btrfs: defrag: bring back the old file extent search behavior

author Qu Wenruo <wqu@suse.com>

Fri, 11 Feb 2022 06:46:12 +0000 (14:46 +0800)

committer David Sterba <dsterba@suse.com>

Wed, 23 Feb 2022 16:43:07 +0000 (17:43 +0100)
author Qu Wenruo <wqu@suse.com>
Fri, 11 Feb 2022 06:46:12 +0000 (14:46 +0800)
committer David Sterba <dsterba@suse.com>
Wed, 23 Feb 2022 16:43:07 +0000 (17:43 +0100)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index 212157473ad886fdd9959fdc8e588187cce9576e..ffebd420829e8f936eb3c9f72148202d2858ef21 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1012,8 +1012,155 @@ out:
         return ret;
  }
  
+/*
+ * Defrag specific helper to get an extent map.
+ *
+ * Differences between this and btrfs_get_extent() are:
+ *
+ * - No extent_map will be added to inode->extent_tree
+ *   To reduce memory usage in the long run.
+ *
+ * - Extra optimization to skip file extents older than @newer_than
+ *   By using btrfs_search_forward() we can skip entire file ranges that
+ *   have extents created in past transactions, because btrfs_search_forward()
+ *   will not visit leaves and nodes with a generation smaller than given
+ *   minimal generation threshold (@newer_than).
+ *
+ * Return valid em if we find a file extent matching the requirement.
+ * Return NULL if we can not find a file extent matching the requirement.
+ *
+ * Return ERR_PTR() for error.
+ */
+static struct extent_map *defrag_get_extent(struct btrfs_inode *inode,
+                                           u64 start, u64 newer_than)
+{
+       struct btrfs_root *root = inode->root;
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_path path = { 0 };
+       struct extent_map *em;
+       struct btrfs_key key;
+       u64 ino = btrfs_ino(inode);
+       int ret;
+
+       em = alloc_extent_map();
+       if (!em) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       key.objectid = ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = start;
+
+       if (newer_than) {
+               ret = btrfs_search_forward(root, &key, &path, newer_than);
+               if (ret < 0)
+                       goto err;
+               /* Can't find anything newer */
+               if (ret > 0)
+                       goto not_found;
+       } else {
+               ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+               if (ret < 0)
+                       goto err;
+       }
+       if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
+               /*
+                * If btrfs_search_slot() makes path to point beyond nritems,
+                * we should not have an empty leaf, as this inode must at
+                * least have its INODE_ITEM.
+                */
+               ASSERT(btrfs_header_nritems(path.nodes[0]));
+               path.slots[0] = btrfs_header_nritems(path.nodes[0]) - 1;
+       }
+       btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+       /* Perfect match, no need to go one slot back */
+       if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY &&
+           key.offset == start)
+               goto iterate;
+
+       /* We didn't find a perfect match, needs to go one slot back */
+       if (path.slots[0] > 0) {
+               btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+               if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY)
+                       path.slots[0]--;
+       }
+
+iterate:
+       /* Iterate through the path to find a file extent covering @start */
+       while (true) {
+               u64 extent_end;
+
+               if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
+                       goto next;
+
+               btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+
+               /*
+                * We may go one slot back to INODE_REF/XATTR item, then
+                * need to go forward until we reach an EXTENT_DATA.
+                * But we should still has the correct ino as key.objectid.
+                */
+               if (WARN_ON(key.objectid < ino) || key.type < BTRFS_EXTENT_DATA_KEY)
+                       goto next;
+
+               /* It's beyond our target range, definitely not extent found */
+               if (key.objectid > ino || key.type > BTRFS_EXTENT_DATA_KEY)
+                       goto not_found;
+
+               /*
+                *      |       |<- File extent ->|
+                *      \- start
+                *
+                * This means there is a hole between start and key.offset.
+                */
+               if (key.offset > start) {
+                       em->start = start;
+                       em->orig_start = start;
+                       em->block_start = EXTENT_MAP_HOLE;
+                       em->len = key.offset - start;
+                       break;
+               }
+
+               fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+                                   struct btrfs_file_extent_item);
+               extent_end = btrfs_file_extent_end(&path);
+
+               /*
+                *      |<- file extent ->|     |
+                *                              \- start
+                *
+                * We haven't reached start, search next slot.
+                */
+               if (extent_end <= start)
+                       goto next;
+
+               /* Now this extent covers @start, convert it to em */
+               btrfs_extent_item_to_extent_map(inode, &path, fi, false, em);
+               break;
+next:
+               ret = btrfs_next_item(root, &path);
+               if (ret < 0)
+                       goto err;
+               if (ret > 0)
+                       goto not_found;
+       }
+       btrfs_release_path(&path);
+       return em;
+
+not_found:
+       btrfs_release_path(&path);
+       free_extent_map(em);
+       return NULL;
+
+err:
+       btrfs_release_path(&path);
+       free_extent_map(em);
+       return ERR_PTR(ret);
+}
+
  static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
-                                              bool locked)
+                                              u64 newer_than, bool locked)
  {
         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -1035,7 +1182,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
                 /* get the big lock and read metadata off disk */
                 if (!locked)
                         lock_extent_bits(io_tree, start, end, &cached);
-               em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, sectorsize);
+               em = defrag_get_extent(BTRFS_I(inode), start, newer_than);
                 if (!locked)
                         unlock_extent_cached(io_tree, start, end, &cached);
  
@@ -1063,7 +1210,12 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
         if (em->start + em->len >= i_size_read(inode))
                 return false;
  
-       next = defrag_lookup_extent(inode, em->start + em->len, locked);
+       /*
+        * We want to check if the next extent can be merged with the current
+        * one, which can be an extent created in a past generation, so we pass
+        * a minimum generation of 0 to defrag_lookup_extent().
+        */
+       next = defrag_lookup_extent(inode, em->start + em->len, 0, locked);
         /* No more em or hole */
         if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
                 goto out;
@@ -1214,7 +1366,8 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
                 u64 range_len;
  
                 last_is_target = false;
-               em = defrag_lookup_extent(&inode->vfs_inode, cur, locked);
+               em = defrag_lookup_extent(&inode->vfs_inode, cur,
+                                         newer_than, locked);
                 if (!em)
                         break;
author	Qu Wenruo <wqu@suse.com>
	Fri, 11 Feb 2022 06:46:12 +0000 (14:46 +0800)
committer	David Sterba <dsterba@suse.com>
	Wed, 23 Feb 2022 16:43:07 +0000 (17:43 +0100)