btrfs: send: optimize clone detection to increase extent sharing

author Filipe Manana <fdmanana@suse.com>

Tue, 1 Nov 2022 16:15:45 +0000 (16:15 +0000)

committer David Sterba <dsterba@suse.com>

Mon, 5 Dec 2022 17:00:50 +0000 (18:00 +0100)
author Filipe Manana <fdmanana@suse.com>
Tue, 1 Nov 2022 16:15:45 +0000 (16:15 +0000)
committer David Sterba <dsterba@suse.com>
Mon, 5 Dec 2022 17:00:50 +0000 (18:00 +0100)
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c

index 04cb608..9be11a3 100644 (file)
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -27,6 +27,7 @@
  struct extent_inode_elem {
         u64 inum;
         u64 offset;
+       u64 num_bytes;
         struct extent_inode_elem *next;
  };
  
@@ -37,6 +38,7 @@ static int check_extent_in_eb(const struct btrfs_key *key,
                               struct extent_inode_elem **eie,
                               bool ignore_offset)
  {
+       const u64 data_len = btrfs_file_extent_num_bytes(eb, fi);
         u64 offset = 0;
         struct extent_inode_elem *e;
  
@@ -45,10 +47,8 @@ static int check_extent_in_eb(const struct btrfs_key *key,
             !btrfs_file_extent_encryption(eb, fi) &&
             !btrfs_file_extent_other_encoding(eb, fi)) {
                 u64 data_offset;
-               u64 data_len;
  
                 data_offset = btrfs_file_extent_offset(eb, fi);
-               data_len = btrfs_file_extent_num_bytes(eb, fi);
  
                 if (extent_item_pos < data_offset ||
                     extent_item_pos >= data_offset + data_len)
@@ -63,6 +63,7 @@ static int check_extent_in_eb(const struct btrfs_key *key,
         e->next = *eie;
         e->inum = key->objectid;
         e->offset = key->offset + offset;
+       e->num_bytes = data_len;
         *eie = e;
  
         return 0;
@@ -2265,7 +2266,7 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
                             "ref for %llu resolved, key (%llu EXTEND_DATA %llu), root %llu",
                             extent_item_objectid, eie->inum,
                             eie->offset, root);
-               ret = iterate(eie->inum, eie->offset, root, ctx);
+               ret = iterate(eie->inum, eie->offset, eie->num_bytes, root, ctx);
                 if (ret) {
                         btrfs_debug(fs_info,
                                     "stopping iteration for %llu due to ret=%d",
@@ -2357,7 +2358,7 @@ out:
         return ret;
  }
  
-static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
+static int build_ino_list(u64 inum, u64 offset, u64 num_bytes, u64 root, void *ctx)
  {
         struct btrfs_data_container *inodes = ctx;
         const size_t c = 3 * sizeof(u64);
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h

index 2dd68f8..8d35981 100644 (file)
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -75,8 +75,8 @@ struct btrfs_backref_share_check_ctx {
         int prev_extents_cache_slot;
  };
  
-typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
-               void *ctx);
+typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 num_bytes,
+                                     u64 root, void *ctx);
  
  struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void);
  void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c

index 8c89e63..3c22573 100644 (file)
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -809,8 +809,8 @@ nomem:
         return ERR_PTR(-ENOMEM);
  }
  
-static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
-                                    void *warn_ctx)
+static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
+                                    u64 root, void *warn_ctx)
  {
         u32 nlink;
         int ret;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c

index d9e01b6..49759cd 100644 (file)
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -76,7 +76,7 @@ struct clone_root {
         struct btrfs_root *root;
         u64 ino;
         u64 offset;
-
+       u64 num_bytes;
         u64 found_refs;
  };
  
@@ -1273,7 +1273,8 @@ static int __clone_root_cmp_sort(const void *e1, const void *e2)
   * Called for every backref that is found for the current extent.
   * Results are collected in sctx->clone_roots->ino/offset/found_refs
   */
-static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
+static int __iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root,
+                             void *ctx_)
  {
         struct backref_ctx *bctx = ctx_;
         struct clone_root *found;
@@ -1318,15 +1319,17 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
  
         bctx->found++;
         found->found_refs++;
-       if (ino < found->ino) {
+
+       /*
+        * If the given backref refers to a file extent item with a larger
+        * number of bytes than what we found before, use the new one so that
+        * we clone more optimally and end up doing less writes and getting
+        * less exclusive, non-shared extents at the destination.
+        */
+       if (num_bytes > found->num_bytes) {
                 found->ino = ino;
                 found->offset = offset;
-       } else if (found->ino == ino) {
-               /*
-                * same extent found more then once in the same file.
-                */
-               if (found->offset > offset + bctx->extent_len)
-                       found->offset = offset;
+               found->num_bytes = num_bytes;
         }
  
         return 0;
@@ -1437,6 +1440,7 @@ static int find_extent_clone(struct send_ctx *sctx,
                 cur_clone_root = sctx->clone_roots + i;
                 cur_clone_root->ino = (u64)-1;
                 cur_clone_root->offset = 0;
+               cur_clone_root->num_bytes = 0;
                 cur_clone_root->found_refs = 0;
         }
  
@@ -1506,14 +1510,27 @@ static int find_extent_clone(struct send_ctx *sctx,
  
         cur_clone_root = NULL;
         for (i = 0; i < sctx->clone_roots_cnt; i++) {
-               if (sctx->clone_roots[i].found_refs) {
-                       if (!cur_clone_root)
-                               cur_clone_root = sctx->clone_roots + i;
-                       else if (sctx->clone_roots[i].root == sctx->send_root)
-                               /* prefer clones from send_root over others */
-                               cur_clone_root = sctx->clone_roots + i;
-               }
+               struct clone_root *clone_root = &sctx->clone_roots[i];
  
+               if (clone_root->found_refs == 0)
+                       continue;
+
+               /*
+                * Choose the root from which we can clone more bytes, to
+                * minimize write operations and therefore have more extent
+                * sharing at the destination (the same as in the source).
+                */
+               if (!cur_clone_root ||
+                   clone_root->num_bytes > cur_clone_root->num_bytes) {
+                       cur_clone_root = clone_root;
+
+                       /*
+                        * We found an optimal clone candidate (any inode from
+                        * any root is fine), so we're done.
+                        */
+                       if (clone_root->num_bytes >= backref_ctx.extent_len)
+                               break;
+               }
         }
  
         if (cur_clone_root) {
author	Filipe Manana <fdmanana@suse.com>
	Tue, 1 Nov 2022 16:15:45 +0000 (16:15 +0000)
committer	David Sterba <dsterba@suse.com>
	Mon, 5 Dec 2022 17:00:50 +0000 (18:00 +0100)
fs/btrfs/backref.c		patch \| blob \| history
fs/btrfs/backref.h		patch \| blob \| history
fs/btrfs/scrub.c		patch \| blob \| history
fs/btrfs/send.c		patch \| blob \| history