erofs-utils: mkfs: implement extent-based deduplication
authorGao Xiang <hsiangkao@linux.alibaba.com>
Thu, 3 Apr 2025 08:14:03 +0000 (16:14 +0800)
committerGao Xiang <hsiangkao@linux.alibaba.com>
Sat, 5 Apr 2025 16:25:11 +0000 (00:25 +0800)
Currently, only rolling-hash deduplication could be used for
compressed data, and it is still single-threaded for now.

Before applying multi-threaded compression to that, let's allow
extent-based compressed data deduplication if `-Efragments` is on.

This feature will only work if multi-threaded compression is active.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20250403081403.2671077-3-hsiangkao@linux.alibaba.com
include/erofs/dedupe.h
lib/Makefile.am
lib/compress.c
lib/dedupe_ext.c [new file with mode: 0644]
mkfs/main.c

index 1af08e36fdd45e8d30139f2ab985fccd8aec01c4..ffb00a504bcece0e630a9c2ac0083030ba1d7964 100644 (file)
@@ -32,6 +32,14 @@ void z_erofs_dedupe_commit(bool drop);
 int z_erofs_dedupe_init(unsigned int wsiz);
 void z_erofs_dedupe_exit(void);
 
+int z_erofs_dedupe_ext_insert(struct z_erofs_inmem_extent *e,
+                             u64 hash);
+erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
+                       u8 *encoded, unsigned int size, bool raw, u64 *hash);
+void z_erofs_dedupe_ext_commit(bool drop);
+int z_erofs_dedupe_ext_init(void);
+void z_erofs_dedupe_ext_exit(void);
+
 #ifdef __cplusplus
 }
 #endif
index 9cddc92b1f83a2a9357b7bb61ddde47a8f31693a..bdc74ad0ec10c3efca3fdb00c3ddf61025b51cdc 100644 (file)
@@ -35,7 +35,7 @@ liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
                      namei.c data.c compress.c compressor.c zmap.c decompress.c \
                      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
                      fragments.c dedupe.c uuid_unparse.c uuid.c tar.c \
-                     block_list.c rebuild.c diskbuf.c bitops.c
+                     block_list.c rebuild.c diskbuf.c bitops.c dedupe_ext.c
 
 liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include
 if ENABLE_LZ4
index 50d155e5eb9c6952db6ce44e2789ed50c19da603..cc7dce02b54b11da798bb45b40e1bd3dc1783e9d 100644 (file)
@@ -49,6 +49,7 @@ struct z_erofs_compress_ictx {                /* inode context */
        u32 tof_chksum;
        bool fix_dedupedfrag;
        bool fragemitted;
+       bool dedupe;
 
        /* fields for write indexes */
        u8 *metacur;
@@ -337,10 +338,7 @@ static int z_erofs_compress_dedupe(struct z_erofs_compress_sctx *ctx)
                        ei->e.partial = true;
                        ei->e.length -= delta;
                }
-
-               /* fall back to noncompact indexes for deduplication */
-               inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B;
-               inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+               ctx->ictx->dedupe = true;
                erofs_sb_set_dedupe(sbi);
 
                sbi->saved_by_deduplication += dctx.e.plen;
@@ -1001,9 +999,25 @@ static void z_erofs_write_mapheader(struct erofs_inode *inode,
 static void *z_erofs_write_indexes(struct z_erofs_compress_ictx *ctx)
 {
        struct erofs_inode *inode = ctx->inode;
+       struct erofs_sb_info *sbi = inode->sbi;
        struct z_erofs_extent_item *ei, *n;
        void *metabuf;
 
+       if (!cfg.c_legacy_compress && !ctx->dedupe &&
+           inode->z_logical_clusterbits <= 14) {
+               if (inode->z_logical_clusterbits <= 12)
+                       inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
+               inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
+       } else {
+               inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+       }
+
+       if (erofs_sb_has_big_pcluster(sbi)) {
+               inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+               if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
+                       inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
+       }
+
        metabuf = malloc(BLK_ROUND_UP(inode->sbi, inode->i_size) *
                         sizeof(struct z_erofs_lcluster_index) +
                         Z_EROFS_LEGACY_MAP_HEADER_SIZE);
@@ -1170,6 +1184,7 @@ int erofs_commit_compressed_file(struct z_erofs_compress_ictx *ictx,
                ret = -ENOSPC;
                goto err_free_meta;
        }
+       z_erofs_dedupe_ext_commit(false);
        z_erofs_dedupe_commit(false);
 
        if (!ictx->fragemitted)
@@ -1345,8 +1360,11 @@ int z_erofs_merge_segment(struct z_erofs_compress_ictx *ictx,
 {
        struct z_erofs_extent_item *ei, *n;
        struct erofs_sb_info *sbi = ictx->inode->sbi;
+       bool dedupe_ext = cfg.c_fragments;
        erofs_off_t off = 0;
        int ret = 0, ret2;
+       erofs_blk_t dupb;
+       u64 hash;
 
        list_for_each_entry_safe(ei, n, &sctx->extents, list) {
                list_del(&ei->list);
@@ -1361,13 +1379,30 @@ int z_erofs_merge_segment(struct z_erofs_compress_ictx *ictx,
                /* skip write data but leave blkaddr for inline fallback */
                if (ei->e.inlined || !ei->e.plen)
                        continue;
+
+               if (dedupe_ext) {
+                       dupb = z_erofs_dedupe_ext_match(sbi, sctx->membuf + off,
+                                               ei->e.plen, ei->e.raw, &hash);
+                       if (dupb != EROFS_NULL_ADDR) {
+                               ei->e.pstart = dupb;
+                               sctx->pstart -= ei->e.plen;
+                               off += ei->e.plen;
+                               ictx->dedupe = true;
+                               erofs_sb_set_dedupe(sbi);
+                               sbi->saved_by_deduplication += ei->e.plen;
+                               erofs_dbg("Dedupe %u %scompressed data to %llu of %u bytes",
+                                         ei->e.length, ei->e.raw ? "un" : "",
+                                         ei->e.pstart | 0ULL, ei->e.plen);
+                               continue;
+                       }
+               }
                ret2 = erofs_dev_write(sbi, sctx->membuf + off, ei->e.pstart,
                                       ei->e.plen);
                off += ei->e.plen;
-               if (ret2) {
+               if (ret2)
                        ret = ret2;
-                       continue;
-               }
+               else if (dedupe_ext)
+                       z_erofs_dedupe_ext_insert(&ei->e, hash);
        }
        free(sctx->membuf);
        sctx->membuf = NULL;
@@ -1543,19 +1578,6 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
        /* initialize per-file compression setting */
        inode->z_advise = 0;
        inode->z_logical_clusterbits = sbi->blkszbits;
-       if (!cfg.c_legacy_compress && inode->z_logical_clusterbits <= 14) {
-               if (inode->z_logical_clusterbits <= 12)
-                       inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
-               inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
-       } else {
-               inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
-       }
-
-       if (erofs_sb_has_big_pcluster(sbi)) {
-               inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
-               if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
-                       inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
-       }
        if (cfg.c_fragments && !cfg.c_dedupe)
                inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
 
@@ -1615,6 +1637,7 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
        init_list_head(&ictx->extents);
        ictx->fix_dedupedfrag = false;
        ictx->fragemitted = false;
+       ictx->dedupe = false;
 
        if (all_fragments && !inode->fragment_size) {
                ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tof_chksum);
diff --git a/lib/dedupe_ext.c b/lib/dedupe_ext.c
new file mode 100644 (file)
index 0000000..c2c5ca9
--- /dev/null
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/dedupe.h"
+#include "liberofs_xxhash.h"
+#include <stdlib.h>
+
+struct z_erofs_dedupe_ext_item {
+       struct list_head list;
+       struct z_erofs_dedupe_ext_item *revoke;
+       struct z_erofs_inmem_extent e;
+       u64             xxh64;
+};
+
+static struct list_head dupl_ext[65536];
+static struct z_erofs_dedupe_ext_item *revoke_list;
+
+int z_erofs_dedupe_ext_insert(struct z_erofs_inmem_extent *e,
+                             u64 hash)
+{
+       struct z_erofs_dedupe_ext_item *item;
+       struct list_head *p;
+
+       item = malloc(sizeof(struct z_erofs_dedupe_ext_item));
+       if (!item)
+               return -ENOMEM;
+       item->e = *e;
+       item->xxh64 = hash;
+
+       p = &dupl_ext[hash & (ARRAY_SIZE(dupl_ext) - 1)];
+       list_add_tail(&item->list, p);
+       item->revoke = revoke_list;
+       revoke_list = item;
+       return 0;
+}
+
+erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
+                                    u8 *encoded, unsigned int len,
+                                    bool raw, u64 *hash)
+{
+       struct z_erofs_dedupe_ext_item *item;
+       struct list_head *p;
+       u64 _xxh64;
+       char *memb;
+       int ret;
+
+       *hash = _xxh64 = xxh64(encoded, len, 0);
+       p = &dupl_ext[_xxh64 & (ARRAY_SIZE(dupl_ext) - 1)];
+       list_for_each_entry(item, p, list) {
+               if (item->xxh64 == _xxh64 && item->e.plen == len &&
+                   item->e.raw == raw) {
+                       memb = malloc(len);
+                       if (!memb)
+                               break;
+                       ret = erofs_dev_read(sbi, 0, memb, item->e.pstart, len);
+                       if (ret < 0 || memcmp(memb, encoded, len)) {
+                               free(memb);
+                               break;
+                       }
+                       free(memb);
+                       return item->e.pstart;
+               }
+       }
+       return EROFS_NULL_ADDR;
+}
+
+void z_erofs_dedupe_ext_commit(bool drop)
+{
+       if (drop) {
+               struct z_erofs_dedupe_ext_item *item, *n;
+
+               for (item = revoke_list; item; item = n) {
+                       n = item->revoke;
+                       list_del(&item->list);
+                       free(item);
+               }
+       }
+       revoke_list = NULL;
+}
+
+int z_erofs_dedupe_ext_init(void)
+{
+       struct list_head *p;
+
+       for (p = dupl_ext; p < dupl_ext + ARRAY_SIZE(dupl_ext); ++p)
+               init_list_head(p);
+       return 0;
+}
+
+void z_erofs_dedupe_ext_exit(void)
+{
+       struct z_erofs_dedupe_ext_item *item, *n;
+       struct list_head *p;
+
+       if (!dupl_ext[0].next)
+               return;
+       z_erofs_dedupe_commit(true);
+       for (p = dupl_ext; p < dupl_ext + ARRAY_SIZE(dupl_ext); ++p) {
+               list_for_each_entry_safe(item, n, p, list) {
+                       list_del(&item->list);
+                       free(item);
+               }
+       }
+}
index b2396d016ac6447b7e5a57963b7c027ed381a5e3..00bd21a43a55827c0928c876ba02e6a83a555982 100644 (file)
@@ -1374,6 +1374,15 @@ int main(int argc, char **argv)
                }
        }
 
+       if (cfg.c_fragments) {
+               err = z_erofs_dedupe_ext_init();
+               if (err) {
+                       erofs_err("failed to initialize extent deduplication: %s",
+                                 erofs_strerror(err));
+                       goto exit;
+               }
+       }
+
        if (cfg.c_chunkbits) {
                err = erofs_blob_init(cfg.c_blobdev_path, 1 << cfg.c_chunkbits);
                if (err)
@@ -1486,6 +1495,7 @@ exit:
                erofs_iput(root);
        z_erofs_compress_exit();
        z_erofs_dedupe_exit();
+       z_erofs_dedupe_ext_exit();
        blklst = erofs_blocklist_close();
        if (blklst)
                fclose(blklst);