From: Gao Xiang Date: Thu, 3 Apr 2025 08:14:03 +0000 (+0800) Subject: erofs-utils: mkfs: implement extent-based deduplication X-Git-Tag: accepted/tizen/unified/20250610.081809~22 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cf04b8b78f09aef6a7f0b77306db451332848c13;p=platform%2Fupstream%2Ferofs-utils.git erofs-utils: mkfs: implement extent-based deduplication Currently, only rolling-hash deduplication could be used for compressed data, and it is still single-threaded for now. Before applying multi-threaded compression to that, let's allow extent-based compressed data deduplication if `-Efragments` is on. This feature will only work if multi-threaded compression is active. Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250403081403.2671077-3-hsiangkao@linux.alibaba.com --- diff --git a/include/erofs/dedupe.h b/include/erofs/dedupe.h index 1af08e3..ffb00a5 100644 --- a/include/erofs/dedupe.h +++ b/include/erofs/dedupe.h @@ -32,6 +32,14 @@ void z_erofs_dedupe_commit(bool drop); int z_erofs_dedupe_init(unsigned int wsiz); void z_erofs_dedupe_exit(void); +int z_erofs_dedupe_ext_insert(struct z_erofs_inmem_extent *e, + u64 hash); +erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi, + u8 *encoded, unsigned int size, bool raw, u64 *hash); +void z_erofs_dedupe_ext_commit(bool drop); +int z_erofs_dedupe_ext_init(void); +void z_erofs_dedupe_ext_exit(void); + #ifdef __cplusplus } #endif diff --git a/lib/Makefile.am b/lib/Makefile.am index 9cddc92..bdc74ad 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -35,7 +35,7 @@ liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \ namei.c data.c compress.c compressor.c zmap.c decompress.c \ compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \ fragments.c dedupe.c uuid_unparse.c uuid.c tar.c \ - block_list.c rebuild.c diskbuf.c bitops.c + block_list.c rebuild.c diskbuf.c bitops.c dedupe_ext.c liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include if ENABLE_LZ4 diff --git a/lib/compress.c b/lib/compress.c index 50d155e..cc7dce0 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -49,6 +49,7 @@ struct z_erofs_compress_ictx { /* inode context */ u32 tof_chksum; bool fix_dedupedfrag; bool fragemitted; + bool dedupe; /* fields for write indexes */ u8 *metacur; @@ -337,10 +338,7 @@ static int z_erofs_compress_dedupe(struct z_erofs_compress_sctx *ctx) ei->e.partial = true; ei->e.length -= delta; } - - /* fall back to noncompact indexes for deduplication */ - inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B; - inode->datalayout = EROFS_INODE_COMPRESSED_FULL; + ctx->ictx->dedupe = true; erofs_sb_set_dedupe(sbi); sbi->saved_by_deduplication += dctx.e.plen; @@ -1001,9 +999,25 @@ static void z_erofs_write_mapheader(struct erofs_inode *inode, static void *z_erofs_write_indexes(struct z_erofs_compress_ictx *ctx) { struct erofs_inode *inode = ctx->inode; + struct erofs_sb_info *sbi = inode->sbi; struct z_erofs_extent_item *ei, *n; void *metabuf; + if (!cfg.c_legacy_compress && !ctx->dedupe && + inode->z_logical_clusterbits <= 14) { + if (inode->z_logical_clusterbits <= 12) + inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B; + inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT; + } else { + inode->datalayout = EROFS_INODE_COMPRESSED_FULL; + } + + if (erofs_sb_has_big_pcluster(sbi)) { + inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1; + if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT) + inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2; + } + metabuf = malloc(BLK_ROUND_UP(inode->sbi, inode->i_size) * sizeof(struct z_erofs_lcluster_index) + Z_EROFS_LEGACY_MAP_HEADER_SIZE); @@ -1170,6 +1184,7 @@ int erofs_commit_compressed_file(struct z_erofs_compress_ictx *ictx, ret = -ENOSPC; goto err_free_meta; } + z_erofs_dedupe_ext_commit(false); z_erofs_dedupe_commit(false); if (!ictx->fragemitted) @@ -1345,8 +1360,11 @@ int z_erofs_merge_segment(struct z_erofs_compress_ictx *ictx, { struct z_erofs_extent_item *ei, *n; struct erofs_sb_info *sbi = ictx->inode->sbi; + bool dedupe_ext = cfg.c_fragments; erofs_off_t off = 0; int ret = 0, ret2; + erofs_blk_t dupb; + u64 hash; list_for_each_entry_safe(ei, n, &sctx->extents, list) { list_del(&ei->list); @@ -1361,13 +1379,30 @@ int z_erofs_merge_segment(struct z_erofs_compress_ictx *ictx, /* skip write data but leave blkaddr for inline fallback */ if (ei->e.inlined || !ei->e.plen) continue; + + if (dedupe_ext) { + dupb = z_erofs_dedupe_ext_match(sbi, sctx->membuf + off, + ei->e.plen, ei->e.raw, &hash); + if (dupb != EROFS_NULL_ADDR) { + ei->e.pstart = dupb; + sctx->pstart -= ei->e.plen; + off += ei->e.plen; + ictx->dedupe = true; + erofs_sb_set_dedupe(sbi); + sbi->saved_by_deduplication += ei->e.plen; + erofs_dbg("Dedupe %u %scompressed data to %llu of %u bytes", + ei->e.length, ei->e.raw ? "un" : "", + ei->e.pstart | 0ULL, ei->e.plen); + continue; + } + } ret2 = erofs_dev_write(sbi, sctx->membuf + off, ei->e.pstart, ei->e.plen); off += ei->e.plen; - if (ret2) { + if (ret2) ret = ret2; - continue; - } + else if (dedupe_ext) + z_erofs_dedupe_ext_insert(&ei->e, hash); } free(sctx->membuf); sctx->membuf = NULL; @@ -1543,19 +1578,6 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos) /* initialize per-file compression setting */ inode->z_advise = 0; inode->z_logical_clusterbits = sbi->blkszbits; - if (!cfg.c_legacy_compress && inode->z_logical_clusterbits <= 14) { - if (inode->z_logical_clusterbits <= 12) - inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B; - inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT; - } else { - inode->datalayout = EROFS_INODE_COMPRESSED_FULL; - } - - if (erofs_sb_has_big_pcluster(sbi)) { - inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1; - if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT) - inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2; - } if (cfg.c_fragments && !cfg.c_dedupe) inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER; @@ -1615,6 +1637,7 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos) init_list_head(&ictx->extents); ictx->fix_dedupedfrag = false; ictx->fragemitted = false; + ictx->dedupe = false; if (all_fragments && !inode->fragment_size) { ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tof_chksum); diff --git a/lib/dedupe_ext.c b/lib/dedupe_ext.c new file mode 100644 index 0000000..c2c5ca9 --- /dev/null +++ b/lib/dedupe_ext.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +#include "erofs/dedupe.h" +#include "liberofs_xxhash.h" +#include + +struct z_erofs_dedupe_ext_item { + struct list_head list; + struct z_erofs_dedupe_ext_item *revoke; + struct z_erofs_inmem_extent e; + u64 xxh64; +}; + +static struct list_head dupl_ext[65536]; +static struct z_erofs_dedupe_ext_item *revoke_list; + +int z_erofs_dedupe_ext_insert(struct z_erofs_inmem_extent *e, + u64 hash) +{ + struct z_erofs_dedupe_ext_item *item; + struct list_head *p; + + item = malloc(sizeof(struct z_erofs_dedupe_ext_item)); + if (!item) + return -ENOMEM; + item->e = *e; + item->xxh64 = hash; + + p = &dupl_ext[hash & (ARRAY_SIZE(dupl_ext) - 1)]; + list_add_tail(&item->list, p); + item->revoke = revoke_list; + revoke_list = item; + return 0; +} + +erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi, + u8 *encoded, unsigned int len, + bool raw, u64 *hash) +{ + struct z_erofs_dedupe_ext_item *item; + struct list_head *p; + u64 _xxh64; + char *memb; + int ret; + + *hash = _xxh64 = xxh64(encoded, len, 0); + p = &dupl_ext[_xxh64 & (ARRAY_SIZE(dupl_ext) - 1)]; + list_for_each_entry(item, p, list) { + if (item->xxh64 == _xxh64 && item->e.plen == len && + item->e.raw == raw) { + memb = malloc(len); + if (!memb) + break; + ret = erofs_dev_read(sbi, 0, memb, item->e.pstart, len); + if (ret < 0 || memcmp(memb, encoded, len)) { + free(memb); + break; + } + free(memb); + return item->e.pstart; + } + } + return EROFS_NULL_ADDR; +} + +void z_erofs_dedupe_ext_commit(bool drop) +{ + if (drop) { + struct z_erofs_dedupe_ext_item *item, *n; + + for (item = revoke_list; item; item = n) { + n = item->revoke; + list_del(&item->list); + free(item); + } + } + revoke_list = NULL; +} + +int z_erofs_dedupe_ext_init(void) +{ + struct list_head *p; + + for (p = dupl_ext; p < dupl_ext + ARRAY_SIZE(dupl_ext); ++p) + init_list_head(p); + return 0; +} + +void z_erofs_dedupe_ext_exit(void) +{ + struct z_erofs_dedupe_ext_item *item, *n; + struct list_head *p; + + if (!dupl_ext[0].next) + return; + z_erofs_dedupe_commit(true); + for (p = dupl_ext; p < dupl_ext + ARRAY_SIZE(dupl_ext); ++p) { + list_for_each_entry_safe(item, n, p, list) { + list_del(&item->list); + free(item); + } + } +} diff --git a/mkfs/main.c b/mkfs/main.c index b2396d0..00bd21a 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -1374,6 +1374,15 @@ int main(int argc, char **argv) } } + if (cfg.c_fragments) { + err = z_erofs_dedupe_ext_init(); + if (err) { + erofs_err("failed to initialize extent deduplication: %s", + erofs_strerror(err)); + goto exit; + } + } + if (cfg.c_chunkbits) { err = erofs_blob_init(cfg.c_blobdev_path, 1 << cfg.c_chunkbits); if (err) @@ -1486,6 +1495,7 @@ exit: erofs_iput(root); z_erofs_compress_exit(); z_erofs_dedupe_exit(); + z_erofs_dedupe_ext_exit(); blklst = erofs_blocklist_close(); if (blklst) fclose(blklst);