u32 tof_chksum;
bool fix_dedupedfrag;
bool fragemitted;
+ bool dedupe;
/* fields for write indexes */
u8 *metacur;
ei->e.partial = true;
ei->e.length -= delta;
}
-
- /* fall back to noncompact indexes for deduplication */
- inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B;
- inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+ ctx->ictx->dedupe = true;
erofs_sb_set_dedupe(sbi);
sbi->saved_by_deduplication += dctx.e.plen;
static void *z_erofs_write_indexes(struct z_erofs_compress_ictx *ctx)
{
struct erofs_inode *inode = ctx->inode;
+ struct erofs_sb_info *sbi = inode->sbi;
struct z_erofs_extent_item *ei, *n;
void *metabuf;
+ if (!cfg.c_legacy_compress && !ctx->dedupe &&
+ inode->z_logical_clusterbits <= 14) {
+ if (inode->z_logical_clusterbits <= 12)
+ inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
+ inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
+ } else {
+ inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+ }
+
+ if (erofs_sb_has_big_pcluster(sbi)) {
+ inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+ if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
+ inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
+ }
+
metabuf = malloc(BLK_ROUND_UP(inode->sbi, inode->i_size) *
sizeof(struct z_erofs_lcluster_index) +
Z_EROFS_LEGACY_MAP_HEADER_SIZE);
ret = -ENOSPC;
goto err_free_meta;
}
+ z_erofs_dedupe_ext_commit(false);
z_erofs_dedupe_commit(false);
if (!ictx->fragemitted)
{
struct z_erofs_extent_item *ei, *n;
struct erofs_sb_info *sbi = ictx->inode->sbi;
+ bool dedupe_ext = cfg.c_fragments;
erofs_off_t off = 0;
int ret = 0, ret2;
+ erofs_blk_t dupb;
+ u64 hash;
list_for_each_entry_safe(ei, n, &sctx->extents, list) {
list_del(&ei->list);
/* skip write data but leave blkaddr for inline fallback */
if (ei->e.inlined || !ei->e.plen)
continue;
+
+ if (dedupe_ext) {
+ dupb = z_erofs_dedupe_ext_match(sbi, sctx->membuf + off,
+ ei->e.plen, ei->e.raw, &hash);
+ if (dupb != EROFS_NULL_ADDR) {
+ ei->e.pstart = dupb;
+ sctx->pstart -= ei->e.plen;
+ off += ei->e.plen;
+ ictx->dedupe = true;
+ erofs_sb_set_dedupe(sbi);
+ sbi->saved_by_deduplication += ei->e.plen;
+ erofs_dbg("Dedupe %u %scompressed data to %llu of %u bytes",
+ ei->e.length, ei->e.raw ? "un" : "",
+ ei->e.pstart | 0ULL, ei->e.plen);
+ continue;
+ }
+ }
ret2 = erofs_dev_write(sbi, sctx->membuf + off, ei->e.pstart,
ei->e.plen);
off += ei->e.plen;
- if (ret2) {
+ if (ret2)
ret = ret2;
- continue;
- }
+ else if (dedupe_ext)
+ z_erofs_dedupe_ext_insert(&ei->e, hash);
}
free(sctx->membuf);
sctx->membuf = NULL;
/* initialize per-file compression setting */
inode->z_advise = 0;
inode->z_logical_clusterbits = sbi->blkszbits;
- if (!cfg.c_legacy_compress && inode->z_logical_clusterbits <= 14) {
- if (inode->z_logical_clusterbits <= 12)
- inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
- inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
- } else {
- inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
- }
-
- if (erofs_sb_has_big_pcluster(sbi)) {
- inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
- if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
- inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
- }
if (cfg.c_fragments && !cfg.c_dedupe)
inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
init_list_head(&ictx->extents);
ictx->fix_dedupedfrag = false;
ictx->fragemitted = false;
+ ictx->dedupe = false;
if (all_fragments && !inode->fragment_size) {
ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tof_chksum);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/dedupe.h"
+#include "liberofs_xxhash.h"
+#include <stdlib.h>
+
+struct z_erofs_dedupe_ext_item {
+ struct list_head list;
+ struct z_erofs_dedupe_ext_item *revoke;
+ struct z_erofs_inmem_extent e;
+ u64 xxh64;
+};
+
+static struct list_head dupl_ext[65536];
+static struct z_erofs_dedupe_ext_item *revoke_list;
+
+int z_erofs_dedupe_ext_insert(struct z_erofs_inmem_extent *e,
+ u64 hash)
+{
+ struct z_erofs_dedupe_ext_item *item;
+ struct list_head *p;
+
+ item = malloc(sizeof(struct z_erofs_dedupe_ext_item));
+ if (!item)
+ return -ENOMEM;
+ item->e = *e;
+ item->xxh64 = hash;
+
+ p = &dupl_ext[hash & (ARRAY_SIZE(dupl_ext) - 1)];
+ list_add_tail(&item->list, p);
+ item->revoke = revoke_list;
+ revoke_list = item;
+ return 0;
+}
+
+erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
+ u8 *encoded, unsigned int len,
+ bool raw, u64 *hash)
+{
+ struct z_erofs_dedupe_ext_item *item;
+ struct list_head *p;
+ u64 _xxh64;
+ char *memb;
+ int ret;
+
+ *hash = _xxh64 = xxh64(encoded, len, 0);
+ p = &dupl_ext[_xxh64 & (ARRAY_SIZE(dupl_ext) - 1)];
+ list_for_each_entry(item, p, list) {
+ if (item->xxh64 == _xxh64 && item->e.plen == len &&
+ item->e.raw == raw) {
+ memb = malloc(len);
+ if (!memb)
+ break;
+ ret = erofs_dev_read(sbi, 0, memb, item->e.pstart, len);
+ if (ret < 0 || memcmp(memb, encoded, len)) {
+ free(memb);
+ break;
+ }
+ free(memb);
+ return item->e.pstart;
+ }
+ }
+ return EROFS_NULL_ADDR;
+}
+
+void z_erofs_dedupe_ext_commit(bool drop)
+{
+ if (drop) {
+ struct z_erofs_dedupe_ext_item *item, *n;
+
+ for (item = revoke_list; item; item = n) {
+ n = item->revoke;
+ list_del(&item->list);
+ free(item);
+ }
+ }
+ revoke_list = NULL;
+}
+
+int z_erofs_dedupe_ext_init(void)
+{
+ struct list_head *p;
+
+ for (p = dupl_ext; p < dupl_ext + ARRAY_SIZE(dupl_ext); ++p)
+ init_list_head(p);
+ return 0;
+}
+
+void z_erofs_dedupe_ext_exit(void)
+{
+ struct z_erofs_dedupe_ext_item *item, *n;
+ struct list_head *p;
+
+ if (!dupl_ext[0].next)
+ return;
+ z_erofs_dedupe_commit(true);
+ for (p = dupl_ext; p < dupl_ext + ARRAY_SIZE(dupl_ext); ++p) {
+ list_for_each_entry_safe(item, n, p, list) {
+ list_del(&item->list);
+ free(item);
+ }
+ }
+}