erofs-utils: lib: generate compression indexes in memory first
authorYifan Zhao <zhaoyifan@sjtu.edu.cn>
Mon, 18 Dec 2023 14:57:10 +0000 (22:57 +0800)
committerGao Xiang <hsiangkao@linux.alibaba.com>
Sun, 14 Jan 2024 09:16:52 +0000 (17:16 +0800)
Currently, mkfs generates the on-disk indexes of each compressed extent
on the fly during compressing, which is inflexible if we'd like to merge
sub-indexes of a file later for the multi-threaded scenarios.

Let's generate on-disk indexes after the compression is completed.

Signed-off-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20231218145710.132164-3-hsiangkao@linux.alibaba.com
lib/compress.c

index eafbad148cb009712a81c6bb0f03c06e68adad20..8f61f920b53eaae56187063ce8268160c66b2093 100644 (file)
@@ -28,9 +28,15 @@ struct erofs_compress_cfg {
        bool enable;
 } erofs_ccfg[EROFS_MAX_COMPR_CFGS];
 
+struct z_erofs_extent_item {
+       struct list_head list;
+       struct z_erofs_inmem_extent e;
+};
+
 struct z_erofs_vle_compress_ctx {
        u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
-       struct z_erofs_inmem_extent e;  /* (lookahead) extent */
+       struct list_head extents;
+       struct z_erofs_extent_item *pivot;
 
        struct erofs_inode *inode;
        struct erofs_compress_cfg *ccfg;
@@ -65,20 +71,18 @@ static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
        ctx->metacur += sizeof(di);
 }
 
-static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
+static void z_erofs_write_extent(struct z_erofs_vle_compress_ctx *ctx,
+                                struct z_erofs_inmem_extent *e)
 {
        struct erofs_inode *inode = ctx->inode;
        struct erofs_sb_info *sbi = inode->sbi;
        unsigned int clusterofs = ctx->clusterofs;
-       unsigned int count = ctx->e.length;
+       unsigned int count = e->length;
        unsigned int d0 = 0, d1 = (clusterofs + count) / erofs_blksiz(sbi);
        struct z_erofs_lcluster_index di;
        unsigned int type, advise;
 
-       if (!count)
-               return;
-
-       ctx->e.length = 0;      /* mark as written first */
+       DBG_BUGON(!count);
        di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
 
        /* whether the tail-end (un)compressed block or not */
@@ -87,18 +91,18 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
                 * A lcluster cannot have three parts with the middle one which
                 * is well-compressed for !ztailpacking cases.
                 */
-               DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking && !cfg.c_fragments);
-               DBG_BUGON(ctx->e.partial);
-               type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+               DBG_BUGON(!e->raw && !cfg.c_ztailpacking && !cfg.c_fragments);
+               DBG_BUGON(e->partial);
+               type = e->raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
                        Z_EROFS_LCLUSTER_TYPE_HEAD1;
                advise = type << Z_EROFS_LI_LCLUSTER_TYPE_BIT;
                di.di_advise = cpu_to_le16(advise);
 
                if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
-                   !ctx->e.compressedblks)
+                   !e->compressedblks)
                        di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
                else
-                       di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+                       di.di_u.blkaddr = cpu_to_le32(e->blkaddr);
                memcpy(ctx->metacur, &di, sizeof(di));
                ctx->metacur += sizeof(di);
 
@@ -112,7 +116,7 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
                /* XXX: big pcluster feature should be per-inode */
                if (d0 == 1 && erofs_sb_has_big_pcluster(sbi)) {
                        type = Z_EROFS_LCLUSTER_TYPE_NONHEAD;
-                       di.di_u.delta[0] = cpu_to_le16(ctx->e.compressedblks |
+                       di.di_u.delta[0] = cpu_to_le16(e->compressedblks |
                                                       Z_EROFS_LI_D0_CBLKCNT);
                        di.di_u.delta[1] = cpu_to_le16(d1);
                } else if (d0) {
@@ -136,17 +140,17 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
                                di.di_u.delta[0] = cpu_to_le16(d0);
                        di.di_u.delta[1] = cpu_to_le16(d1);
                } else {
-                       type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+                       type = e->raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
                                Z_EROFS_LCLUSTER_TYPE_HEAD1;
 
                        if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
-                           !ctx->e.compressedblks)
+                           !e->compressedblks)
                                di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
                        else
-                               di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+                               di.di_u.blkaddr = cpu_to_le32(e->blkaddr);
 
-                       if (ctx->e.partial) {
-                               DBG_BUGON(ctx->e.raw);
+                       if (e->partial) {
+                               DBG_BUGON(e->raw);
                                advise |= Z_EROFS_LI_PARTIAL_REF;
                        }
                }
@@ -166,6 +170,20 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
        ctx->clusterofs = clusterofs + count;
 }
 
+static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
+{
+       struct z_erofs_extent_item *ei, *n;
+
+       ctx->clusterofs = 0;
+       list_for_each_entry_safe(ei, n, &ctx->extents, list) {
+               z_erofs_write_extent(ctx, &ei->e);
+
+               list_del(&ei->list);
+               free(ei);
+       }
+       z_erofs_write_indexes_final(ctx);
+}
+
 static bool z_erofs_need_refill(struct z_erofs_vle_compress_ctx *ctx)
 {
        const bool final = !ctx->remaining;
@@ -182,13 +200,25 @@ static bool z_erofs_need_refill(struct z_erofs_vle_compress_ctx *ctx)
        return true;
 }
 
+static void z_erofs_commit_extent(struct z_erofs_vle_compress_ctx *ctx,
+                                 struct z_erofs_extent_item *ei)
+{
+       list_add_tail(&ei->list, &ctx->extents);
+       ctx->clusterofs = (ctx->clusterofs + ei->e.length) &
+                       (erofs_blksiz(ctx->inode->sbi) - 1);
+
+}
+
 static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
                                   unsigned int *len)
 {
        struct erofs_inode *inode = ctx->inode;
        const unsigned int lclustermask = (1 << inode->z_logical_clusterbits) - 1;
        struct erofs_sb_info *sbi = inode->sbi;
-       int ret = 0;
+       struct z_erofs_extent_item *ei = ctx->pivot;
+
+       if (!ei)
+               return 0;
 
        /*
         * No need dedupe for packed inode since it is composed of
@@ -200,12 +230,12 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
        do {
                struct z_erofs_dedupe_ctx dctx = {
                        .start = ctx->queue + ctx->head - ({ int rc;
-                               if (ctx->e.length <= erofs_blksiz(sbi))
+                               if (ei->e.length <= erofs_blksiz(sbi))
                                        rc = 0;
-                               else if (ctx->e.length - erofs_blksiz(sbi) >= ctx->head)
+                               else if (ei->e.length - erofs_blksiz(sbi) >= ctx->head)
                                        rc = ctx->head;
                                else
-                                       rc = ctx->e.length - erofs_blksiz(sbi);
+                                       rc = ei->e.length - erofs_blksiz(sbi);
                                rc; }),
                        .end = ctx->queue + ctx->head + *len,
                        .cur = ctx->queue + ctx->head,
@@ -222,25 +252,31 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
                 * decompresssion could be done as another try in practice.
                 */
                if (dctx.e.compressedblks > 1 &&
-                   ((ctx->clusterofs + ctx->e.length - delta) & lclustermask) +
+                   ((ctx->clusterofs + ei->e.length - delta) & lclustermask) +
                        dctx.e.length < 2 * (lclustermask + 1))
                        break;
 
+               ctx->pivot = malloc(sizeof(struct z_erofs_extent_item));
+               if (!ctx->pivot) {
+                       z_erofs_commit_extent(ctx, ei);
+                       return -ENOMEM;
+               }
+
                if (delta) {
                        DBG_BUGON(delta < 0);
-                       DBG_BUGON(!ctx->e.length);
+                       DBG_BUGON(!ei->e.length);
 
                        /*
                         * For big pcluster dedupe, if we decide to shorten the
                         * previous big pcluster, make sure that the previous
                         * CBLKCNT is still kept.
                         */
-                       if (ctx->e.compressedblks > 1 &&
-                           (ctx->clusterofs & lclustermask) + ctx->e.length
+                       if (ei->e.compressedblks > 1 &&
+                           (ctx->clusterofs & lclustermask) + ei->e.length
                                - delta < 2 * (lclustermask + 1))
                                break;
-                       ctx->e.partial = true;
-                       ctx->e.length -= delta;
+                       ei->e.partial = true;
+                       ei->e.length -= delta;
                }
 
                /* fall back to noncompact indexes for deduplication */
@@ -253,39 +289,32 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
                erofs_dbg("Dedupe %u %scompressed data (delta %d) to %u of %u blocks",
                          dctx.e.length, dctx.e.raw ? "un" : "",
                          delta, dctx.e.blkaddr, dctx.e.compressedblks);
-               z_erofs_write_indexes(ctx);
-               ctx->e = dctx.e;
+
+               z_erofs_commit_extent(ctx, ei);
+               ei = ctx->pivot;
+               init_list_head(&ei->list);
+               ei->e = dctx.e;
+
                ctx->head += dctx.e.length - delta;
                DBG_BUGON(*len < dctx.e.length - delta);
                *len -= dctx.e.length - delta;
 
-               if (z_erofs_need_refill(ctx)) {
-                       ret = -EAGAIN;
-                       break;
-               }
+               if (z_erofs_need_refill(ctx))
+                       return 1;
        } while (*len);
-
 out:
-       z_erofs_write_indexes(ctx);
-       return ret;
+       z_erofs_commit_extent(ctx, ei);
+       ctx->pivot = NULL;
+       return 0;
 }
 
 static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
-                                    unsigned int *len, char *dst)
+                                    unsigned int len, char *dst)
 {
-       int ret;
        struct erofs_sb_info *sbi = ctx->inode->sbi;
-       unsigned int count, interlaced_offset, rightpart;
-
-       /* reset clusterofs to 0 if permitted */
-       if (!erofs_sb_has_lz4_0padding(sbi) && ctx->clusterofs &&
-           ctx->head >= ctx->clusterofs) {
-               ctx->head -= ctx->clusterofs;
-               *len += ctx->clusterofs;
-               ctx->clusterofs = 0;
-       }
-
-       count = min(erofs_blksiz(sbi), *len);
+       unsigned int count = min(erofs_blksiz(sbi), len);
+       unsigned int interlaced_offset, rightpart;
+       int ret;
 
        /* write interlaced uncompressed data if needed */
        if (ctx->inode->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
@@ -455,7 +484,8 @@ static int __z_erofs_compress_one(struct z_erofs_vle_compress_ctx *ctx,
                        may_inline = false;
                        may_packing = false;
 nocompression:
-                       ret = write_uncompressed_extent(ctx, &len, dst);
+                       /* TODO: reset clusterofs to 0 if permitted */
+                       ret = write_uncompressed_extent(ctx, len, dst);
                }
 
                if (ret < 0)
@@ -554,7 +584,6 @@ frag_packing:
 fix_dedupedfrag:
        DBG_BUGON(!inode->fragment_size);
        ctx->remaining += inode->fragment_size;
-       e->length = 0;
        ctx->fix_dedupedfrag = true;
        return 1;
 }
@@ -562,20 +591,32 @@ fix_dedupedfrag:
 static int z_erofs_compress_one(struct z_erofs_vle_compress_ctx *ctx)
 {
        unsigned int len = ctx->tail - ctx->head;
-       int ret;
+       struct z_erofs_extent_item *ei;
 
        while (len) {
-               if (z_erofs_compress_dedupe(ctx, &len))
+               int ret = z_erofs_compress_dedupe(ctx, &len);
+
+               if (ret > 0)
                        break;
+               else if (ret < 0)
+                       return ret;
 
-               ret = __z_erofs_compress_one(ctx, &ctx->e);
+               DBG_BUGON(ctx->pivot);
+               ei = malloc(sizeof(*ei));
+               if (!ei)
+                       return -ENOMEM;
+
+               init_list_head(&ei->list);
+               ret = __z_erofs_compress_one(ctx, &ei->e);
                if (ret) {
+                       free(ei);
                        if (ret > 0)
                                break;          /* need more data */
                        return ret;
                }
 
-               len -= ctx->e.length;
+               len -= ei->e.length;
+               ctx->pivot = ei;
                if (ctx->fix_dedupedfrag && !ctx->fragemitted &&
                    z_erofs_fixup_deduped_fragment(ctx, len))
                        break;
@@ -939,7 +980,8 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
        ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
        ctx.head = ctx.tail = 0;
        ctx.clusterofs = 0;
-       ctx.e.length = 0;
+       ctx.pivot = NULL;
+       init_list_head(&ctx.extents);
        ctx.remaining = inode->i_size - inode->fragment_size;
        ctx.fix_dedupedfrag = false;
        ctx.fragemitted = false;
@@ -973,19 +1015,34 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
        DBG_BUGON(compressed_blocks < !!inode->idata_size);
        compressed_blocks -= !!inode->idata_size;
 
+       if (ctx.pivot) {
+               z_erofs_commit_extent(&ctx, ctx.pivot);
+               ctx.pivot = NULL;
+       }
+
        /* generate an extent for the deduplicated fragment */
        if (inode->fragment_size && !ctx.fragemitted) {
-               z_erofs_write_indexes(&ctx);
-               ctx.e.length = inode->fragment_size;
-               ctx.e.compressedblks = 0;
-               ctx.e.raw = false;
-               ctx.e.partial = false;
-               ctx.e.blkaddr = ctx.blkaddr;
+               struct z_erofs_extent_item *ei;
+
+               ei = malloc(sizeof(*ei));
+               if (!ei) {
+                       ret = -ENOMEM;
+                       goto err_free_idata;
+               }
+
+               ei->e = (struct z_erofs_inmem_extent) {
+                       .length = inode->fragment_size,
+                       .compressedblks = 0,
+                       .raw = false,
+                       .partial = false,
+                       .blkaddr = ctx.blkaddr,
+               };
+               init_list_head(&ei->list);
+               z_erofs_commit_extent(&ctx, ei);
        }
        z_erofs_fragments_commit(inode);
 
        z_erofs_write_indexes(&ctx);
-       z_erofs_write_indexes_final(&ctx);
        legacymetasize = ctx.metacur - compressmeta;
        /* estimate if data compression saves space or not */
        if (!inode->fragment_size &&