erofs-utils: mkfs: support data alignment
authorGao Xiang <hsiangkao@linux.alibaba.com>
Thu, 19 Dec 2024 06:43:31 +0000 (14:43 +0800)
committerGao Xiang <hsiangkao@linux.alibaba.com>
Sat, 5 Apr 2025 16:25:21 +0000 (00:25 +0800)
The underlay block storage could work in a stripe-like manner to improve
performance and space efficiency.

EROFS on-disk layout is flexible enough for such use cases.

Cc: Changpeng Liu <changpeliu@tencent.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20241219064331.2223001-4-hsiangkao@linux.alibaba.com
include/erofs/cache.h
lib/cache.c
man/mkfs.erofs.1
mkfs/main.c

index d8559a88a9948ca82645a1f9cd87ce302d0bd605..6ff80aba9c5ea25e1cde991ac3e0393b79da5f5e 100644 (file)
@@ -67,6 +67,9 @@ struct erofs_bufmgr {
 
        /* last mapped buffer block to accelerate erofs_mapbh() */
        struct erofs_buffer_block *last_mapped_block;
+
+       /* align data block addresses to multiples of `dsunit` */
+       unsigned int dsunit;
 };
 
 static inline const int get_alignsize(struct erofs_sb_info *sbi, int type,
index cb0546654234be56bd1568d4ec23bd119f670d92..5b2cc452bacfc6482756066fddd4e322f5cb59fe 100644 (file)
@@ -86,7 +86,7 @@ static int __erofs_battach(struct erofs_buffer_block *bb,
                        + inline_ext > blkmask)
                return -ENOSPC;
 
-       oob = cmpsgn(roundup(boff, alignsize) + incr + inline_ext,
+       oob = cmpsgn(alignedoffset + incr + inline_ext,
                     bb->buffers.nblocks << sbi->blkszbits);
        if (oob >= 0) {
                /* the next buffer block should be NULL_ADDR all the time */
@@ -156,10 +156,13 @@ static int erofs_bfind_for_attach(struct erofs_bufmgr *bmgr,
        bb = NULL;
 
        /* try to find a most-fit mapped buffer block first */
-       if (size + inline_ext >= blksiz)
+       if (__erofs_unlikely(bmgr->dsunit > 1))
+               used_before = blksiz - alignsize;
+       else if (size + inline_ext >= blksiz)
                goto skip_mapped;
+       else
+               used_before = rounddown(blksiz - (size + inline_ext), alignsize);
 
-       used_before = rounddown(blksiz - (size + inline_ext), alignsize);
        for (; used_before; --used_before) {
                struct list_head *bt = bmgr->mapped_buckets[type] + used_before;
 
@@ -181,7 +184,7 @@ static int erofs_bfind_for_attach(struct erofs_bufmgr *bmgr,
                ret = __erofs_battach(cur, NULL, size, alignsize,
                                      inline_ext, true);
                if (ret < 0) {
-                       DBG_BUGON(1);
+                       DBG_BUGON(!(bmgr->dsunit > 1));
                        continue;
                }
 
@@ -324,10 +327,20 @@ struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
 static void __erofs_mapbh(struct erofs_buffer_block *bb)
 {
        struct erofs_bufmgr *bmgr = bb->buffers.fsprivate;
-       erofs_blk_t blkaddr;
+       erofs_blk_t blkaddr = bmgr->tail_blkaddr;
 
        if (bb->blkaddr == NULL_ADDR) {
-               bb->blkaddr = bmgr->tail_blkaddr;
+               bb->blkaddr = blkaddr;
+               if (__erofs_unlikely(bmgr->dsunit > 1) && bb->type == DATA) {
+                       struct erofs_buffer_block *pb = list_prev_entry(bb, list);
+
+                       bb->blkaddr = roundup(blkaddr, bmgr->dsunit);
+                       if (pb != &bmgr->blkh &&
+                           pb->blkaddr + pb->buffers.nblocks >= blkaddr) {
+                               DBG_BUGON(pb->blkaddr + pb->buffers.nblocks > blkaddr);
+                               pb->buffers.nblocks = bb->blkaddr - pb->blkaddr;
+                       }
+               }
                bmgr->last_mapped_block = bb;
                erofs_bupdate_mapped(bb);
        }
index 8960ca16125e9f613b2e12261daf17da0ff53963..ae8411d749420cf9cdc4cd26f1cbc2d89e7502dc 100644 (file)
@@ -166,6 +166,9 @@ the given primary algorithm, alternative algorithms can be specified with
 are extended regular expressions, matched against absolute paths within
 the output filesystem, with no leading /.
 .TP
+.BI "\-\-dsunit=" #
+Align all data block addresses to multiples of #.
+.TP
 .BI "\-\-exclude-path=" path
 Ignore file that matches the exact literal path.
 You may give multiple
index 00bd21a43a55827c0928c876ba02e6a83a555982..6d1a2de04f515b2e0049170af90d048a390d8623 100644 (file)
@@ -86,6 +86,7 @@ static struct option long_options[] = {
        {"all-time", no_argument, NULL, 526},
        {"sort", required_argument, NULL, 527},
        {"hard-dereference", no_argument, NULL, 528},
+       {"dsunit", required_argument, NULL, 529},
        {0, 0, 0, 0},
 };
 
@@ -166,6 +167,7 @@ static void usage(int argc, char **argv)
                "                       (X = data|rvsp; data=full data, rvsp=space is allocated\n"
                "                                       and filled with zeroes)\n"
                " --compress-hints=X    specify a file to configure per-file compression strategy\n"
+               " --dsunit=#            align all data block addresses to multiples of #\n"
                " --exclude-path=X      avoid including file X (X = exact literal path)\n"
                " --exclude-regex=X     avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -243,6 +245,7 @@ static unsigned int rebuild_src_count;
 static LIST_HEAD(rebuild_src_list);
 static u8 fixeduuid[16];
 static bool valid_fixeduuid;
+static unsigned int dsunit;
 
 static int erofs_mkfs_feat_set_legacy_compress(bool en, const char *val,
                                               unsigned int vallen)
@@ -876,6 +879,13 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
                case 528:
                        cfg.c_hard_dereference = true;
                        break;
+               case 529:
+                       dsunit = strtoul(optarg, &endptr, 0);
+                       if (*endptr != '\0') {
+                               erofs_err("invalid dsunit %s", optarg);
+                               return -EINVAL;
+                       }
+                       break;
                case 'V':
                        version();
                        exit(0);
@@ -1330,6 +1340,7 @@ int main(int argc, char **argv)
                }
                sb_bh = NULL;
        }
+       g_sbi.bmgr->dsunit = dsunit;
 
        /* Use the user-defined UUID or generate one for clean builds */
        if (valid_fixeduuid)