erofs-utils: mkfs: add libdeflate compressor support
authorGao Xiang <hsiangkao@linux.alibaba.com>
Mon, 10 Jul 2023 11:02:51 +0000 (19:02 +0800)
committerGao Xiang <hsiangkao@linux.alibaba.com>
Thu, 20 Jul 2023 09:06:29 +0000 (17:06 +0800)
Eric suggests a "binary search + heuristics" way by using the
current libdeflate APIs to generate fixed-sized output DEFLATE streams.

Compared to the previous built-in one, it will generate smaller images
(which is expected since the built-in one is roughly just the original
zlib replacement), yet the total compression time might be amplified a
lot especially if some larger pclusters are used by users compared to
the built-in one.

For example:
$ time mkfs.erofs -zdeflate,9 -C65536 enwik8.z enwik8
real    0m9.559s
user    0m9.453s
sys     0m0.069s

$ time mkfs.erofs -zlibdeflate,9 -C65536 enwik8.libdeflate.9.z enwik8
real    0m50.184s
user    0m50.082s
sys     0m0.074s

$ mkfs/mkfs.erofs -zlibdeflate,6 -C65536 enwik8.libdeflate.6.z enwik8
real    0m23.428s
user    0m23.329s
sys     0m0.067s

37175296 enwik8.libdeflate.6.z
37142528 enwik8.z
36835328 enwik8.libdeflate.9.z

Anyway, let's use the current APIs for users who needs smaller image
sizes for now.  Besides, EROFS also supports multiple per-file
algorithms in one image, so it can be used for specific files as well.

Suggested-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230710110251.89464-5-hsiangkao@linux.alibaba.com
configure.ac
lib/Makefile.am
lib/compress.c
lib/compressor.c
lib/compressor.h
lib/compressor_libdeflate.c [new file with mode: 0644]
mkfs/Makefile.am

index d6dc7afafcda82274c49b94757beed289872848b..ac0b0edb323d0a2351ea8ec1c1ec46e68412b3d7 100644 (file)
@@ -450,6 +450,7 @@ AM_CONDITIONAL([ENABLE_LZ4], [test "x${have_lz4}" = "xyes"])
 AM_CONDITIONAL([ENABLE_LZ4HC], [test "x${have_lz4hc}" = "xyes"])
 AM_CONDITIONAL([ENABLE_FUSE], [test "x${have_fuse}" = "xyes"])
 AM_CONDITIONAL([ENABLE_LIBLZMA], [test "x${have_liblzma}" = "xyes"])
+AM_CONDITIONAL([ENABLE_LIBDEFLATE], [test "x${have_libdeflate}" = "xyes"])
 
 if test "x$have_uuid" = "xyes"; then
   AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found])
index ae19b74cf577f434b25bca6d9da7d6ec89795d00..694888e8e821b7385f4d24fcd343d2090b47ae6f 100644 (file)
@@ -45,3 +45,6 @@ liberofs_la_SOURCES += compressor_liblzma.c
 endif
 
 liberofs_la_SOURCES += kite_deflate.c compressor_deflate.c
+if ENABLE_LIBDEFLATE
+liberofs_la_SOURCES += compressor_libdeflate.c
+endif
index 318b8de6d1cacb69813c12d903596533261319bc..6fb63cb9b07b277079d8b914b9f856282acbe1dc 100644 (file)
@@ -1026,7 +1026,7 @@ static int erofs_get_compress_algorithm_id(const char *name)
                return Z_EROFS_COMPRESSION_LZ4;
        if (!strcmp(name, "lzma"))
                return Z_EROFS_COMPRESSION_LZMA;
-       if (!strcmp(name, "deflate"))
+       if (!strcmp(name, "deflate") || !strcmp(name, "libdeflate"))
                return Z_EROFS_COMPRESSION_DEFLATE;
        return -ENOTSUP;
 }
index ca4d3645387974eb22a0c3ee25d429a278e4e67e..f81db5bbc26499141f8ff68b9b0931216d7e4f48 100644 (file)
@@ -21,6 +21,9 @@ static const struct erofs_compressor *compressors[] = {
                &erofs_compressor_lzma,
 #endif
                &erofs_compressor_deflate,
+#if HAVE_LIBDEFLATE
+               &erofs_compressor_libdeflate,
+#endif
 };
 
 int erofs_compress_destsize(const struct erofs_compress *c,
index c1eee20f8a65bf6bd265536087648dd0a985456f..f699fe7e8cbe30cb3c01025fa2e65448affa1d4b 100644 (file)
@@ -45,6 +45,7 @@ extern const struct erofs_compressor erofs_compressor_lz4;
 extern const struct erofs_compressor erofs_compressor_lz4hc;
 extern const struct erofs_compressor erofs_compressor_lzma;
 extern const struct erofs_compressor erofs_compressor_deflate;
+extern const struct erofs_compressor erofs_compressor_libdeflate;
 
 int erofs_compress_destsize(const struct erofs_compress *c,
                            const void *src, unsigned int *srcsize,
diff --git a/lib/compressor_libdeflate.c b/lib/compressor_libdeflate.c
new file mode 100644 (file)
index 0000000..2756dd8
--- /dev/null
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/internal.h"
+#include "erofs/print.h"
+#include "erofs/config.h"
+#include <libdeflate.h>
+#include "compressor.h"
+
+static int libdeflate_compress_destsize(const struct erofs_compress *c,
+                                       const void *src, unsigned int *srcsize,
+                                       void *dst, unsigned int dstsize)
+{
+       static size_t last_uncompressed_size = 0;
+       size_t l = 0; /* largest input that fits so far */
+       size_t l_csize = 0;
+       size_t r = *srcsize + 1; /* smallest input that doesn't fit so far */
+       size_t m;
+       u8 tmpbuf[dstsize + 9];
+
+       if (last_uncompressed_size)
+               m = last_uncompressed_size * 15 / 16;
+       else
+               m = dstsize * 4;
+       for (;;) {
+               size_t csize;
+
+               m = max(m, l + 1);
+               m = min(m, r - 1);
+
+               csize = libdeflate_deflate_compress(c->private_data, src, m,
+                                                   tmpbuf, dstsize + 9);
+               /*printf("Tried %zu => %zu\n", m, csize);*/
+               if (csize > 0 && csize <= dstsize) {
+                       /* Fits */
+                       memcpy(dst, tmpbuf, csize);
+                       l = m;
+                       l_csize = csize;
+                       if (r <= l + 1 || csize +
+                               (22 - 2*(int)c->compression_level) >= dstsize)
+                               break;
+                       /*
+                        * Estimate needed input prefix size based on current
+                        * compression ratio.
+                        */
+                       m = (dstsize * m) / csize;
+               } else {
+                       /* Doesn't fit */
+                       r = m;
+                       if (r <= l + 1)
+                               break;
+                       m = (l + r) / 2;
+               }
+       }
+
+       /*
+        * Since generic EROFS on-disk compressed data will be filled with
+        * leading 0s (but no more than one block, 4KB for example, even the
+        * whole pcluster is 128KB) if not filled, it will be used to identify
+        * the actual compressed length as well without taking more reserved
+        * compressed bytes or some extra metadata to record this.
+        *
+        * DEFLATE streams can also be used in this way, if it starts from a
+        * non-last stored block, flag an unused bit instead to avoid the zero
+        * byte. It's still a valid one according to the DEFLATE specification.
+        */
+       if (l_csize && !((u8 *)dst)[0])
+              ((u8 *)dst)[0] = 1 << (2 + 1);
+
+       /*printf("Choosing %zu => %zu\n", l, l_csize);*/
+       *srcsize = l;
+       last_uncompressed_size = l;
+       return l_csize;
+}
+
+static int compressor_libdeflate_exit(struct erofs_compress *c)
+{
+       if (!c->private_data)
+               return -EINVAL;
+
+       libdeflate_free_compressor(c->private_data);
+       return 0;
+}
+
+static int compressor_libdeflate_init(struct erofs_compress *c)
+{
+       c->alg = &erofs_compressor_libdeflate;
+       c->private_data = NULL;
+
+       erofs_warn("EXPERIMENTAL libdeflate compressor in use. Use at your own risk!");
+       return 0;
+}
+
+static int erofs_compressor_libdeflate_setlevel(struct erofs_compress *c,
+                                               int compression_level)
+{
+       if (compression_level < 0)
+               compression_level = erofs_compressor_deflate.default_level;
+
+       libdeflate_free_compressor(c->private_data);
+       c->private_data = libdeflate_alloc_compressor(compression_level);
+       if (!c->private_data)
+               return -ENOMEM;
+       c->compression_level = compression_level;
+       return 0;
+}
+
+const struct erofs_compressor erofs_compressor_libdeflate = {
+       .name = "libdeflate",
+       .default_level = 1,
+       .best_level = 12,
+       .init = compressor_libdeflate_init,
+       .exit = compressor_libdeflate_exit,
+       .setlevel = erofs_compressor_libdeflate_setlevel,
+       .compress_destsize = libdeflate_compress_destsize,
+};
index a08dc53763115b52a8c656bf2b310ff282f45e77..603c2f3387efacd0df044c6db7752fe3e91d0f13 100644 (file)
@@ -6,4 +6,4 @@ AM_CPPFLAGS = ${libselinux_CFLAGS}
 mkfs_erofs_SOURCES = main.c
 mkfs_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
 mkfs_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
-       ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS}
+       ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} ${libdeflate_LIBS}