From: Gao Xiang Date: Mon, 10 Jul 2023 11:02:51 +0000 (+0800) Subject: erofs-utils: mkfs: add libdeflate compressor support X-Git-Tag: v1.8~226 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=073633e09cb5d55c23fd07faec69a2f05b668d0d;p=platform%2Fupstream%2Ferofs-utils.git erofs-utils: mkfs: add libdeflate compressor support Eric suggests a "binary search + heuristics" way by using the current libdeflate APIs to generate fixed-sized output DEFLATE streams. Compared to the previous built-in one, it will generate smaller images (which is expected since the built-in one is roughly just the original zlib replacement), yet the total compression time might be amplified a lot especially if some larger pclusters are used by users compared to the built-in one. For example: $ time mkfs.erofs -zdeflate,9 -C65536 enwik8.z enwik8 real 0m9.559s user 0m9.453s sys 0m0.069s $ time mkfs.erofs -zlibdeflate,9 -C65536 enwik8.libdeflate.9.z enwik8 real 0m50.184s user 0m50.082s sys 0m0.074s $ mkfs/mkfs.erofs -zlibdeflate,6 -C65536 enwik8.libdeflate.6.z enwik8 real 0m23.428s user 0m23.329s sys 0m0.067s 37175296 enwik8.libdeflate.6.z 37142528 enwik8.z 36835328 enwik8.libdeflate.9.z Anyway, let's use the current APIs for users who needs smaller image sizes for now. Besides, EROFS also supports multiple per-file algorithms in one image, so it can be used for specific files as well. Suggested-by: Eric Biggers Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20230710110251.89464-5-hsiangkao@linux.alibaba.com --- diff --git a/configure.ac b/configure.ac index d6dc7af..ac0b0ed 100644 --- a/configure.ac +++ b/configure.ac @@ -450,6 +450,7 @@ AM_CONDITIONAL([ENABLE_LZ4], [test "x${have_lz4}" = "xyes"]) AM_CONDITIONAL([ENABLE_LZ4HC], [test "x${have_lz4hc}" = "xyes"]) AM_CONDITIONAL([ENABLE_FUSE], [test "x${have_fuse}" = "xyes"]) AM_CONDITIONAL([ENABLE_LIBLZMA], [test "x${have_liblzma}" = "xyes"]) +AM_CONDITIONAL([ENABLE_LIBDEFLATE], [test "x${have_libdeflate}" = "xyes"]) if test "x$have_uuid" = "xyes"; then AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found]) diff --git a/lib/Makefile.am b/lib/Makefile.am index ae19b74..694888e 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -45,3 +45,6 @@ liberofs_la_SOURCES += compressor_liblzma.c endif liberofs_la_SOURCES += kite_deflate.c compressor_deflate.c +if ENABLE_LIBDEFLATE +liberofs_la_SOURCES += compressor_libdeflate.c +endif diff --git a/lib/compress.c b/lib/compress.c index 318b8de..6fb63cb 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -1026,7 +1026,7 @@ static int erofs_get_compress_algorithm_id(const char *name) return Z_EROFS_COMPRESSION_LZ4; if (!strcmp(name, "lzma")) return Z_EROFS_COMPRESSION_LZMA; - if (!strcmp(name, "deflate")) + if (!strcmp(name, "deflate") || !strcmp(name, "libdeflate")) return Z_EROFS_COMPRESSION_DEFLATE; return -ENOTSUP; } diff --git a/lib/compressor.c b/lib/compressor.c index ca4d364..f81db5b 100644 --- a/lib/compressor.c +++ b/lib/compressor.c @@ -21,6 +21,9 @@ static const struct erofs_compressor *compressors[] = { &erofs_compressor_lzma, #endif &erofs_compressor_deflate, +#if HAVE_LIBDEFLATE + &erofs_compressor_libdeflate, +#endif }; int erofs_compress_destsize(const struct erofs_compress *c, diff --git a/lib/compressor.h b/lib/compressor.h index c1eee20..f699fe7 100644 --- a/lib/compressor.h +++ b/lib/compressor.h @@ -45,6 +45,7 @@ extern const struct erofs_compressor erofs_compressor_lz4; extern const struct erofs_compressor erofs_compressor_lz4hc; extern const struct erofs_compressor erofs_compressor_lzma; extern const struct erofs_compressor erofs_compressor_deflate; +extern const struct erofs_compressor erofs_compressor_libdeflate; int erofs_compress_destsize(const struct erofs_compress *c, const void *src, unsigned int *srcsize, diff --git a/lib/compressor_libdeflate.c b/lib/compressor_libdeflate.c new file mode 100644 index 0000000..2756dd8 --- /dev/null +++ b/lib/compressor_libdeflate.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +#include "erofs/internal.h" +#include "erofs/print.h" +#include "erofs/config.h" +#include +#include "compressor.h" + +static int libdeflate_compress_destsize(const struct erofs_compress *c, + const void *src, unsigned int *srcsize, + void *dst, unsigned int dstsize) +{ + static size_t last_uncompressed_size = 0; + size_t l = 0; /* largest input that fits so far */ + size_t l_csize = 0; + size_t r = *srcsize + 1; /* smallest input that doesn't fit so far */ + size_t m; + u8 tmpbuf[dstsize + 9]; + + if (last_uncompressed_size) + m = last_uncompressed_size * 15 / 16; + else + m = dstsize * 4; + for (;;) { + size_t csize; + + m = max(m, l + 1); + m = min(m, r - 1); + + csize = libdeflate_deflate_compress(c->private_data, src, m, + tmpbuf, dstsize + 9); + /*printf("Tried %zu => %zu\n", m, csize);*/ + if (csize > 0 && csize <= dstsize) { + /* Fits */ + memcpy(dst, tmpbuf, csize); + l = m; + l_csize = csize; + if (r <= l + 1 || csize + + (22 - 2*(int)c->compression_level) >= dstsize) + break; + /* + * Estimate needed input prefix size based on current + * compression ratio. + */ + m = (dstsize * m) / csize; + } else { + /* Doesn't fit */ + r = m; + if (r <= l + 1) + break; + m = (l + r) / 2; + } + } + + /* + * Since generic EROFS on-disk compressed data will be filled with + * leading 0s (but no more than one block, 4KB for example, even the + * whole pcluster is 128KB) if not filled, it will be used to identify + * the actual compressed length as well without taking more reserved + * compressed bytes or some extra metadata to record this. + * + * DEFLATE streams can also be used in this way, if it starts from a + * non-last stored block, flag an unused bit instead to avoid the zero + * byte. It's still a valid one according to the DEFLATE specification. + */ + if (l_csize && !((u8 *)dst)[0]) + ((u8 *)dst)[0] = 1 << (2 + 1); + + /*printf("Choosing %zu => %zu\n", l, l_csize);*/ + *srcsize = l; + last_uncompressed_size = l; + return l_csize; +} + +static int compressor_libdeflate_exit(struct erofs_compress *c) +{ + if (!c->private_data) + return -EINVAL; + + libdeflate_free_compressor(c->private_data); + return 0; +} + +static int compressor_libdeflate_init(struct erofs_compress *c) +{ + c->alg = &erofs_compressor_libdeflate; + c->private_data = NULL; + + erofs_warn("EXPERIMENTAL libdeflate compressor in use. Use at your own risk!"); + return 0; +} + +static int erofs_compressor_libdeflate_setlevel(struct erofs_compress *c, + int compression_level) +{ + if (compression_level < 0) + compression_level = erofs_compressor_deflate.default_level; + + libdeflate_free_compressor(c->private_data); + c->private_data = libdeflate_alloc_compressor(compression_level); + if (!c->private_data) + return -ENOMEM; + c->compression_level = compression_level; + return 0; +} + +const struct erofs_compressor erofs_compressor_libdeflate = { + .name = "libdeflate", + .default_level = 1, + .best_level = 12, + .init = compressor_libdeflate_init, + .exit = compressor_libdeflate_exit, + .setlevel = erofs_compressor_libdeflate_setlevel, + .compress_destsize = libdeflate_compress_destsize, +}; diff --git a/mkfs/Makefile.am b/mkfs/Makefile.am index a08dc53..603c2f3 100644 --- a/mkfs/Makefile.am +++ b/mkfs/Makefile.am @@ -6,4 +6,4 @@ AM_CPPFLAGS = ${libselinux_CFLAGS} mkfs_erofs_SOURCES = main.c mkfs_erofs_CFLAGS = -Wall -I$(top_srcdir)/include mkfs_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \ - ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} + ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} ${libdeflate_LIBS}