erofs-utils: support Intel Query Processing Library
authorGao Xiang <hsiangkao@linux.alibaba.com>
Wed, 5 Jun 2024 12:32:33 +0000 (20:32 +0800)
committerGao Xiang <hsiangkao@linux.alibaba.com>
Thu, 6 Jun 2024 11:26:13 +0000 (19:26 +0800)
This adds the preliminary Intel QPL [1] support to enable built-in
In-Memory Analytics Accelerator [2] started from Sapphire Rapids.

It just leverages the synchronous APIs for the sake of simplicity
for now, thus performance for small compressed clusters can still
be improved in the future if needed anyway.

[ QPL 1.5.0+ is strictly needed for pkg-config detection and
  it can be explicitly enabled by `--with-qpl`. ]

Here are some performance numbers for reference:

Processors: Intel(R) Xeon(R) Platinum 8475B (192 cores)
Memory:     512 GiB
Dataset:    enwik9 (1000000000) [3]

Single-threaded decompression:
 ______________________________________________________________
|                 |_ Cluster size _|_ Image size _|_ Time (s) _|
| LZ4             |     65536      |  391581696   |   0.364    |
| LZ4             |    1048576     |  373309440   |   0.376    |
| Intel QPL (IAA) |    1048576     |  374816768   |   0.386    |
| Intel QPL (IAA) |     65536      |  376057856   |   0.396    |
| Intel QPL (IAA) |      4096      |  399650816   |   0.675    |
| libdeflate (4k) |    1048576     |  374816768   |   1.862    |
| libdeflate (4k) |     65536      |  376057856   |   1.859    |
| libdeflate (4k) |      4096      |  399749120   |   2.203    |
| libdeflate      |    1048576     |  323457024   |   1.318    |
| libdeflate      |     65536      |  328712192   |   1.358    |
| libdeflate      |      4096      |  389943296   |   2.103    |
| Zstd            |      N/A       |  312548986   |   1.047    |
| Zstd (fast)     |      N/A       |  453096980   |   0.740    |
|_________________|________________|______________|____________|

LZ4 1.9.4: [ mkfs.erofs -zlz4hc,12 -C65536 ]
           [ mkfs.erofs -zlz4hc,12 -C1048576 ]
    time fsck/fsck.erofs --extract

QPL 1.5.0 (IAA) / libdeflate 1.20 (4k):
           [ mkfs.erofs -zdeflate,level=9,dictsize=4096 -C1048576 ]
           [ mkfs.erofs -zdeflate,level=9,dictsize=4096 -C65536 ]
           [ mkfs.erofs -zdeflate,level=9,dictsize=4096 -C4096 ]
    time fsck/fsck.erofs --extract

libdeflate 1.20:
           [ mkfs.erofs -zdeflate,level=9 -C1048576 ]
           [ mkfs.erofs -zdeflate,level=9 -C65536 ]
           [ mkfs.erofs -zdeflate,level=9 -C4096 ]
    time fsck/fsck.erofs --extract

Zstd 1.5.6: [ zstd -k ] [ zstd -k --fast ]
    time zstd -d -k -f -c --no-progress > /dev/null

[1] https://github.com/intel/qpl
[2] https://www.intel.com/content/www/us/en/products/docs/accelerator-engines/in-memory-analytics-accelerator.html
[3] https://www.mattmahoney.net/dc/textdata.html

Cc: "Feghali, Wajdi K" <wajdi.k.feghali@intel.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240605123233.3833332-1-hsiangkao@linux.alibaba.com
configure.ac
dump/Makefile.am
fsck/Makefile.am
fuse/Makefile.am
include/erofs/internal.h
lib/decompress.c
mkfs/Makefile.am

index 1989bca5a98897c2ae0697c824d4d3b82e8c75ff..cfbde433fcdff6eab01f7e18a4cc97ae70cf64ba 100644 (file)
@@ -143,6 +143,11 @@ AC_ARG_WITH(libzstd,
    [AS_HELP_STRING([--with-libzstd],
       [Enable and build with of libzstd support @<:@default=auto@:>@])])
 
+AC_ARG_WITH(qpl,
+   [AS_HELP_STRING([--with-qpl],
+      [Enable and build with Intel QPL support @<:@default=disabled@:>@])], [],
+      [with_qpl="no"])
+
 AC_ARG_ENABLE(fuse,
    [AS_HELP_STRING([--enable-fuse], [enable erofsfuse @<:@default=no@:>@])],
    [enable_fuse="$enableval"], [enable_fuse="no"])
@@ -504,6 +509,31 @@ AS_IF([test "x$with_libzstd" != "xno"], [
   ])
 ])
 
+# Configure Intel QPL
+have_qpl="no"
+AS_IF([test "x$with_qpl" != "xno"], [
+  PKG_CHECK_MODULES([libqpl], [qpl >= 1.5.0], [
+    # Paranoia: don't trust the result reported by pkgconfig before trying out
+    saved_LIBS="$LIBS"
+    saved_CPPFLAGS=${CPPFLAGS}
+    CPPFLAGS="${libqpl_CFLAGS} ${CPPFLAGS}"
+    LIBS="${libqpl_LIBS} $LIBS"
+    AC_CHECK_HEADERS([qpl/qpl.h],[
+      AC_CHECK_LIB(qpl, qpl_execute_job, [], [
+        AC_MSG_ERROR([libqpl doesn't work properly])])
+      AC_CHECK_DECL(qpl_execute_job, [have_qpl="yes"],
+        [AC_MSG_ERROR([libqpl doesn't work properly])], [[
+#include <qpl/qpl.h>
+      ]])
+    ])
+    LIBS="${saved_LIBS}"
+    CPPFLAGS="${saved_CPPFLAGS}"], [
+    AS_IF([test "x$with_qpl" = "xyes"], [
+      AC_MSG_ERROR([Cannot find proper libqpl])
+    ])
+  ])
+])
+
 # Enable 64-bit off_t
 CFLAGS+=" -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64"
 
@@ -525,6 +555,7 @@ AM_CONDITIONAL([ENABLE_FUSE], [test "x${have_fuse}" = "xyes"])
 AM_CONDITIONAL([ENABLE_LIBLZMA], [test "x${have_liblzma}" = "xyes"])
 AM_CONDITIONAL([ENABLE_LIBDEFLATE], [test "x${have_libdeflate}" = "xyes"])
 AM_CONDITIONAL([ENABLE_LIBZSTD], [test "x${have_libzstd}" = "xyes"])
+AM_CONDITIONAL([ENABLE_QPL], [test "x${have_qpl}" = "xyes"])
 
 if test "x$have_uuid" = "xyes"; then
   AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found])
@@ -574,6 +605,12 @@ if test "x$have_libzstd" = "xyes"; then
   AC_DEFINE([HAVE_LIBZSTD], 1, [Define to 1 if libzstd is found])
 fi
 
+if test "x$have_qpl" = "xyes"; then
+  AC_DEFINE([HAVE_QPL], 1, [Define to 1 if qpl is found])
+  AC_SUBST([libqpl_LIBS])
+  AC_SUBST([libqpl_CFLAGS])
+fi
+
 # Dump maximum block size
 AS_IF([test "x$erofs_cv_max_block_size" = "x"],
       [$erofs_cv_max_block_size = 4096], [])
index 09c483e4b93ba9d1e8a4c72f1fa744ade18f5798..2a4f67ae0921a19a44dde17c13f68587f9750268 100644 (file)
@@ -8,4 +8,4 @@ dump_erofs_SOURCES = main.c
 dump_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
 dump_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
        ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} \
-       ${libzstd_LIBS}
+       ${libzstd_LIBS} ${libqpl_LIBS}
index 70eacc0d7cbda1d363278a887e433c0b57403b95..5bdee4d9f9b2342c987204c94d87f501143b0e20 100644 (file)
@@ -8,7 +8,7 @@ fsck_erofs_SOURCES = main.c
 fsck_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
 fsck_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
        ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} \
-       ${libzstd_LIBS}
+       ${libzstd_LIBS} ${libqpl_LIBS}
 
 if ENABLE_FUZZING
 noinst_PROGRAMS   = fuzz_erofsfsck
@@ -17,5 +17,5 @@ fuzz_erofsfsck_CFLAGS = -Wall -I$(top_srcdir)/include -DFUZZING
 fuzz_erofsfsck_LDFLAGS = -fsanitize=address,fuzzer
 fuzz_erofsfsck_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
        ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} \
-       ${libzstd_LIBS}
+       ${libzstd_LIBS} ${libqpl_LIBS}
 endif
index 7eae5f65c19a686d988833736976b7c4317b6351..2fd9b6d92101987bf77246c92eb3edf9c29b06d6 100644 (file)
@@ -7,4 +7,5 @@ erofsfuse_SOURCES = main.c
 erofsfuse_CFLAGS = -Wall -I$(top_srcdir)/include
 erofsfuse_CFLAGS += ${libfuse2_CFLAGS} ${libfuse3_CFLAGS} ${libselinux_CFLAGS}
 erofsfuse_LDADD = $(top_builddir)/lib/liberofs.la ${libfuse2_LIBS} ${libfuse3_LIBS} ${liblz4_LIBS} \
-       ${libselinux_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} ${libzstd_LIBS}
+       ${libselinux_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} ${libzstd_LIBS} \
+       ${libqpl_LIBS}
index d52bcc656521cbb1556268d366e28da34786d4a7..2067cb98a4380542f0abf9c5075c582e1180c6ea 100644 (file)
@@ -131,6 +131,7 @@ struct erofs_sb_info {
        pthread_t dfops_worker;
        struct erofs_mkfs_dfops *mkfs_dfops;
 #endif
+       bool useqpl;
 };
 
 /* make sure that any user of the erofs headers has atleast 64bit off_t type */
index 2842f51b595285ff036cf2286a8c8a2a6759aabb..1e22f9fde40ed527839489e8abb69dd19956308d 100644 (file)
@@ -77,6 +77,163 @@ out:
 }
 #endif
 
+#ifdef HAVE_QPL
+#include <qpl/qpl.h>
+
+struct z_erofs_qpl_job {
+       struct z_erofs_qpl_job *next;
+       u8 job[];
+};
+static struct z_erofs_qpl_job *z_erofs_qpl_jobs;
+static unsigned int z_erofs_qpl_reclaim_quot;
+#ifdef HAVE_PTHREAD_H
+static pthread_mutex_t z_erofs_qpl_mutex;
+#endif
+
+int z_erofs_load_deflate_config(struct erofs_sb_info *sbi,
+                               struct erofs_super_block *dsb, void *data, int size)
+{
+       struct z_erofs_deflate_cfgs *dfl = data;
+       static erofs_atomic_bool_t inited;
+
+       if (!dfl || size < sizeof(struct z_erofs_deflate_cfgs)) {
+               erofs_err("invalid deflate cfgs, size=%u", size);
+               return -EINVAL;
+       }
+
+       /*
+        * In Intel QPL, decompression is supported for DEFLATE streams where
+        * the size of the history buffer is no more than 4 KiB, otherwise
+        * QPL_STS_BAD_DIST_ERR code is returned.
+        */
+       sbi->useqpl = (dfl->windowbits <= 12);
+       if (sbi->useqpl) {
+               if (!erofs_atomic_test_and_set(&inited))
+                       z_erofs_qpl_reclaim_quot = erofs_get_available_processors();
+               erofs_info("Intel QPL will be used for DEFLATE decompression");
+       }
+       return 0;
+}
+
+static qpl_job *z_erofs_qpl_get_job(void)
+{
+       qpl_path_t execution_path = qpl_path_auto;
+       struct z_erofs_qpl_job *job;
+       int32_t jobsize = 0;
+       qpl_status status;
+
+#ifdef HAVE_PTHREAD_H
+       pthread_mutex_lock(&z_erofs_qpl_mutex);
+#endif
+       job = z_erofs_qpl_jobs;
+       if (job)
+               z_erofs_qpl_jobs = job->next;
+#ifdef HAVE_PTHREAD_H
+       pthread_mutex_unlock(&z_erofs_qpl_mutex);
+#endif
+
+       if (!job) {
+               status = qpl_get_job_size(execution_path, &jobsize);
+               if (status != QPL_STS_OK) {
+                       erofs_err("failed to get job size: %d", status);
+                       return ERR_PTR(-EOPNOTSUPP);
+               }
+
+               job = malloc(jobsize + sizeof(struct z_erofs_qpl_job));
+               if (!job)
+                       return ERR_PTR(-ENOMEM);
+
+               status = qpl_init_job(execution_path, (qpl_job *)job->job);
+               if (status != QPL_STS_OK) {
+                       erofs_err("failed to initialize job: %d", status);
+                       return ERR_PTR(-EOPNOTSUPP);
+               }
+               erofs_atomic_dec_return(&z_erofs_qpl_reclaim_quot);
+       }
+       return (qpl_job *)job->job;
+}
+
+static bool z_erofs_qpl_put_job(qpl_job *qjob)
+{
+       struct z_erofs_qpl_job *job =
+               container_of((void *)qjob, struct z_erofs_qpl_job, job);
+
+       if (erofs_atomic_inc_return(&z_erofs_qpl_reclaim_quot) <= 0) {
+               qpl_status status = qpl_fini_job(qjob);
+
+               free(job);
+               if (status != QPL_STS_OK)
+                       erofs_err("failed to finalize job: %d", status);
+               return status == QPL_STS_OK;
+       }
+#ifdef HAVE_PTHREAD_H
+       pthread_mutex_lock(&z_erofs_qpl_mutex);
+#endif
+       job->next = z_erofs_qpl_jobs;
+       z_erofs_qpl_jobs = job;
+#ifdef HAVE_PTHREAD_H
+       pthread_mutex_unlock(&z_erofs_qpl_mutex);
+#endif
+       return true;
+}
+
+static int z_erofs_decompress_qpl(struct z_erofs_decompress_req *rq)
+{
+       u8 *dest = (u8 *)rq->out;
+       u8 *src = (u8 *)rq->in;
+       u8 *buff = NULL;
+       unsigned int inputmargin;
+       qpl_status status;
+       qpl_job *job;
+       int ret;
+
+       job = z_erofs_qpl_get_job();
+       if (IS_ERR(job))
+               return PTR_ERR(job);
+
+       inputmargin = z_erofs_fixup_insize(src, rq->inputsize);
+       if (inputmargin >= rq->inputsize)
+               return -EFSCORRUPTED;
+
+       if (rq->decodedskip) {
+               buff = malloc(rq->decodedlength);
+               if (!buff)
+                       return -ENOMEM;
+               dest = buff;
+       }
+
+       job->op            = qpl_op_decompress;
+       job->next_in_ptr   = src + inputmargin;
+       job->next_out_ptr  = dest;
+       job->available_in  = rq->inputsize - inputmargin;
+       job->available_out = rq->decodedlength;
+       job->flags         = QPL_FLAG_FIRST | QPL_FLAG_LAST;
+       status = qpl_execute_job(job);
+       if (status != QPL_STS_OK) {
+               erofs_err("failed to decompress: %d", status);
+               ret = -EIO;
+               goto out_inflate_end;
+       }
+
+       if (rq->decodedskip)
+               memcpy(rq->out, dest + rq->decodedskip,
+                      rq->decodedlength - rq->decodedskip);
+       ret = 0;
+out_inflate_end:
+       if (!z_erofs_qpl_put_job(job))
+               ret = -EFAULT;
+       if (buff)
+               free(buff);
+       return ret;
+}
+#else
+int z_erofs_load_deflate_config(struct erofs_sb_info *sbi,
+                               struct erofs_super_block *dsb, void *data, int size)
+{
+       return 0;
+}
+#endif
+
 #ifdef HAVE_LIBDEFLATE
 /* if libdeflate is available, use libdeflate instead. */
 #include <libdeflate.h>
@@ -372,6 +529,11 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq)
        if (rq->alg == Z_EROFS_COMPRESSION_LZMA)
                return z_erofs_decompress_lzma(rq);
 #endif
+#ifdef HAVE_QPL
+       if (rq->alg == Z_EROFS_COMPRESSION_DEFLATE && rq->sbi->useqpl)
+               if (!z_erofs_decompress_qpl(rq))
+                       return 0;
+#endif
 #if defined(HAVE_ZLIB) || defined(HAVE_LIBDEFLATE)
        if (rq->alg == Z_EROFS_COMPRESSION_DEFLATE)
                return z_erofs_decompress_deflate(rq);
@@ -416,7 +578,10 @@ int z_erofs_parse_cfgs(struct erofs_sb_info *sbi, struct erofs_super_block *dsb)
                        break;
                }
 
-               ret = 0;
+               if (alg == Z_EROFS_COMPRESSION_DEFLATE)
+                       ret = z_erofs_load_deflate_config(sbi, dsb, data, size);
+               else
+                       ret = 0;
                free(data);
                if (ret)
                        break;
index af97e3939a79a01dbb985d8a267a84687db00a7b..6354712b2191cc1fa3b4c0381e05834e266fd72e 100644 (file)
@@ -7,4 +7,4 @@ mkfs_erofs_SOURCES = main.c
 mkfs_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
 mkfs_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
        ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} \
-       ${libdeflate_LIBS} ${libzstd_LIBS}
+       ${libdeflate_LIBS} ${libzstd_LIBS} ${libqpl_LIBS}