[PATCH v2 2/2] erofs-utils: support Intel Query Processing Library
Gao Xiang
hsiangkao at linux.alibaba.com
Wed Jun 5 22:32:33 AEST 2024
This adds the preliminary Intel QPL [1] support to enable built-in
In-Memory Analytics Accelerator [2] started from Sapphire Rapids.
It just leverages the synchronous APIs for the sake of simplicity
for now, thus performance for small compressed clusters can still
be improved in the future if needed anyway.
[ QPL 1.5.0+ is strictly needed for pkg-config detection and
it can be explicitly enabled by `--with-qpl`. ]
Here are some performance numbers for reference:
Processors: Intel(R) Xeon(R) Platinum 8475B (192 cores)
Memory: 512 GiB
Single-threaded decompression:
_____________________________________________________________
| | cluster size | image size | time (s) |
| LZ4 | 65536 | 391581696 | 0.364 |
| LZ4 | 1048576 | 373309440 | 0.376 |
| Intel QPL (IAA) | 1048576 | 374816768 | 0.386 |
| Intel QPL (IAA) | 65536 | 376057856 | 0.396 |
| Intel QPL (IAA) | 4096 | 399650816 | 0.675 |
| libdeflate (4k) | 1048576 | 374816768 | 1.862 |
| libdeflate (4k) | 65536 | 376057856 | 1.859 |
| libdeflate (4k) | 4096 | 399749120 | 2.203 |
| libdeflate | 1048576 | 323457024 | 1.318 |
| libdeflate | 65536 | 328712192 | 1.358 |
| libdeflate | 4096 | 389943296 | 2.103 |
| Zstd | N/A | 312548986 | 1.047 |
| Zstd (fast) | N/A | 453096980 | 0.740 |
|_________________|________________|______________|____________|
LZ4 1.9.4: [ mkfs.erofs -zlz4hc,12 -C65536 ]
[ mkfs.erofs -zlz4hc,12 -C1048576 ]
time fsck/fsck.erofs --extract
QPL 1.5.0 (IAA) / libdeflate 1.20 (4k):
[ mkfs.erofs -zdeflate,level=9,dictsize=4096 -C1048576 ]
[ mkfs.erofs -zdeflate,level=9,dictsize=4096 -C65536 ]
[ mkfs.erofs -zdeflate,level=9,dictsize=4096 -C4096 ]
time fsck/fsck.erofs --extract
libdeflate 1.20:
[ mkfs.erofs -zdeflate,level=9 -C1048576 ]
[ mkfs.erofs -zdeflate,level=9 -C65536 ]
[ mkfs.erofs -zdeflate,level=9 -C4096 ]
time fsck/fsck.erofs --extract
Zstd 1.5.6: [ zstd -k ] [ zstd -k --fast ]
time zstd -d -k -f -c --no-progress > /dev/null
[1] https://github.com/intel/qpl
[2] https://www.intel.com/content/www/us/en/products/docs/accelerator-engines/in-memory-analytics-accelerator.html
Cc: "Feghali, Wajdi K" <wajdi.k.feghali at intel.com>
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
changes since v1:
- Fix a wrong image size number in commit message;
- Minor configure.ac fix.
configure.ac | 37 +++++++++
dump/Makefile.am | 2 +-
fsck/Makefile.am | 4 +-
fuse/Makefile.am | 3 +-
include/erofs/internal.h | 1 +
lib/decompress.c | 167 ++++++++++++++++++++++++++++++++++++++-
mkfs/Makefile.am | 2 +-
7 files changed, 210 insertions(+), 6 deletions(-)
diff --git a/configure.ac b/configure.ac
index 1989bca..cfbde43 100644
--- a/configure.ac
+++ b/configure.ac
@@ -143,6 +143,11 @@ AC_ARG_WITH(libzstd,
[AS_HELP_STRING([--with-libzstd],
[Enable and build with of libzstd support @<:@default=auto@:>@])])
+AC_ARG_WITH(qpl,
+ [AS_HELP_STRING([--with-qpl],
+ [Enable and build with Intel QPL support @<:@default=disabled@:>@])], [],
+ [with_qpl="no"])
+
AC_ARG_ENABLE(fuse,
[AS_HELP_STRING([--enable-fuse], [enable erofsfuse @<:@default=no@:>@])],
[enable_fuse="$enableval"], [enable_fuse="no"])
@@ -504,6 +509,31 @@ AS_IF([test "x$with_libzstd" != "xno"], [
])
])
+# Configure Intel QPL
+have_qpl="no"
+AS_IF([test "x$with_qpl" != "xno"], [
+ PKG_CHECK_MODULES([libqpl], [qpl >= 1.5.0], [
+ # Paranoia: don't trust the result reported by pkgconfig before trying out
+ saved_LIBS="$LIBS"
+ saved_CPPFLAGS=${CPPFLAGS}
+ CPPFLAGS="${libqpl_CFLAGS} ${CPPFLAGS}"
+ LIBS="${libqpl_LIBS} $LIBS"
+ AC_CHECK_HEADERS([qpl/qpl.h],[
+ AC_CHECK_LIB(qpl, qpl_execute_job, [], [
+ AC_MSG_ERROR([libqpl doesn't work properly])])
+ AC_CHECK_DECL(qpl_execute_job, [have_qpl="yes"],
+ [AC_MSG_ERROR([libqpl doesn't work properly])], [[
+#include <qpl/qpl.h>
+ ]])
+ ])
+ LIBS="${saved_LIBS}"
+ CPPFLAGS="${saved_CPPFLAGS}"], [
+ AS_IF([test "x$with_qpl" = "xyes"], [
+ AC_MSG_ERROR([Cannot find proper libqpl])
+ ])
+ ])
+])
+
# Enable 64-bit off_t
CFLAGS+=" -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64"
@@ -525,6 +555,7 @@ AM_CONDITIONAL([ENABLE_FUSE], [test "x${have_fuse}" = "xyes"])
AM_CONDITIONAL([ENABLE_LIBLZMA], [test "x${have_liblzma}" = "xyes"])
AM_CONDITIONAL([ENABLE_LIBDEFLATE], [test "x${have_libdeflate}" = "xyes"])
AM_CONDITIONAL([ENABLE_LIBZSTD], [test "x${have_libzstd}" = "xyes"])
+AM_CONDITIONAL([ENABLE_QPL], [test "x${have_qpl}" = "xyes"])
if test "x$have_uuid" = "xyes"; then
AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found])
@@ -574,6 +605,12 @@ if test "x$have_libzstd" = "xyes"; then
AC_DEFINE([HAVE_LIBZSTD], 1, [Define to 1 if libzstd is found])
fi
+if test "x$have_qpl" = "xyes"; then
+ AC_DEFINE([HAVE_QPL], 1, [Define to 1 if qpl is found])
+ AC_SUBST([libqpl_LIBS])
+ AC_SUBST([libqpl_CFLAGS])
+fi
+
# Dump maximum block size
AS_IF([test "x$erofs_cv_max_block_size" = "x"],
[$erofs_cv_max_block_size = 4096], [])
diff --git a/dump/Makefile.am b/dump/Makefile.am
index 09c483e..2a4f67a 100644
--- a/dump/Makefile.am
+++ b/dump/Makefile.am
@@ -8,4 +8,4 @@ dump_erofs_SOURCES = main.c
dump_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
dump_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} \
- ${libzstd_LIBS}
+ ${libzstd_LIBS} ${libqpl_LIBS}
diff --git a/fsck/Makefile.am b/fsck/Makefile.am
index 70eacc0..5bdee4d 100644
--- a/fsck/Makefile.am
+++ b/fsck/Makefile.am
@@ -8,7 +8,7 @@ fsck_erofs_SOURCES = main.c
fsck_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
fsck_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} \
- ${libzstd_LIBS}
+ ${libzstd_LIBS} ${libqpl_LIBS}
if ENABLE_FUZZING
noinst_PROGRAMS = fuzz_erofsfsck
@@ -17,5 +17,5 @@ fuzz_erofsfsck_CFLAGS = -Wall -I$(top_srcdir)/include -DFUZZING
fuzz_erofsfsck_LDFLAGS = -fsanitize=address,fuzzer
fuzz_erofsfsck_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} \
- ${libzstd_LIBS}
+ ${libzstd_LIBS} ${libqpl_LIBS}
endif
diff --git a/fuse/Makefile.am b/fuse/Makefile.am
index 7eae5f6..2fd9b6d 100644
--- a/fuse/Makefile.am
+++ b/fuse/Makefile.am
@@ -7,4 +7,5 @@ erofsfuse_SOURCES = main.c
erofsfuse_CFLAGS = -Wall -I$(top_srcdir)/include
erofsfuse_CFLAGS += ${libfuse2_CFLAGS} ${libfuse3_CFLAGS} ${libselinux_CFLAGS}
erofsfuse_LDADD = $(top_builddir)/lib/liberofs.la ${libfuse2_LIBS} ${libfuse3_LIBS} ${liblz4_LIBS} \
- ${libselinux_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} ${libzstd_LIBS}
+ ${libselinux_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} ${libzstd_LIBS} \
+ ${libqpl_LIBS}
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index d52bcc6..2067cb9 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -131,6 +131,7 @@ struct erofs_sb_info {
pthread_t dfops_worker;
struct erofs_mkfs_dfops *mkfs_dfops;
#endif
+ bool useqpl;
};
/* make sure that any user of the erofs headers has atleast 64bit off_t type */
diff --git a/lib/decompress.c b/lib/decompress.c
index 2842f51..1e22f9f 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -77,6 +77,163 @@ out:
}
#endif
+#ifdef HAVE_QPL
+#include <qpl/qpl.h>
+
+struct z_erofs_qpl_job {
+ struct z_erofs_qpl_job *next;
+ u8 job[];
+};
+static struct z_erofs_qpl_job *z_erofs_qpl_jobs;
+static unsigned int z_erofs_qpl_reclaim_quot;
+#ifdef HAVE_PTHREAD_H
+static pthread_mutex_t z_erofs_qpl_mutex;
+#endif
+
+int z_erofs_load_deflate_config(struct erofs_sb_info *sbi,
+ struct erofs_super_block *dsb, void *data, int size)
+{
+ struct z_erofs_deflate_cfgs *dfl = data;
+ static erofs_atomic_bool_t inited;
+
+ if (!dfl || size < sizeof(struct z_erofs_deflate_cfgs)) {
+ erofs_err("invalid deflate cfgs, size=%u", size);
+ return -EINVAL;
+ }
+
+ /*
+ * In Intel QPL, decompression is supported for DEFLATE streams where
+ * the size of the history buffer is no more than 4 KiB, otherwise
+ * QPL_STS_BAD_DIST_ERR code is returned.
+ */
+ sbi->useqpl = (dfl->windowbits <= 12);
+ if (sbi->useqpl) {
+ if (!erofs_atomic_test_and_set(&inited))
+ z_erofs_qpl_reclaim_quot = erofs_get_available_processors();
+ erofs_info("Intel QPL will be used for DEFLATE decompression");
+ }
+ return 0;
+}
+
+static qpl_job *z_erofs_qpl_get_job(void)
+{
+ qpl_path_t execution_path = qpl_path_auto;
+ struct z_erofs_qpl_job *job;
+ int32_t jobsize = 0;
+ qpl_status status;
+
+#ifdef HAVE_PTHREAD_H
+ pthread_mutex_lock(&z_erofs_qpl_mutex);
+#endif
+ job = z_erofs_qpl_jobs;
+ if (job)
+ z_erofs_qpl_jobs = job->next;
+#ifdef HAVE_PTHREAD_H
+ pthread_mutex_unlock(&z_erofs_qpl_mutex);
+#endif
+
+ if (!job) {
+ status = qpl_get_job_size(execution_path, &jobsize);
+ if (status != QPL_STS_OK) {
+ erofs_err("failed to get job size: %d", status);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ job = malloc(jobsize + sizeof(struct z_erofs_qpl_job));
+ if (!job)
+ return ERR_PTR(-ENOMEM);
+
+ status = qpl_init_job(execution_path, (qpl_job *)job->job);
+ if (status != QPL_STS_OK) {
+ erofs_err("failed to initialize job: %d", status);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ erofs_atomic_dec_return(&z_erofs_qpl_reclaim_quot);
+ }
+ return (qpl_job *)job->job;
+}
+
+static bool z_erofs_qpl_put_job(qpl_job *qjob)
+{
+ struct z_erofs_qpl_job *job =
+ container_of((void *)qjob, struct z_erofs_qpl_job, job);
+
+ if (erofs_atomic_inc_return(&z_erofs_qpl_reclaim_quot) <= 0) {
+ qpl_status status = qpl_fini_job(qjob);
+
+ free(job);
+ if (status != QPL_STS_OK)
+ erofs_err("failed to finalize job: %d", status);
+ return status == QPL_STS_OK;
+ }
+#ifdef HAVE_PTHREAD_H
+ pthread_mutex_lock(&z_erofs_qpl_mutex);
+#endif
+ job->next = z_erofs_qpl_jobs;
+ z_erofs_qpl_jobs = job;
+#ifdef HAVE_PTHREAD_H
+ pthread_mutex_unlock(&z_erofs_qpl_mutex);
+#endif
+ return true;
+}
+
+static int z_erofs_decompress_qpl(struct z_erofs_decompress_req *rq)
+{
+ u8 *dest = (u8 *)rq->out;
+ u8 *src = (u8 *)rq->in;
+ u8 *buff = NULL;
+ unsigned int inputmargin;
+ qpl_status status;
+ qpl_job *job;
+ int ret;
+
+ job = z_erofs_qpl_get_job();
+ if (IS_ERR(job))
+ return PTR_ERR(job);
+
+ inputmargin = z_erofs_fixup_insize(src, rq->inputsize);
+ if (inputmargin >= rq->inputsize)
+ return -EFSCORRUPTED;
+
+ if (rq->decodedskip) {
+ buff = malloc(rq->decodedlength);
+ if (!buff)
+ return -ENOMEM;
+ dest = buff;
+ }
+
+ job->op = qpl_op_decompress;
+ job->next_in_ptr = src + inputmargin;
+ job->next_out_ptr = dest;
+ job->available_in = rq->inputsize - inputmargin;
+ job->available_out = rq->decodedlength;
+ job->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST;
+ status = qpl_execute_job(job);
+ if (status != QPL_STS_OK) {
+ erofs_err("failed to decompress: %d", status);
+ ret = -EIO;
+ goto out_inflate_end;
+ }
+
+ if (rq->decodedskip)
+ memcpy(rq->out, dest + rq->decodedskip,
+ rq->decodedlength - rq->decodedskip);
+ ret = 0;
+out_inflate_end:
+ if (!z_erofs_qpl_put_job(job))
+ ret = -EFAULT;
+ if (buff)
+ free(buff);
+ return ret;
+}
+#else
+int z_erofs_load_deflate_config(struct erofs_sb_info *sbi,
+ struct erofs_super_block *dsb, void *data, int size)
+{
+ return 0;
+}
+#endif
+
#ifdef HAVE_LIBDEFLATE
/* if libdeflate is available, use libdeflate instead. */
#include <libdeflate.h>
@@ -372,6 +529,11 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq)
if (rq->alg == Z_EROFS_COMPRESSION_LZMA)
return z_erofs_decompress_lzma(rq);
#endif
+#ifdef HAVE_QPL
+ if (rq->alg == Z_EROFS_COMPRESSION_DEFLATE && rq->sbi->useqpl)
+ if (!z_erofs_decompress_qpl(rq))
+ return 0;
+#endif
#if defined(HAVE_ZLIB) || defined(HAVE_LIBDEFLATE)
if (rq->alg == Z_EROFS_COMPRESSION_DEFLATE)
return z_erofs_decompress_deflate(rq);
@@ -416,7 +578,10 @@ int z_erofs_parse_cfgs(struct erofs_sb_info *sbi, struct erofs_super_block *dsb)
break;
}
- ret = 0;
+ if (alg == Z_EROFS_COMPRESSION_DEFLATE)
+ ret = z_erofs_load_deflate_config(sbi, dsb, data, size);
+ else
+ ret = 0;
free(data);
if (ret)
break;
diff --git a/mkfs/Makefile.am b/mkfs/Makefile.am
index af97e39..6354712 100644
--- a/mkfs/Makefile.am
+++ b/mkfs/Makefile.am
@@ -7,4 +7,4 @@ mkfs_erofs_SOURCES = main.c
mkfs_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
mkfs_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} \
- ${libdeflate_LIBS} ${libzstd_LIBS}
+ ${libdeflate_LIBS} ${libzstd_LIBS} ${libqpl_LIBS}
--
2.39.3
More information about the Linux-erofs
mailing list