[PATCH v3] erofs-utils: add preliminary zstd support [x]
Gao Xiang
hsiangkao at linux.alibaba.com
Wed May 15 15:16:41 AEST 2024
This patch just adds a preliminary Zstandard support to erofs-utils
since currently Zstandard doesn't support fixed-sized output compression
officially. Mkfs could take more time to finish but it works at least.
The built-in zstd compressor for erofs-utils is slowly WIP, therefore
apparently it will take more efforts.
[ TODO: Later I tend to add another way to generate fixed-sized input
pclusters temporarily for relatively large pcluster sizes as
an option since it will have minor impacts to the results. ]
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
changes since v2:
- use ZSTD_compress2() since only this can enable the previous applied
parameters.
configure.ac | 35 ++++++++++
dump/Makefile.am | 3 +-
fsck/Makefile.am | 6 +-
fuse/Makefile.am | 2 +-
include/erofs_fs.h | 10 +++
lib/Makefile.am | 3 +
lib/compress.c | 24 +++++++
lib/compressor.c | 8 +++
lib/compressor.h | 1 +
lib/compressor_libzstd.c | 143 +++++++++++++++++++++++++++++++++++++++
lib/decompress.c | 67 ++++++++++++++++++
mkfs/Makefile.am | 2 +-
12 files changed, 299 insertions(+), 5 deletions(-)
create mode 100644 lib/compressor_libzstd.c
diff --git a/configure.ac b/configure.ac
index 4a940a8..1560f84 100644
--- a/configure.ac
+++ b/configure.ac
@@ -139,6 +139,10 @@ AC_ARG_WITH(libdeflate,
[Enable and build with libdeflate inflate support @<:@default=disabled@:>@])], [],
[with_libdeflate="no"])
+AC_ARG_WITH(libzstd,
+ [AS_HELP_STRING([--with-libzstd],
+ [Enable and build with of libzstd support @<:@default=auto@:>@])])
+
AC_ARG_ENABLE(fuse,
[AS_HELP_STRING([--enable-fuse], [enable erofsfuse @<:@default=no@:>@])],
[enable_fuse="$enableval"], [enable_fuse="no"])
@@ -474,6 +478,32 @@ AS_IF([test "x$with_libdeflate" != "xno"], [
LIBS="${saved_LIBS}"
CPPFLAGS="${saved_CPPFLAGS}"], [have_libdeflate="no"])
+# Configure libzstd
+have_libzstd="no"
+AS_IF([test "x$with_libzstd" != "xno"], [
+ PKG_CHECK_MODULES([libzstd], [libzstd >= 1.4.0], [
+ # Paranoia: don't trust the result reported by pkgconfig before trying out
+ saved_LIBS="$LIBS"
+ saved_CPPFLAGS=${CPPFLAGS}
+ CPPFLAGS="${libzstd_CFLAGS} ${CPPFLAGS}"
+ LIBS="${libzstd_LIBS} $LIBS"
+ AC_CHECK_HEADERS([zstd.h],[
+ AC_CHECK_LIB(zstd, ZSTD_compress2, [], [
+ AC_MSG_ERROR([libzstd doesn't work properly])])
+ AC_CHECK_DECL(ZSTD_compress2, [have_libzstd="yes"],
+ [AC_MSG_ERROR([libzstd doesn't work properly])], [[
+#include <zstd.h>
+ ]])
+ AC_CHECK_FUNCS([ZSTD_getFrameContentSize])
+ ])
+ LIBS="${saved_LIBS}"
+ CPPFLAGS="${saved_CPPFLAGS}"], [
+ AS_IF([test "x$with_libzstd" = "xyes"], [
+ AC_MSG_ERROR([Cannot find proper libzstd])
+ ])
+ ])
+])
+
# Enable 64-bit off_t
CFLAGS+=" -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64"
@@ -494,6 +524,7 @@ AM_CONDITIONAL([ENABLE_LZ4HC], [test "x${have_lz4hc}" = "xyes"])
AM_CONDITIONAL([ENABLE_FUSE], [test "x${have_fuse}" = "xyes"])
AM_CONDITIONAL([ENABLE_LIBLZMA], [test "x${have_liblzma}" = "xyes"])
AM_CONDITIONAL([ENABLE_LIBDEFLATE], [test "x${have_libdeflate}" = "xyes"])
+AM_CONDITIONAL([ENABLE_LIBZSTD], [test "x${have_libzstd}" = "xyes"])
if test "x$have_uuid" = "xyes"; then
AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found])
@@ -539,6 +570,10 @@ if test "x$have_libdeflate" = "xyes"; then
AC_DEFINE([HAVE_LIBDEFLATE], 1, [Define to 1 if libdeflate is found])
fi
+if test "x$have_libzstd" = "xyes"; then
+ AC_DEFINE([HAVE_LIBZSTD], 1, [Define to 1 if libzstd is found])
+fi
+
# Dump maximum block size
AS_IF([test "x$erofs_cv_max_block_size" = "x"],
[$erofs_cv_max_block_size = 4096], [])
diff --git a/dump/Makefile.am b/dump/Makefile.am
index aed20c2..09c483e 100644
--- a/dump/Makefile.am
+++ b/dump/Makefile.am
@@ -7,4 +7,5 @@ AM_CPPFLAGS = ${libuuid_CFLAGS}
dump_erofs_SOURCES = main.c
dump_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
dump_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
- ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
+ ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} \
+ ${libzstd_LIBS}
diff --git a/fsck/Makefile.am b/fsck/Makefile.am
index d024405..70eacc0 100644
--- a/fsck/Makefile.am
+++ b/fsck/Makefile.am
@@ -7,7 +7,8 @@ AM_CPPFLAGS = ${libuuid_CFLAGS}
fsck_erofs_SOURCES = main.c
fsck_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
fsck_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
- ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
+ ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} \
+ ${libzstd_LIBS}
if ENABLE_FUZZING
noinst_PROGRAMS = fuzz_erofsfsck
@@ -15,5 +16,6 @@ fuzz_erofsfsck_SOURCES = main.c
fuzz_erofsfsck_CFLAGS = -Wall -I$(top_srcdir)/include -DFUZZING
fuzz_erofsfsck_LDFLAGS = -fsanitize=address,fuzzer
fuzz_erofsfsck_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
- ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
+ ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} \
+ ${libzstd_LIBS}
endif
diff --git a/fuse/Makefile.am b/fuse/Makefile.am
index c63efcd..7eae5f6 100644
--- a/fuse/Makefile.am
+++ b/fuse/Makefile.am
@@ -7,4 +7,4 @@ erofsfuse_SOURCES = main.c
erofsfuse_CFLAGS = -Wall -I$(top_srcdir)/include
erofsfuse_CFLAGS += ${libfuse2_CFLAGS} ${libfuse3_CFLAGS} ${libselinux_CFLAGS}
erofsfuse_LDADD = $(top_builddir)/lib/liberofs.la ${libfuse2_LIBS} ${libfuse3_LIBS} ${liblz4_LIBS} \
- ${libselinux_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
+ ${libselinux_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} ${libzstd_LIBS}
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index eba6c26..907f3d8 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -304,6 +304,7 @@ enum {
Z_EROFS_COMPRESSION_LZ4 = 0,
Z_EROFS_COMPRESSION_LZMA = 1,
Z_EROFS_COMPRESSION_DEFLATE = 2,
+ Z_EROFS_COMPRESSION_ZSTD = 3,
Z_EROFS_COMPRESSION_MAX
};
#define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1)
@@ -330,6 +331,15 @@ struct z_erofs_deflate_cfgs {
u8 reserved[5];
} __packed;
+/* 6 bytes (+ length field = 8 bytes) */
+struct z_erofs_zstd_cfgs {
+ u8 format;
+ u8 windowlog; /* windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN(10) */
+ u8 reserved[4];
+} __packed;
+
+#define Z_EROFS_ZSTD_MAX_DICT_SIZE Z_EROFS_PCLUSTER_MAX_SIZE
+
/*
* bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
* e.g. for 4k logical cluster size, 4B if compacted 2B is off;
diff --git a/lib/Makefile.am b/lib/Makefile.am
index b3bea74..2cb4cab 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -53,6 +53,9 @@ liberofs_la_SOURCES += kite_deflate.c compressor_deflate.c
if ENABLE_LIBDEFLATE
liberofs_la_SOURCES += compressor_libdeflate.c
endif
+if ENABLE_LIBZSTD
+liberofs_la_SOURCES += compressor_libzstd.c
+endif
if ENABLE_EROFS_MT
liberofs_la_LDFLAGS = -lpthread
liberofs_la_SOURCES += workqueue.c
diff --git a/lib/compress.c b/lib/compress.c
index e3e4c21..f783236 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -1655,6 +1655,30 @@ static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
sizeof(zalg));
bh->op = &erofs_drop_directly_bhops;
}
+#ifdef HAVE_LIBZSTD
+ if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_ZSTD)) {
+ struct {
+ __le16 size;
+ struct z_erofs_zstd_cfgs z;
+ } __packed zalg = {
+ .size = cpu_to_le16(sizeof(struct z_erofs_zstd_cfgs)),
+ .z = {
+ .windowlog =
+ ilog2(max_dict_size[Z_EROFS_COMPRESSION_ZSTD]) - 10,
+ }
+ };
+
+ bh = erofs_battach(bh, META, sizeof(zalg));
+ if (IS_ERR(bh)) {
+ DBG_BUGON(1);
+ return PTR_ERR(bh);
+ }
+ erofs_mapbh(bh->block);
+ ret = dev_write(sbi, &zalg, erofs_btell(bh, false),
+ sizeof(zalg));
+ bh->op = &erofs_drop_directly_bhops;
+ }
+#endif
return ret;
}
diff --git a/lib/compressor.c b/lib/compressor.c
index 175259e..24c99ac 100644
--- a/lib/compressor.c
+++ b/lib/compressor.c
@@ -37,6 +37,14 @@ static const struct erofs_algorithm erofs_algs[] = {
{ "libdeflate", &erofs_compressor_libdeflate,
Z_EROFS_COMPRESSION_DEFLATE, true },
#endif
+
+ { "zstd",
+#ifdef HAVE_LIBZSTD
+ &erofs_compressor_libzstd,
+#else
+ NULL,
+#endif
+ Z_EROFS_COMPRESSION_ZSTD, false },
};
int z_erofs_get_compress_algorithm_id(const struct erofs_compress *c)
diff --git a/lib/compressor.h b/lib/compressor.h
index 96f2d21..59d525d 100644
--- a/lib/compressor.h
+++ b/lib/compressor.h
@@ -53,6 +53,7 @@ extern const struct erofs_compressor erofs_compressor_lz4hc;
extern const struct erofs_compressor erofs_compressor_lzma;
extern const struct erofs_compressor erofs_compressor_deflate;
extern const struct erofs_compressor erofs_compressor_libdeflate;
+extern const struct erofs_compressor erofs_compressor_libzstd;
int z_erofs_get_compress_algorithm_id(const struct erofs_compress *c);
int erofs_compress_destsize(const struct erofs_compress *c,
diff --git a/lib/compressor_libzstd.c b/lib/compressor_libzstd.c
new file mode 100644
index 0000000..223806e
--- /dev/null
+++ b/lib/compressor_libzstd.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/internal.h"
+#include "erofs/print.h"
+#include "erofs/config.h"
+#include <zstd.h>
+#include <zstd_errors.h>
+#include <alloca.h>
+#include "compressor.h"
+#include "erofs/atomic.h"
+
+static int libzstd_compress_destsize(const struct erofs_compress *c,
+ const void *src, unsigned int *srcsize,
+ void *dst, unsigned int dstsize)
+{
+ ZSTD_CCtx *cctx = c->private_data;
+ size_t l = 0; /* largest input that fits so far */
+ size_t l_csize = 0;
+ size_t r = *srcsize + 1; /* smallest input that doesn't fit so far */
+ size_t m;
+ u8 *fitblk_buffer = alloca(dstsize + 32);
+
+ m = dstsize * 4;
+ for (;;) {
+ size_t csize;
+
+ m = max(m, l + 1);
+ m = min(m, r - 1);
+
+ csize = ZSTD_compress2(cctx, fitblk_buffer,
+ dstsize + 32, src, m);
+ if (ZSTD_isError(csize)) {
+ if (ZSTD_getErrorCode(csize) == ZSTD_error_dstSize_tooSmall)
+ goto doesnt_fit;
+ return -EFAULT;
+ }
+
+ if (csize > 0 && csize <= dstsize) {
+ /* Fits */
+ memcpy(dst, fitblk_buffer, csize);
+ l = m;
+ l_csize = csize;
+ if (r <= l + 1 || csize + 1 >= dstsize)
+ break;
+ /*
+ * Estimate needed input prefix size based on current
+ * compression ratio.
+ */
+ m = (dstsize * m) / csize;
+ } else {
+doesnt_fit:
+ /* Doesn't fit */
+ r = m;
+ if (r <= l + 1)
+ break;
+ m = (l + r) / 2;
+ }
+ }
+ *srcsize = l;
+ return l_csize;
+}
+
+static int compressor_libzstd_exit(struct erofs_compress *c)
+{
+ if (!c->private_data)
+ return -EINVAL;
+ ZSTD_freeCCtx(c->private_data);
+ return 0;
+}
+
+static int erofs_compressor_libzstd_setlevel(struct erofs_compress *c,
+ int compression_level)
+{
+ if (compression_level > erofs_compressor_libzstd.best_level) {
+ erofs_err("invalid compression level %d", compression_level);
+ return -EINVAL;
+ }
+ c->compression_level = compression_level;
+ return 0;
+}
+
+static int erofs_compressor_libzstd_setdictsize(struct erofs_compress *c,
+ u32 dict_size)
+{
+ if (!dict_size) {
+ if (erofs_compressor_libzstd.default_dictsize) {
+ dict_size = erofs_compressor_libzstd.default_dictsize;
+ } else {
+ dict_size = min_t(u32, Z_EROFS_ZSTD_MAX_DICT_SIZE,
+ cfg.c_mkfs_pclustersize_max << 3);
+ dict_size = 1 << ilog2(dict_size);
+ }
+ }
+ if (dict_size != 1 << ilog2(dict_size) ||
+ dict_size > Z_EROFS_ZSTD_MAX_DICT_SIZE) {
+ erofs_err("invalid dictionary size %u", dict_size);
+ return -EINVAL;
+ }
+ c->dict_size = dict_size;
+ return 0;
+}
+
+static int compressor_libzstd_init(struct erofs_compress *c)
+{
+ static erofs_atomic_bool_t __warnonce;
+ ZSTD_CCtx *cctx = c->private_data;
+ size_t err;
+
+ ZSTD_freeCCtx(cctx);
+ cctx = ZSTD_createCCtx();
+ if (!cctx)
+ return -ENOMEM;
+
+ err = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, c->compression_level);
+ if (ZSTD_isError(err)) {
+ erofs_err("failed to set compression level: %s",
+ ZSTD_getErrorName(err));
+ return -EINVAL;
+ }
+ err = ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, ilog2(c->dict_size));
+ if (ZSTD_isError(err)) {
+ erofs_err("failed to set window log: %s", ZSTD_getErrorName(err));
+ return -EINVAL;
+ }
+ c->private_data = cctx;
+
+ if (!erofs_atomic_test_and_set(&__warnonce)) {
+ erofs_warn("EXPERIMENTAL libzstd compressor in use. Note that `fitblk` isn't supported by upstream zstd for now.");
+ erofs_warn("Therefore it will takes more time in order to get the optimal result.");
+ erofs_info("You could clarify further needs in zstd repository <https://github.com/facebook/zstd/issues> for reference too.");
+ }
+ return 0;
+}
+
+const struct erofs_compressor erofs_compressor_libzstd = {
+ .default_level = ZSTD_CLEVEL_DEFAULT,
+ .best_level = 22,
+ .max_dictsize = Z_EROFS_ZSTD_MAX_DICT_SIZE,
+ .init = compressor_libzstd_init,
+ .exit = compressor_libzstd_exit,
+ .setlevel = erofs_compressor_libzstd_setlevel,
+ .setdictsize = erofs_compressor_libzstd_setdictsize,
+ .compress_destsize = libzstd_compress_destsize,
+};
diff --git a/lib/decompress.c b/lib/decompress.c
index fe8a40c..58ce7e5 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -9,6 +9,69 @@
#include "erofs/err.h"
#include "erofs/print.h"
+#ifdef HAVE_LIBZSTD
+#include <zstd.h>
+#include <zstd_errors.h>
+
+/* also a very preliminary userspace version */
+static int z_erofs_decompress_zstd(struct z_erofs_decompress_req *rq)
+{
+ struct erofs_sb_info *sbi = rq->sbi;
+ int ret = 0;
+ char *dest = rq->out;
+ char *src = rq->in;
+ char *buff = NULL;
+ unsigned int inputmargin = 0;
+ unsigned long long total;
+
+ while (!src[inputmargin & (erofs_blksiz(sbi) - 1)])
+ if (!(++inputmargin & (erofs_blksiz(sbi) - 1)))
+ break;
+
+ if (inputmargin >= rq->inputsize)
+ return -EFSCORRUPTED;
+
+#ifdef HAVE_ZSTD_GETFRAMECONTENTSIZE
+ total = ZSTD_getFrameContentSize(src + inputmargin,
+ rq->inputsize - inputmargin);
+ if (total == ZSTD_CONTENTSIZE_UNKNOWN ||
+ total == ZSTD_CONTENTSIZE_ERROR)
+ return -EFSCORRUPTED;
+#else
+ total = ZSTD_getDecompressedSize(src + inputmargin,
+ rq->inputsize - inputmargin);
+#endif
+ if (rq->decodedskip || total != rq->decodedlength) {
+ buff = malloc(total);
+ if (!buff)
+ return -ENOMEM;
+ dest = buff;
+ }
+
+ ret = ZSTD_decompress(dest, total,
+ src + inputmargin, rq->inputsize - inputmargin);
+ if (ZSTD_isError(ret)) {
+ erofs_err("ZSTD decompress failed %d: %s", ZSTD_getErrorCode(ret),
+ ZSTD_getErrorName(ret));
+ ret = -EIO;
+ goto out;
+ }
+
+ if (ret != (int)total) {
+ erofs_err("ZSTD decompress length mismatch %d, expected %d",
+ ret, total);
+ goto out;
+ }
+ if (rq->decodedskip || total != rq->decodedlength)
+ memcpy(rq->out, dest + rq->decodedskip,
+ rq->decodedlength - rq->decodedskip);
+out:
+ if (buff)
+ free(buff);
+ return ret;
+}
+#endif
+
#ifdef HAVE_LIBDEFLATE
/* if libdeflate is available, use libdeflate instead. */
#include <libdeflate.h>
@@ -322,6 +385,10 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq)
#if defined(HAVE_ZLIB) || defined(HAVE_LIBDEFLATE)
if (rq->alg == Z_EROFS_COMPRESSION_DEFLATE)
return z_erofs_decompress_deflate(rq);
+#endif
+#ifdef HAVE_LIBZSTD
+ if (rq->alg == Z_EROFS_COMPRESSION_ZSTD)
+ return z_erofs_decompress_zstd(rq);
#endif
return -EOPNOTSUPP;
}
diff --git a/mkfs/Makefile.am b/mkfs/Makefile.am
index dd75485..af97e39 100644
--- a/mkfs/Makefile.am
+++ b/mkfs/Makefile.am
@@ -7,4 +7,4 @@ mkfs_erofs_SOURCES = main.c
mkfs_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
mkfs_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} \
- ${libdeflate_LIBS}
+ ${libdeflate_LIBS} ${libzstd_LIBS}
--
2.39.3
More information about the Linux-erofs
mailing list