[PATCH v3 0/3] erofs-utils: mkfs: allow to specify dictionary size for compression algorithms

Yifan Zhao zhaoyifan at sjtu.edu.cn
Fri Jan 19 23:46:40 AEDT 2024


This patchset allows to specify dictionary size for compression algorithms.

change since v2:
 - does not touch kite_deflate.c
 - does not add unneeded macro definitions unrelated to on-disk format
 - modify the hacky way to get max_dict_size in z_erofs_build_compr_cfgs()
 - some rename, reorganize and code style fix

Yifan Zhao (3):
  erofs-utils: mkfs: merge erofs_compressor_setlevel() into
    erofs_compressor_init()
  erofs-utils: mkfs: allow to specify dictionary size for compression
    algorithms
  erofs-utils: mkfs: reorganize logic in erofs_compressor_init()

 include/erofs/config.h      |  10 ++--
 lib/compress.c              |  37 +++++++------
 lib/compress_hints.c        |   2 +-
 lib/compressor.c            |  45 +++++++++++-----
 lib/compressor.h            |   9 ++--
 lib/compressor_deflate.c    |  40 ++++++++++----
 lib/compressor_libdeflate.c |  14 +++--
 lib/compressor_liblzma.c    |  54 ++++++++++++-------
 lib/compressor_lz4.c        |   2 -
 lib/compressor_lz4hc.c      |   5 +-
 lib/config.c                |   4 +-
 lib/inode.c                 |   2 +-
 mkfs/main.c                 | 104 ++++++++++++++++++++++++++++--------
 13 files changed, 229 insertions(+), 99 deletions(-)

Interdiff against v2:
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 7dbfd1e..eecf575 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -34,7 +34,7 @@ enum {
 
 #define EROFS_MAX_COMPR_CFGS		64
 
-struct erofs_compr_cfg {
+struct erofs_compr_opts {
 	char *alg;
 	int level;
 	u32 dict_size;
@@ -70,7 +70,7 @@ struct erofs_configure {
 	char *c_src_path;
 	char *c_blobdev_path;
 	char *c_compress_hints_file;
-	struct erofs_compr_cfg c_compr_opts[EROFS_MAX_COMPR_CFGS];
+	struct erofs_compr_opts c_compr_opts[EROFS_MAX_COMPR_CFGS];
 	char c_force_inodeversion;
 	char c_force_chunkformat;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index 72f0ca6..eba6c26 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -322,7 +322,6 @@ struct z_erofs_lzma_cfgs {
 	u8 reserved[8];
 } __packed;
 
-#define Z_EROFS_LZMA_DEFAULT_DICT_SIZE	(8 * Z_EROFS_PCLUSTER_MAX_SIZE)
 #define Z_EROFS_LZMA_MAX_DICT_SIZE	(8 * Z_EROFS_PCLUSTER_MAX_SIZE)
 
 /* 6 bytes (+ length field = 8 bytes) */
@@ -331,9 +330,6 @@ struct z_erofs_deflate_cfgs {
 	u8 reserved[5];
 } __packed;
 
-#define Z_EROFS_DEFLATE_DEFULT_DICT_SIZE	(1U << 15)
-#define Z_EROFS_DEFLATE_MAX_DICT_SIZE		(1U << 15)
-
 /*
  * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
  *  e.g. for 4k logical cluster size,      4B        if compacted 2B is off;
diff --git a/lib/compress.c b/lib/compress.c
index b7ee9ec..ea9d00d 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -1124,11 +1124,10 @@ err_free_meta:
 
 static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
 				    struct erofs_buffer_head *sb_bh,
-				    struct erofs_compress_cfg *ccfg)
+				    u32 *max_dict_size)
 {
 	struct erofs_buffer_head *bh = sb_bh;
-	int i, ret = 0;
-	u32 dict_size = 0;
+	int ret = 0;
 
 	if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZ4)) {
 		struct {
@@ -1155,22 +1154,15 @@ static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
 	}
 #ifdef HAVE_LIBLZMA
 	if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZMA)) {
-		for (i = 0; i < EROFS_MAX_COMPR_CFGS; i++) {
-			if (ccfg[i].enable &&
-			    ccfg[i].algorithmtype == Z_EROFS_COMPRESSION_LZMA) {
-				dict_size = ccfg[i].handle.dict_size;
-				break;
-			}
-		}
-		DBG_BUGON(!dict_size);
-
 		struct {
 			__le16 size;
 			struct z_erofs_lzma_cfgs lzma;
 		} __packed lzmaalg = {
 			.size = cpu_to_le16(sizeof(struct z_erofs_lzma_cfgs)),
 			.lzma = {
-				.dict_size = cpu_to_le32(dict_size),
+				.dict_size = cpu_to_le32(
+					max_dict_size
+						[Z_EROFS_COMPRESSION_LZMA]),
 			}
 		};
 
@@ -1186,23 +1178,15 @@ static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
 	}
 #endif
 	if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_DEFLATE)) {
-		for (i = 0; i < EROFS_MAX_COMPR_CFGS; i++) {
-			if (ccfg[i].enable &&
-			    ccfg[i].algorithmtype == Z_EROFS_COMPRESSION_DEFLATE) {
-				dict_size = ccfg[i].handle.dict_size;
-				break;
-			}
-		}
-		DBG_BUGON(!dict_size);
-
 		struct {
 			__le16 size;
 			struct z_erofs_deflate_cfgs z;
 		} __packed zalg = {
 			.size = cpu_to_le16(sizeof(struct z_erofs_deflate_cfgs)),
 			.z = {
-				.windowbits =
-					cpu_to_le32(ilog2(dict_size)),
+				.windowbits = cpu_to_le32(ilog2(
+					max_dict_size
+						[Z_EROFS_COMPRESSION_DEFLATE])),
 			}
 		};
 
@@ -1222,6 +1206,7 @@ static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
 int z_erofs_compress_init(struct erofs_sb_info *sbi, struct erofs_buffer_head *sb_bh)
 {
 	int i, ret;
+	u32 max_dict_size[Z_EROFS_COMPRESSION_MAX];
 
 	for (i = 0; cfg.c_compr_opts[i].alg; ++i) {
 		struct erofs_compress *c = &erofs_ccfg[i].handle;
@@ -1238,6 +1223,9 @@ int z_erofs_compress_init(struct erofs_sb_info *sbi, struct erofs_buffer_head *s
 		sbi->available_compr_algs |= 1 << erofs_ccfg[i].algorithmtype;
 		if (erofs_ccfg[i].algorithmtype != Z_EROFS_COMPRESSION_LZ4)
 			erofs_sb_set_compr_cfgs(sbi);
+		max_dict_size[erofs_ccfg[i].algorithmtype] =
+			max(max_dict_size[erofs_ccfg[i].algorithmtype],
+			    c->dict_size);
 	}
 
 	/*
@@ -1271,7 +1259,7 @@ int z_erofs_compress_init(struct erofs_sb_info *sbi, struct erofs_buffer_head *s
 	}
 
 	if (erofs_sb_has_compr_cfgs(sbi))
-		return z_erofs_build_compr_cfgs(sbi, sb_bh, erofs_ccfg);
+		return z_erofs_build_compr_cfgs(sbi, sb_bh, max_dict_size);
 	return 0;
 }
 
diff --git a/lib/compressor.c b/lib/compressor.c
index 27b4077..290746e 100644
--- a/lib/compressor.c
+++ b/lib/compressor.c
@@ -102,26 +102,28 @@ int erofs_compressor_init(struct erofs_sb_info *sbi, struct erofs_compress *c,
 
 		if (erofs_algs[i].c->setlevel) {
 			ret = erofs_algs[i].c->setlevel(c, compression_level);
-			if (ret)
-				return ret;
-		} else {
-			if (compression_level >= 0)
-				erofs_warn(
-					"compression level %d is ignored for %s",
+			if (ret) {
+				erofs_err("failed to set compression level %d for %s",
 					  compression_level, alg_name);
-			c->compression_level = 0;
+				return ret;
+			}
+		} else if (compression_level >= 0) {
+			erofs_err("compression level is not supported for %s",
+				  alg_name);
+			return -EINVAL;
 		}
 
 		if (erofs_algs[i].c->setdictsize) {
 			ret = erofs_algs[i].c->setdictsize(c, dict_size);
-			if (ret)
-				return ret;
-		} else {
-			if (dict_size)
-				erofs_warn(
-					"dictionary size %u is ignored for %s",
+			if (ret) {
+				erofs_err("failed to set dict size %u for %s",
 					  dict_size, alg_name);
-			c->dict_size = 0;
+				return ret;
+			}
+		} else if (dict_size) {
+			erofs_err("dict size is not supported for %s",
+				  alg_name);
+			return -EINVAL;
 		}
 
 		ret = erofs_algs[i].c->init(c);
diff --git a/lib/compressor_deflate.c b/lib/compressor_deflate.c
index d9f8a91..479ad56 100644
--- a/lib/compressor_deflate.c
+++ b/lib/compressor_deflate.c
@@ -84,8 +84,8 @@ static int erofs_compressor_deflate_setdictsize(struct erofs_compress *c,
 const struct erofs_compressor erofs_compressor_deflate = {
 	.default_level = 1,
 	.best_level = 9,
-	.default_dictsize = Z_EROFS_DEFLATE_DEFULT_DICT_SIZE,
-	.max_dictsize = Z_EROFS_DEFLATE_MAX_DICT_SIZE,
+	.default_dictsize = 1 << 15,
+	.max_dictsize = 1 << 15,
 	.init = compressor_deflate_init,
 	.exit = compressor_deflate_exit,
 	.setlevel = erofs_compressor_deflate_setlevel,
diff --git a/lib/compressor_liblzma.c b/lib/compressor_liblzma.c
index 0203a5c..b8ae29c 100644
--- a/lib/compressor_liblzma.c
+++ b/lib/compressor_liblzma.c
@@ -73,7 +73,8 @@ static int erofs_compressor_liblzma_setdictsize(struct erofs_compress *c,
 	if (dict_size == 0)
 		dict_size = erofs_compressor_lzma.default_dictsize;
 
-	if (dict_size > erofs_compressor_lzma.max_dictsize) {
+	if (dict_size > erofs_compressor_lzma.max_dictsize ||
+	    dict_size < 4096) {
 		erofs_err("invalid dict size %u", dict_size);
 		return -EINVAL;
 	}
@@ -111,7 +112,7 @@ static int erofs_compressor_liblzma_init(struct erofs_compress *c)
 const struct erofs_compressor erofs_compressor_lzma = {
 	.default_level = LZMA_PRESET_DEFAULT,
 	.best_level = 109,
-	.default_dictsize = Z_EROFS_LZMA_DEFAULT_DICT_SIZE,
+	.default_dictsize = 8 * Z_EROFS_PCLUSTER_MAX_SIZE,
 	.max_dictsize = Z_EROFS_LZMA_MAX_DICT_SIZE,
 	.init = erofs_compressor_liblzma_init,
 	.exit = erofs_compressor_liblzma_exit,
diff --git a/lib/kite_deflate.c b/lib/kite_deflate.c
index 2357f76..8667954 100644
--- a/lib/kite_deflate.c
+++ b/lib/kite_deflate.c
@@ -5,7 +5,6 @@
  * Copyright (C) 2023, Alibaba Cloud
  * Copyright (C) 2023, Gao Xiang <xiang at kernel.org>
  */
-#include "erofs/internal.h"
 #include "erofs/defs.h"
 #include "erofs/print.h"
 #include <errno.h>
@@ -23,7 +22,7 @@ unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
 #define kite_dbg(x, ...)
 #endif
 
-#define kHistorySize32		Z_EROFS_DEFLATE_DEFULT_DICT_SIZE
+#define kHistorySize32		(1U << 15)
 
 #define kNumLenSymbols32	256
 #define kNumLenSymbolsMax	kNumLenSymbols32
diff --git a/mkfs/main.c b/mkfs/main.c
index 8a9c3cc..acb2108 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -310,43 +310,31 @@ handle_fragment:
 	return 0;
 }
 
-static int mkfs_parse_compress_algs(char *algs)
+static int mkfs_parse_one_compress_alg(char *alg,
+				       struct erofs_compr_opts *copts)
 {
-	unsigned int i;
-	char *s;
-
-	for (s = strtok(algs, ":"), i = 0; s; s = strtok(NULL, ":"), ++i) {
 	char *p, *q, *opt, *endptr;
-		struct erofs_compr_cfg *ccfg;
 
-		if (i >= EROFS_MAX_COMPR_CFGS - 1) {
-			erofs_err("too many algorithm types");
-			return -EINVAL;
-		}
+	copts->level = -1;
+	copts->dict_size = 0;
 
-		ccfg = cfg.c_compr_opts + i;
-
-		ccfg->level = -1;
-		ccfg->dict_size = 0;
-
-		p = strchr(s, ',');
+	p = strchr(alg, ',');
 	if (p) {
-			ccfg->alg = strndup(s, p - s);
+		copts->alg = strndup(alg, p - alg);
 
-			/* backward compatibility */
+		/* support old '-zlzma,9' form */
 		if (isdigit(*(p + 1))) {
-				ccfg->level = strtol(p + 1, &endptr, 10);
+			copts->level = strtol(p + 1, &endptr, 10);
 			if (*endptr && *endptr != ',') {
-					erofs_err(
-						"invalid compression level %s",
+				erofs_err("invalid compression level %s",
 					  p + 1);
 				return -EINVAL;
 			}
-				continue;
+			return 0;
 		}
 	} else {
-			ccfg->alg = strdup(s);
-			continue;
+		copts->alg = strdup(alg);
+		return 0;
 	}
 
 	opt = p + 1;
@@ -357,26 +345,20 @@ static int mkfs_parse_compress_algs(char *algs)
 
 		if ((p = strstr(opt, "level="))) {
 			p += strlen("level=");
-				ccfg->level = strtol(p, &endptr, 10);
-				if ((endptr == p) ||
-				    (*endptr && *endptr != ',')) {
-					erofs_err(
-						"invalid compression level %s",
-						p);
+			copts->level = strtol(p, &endptr, 10);
+			if ((endptr == p) || (*endptr && *endptr != ',')) {
+				erofs_err("invalid compression level %s", p);
 				return -EINVAL;
 			}
 		} else if ((p = strstr(opt, "dictsize="))) {
 			p += strlen("dictsize=");
-				ccfg->dict_size = strtoul(p, &endptr, 10);
+			copts->dict_size = strtoul(p, &endptr, 10);
 			if (*endptr == 'k' || *endptr == 'K')
-					ccfg->dict_size <<= 10;
+				copts->dict_size <<= 10;
 			else if (*endptr == 'm' || *endptr == 'M')
-					ccfg->dict_size <<= 20;
-				else if ((endptr == p) ||
-					 (*endptr && *endptr != ',')) {
-					erofs_err(
-						"invalid compression dictsize %s",
-						p);
+				copts->dict_size <<= 20;
+			else if ((endptr == p) || (*endptr && *endptr != ',')) {
+				erofs_err("invalid compression dictsize %s", p);
 				return -EINVAL;
 			}
 		} else {
@@ -384,11 +366,27 @@ static int mkfs_parse_compress_algs(char *algs)
 			return -EINVAL;
 		}
 
-			if (q)
-				opt = q + 1;
-			else
-				opt = NULL;
+		opt = q ? q + 1 : NULL;
 	}
+
+	return 0;
+}
+
+static int mkfs_parse_compress_algs(char *algs)
+{
+	unsigned int i;
+	char *s;
+	int ret;
+
+	for (s = strtok(algs, ":"), i = 0; s; s = strtok(NULL, ":"), ++i) {
+		if (i >= EROFS_MAX_COMPR_CFGS - 1) {
+			erofs_err("too many algorithm types");
+			return -EINVAL;
+		}
+
+		ret = mkfs_parse_one_compress_alg(s, &cfg.c_compr_opts[i]);
+		if (ret)
+			return ret;
 	}
 	return 0;
 }
-- 
2.43.0



More information about the Linux-erofs mailing list