[PATCH 2/3] erofs-utils: mkfs: allow to specify dictionary size for compression algorithms

Gao Xiang hsiangkao at linux.alibaba.com
Wed Jan 17 21:56:22 AEDT 2024



On 2024/1/7 02:11, Yifan Zhao wrote:
> Currently, the dictionary size for compression algorithms is fixed. This
> patch allows to specify it with -zX,dictsize=<dictsize> options in
> mkfs command line.

..patch allows to specify different ones with new -zX,dictsize=<dictsize>
options.

> 
> This patch also changes the way to specify compression levels. Now, the
> compression level is specified with -zX,level=<level> options and could
> be specified together with dictsize. The old -zX,<level> way is still

The old -zX,<level> form is still...

> supported for compatibility.
> 
> Suggested-by: Gao Xiang <hsiangkao at linux.alibaba.com>
> Signed-off-by: Yifan Zhao <zhaoyifan at sjtu.edu.cn>
> ---
>   include/erofs/config.h   |  10 ++--
>   include/erofs_fs.h       |   4 ++
>   lib/compress.c           |  45 +++++++++++++----
>   lib/compress_hints.c     |   2 +-
>   lib/compressor.c         |  17 ++++++-
>   lib/compressor.h         |   6 ++-
>   lib/compressor_deflate.c |  26 ++++++++--
>   lib/compressor_liblzma.c |  33 +++++++-----
>   lib/config.c             |   4 +-
>   lib/inode.c              |   2 +-
>   lib/kite_deflate.c       |   3 +-
>   mkfs/main.c              | 106 ++++++++++++++++++++++++++++++++-------
>   12 files changed, 203 insertions(+), 55 deletions(-)
> 
> diff --git a/include/erofs/config.h b/include/erofs/config.h
> index 89fe522..7dbfd1e 100644
> --- a/include/erofs/config.h
> +++ b/include/erofs/config.h
> @@ -34,6 +34,12 @@ enum {
>   
>   #define EROFS_MAX_COMPR_CFGS		64
>   
> +struct erofs_compr_cfg {

better to rename as `struct erofs_compr_opts`

> +	char *alg;
> +	int level;
> +	u32 dict_size;
> +};
> +
>   struct erofs_configure {
>   	const char *c_version;
>   	int c_dbg_lvl;
> @@ -64,8 +70,7 @@ struct erofs_configure {
>   	char *c_src_path;
>   	char *c_blobdev_path;
>   	char *c_compress_hints_file;
> -	char *c_compr_alg[EROFS_MAX_COMPR_CFGS];
> -	int c_compr_level[EROFS_MAX_COMPR_CFGS];
> +	struct erofs_compr_cfg c_compr_opts[EROFS_MAX_COMPR_CFGS];
>   	char c_force_inodeversion;
>   	char c_force_chunkformat;
>   	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
> @@ -73,7 +78,6 @@ struct erofs_configure {
>   
>   	u32 c_pclusterblks_max, c_pclusterblks_def, c_pclusterblks_packed;
>   	u32 c_max_decompressed_extent_bytes;
> -	u32 c_dict_size;
>   	u64 c_unix_timestamp;
>   	u32 c_uid, c_gid;
>   	const char *mount_point;
> diff --git a/include/erofs_fs.h b/include/erofs_fs.h
> index eba6c26..72f0ca6 100644
> --- a/include/erofs_fs.h
> +++ b/include/erofs_fs.h
> @@ -322,6 +322,7 @@ struct z_erofs_lzma_cfgs {
>   	u8 reserved[8];
>   } __packed;
>   
> +#define Z_EROFS_LZMA_DEFAULT_DICT_SIZE	(8 * Z_EROFS_PCLUSTER_MAX_SIZE)

No need to introduce this since it's not a part of
on-disk format.

It's enough to just update .default_dictsize.

>   #define Z_EROFS_LZMA_MAX_DICT_SIZE	(8 * Z_EROFS_PCLUSTER_MAX_SIZE)
>   
>   /* 6 bytes (+ length field = 8 bytes) */
> @@ -330,6 +331,9 @@ struct z_erofs_deflate_cfgs {
>   	u8 reserved[5];
>   } __packed;
>   
> +#define Z_EROFS_DEFLATE_DEFULT_DICT_SIZE	(1U << 15)

Same here.

> +#define Z_EROFS_DEFLATE_MAX_DICT_SIZE		(1U << 15)

According to DEFLATE specification, it is a hardcoded number
too, so avoid introducing another
Z_EROFS_DEFLATE_MAX_DICT_SIZE here.

> +
>   /*
>    * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
>    *  e.g. for 4k logical cluster size,      4B        if compacted 2B is off;
> diff --git a/lib/compress.c b/lib/compress.c
> index 216aeb4..6f074fd 100644
> --- a/lib/compress.c
> +++ b/lib/compress.c
> @@ -1116,10 +1116,12 @@ err_free_meta:
>   }
>   
>   static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
> -				    struct erofs_buffer_head *sb_bh)
> +				    struct erofs_buffer_head *sb_bh,
> +				    struct erofs_compress_cfg *ccfg)
>   {
>   	struct erofs_buffer_head *bh = sb_bh;
> -	int ret = 0;
> +	int i, ret = 0;
> +	u32 dict_size = 0;
>   
>   	if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZ4)) {
>   		struct {
> @@ -1146,13 +1148,22 @@ static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
>   	}
>   #ifdef HAVE_LIBLZMA
>   	if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZMA)) {
> +		for (i = 0; i < EROFS_MAX_COMPR_CFGS; i++) {
> +			if (ccfg[i].enable &&
> +			    ccfg[i].algorithmtype == Z_EROFS_COMPRESSION_LZMA) {
> +				dict_size = ccfg[i].handle.dict_size;
> +				break;
> +			}
> +		}

That is... too hacky to treat like this.

Perhaps just
u32 max_dict_size[Z_EROFS_COMPRESSION_MAX];

for (i = 0; i < EROFS_MAX_COMPR_CFGS; i++) {
	if (ccfg[i].enable &&
	    max_dict_size[ccfg[i].algorithmtype] < ccfg[i].dict_size)
		max_dict_size[ccfg[i].algorithmtype] = ccfg[i].dict_size;
}

I don't know though, since different ccfg[i] can have
different levels.
But we need the max dict size though.

> +		DBG_BUGON(!dict_size);
> +
>   		struct {
>   			__le16 size;
>   			struct z_erofs_lzma_cfgs lzma;
>   		} __packed lzmaalg = {
>   			.size = cpu_to_le16(sizeof(struct z_erofs_lzma_cfgs)),
>   			.lzma = {
> -				.dict_size = cpu_to_le32(cfg.c_dict_size),
> +				.dict_size = cpu_to_le32(dict_size),
>   			}
>   		};
>   
> @@ -1168,6 +1179,15 @@ static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
>   	}
>   #endif
>   	if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_DEFLATE)) {
> +		for (i = 0; i < EROFS_MAX_COMPR_CFGS; i++) {
> +			if (ccfg[i].enable &&
> +			    ccfg[i].algorithmtype == Z_EROFS_COMPRESSION_DEFLATE) {
> +				dict_size = ccfg[i].handle.dict_size;
> +				break;
> +			}
> +		}

Same here.

> +		DBG_BUGON(!dict_size);
> +
>   		struct {
>   			__le16 size;
>   			struct z_erofs_deflate_cfgs z;
> @@ -1175,7 +1195,7 @@ static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
>   			.size = cpu_to_le16(sizeof(struct z_erofs_deflate_cfgs)),
>   			.z = {
>   				.windowbits =
> -					cpu_to_le32(ilog2(cfg.c_dict_size)),
> +					cpu_to_le32(ilog2(dict_size)),
>   			}
>   		};
>   
> @@ -1196,10 +1216,12 @@ int z_erofs_compress_init(struct erofs_sb_info *sbi, struct erofs_buffer_head *s
>   {
>   	int i, ret;
>   
> -	for (i = 0; cfg.c_compr_alg[i]; ++i) {
> +	for (i = 0; cfg.c_compr_opts[i].alg; ++i) {
>   		struct erofs_compress *c = &erofs_ccfg[i].handle;
>   
> -		ret = erofs_compressor_init(sbi, c, cfg.c_compr_alg[i], cfg.c_compr_level[i]);
> +		ret = erofs_compressor_init(sbi, c, cfg.c_compr_opts[i].alg,
> +					    cfg.c_compr_opts[i].level,
> +					    cfg.c_compr_opts[i].dict_size);
>   		if (ret)
>   			return ret;
>   
> @@ -1215,11 +1237,12 @@ int z_erofs_compress_init(struct erofs_sb_info *sbi, struct erofs_buffer_head *s
>   	 * if primary algorithm is empty (e.g. compression off),
>   	 * clear 0PADDING feature for old kernel compatibility.
>   	 */
> -	if (!cfg.c_compr_alg[0] ||
> -	    (cfg.c_legacy_compress && !strncmp(cfg.c_compr_alg[0], "lz4", 3)))
> +	if (!cfg.c_compr_opts[0].alg ||
> +	    (cfg.c_legacy_compress &&
> +	     !strncmp(cfg.c_compr_opts[0].alg, "lz4", 3)))
>   		erofs_sb_clear_lz4_0padding(sbi);
>   
> -	if (!cfg.c_compr_alg[0])
> +	if (!cfg.c_compr_opts[0].alg)
>   		return 0;
>   
>   	/*
> @@ -1241,7 +1264,7 @@ int z_erofs_compress_init(struct erofs_sb_info *sbi, struct erofs_buffer_head *s
>   	}
>   
>   	if (erofs_sb_has_compr_cfgs(sbi))
> -		return z_erofs_build_compr_cfgs(sbi, sb_bh);
> +		return z_erofs_build_compr_cfgs(sbi, sb_bh, erofs_ccfg);
>   	return 0;
>   }
>   
> @@ -1249,7 +1272,7 @@ int z_erofs_compress_exit(void)
>   {
>   	int i, ret;
>   
> -	for (i = 0; cfg.c_compr_alg[i]; ++i) {
> +	for (i = 0; cfg.c_compr_opts[i].alg; ++i) {
>   		ret = erofs_compressor_exit(&erofs_ccfg[i].handle);
>   		if (ret)
>   			return ret;
> diff --git a/lib/compress_hints.c b/lib/compress_hints.c
> index afc9f8f..8b78f80 100644
> --- a/lib/compress_hints.c
> +++ b/lib/compress_hints.c
> @@ -125,7 +125,7 @@ int erofs_load_compress_hints(struct erofs_sb_info *sbi)
>   		} else {
>   			ccfg = atoi(alg);
>   			if (ccfg >= EROFS_MAX_COMPR_CFGS ||
> -			    !cfg.c_compr_alg[ccfg]) {
> +			    !cfg.c_compr_opts[ccfg].alg) {
>   				erofs_err("invalid compressing configuration \"%s\" at line %u",
>   					  alg, line);
>   				ret = -EINVAL;
> diff --git a/lib/compressor.c b/lib/compressor.c
> index 7ec51c2..5061417 100644
> --- a/lib/compressor.c
> +++ b/lib/compressor.c
> @@ -78,7 +78,7 @@ int erofs_compress_destsize(const struct erofs_compress *c,
>   }
>   
>   int erofs_compressor_init(struct erofs_sb_info *sbi, struct erofs_compress *c,
> -			  char *alg_name, int compression_level)
> +			  char *alg_name, int compression_level, u32 dict_size)
>   {
>   	int ret, i;
>   
> @@ -106,6 +106,8 @@ int erofs_compressor_init(struct erofs_sb_info *sbi, struct erofs_compress *c,
>   
>   		if (erofs_algs[i].c->setlevel) {
>   			ret = erofs_algs[i].c->setlevel(c, compression_level);
> +			if (ret)
> +				return ret;
>   		} else {
>   			if (compression_level >= 0)
>   				erofs_warn(
> @@ -113,6 +115,19 @@ int erofs_compressor_init(struct erofs_sb_info *sbi, struct erofs_compress *c,
>   					compression_level, alg_name);
>   			c->compression_level = 0;
>   		}
> +
> +		if (erofs_algs[i].c->setdictsize) {
> +			ret = erofs_algs[i].c->setdictsize(c, dict_size);
> +			if (ret)
> +				return ret;
> +		} else {
> +			if (dict_size)
> +				erofs_warn(

Avoid a seperate line if it's just the message exceeds 80-char, I mean
				erofs_warn("specifing a dictionary size %u for %s is unsupported, ignored"
					   dict_size, alg_name);
				...

> +					"dictionary size %u is ignored for %s",
> +					dict_size, alg_name);
> +			c->dict_size = 0;
> +		}
> +
>   		if (!ret) {
>   			c->alg = &erofs_algs[i];
>   			return 0;
> diff --git a/lib/compressor.h b/lib/compressor.h
> index ec5485d..d8ccf2e 100644
> --- a/lib/compressor.h
> +++ b/lib/compressor.h
> @@ -14,10 +14,13 @@ struct erofs_compress;
>   struct erofs_compressor {
>   	int default_level;
>   	int best_level;
> +	u32 default_dictsize;
> +	u32 max_dictsize;
>   
>   	int (*init)(struct erofs_compress *c);
>   	int (*exit)(struct erofs_compress *c);
>   	int (*setlevel)(struct erofs_compress *c, int compression_level);
> +	int (*setdictsize)(struct erofs_compress *c, u32 dict_size);
>   
>   	int (*compress_destsize)(const struct erofs_compress *c,
>   				 const void *src, unsigned int *srcsize,
> @@ -39,6 +42,7 @@ struct erofs_compress {
>   
>   	unsigned int compress_threshold;
>   	unsigned int compression_level;
> +	unsigned int dict_size;
>   
>   	void *private_data;
>   };
> @@ -56,7 +60,7 @@ int erofs_compress_destsize(const struct erofs_compress *c,
>   			    void *dst, unsigned int dstsize);
>   
>   int erofs_compressor_init(struct erofs_sb_info *sbi, struct erofs_compress *c,
> -			  char *alg_name, int compression_level);
> +			  char *alg_name, int compression_level, u32 dict_size);
>   int erofs_compressor_exit(struct erofs_compress *c);
>   
>   #endif
> diff --git a/lib/compressor_deflate.c b/lib/compressor_deflate.c
> index 4e5902e..c95e53e 100644
> --- a/lib/compressor_deflate.c
> +++ b/lib/compressor_deflate.c
> @@ -46,6 +46,16 @@ static int compressor_deflate_init(struct erofs_compress *c)
>   
>   static int erofs_compressor_deflate_setlevel(struct erofs_compress *c,
>   					     int compression_level)
> +{
> +	if (compression_level < 0)
> +		compression_level = erofs_compressor_deflate.default_level;
> +
> +	c->compression_level = compression_level;
> +	return 0;
> +}
> +
> +static int erofs_compressor_deflate_setdictsize(struct erofs_compress *c,
> +						u32 dict_size)
>   {
>   	void *s;
>   
> @@ -54,23 +64,31 @@ static int erofs_compressor_deflate_setlevel(struct erofs_compress *c,
>   		c->private_data = NULL;
>   	}
>   
> -	if (compression_level < 0)
> -		compression_level = erofs_compressor_deflate.default_level;
> +	if (dict_size > erofs_compressor_deflate.max_dictsize) {
> +		erofs_err("dict size %u is too large", dict_size);
> +		return -EINVAL;
> +	}
> +
> +	if (dict_size == 0)
> +		dict_size = erofs_compressor_deflate.default_dictsize;
>   
> -	s = kite_deflate_init(compression_level, cfg.c_dict_size);
> +	s = kite_deflate_init(c->compression_level, dict_size);
>   	if (IS_ERR(s))
>   		return PTR_ERR(s);
>   
>   	c->private_data = s;
> -	c->compression_level = compression_level;
> +	c->dict_size = dict_size;
>   	return 0;
>   }
>   
>   const struct erofs_compressor erofs_compressor_deflate = {
>   	.default_level = 1,
>   	.best_level = 9,
> +	.default_dictsize = Z_EROFS_DEFLATE_DEFULT_DICT_SIZE,
> +	.max_dictsize = Z_EROFS_DEFLATE_MAX_DICT_SIZE,
>   	.init = compressor_deflate_init,
>   	.exit = compressor_deflate_exit,
>   	.setlevel = erofs_compressor_deflate_setlevel,
> +	.setdictsize = erofs_compressor_deflate_setdictsize,
>   	.compress_destsize = deflate_compress_destsize,
>   };
> diff --git a/lib/compressor_liblzma.c b/lib/compressor_liblzma.c
> index 0ed6f23..5c7f830 100644
> --- a/lib/compressor_liblzma.c
> +++ b/lib/compressor_liblzma.c
> @@ -68,22 +68,28 @@ static int erofs_compressor_liblzma_setlevel(struct erofs_compress *c,
>   	if (lzma_lzma_preset(&ctx->opt, preset))
>   		return -EINVAL;
>   
> -	/* XXX: temporary hack */
> -	if (cfg.c_dict_size) {
> -		if (cfg.c_dict_size > Z_EROFS_LZMA_MAX_DICT_SIZE) {
> -			erofs_err("dict size %u is too large", cfg.c_dict_size);
> -			return -EINVAL;
> -		}
> -		ctx->opt.dict_size = cfg.c_dict_size;
> -	} else {
> -		if (ctx->opt.dict_size > Z_EROFS_LZMA_MAX_DICT_SIZE)
> -			ctx->opt.dict_size = Z_EROFS_LZMA_MAX_DICT_SIZE;
> -		cfg.c_dict_size = ctx->opt.dict_size;
> -	}
>   	c->compression_level = compression_level;
>   	return 0;
>   }
>   
> +static int erofs_compressor_liblzma_setdictsize(struct erofs_compress *c,
> +						u32 dict_size)
> +{
> +	struct erofs_liblzma_context *ctx = c->private_data;
> +
> +	if (dict_size > erofs_compressor_lzma.max_dictsize) {
> +		erofs_err("dict size %u is too large", dict_size);
> +		return -EINVAL;
> +	}
> +
> +	if (dict_size == 0)
> +		dict_size = erofs_compressor_lzma.default_dictsize;
> +
> +	ctx->opt.dict_size = dict_size;
> +	c->dict_size = dict_size;
> +	return 0;
> +}
> +
>   static int erofs_compressor_liblzma_init(struct erofs_compress *c)
>   {
>   	struct erofs_liblzma_context *ctx;
> @@ -101,9 +107,12 @@ static int erofs_compressor_liblzma_init(struct erofs_compress *c)
>   const struct erofs_compressor erofs_compressor_lzma = {
>   	.default_level = LZMA_PRESET_DEFAULT,
>   	.best_level = 109,
> +	.default_dictsize = Z_EROFS_LZMA_DEFAULT_DICT_SIZE,
> +	.max_dictsize = Z_EROFS_LZMA_MAX_DICT_SIZE,
>   	.init = erofs_compressor_liblzma_init,
>   	.exit = erofs_compressor_liblzma_exit,
>   	.setlevel = erofs_compressor_liblzma_setlevel,
> +	.setdictsize = erofs_compressor_liblzma_setdictsize,
>   	.compress_destsize = erofs_liblzma_compress_destsize,
>   };
>   #endif
> diff --git a/lib/config.c b/lib/config.c
> index 0cddc95..d3e8e53 100644
> --- a/lib/config.c
> +++ b/lib/config.c
> @@ -58,8 +58,8 @@ void erofs_exit_configure(void)
>   		free(cfg.c_img_path);
>   	if (cfg.c_src_path)
>   		free(cfg.c_src_path);
> -	for (i = 0; i < EROFS_MAX_COMPR_CFGS && cfg.c_compr_alg[i]; i++)
> -		free(cfg.c_compr_alg[i]);
> +	for (i = 0; i < EROFS_MAX_COMPR_CFGS && cfg.c_compr_opts[i].alg; i++)
> +		free(cfg.c_compr_opts[i].alg);
>   }
>   
>   static unsigned int fullpath_prefix;	/* root directory prefix length */
> diff --git a/lib/inode.c b/lib/inode.c
> index bcdb4b8..c6424c0 100644
> --- a/lib/inode.c
> +++ b/lib/inode.c
> @@ -492,7 +492,7 @@ int erofs_write_file(struct erofs_inode *inode, int fd, u64 fpos)
>   		return erofs_blob_write_chunked_file(inode, fd, fpos);
>   	}
>   
> -	if (cfg.c_compr_alg[0] && erofs_file_is_compressible(inode)) {
> +	if (cfg.c_compr_opts[0].alg && erofs_file_is_compressible(inode)) {
>   		ret = erofs_write_compressed_file(inode, fd);
>   		if (!ret || ret != -ENOSPC)
>   			return ret;
> diff --git a/lib/kite_deflate.c b/lib/kite_deflate.c
> index 8667954..2357f76 100644
> --- a/lib/kite_deflate.c
> +++ b/lib/kite_deflate.c
> @@ -5,6 +5,7 @@
>    * Copyright (C) 2023, Alibaba Cloud
>    * Copyright (C) 2023, Gao Xiang <xiang at kernel.org>
>    */
> +#include "erofs/internal.h"
>   #include "erofs/defs.h"
>   #include "erofs/print.h"
>   #include <errno.h>
> @@ -22,7 +23,7 @@ unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
>   #define kite_dbg(x, ...)
>   #endif
>   
> -#define kHistorySize32		(1U << 15)
> +#define kHistorySize32		Z_EROFS_DEFLATE_DEFULT_DICT_SIZE

No, please don't touch this file (kite_deflate.c).

>   
>   #define kNumLenSymbols32	256
>   #define kNumLenSymbolsMax	kNumLenSymbols32
> diff --git a/mkfs/main.c b/mkfs/main.c
> index 0517849..702eb4b 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -5,6 +5,7 @@
>    * Created by Li Guifu <bluce.liguifu at huawei.com>
>    */
>   #define _GNU_SOURCE
> +#include <ctype.h>
>   #include <time.h>
>   #include <sys/time.h>
>   #include <stdlib.h>
> @@ -108,24 +109,29 @@ static void usage(int argc, char **argv)
>   		" -b#                   set block size to # (# = page size by default)\n"
>   		" -d<0-9>               set output verbosity; 0=quiet, 9=verbose (default=%i)\n"
>   		" -x#                   set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
> -		" -zX[,Y][:...]         X=compressor (Y=compression level, optional)\n"
> -		"                       alternative compressors can be separated by colons(:)\n"
> -		"                       supported compressors and their level ranges are:\n",
> +		" -zX[,level=Y]         X=compressor (Y=compression level, Z=dictionary size, optional)\n"
> +		"    [,dictsize=Z]      alternative compressors can be separated by colons(:)\n"
> +		"    [:...]             supported compressors and their option ranges are:\n",
>   		argv[0], EROFS_WARN);
>   	while ((s = z_erofs_list_available_compressors(&i)) != NULL) {
> -		printf("                           %s", s->name);
> +		char * const spaces = "                         ";
> +
> +		printf("%s%s\n", spaces, s->name);
>   		if (s->c->setlevel) {
>   			if (!strcmp(s->name, "lzma"))
>   				/* A little kludge to show the range as disjointed
>   				 * "0-9,100-109" instead of a continuous "0-109", and to
>   				 * state what those two subranges respectively mean.  */
> -				printf("[<0-9,100-109>]\t0-9=normal, 100-109=extreme (default=%i)",
> -				       s->c->default_level);
> +				printf("%s  [,level=<0-9,100-109>]\t0-9=normal, 100-109=extreme (default=%i)\n",
> +				       spaces, s->c->default_level);
>   			else
> -				printf("[,<0-%i>]\t(default=%i)",
> -				       s->c->best_level, s->c->default_level);
> +				printf("%s  [,level=<0-%i>]\t\t(default=%i)\n",
> +				       spaces, s->c->best_level, s->c->default_level);
> +		}
> +		if (s->c->setdictsize) {
> +			printf("%s  [,dictsize=<dictsize>]\t(default=%u, max=%u)\n",
> +			       spaces, s->c->default_dictsize, s->c->max_dictsize);
>   		}
> -		putchar('\n');
>   	}
>   	printf(
>   		" -C#                   specify the size of compress physical cluster in bytes\n"
> @@ -310,20 +316,84 @@ static int mkfs_parse_compress_algs(char *algs)
>   	char *s;
>   
>   	for (s = strtok(algs, ":"), i = 0; s; s = strtok(NULL, ":"), ++i) {
> -		const char *lv;
> +		char *p, *q, *opt, *endptr;
> +		struct erofs_compr_cfg *ccfg;
>   
>   		if (i >= EROFS_MAX_COMPR_CFGS - 1) {
>   			erofs_err("too many algorithm types");
>   			return -EINVAL;
>   		}
>   
> -		lv = strchr(s, ',');
> -		if (lv) {
> -			cfg.c_compr_level[i] = atoi(lv + 1);
> -			cfg.c_compr_alg[i] = strndup(s, lv - s);

Let's introduce another helper function to deal
with the following logic.

Thanks,
Gao Xiang

> +		ccfg = cfg.c_compr_opts + i;
> +
> +		ccfg->level = -1;
> +		ccfg->dict_size = 0;
> +
> +		p = strchr(s, ',');
> +		if (p) {
> +			ccfg->alg = strndup(s, p - s);
> +
> +			/* backward compatibility */
> +			if (isdigit(*(p + 1))) {
> +				ccfg->level = strtol(p + 1, &endptr, 10);
> +				if (*endptr && *endptr != ',') {
> +					erofs_err(
> +						"invalid compression level %s",
> +						p + 1);
> +					return -EINVAL;
> +				}
> +				continue;
> +			}
>   		} else {
> -			cfg.c_compr_level[i] = -1;
> -			cfg.c_compr_alg[i] = strdup(s);
> +			ccfg->alg = strdup(s);
> +			continue;
> +		}
> +
> +		opt = p + 1;
> +		while (opt) {
> +			q = strchr(opt, ',');
> +			if (q)
> +				*q = '\0';
> +
> +			if ((p = strstr(opt, "level="))) {
> +				p += strlen("level=");
> +				ccfg->level = strtol(p, &endptr, 10);
> +				if (*endptr && *endptr != ',') {
> +					erofs_err(
> +						"invalid compression level %s",
> +						p);
> +					return -EINVAL;
> +				}
> +			} else if ((p = strstr(opt, "dictsize="))) {
> +				p += strlen("dictsize=");
> +				ccfg->dict_size = strtoul(p, &endptr, 10);
> +				switch (*endptr) {
> +				case ',':
> +				case '\0':
> +					break;
> +				case 'K':
> +				case 'k':
> +					ccfg->dict_size <<= 10;
> +					break;
> +				case 'M':
> +				case 'm':
> +					ccfg->dict_size <<= 20;
> +					break;
> +				default:
> +					erofs_err(
> +						"invalid compression dictsize %s",
> +						p);
> +					return -EINVAL;
> +				}
> +			} else {
> +				erofs_err("invalid compression option %s", opt);
> +				return -EINVAL;
> +			}
> +
> +			if (q)
> +				opt = q + 1;
> +			else
> +				opt = NULL;
>   		}
>   	}
>   	return 0;
> @@ -692,7 +762,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>   		cfg.c_showprogress = false;
>   	}
>   
> -	if (cfg.c_compr_alg[0] && erofs_blksiz(&sbi) != getpagesize())
> +	if (cfg.c_compr_opts[0].alg && erofs_blksiz(&sbi) != getpagesize())
>   		erofs_warn("Please note that subpage blocksize with compression isn't yet supported in kernel. "
>   			   "This compressed image will only work with bs = ps = %u bytes",
>   			   erofs_blksiz(&sbi));
> @@ -1119,7 +1189,7 @@ int main(int argc, char **argv)
>   	}
>   
>   	if (cfg.c_dedupe) {
> -		if (!cfg.c_compr_alg[0]) {
> +		if (!cfg.c_compr_opts[0].alg) {
>   			erofs_err("Compression is not enabled.  Turn on chunk-based data deduplication instead.");
>   			cfg.c_chunkbits = sbi.blkszbits;
>   		} else {


More information about the Linux-erofs mailing list