[PATCH 2/2] erofs-utils: support multiple algorithms in an image

Gao Xiang hsiangkao at linux.alibaba.com
Wed Feb 22 20:30:10 AEDT 2023


Some binaries in an image may be just for archival purposes only
so that the runtime performance is not the top priority at all.

Therefore, it'd better to use another algorithm with higher
compression ratios and (even) pack the whole file into the packed
inode entirely (will enable this way later.)

In order to use alternative algorithms, just specify two or more
compressing configurations together seperated by ':' like below:
   -zlzma:lz4hc,12:lzma,9 -C 32768

Although mkfs still choose the first one by default, you could try
to write a compress-hints file like below:
   4096  1 .*\.so$
   32768 2 .*\.txt$
   4096    sbin/.*$
   16384 0 .*

So that ".so" files will use "lz4hc,1" compression with 4k pclusters
".txt" files will use "lzma,9" compression with 32k pclusters, files
in "/sbin" will use the default "lzma" compression with 4k plusters
and other files will use "lzma" compression with 16k pclusters.

Note that the largest pcluster size should be specified with "-C"
option, otherwise all larger pclusters will be limited.

Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 include/erofs/compress_hints.h |  1 +
 include/erofs/config.h         |  6 ++-
 lib/compress.c                 | 81 ++++++++++++++++++++--------------
 lib/compress_hints.c           | 57 ++++++++++++++++--------
 lib/config.c                   |  1 -
 lib/inode.c                    |  2 +-
 mkfs/main.c                    | 43 +++++++++++++-----
 7 files changed, 124 insertions(+), 67 deletions(-)

diff --git a/include/erofs/compress_hints.h b/include/erofs/compress_hints.h
index 659c5b6..d836f22 100644
--- a/include/erofs/compress_hints.h
+++ b/include/erofs/compress_hints.h
@@ -20,6 +20,7 @@ struct erofs_compress_hints {
 
 	regex_t reg;
 	unsigned int physical_clusterblks;
+	unsigned char algorithmtype;
 };
 
 bool z_erofs_apply_compress_hints(struct erofs_inode *inode);
diff --git a/include/erofs/config.h b/include/erofs/config.h
index a93ab25..17db98c 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -32,6 +32,8 @@ enum {
 	TIMESTAMP_CLAMPING,
 };
 
+#define EROFS_MAX_COMPR_CFG		64
+
 struct erofs_configure {
 	const char *c_version;
 	int c_dbg_lvl;
@@ -57,8 +59,8 @@ struct erofs_configure {
 	char *c_src_path;
 	char *c_blobdev_path;
 	char *c_compress_hints_file;
-	char *c_compr_alg_master;
-	int c_compr_level_master;
+	char *c_compr_alg[EROFS_MAX_COMPR_CFG];
+	int c_compr_level[EROFS_MAX_COMPR_CFG];
 	char c_force_inodeversion;
 	char c_force_chunkformat;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/lib/compress.c b/lib/compress.c
index 2987b10..012a90c 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -21,14 +21,20 @@
 #include "erofs/compress_hints.h"
 #include "erofs/fragments.h"
 
-static struct erofs_compress compresshandle;
-static unsigned int algorithmtype[2];
+/* compressing configuration specified by users */
+struct erofs_compress_cfg {
+	struct erofs_compress handle;
+	unsigned int algorithmtype;
+	bool enable;
+} erofs_ccfg[EROFS_MAX_COMPR_CFG];
 
 struct z_erofs_vle_compress_ctx {
 	u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
 	struct z_erofs_inmem_extent e;	/* (lookahead) extent */
 
 	struct erofs_inode *inode;
+	struct erofs_compress_cfg *ccfg;
+
 	u8 *metacur;
 	unsigned int head, tail;
 	erofs_off_t remaining;
@@ -318,7 +324,8 @@ static int z_erofs_fill_inline_data(struct erofs_inode *inode, void *data,
 	return len;
 }
 
-static void tryrecompress_trailing(void *in, unsigned int *insize,
+static void tryrecompress_trailing(struct erofs_compress *ec,
+				   void *in, unsigned int *insize,
 				   void *out, int *compressedsize)
 {
 	static char tmp[Z_EROFS_PCLUSTER_MAX_SIZE];
@@ -330,8 +337,7 @@ static void tryrecompress_trailing(void *in, unsigned int *insize,
 		return;
 
 	count = *insize;
-	ret = erofs_compress_destsize(&compresshandle,
-				      in, &count, (void *)tmp,
+	ret = erofs_compress_destsize(ec, in, &count, (void *)tmp,
 				      rounddown(ret, EROFS_BLKSIZ), false);
 	if (ret <= 0 || ret + (*insize - count) >=
 			roundup(*compressedsize, EROFS_BLKSIZ))
@@ -375,7 +381,7 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx)
 	static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_BLKSIZ];
 	struct erofs_inode *inode = ctx->inode;
 	char *const dst = dstbuf + EROFS_BLKSIZ;
-	struct erofs_compress *const h = &compresshandle;
+	struct erofs_compress *const h = &ctx->ccfg->handle;
 	unsigned int len = ctx->tail - ctx->head;
 	bool is_packed_inode = erofs_is_packed_inode(inode);
 	bool final = !ctx->remaining;
@@ -491,7 +497,7 @@ frag_packing:
 			}
 
 			if (may_inline && len == ctx->e.length)
-				tryrecompress_trailing(ctx->queue + ctx->head,
+				tryrecompress_trailing(h, ctx->queue + ctx->head,
 						&ctx->e.length, dst, &ret);
 
 			tailused = ret & (EROFS_BLKSIZ - 1);
@@ -853,8 +859,10 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 	}
 	if (cfg.c_fragments && !cfg.c_dedupe)
 		inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
-	inode->z_algorithmtype[0] = algorithmtype[0];
-	inode->z_algorithmtype[1] = algorithmtype[1];
+
+	ctx.ccfg = &erofs_ccfg[inode->z_algorithmtype[0]];
+	inode->z_algorithmtype[0] = ctx.ccfg[0].algorithmtype;
+	inode->z_algorithmtype[1] = 0;
 	inode->z_logical_clusterbits = LOG_BLOCK_SIZE;
 
 	inode->idata_size = 0;
@@ -1050,36 +1058,39 @@ int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh)
 
 int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
 {
-	/* initialize for primary compression algorithm */
-	int ret = erofs_compressor_init(&compresshandle,
-					cfg.c_compr_alg_master);
+	int i, ret;
 
-	if (ret)
-		return ret;
+	for (i = 0; cfg.c_compr_alg[i]; ++i) {
+		ret = erofs_compressor_init(&erofs_ccfg[i].handle,
+					     cfg.c_compr_alg[i]);
+		if (ret)
+			return ret;
+
+		ret = erofs_compressor_setlevel(&erofs_ccfg[i].handle,
+						cfg.c_compr_level[i]);
+		if (ret)
+			return ret;
+
+		ret = erofs_get_compress_algorithm_id(cfg.c_compr_alg[i]);
+		if (ret < 0)
+			return ret;
+		erofs_ccfg[i].algorithmtype = ret;
+		sbi.available_compr_algs |= 1 << ret;
+		if (ret != Z_EROFS_COMPRESSION_LZ4)
+			erofs_sb_set_compr_cfgs();
+	}
 
 	/*
 	 * if primary algorithm is empty (e.g. compression off),
 	 * clear 0PADDING feature for old kernel compatibility.
 	 */
-	if (!cfg.c_compr_alg_master ||
-	    (cfg.c_legacy_compress && !strcmp(cfg.c_compr_alg_master, "lz4")))
+	if (!cfg.c_compr_alg[0] ||
+	    (cfg.c_legacy_compress && !strncmp(cfg.c_compr_alg[0], "lz4", 3)))
 		erofs_sb_clear_lz4_0padding();
 
-	if (!cfg.c_compr_alg_master)
+	if (!cfg.c_compr_alg[0])
 		return 0;
 
-	ret = erofs_compressor_setlevel(&compresshandle,
-					cfg.c_compr_level_master);
-	if (ret)
-		return ret;
-
-	/* figure out primary algorithm */
-	ret = erofs_get_compress_algorithm_id(cfg.c_compr_alg_master);
-	if (ret < 0)
-		return ret;
-
-	algorithmtype[0] = ret;	/* primary algorithm (head 0) */
-	algorithmtype[1] = 0;	/* secondary algorithm (head 1) */
 	/*
 	 * if big pcluster is enabled, an extra CBLKCNT lcluster index needs
 	 * to be loaded in order to get those compressed block counts.
@@ -1098,9 +1109,6 @@ int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
 		return -EINVAL;
 	}
 
-	if (ret != Z_EROFS_COMPRESSION_LZ4)
-		erofs_sb_set_compr_cfgs();
-
 	if (erofs_sb_has_compr_cfgs()) {
 		sbi.available_compr_algs |= 1 << ret;
 		return z_erofs_build_compr_cfgs(sb_bh);
@@ -1110,5 +1118,12 @@ int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
 
 int z_erofs_compress_exit(void)
 {
-	return erofs_compressor_exit(&compresshandle);
+	int i, ret;
+
+	for (i = 0; cfg.c_compr_alg[i]; ++i) {
+		ret = erofs_compressor_exit(&erofs_ccfg[i].handle);
+		if (ret)
+			return ret;
+	}
+	return 0;
 }
diff --git a/lib/compress_hints.c b/lib/compress_hints.c
index 3e5c8c8..017695f 100644
--- a/lib/compress_hints.c
+++ b/lib/compress_hints.c
@@ -20,57 +20,60 @@ static void dump_regerror(int errcode, const char *s, const regex_t *preg)
 	erofs_err("invalid regex %s (%s)\n", s, str);
 }
 
-static int erofs_insert_compress_hints(const char *s, unsigned int blks)
+/* algorithmtype is actually ccfg # here */
+static int erofs_insert_compress_hints(const char *s, unsigned int blks,
+				       unsigned int algorithmtype)
 {
-	struct erofs_compress_hints *r;
+	struct erofs_compress_hints *ch;
 	int ret;
 
-	r = malloc(sizeof(struct erofs_compress_hints));
-	if (!r)
+	ch = malloc(sizeof(struct erofs_compress_hints));
+	if (!ch)
 		return -ENOMEM;
 
-	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
+	ret = regcomp(&ch->reg, s, REG_EXTENDED|REG_NOSUB);
 	if (ret) {
-		dump_regerror(ret, s, &r->reg);
-		goto err_out;
+		dump_regerror(ret, s, &ch->reg);
+		free(ch);
+		return ret;
 	}
-	r->physical_clusterblks = blks;
+	ch->physical_clusterblks = blks;
+	ch->algorithmtype = algorithmtype;
 
-	list_add_tail(&r->list, &compress_hints_head);
+	list_add_tail(&ch->list, &compress_hints_head);
 	erofs_info("compress hint %s (%u) is inserted", s, blks);
 	return ret;
-
-err_out:
-	free(r);
-	return ret;
 }
 
 bool z_erofs_apply_compress_hints(struct erofs_inode *inode)
 {
 	const char *s;
 	struct erofs_compress_hints *r;
-	unsigned int pclusterblks;
+	unsigned int pclusterblks, algorithmtype;
 
 	if (inode->z_physical_clusterblks)
 		return true;
 
 	s = erofs_fspath(inode->i_srcpath);
 	pclusterblks = cfg.c_pclusterblks_def;
+	algorithmtype = 0;
 
 	list_for_each_entry(r, &compress_hints_head, list) {
 		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
 
 		if (!ret) {
 			pclusterblks = r->physical_clusterblks;
+			algorithmtype = r->algorithmtype;
 			break;
 		}
 		if (ret != REG_NOMATCH)
 			dump_regerror(ret, s, &r->reg);
 	}
 	inode->z_physical_clusterblks = pclusterblks;
+	inode->z_algorithmtype[0] = algorithmtype;
 
 	/* pclusterblks is 0 means this file shouldn't be compressed */
-	return !!pclusterblks;
+	return pclusterblks != 0;
 }
 
 void erofs_cleanup_compress_hints(void)
@@ -98,20 +101,38 @@ int erofs_load_compress_hints(void)
 		return -errno;
 
 	for (line = 1; fgets(buf, sizeof(buf), f); ++line) {
-		unsigned int pclustersize;
-		char *pattern;
+		unsigned int pclustersize, ccfg;
+		char *alg, *pattern;
 
 		if (*buf == '#' || *buf == '\n')
 			continue;
 
 		pclustersize = atoi(strtok(buf, "\t "));
+		alg = strtok(NULL, "\n\t ");
 		pattern = strtok(NULL, "\n");
+		if (!pattern) {
+			pattern = alg;
+			alg = NULL;
+		}
 		if (!pattern || *pattern == '\0') {
 			erofs_err("cannot find a match pattern at line %u",
 				  line);
 			ret = -EINVAL;
 			goto out;
 		}
+		if (!alg || *alg == '\0') {
+			ccfg = 0;
+		} else {
+			ccfg = atoi(alg);
+			if (ccfg >= EROFS_MAX_COMPR_CFG ||
+			    !cfg.c_compr_alg[ccfg]) {
+				erofs_err("invalid compressing configuration \"%s\" at line %u",
+					  alg, line);
+				ret = -EINVAL;
+				goto out;
+			}
+		}
+
 		if (pclustersize % EROFS_BLKSIZ) {
 			erofs_warn("invalid physical clustersize %u, "
 				   "use default pclusterblks %u",
@@ -119,7 +140,7 @@ int erofs_load_compress_hints(void)
 			continue;
 		}
 		erofs_insert_compress_hints(pattern,
-					    pclustersize / EROFS_BLKSIZ);
+					    pclustersize / EROFS_BLKSIZ, ccfg);
 
 		if (pclustersize > max_pclustersize)
 			max_pclustersize = pclustersize;
diff --git a/lib/config.c b/lib/config.c
index 20200be..a3235c8 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -25,7 +25,6 @@ void erofs_init_configure(void)
 	cfg.c_version  = PACKAGE_VERSION;
 	cfg.c_dry_run  = false;
 	cfg.c_ignore_mtime = false;
-	cfg.c_compr_level_master = -1;
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
diff --git a/lib/inode.c b/lib/inode.c
index 6fd28a2..8364451 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -424,7 +424,7 @@ static int erofs_write_file(struct erofs_inode *inode)
 		return erofs_blob_write_chunked_file(inode);
 	}
 
-	if (cfg.c_compr_alg_master && erofs_file_is_compressible(inode)) {
+	if (cfg.c_compr_alg[0] && erofs_file_is_compressible(inode)) {
 		fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
 		if (fd < 0)
 			return -errno;
diff --git a/mkfs/main.c b/mkfs/main.c
index 979b763..bb3628f 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -232,6 +232,31 @@ static int parse_extended_opts(const char *opts)
 	return 0;
 }
 
+static int mkfs_parse_compress_algs(char *algs)
+{
+	unsigned int i;
+	char *s;
+
+	for (s = strtok(algs, ":"), i = 0; s; s = strtok(NULL, ":"), ++i) {
+		const char *lv;
+
+		if (i >= EROFS_MAX_COMPR_CFG - 1) {
+			erofs_err("too many algorithm types");
+			return -EINVAL;
+		}
+
+		lv = strchr(s, ',');
+		if (lv) {
+			cfg.c_compr_level[i] = atoi(lv + 1);
+			cfg.c_compr_alg[i] = strndup(s, lv - s);
+		} else {
+			cfg.c_compr_level[i] = -1;
+			cfg.c_compr_alg[i] = strdup(s);
+		}
+	}
+	return 0;
+}
+
 static int mkfs_parse_options_cfg(int argc, char *argv[])
 {
 	char *endptr;
@@ -243,19 +268,13 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 		switch (opt) {
 		case 'z':
 			if (!optarg) {
-				cfg.c_compr_alg_master = "(default)";
+				cfg.c_compr_alg[0] = "(default)";
+				cfg.c_compr_level[0] = -1;
 				break;
 			}
-			/* get specified compression level */
-			for (i = 0; optarg[i] != '\0'; ++i) {
-				if (optarg[i] == ',') {
-					cfg.c_compr_level_master =
-						atoi(optarg + i + 1);
-					optarg[i] = '\0';
-					break;
-				}
-			}
-			cfg.c_compr_alg_master = strndup(optarg, i);
+			i = mkfs_parse_compress_algs(optarg);
+			if (i)
+				return i;
 			break;
 
 		case 'd':
@@ -749,7 +768,7 @@ int main(int argc, char **argv)
 	}
 
 	if (cfg.c_dedupe) {
-		if (!cfg.c_compr_alg_master) {
+		if (!cfg.c_compr_alg[0]) {
 			erofs_err("Compression is not enabled.  Turn on chunk-based data deduplication instead.");
 			cfg.c_chunkbits = LOG_BLOCK_SIZE;
 		} else {
-- 
2.36.1



More information about the Linux-erofs mailing list