[PATCH v3] erofs-utils: support per-inode compress pcluster

Gao Xiang xiang at kernel.org
Mon Sep 6 03:59:30 AEST 2021


On Wed, Aug 25, 2021 at 11:35:23AM +0800, Huang Jianan via Linux-erofs wrote:
> Add an option to configure per-inode compression strategy. Each line
> of the file should be in the following form:
> 
> <Regular-expression> <pcluster-in-bytes>
> 
> When pcluster is 0, it means that the file shouldn't be compressed.
> 
> Signed-off-by: Huang Jianan <huangjianan at oppo.com>
> ---
> changes since v2:
>  - change compress_rule to compress_hints for better understanding. (Gao Xiang)
>  - use default "-C" value when input physical clustersize is invalid. (Gao Xiang)
>  - change the val of WITH_ANDROID option to a separated patch. (Gao Xiang)
> 
> changes since v1:
>  - rename c_pclusterblks to c_physical_clusterblks and place it in union.
>  - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster() since
>    it's per-inode compression strategy.
> 

Hi Jianan,

I sorted out a version this weekend (e.g. bump up max pclustersize if
needed and update the man page), would you mind confirm on your side
as well?

Also, it'd be better to add some functionality testcases to cover this
if you have extra time:

Thanks,
Gao Xiang

>From 0e675d679c8732bd39699e5a9b1b6d9d742fb728 Mon Sep 17 00:00:00 2001
From: Huang Jianan <huangjianan at oppo.com>
Date: Wed, 25 Aug 2021 11:35:23 +0800
Subject: [PATCH v4] erofs-utils: support per-inode compress pcluster

Add an option to configure per-inode compression strategy.

Each line of the file should be in the following form:
<pcluster-in-bytes> <match-pattern>

Note that <match-pattern> can be as a regular expression.
If pcluster size is 0, it means that files shouldn't be compressed.

Signed-off-by: Huang Jianan <huangjianan at oppo.com>
Signed-off-by: Gao Xiang <xiang at kernel.org>
---
 include/erofs/compress_hints.h |  23 ++++++
 include/erofs/config.h         |   3 +-
 include/erofs/internal.h       |   1 +
 lib/Makefile.am                |   5 +-
 lib/compress.c                 |  24 ++++---
 lib/compress_hints.c           | 128 +++++++++++++++++++++++++++++++++
 lib/config.c                   |   3 +-
 lib/inode.c                    |   4 ++
 man/mkfs.erofs.1               |  11 +++
 mkfs/main.c                    |  19 ++++-
 10 files changed, 205 insertions(+), 16 deletions(-)
 create mode 100644 include/erofs/compress_hints.h
 create mode 100644 lib/compress_hints.c

diff --git a/include/erofs/compress_hints.h b/include/erofs/compress_hints.h
new file mode 100644
index 000000000000..a5772c72b1c4
--- /dev/null
+++ b/include/erofs/compress_hints.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan at oppo.com>
+ */
+#ifndef __EROFS_COMPRESS_HINTS_H
+#define __EROFS_COMPRESS_HINTS_H
+
+#include "erofs/internal.h"
+#include <sys/types.h>
+#include <regex.h>
+
+struct erofs_compress_hints {
+	struct list_head list;
+
+	regex_t reg;
+	unsigned int physical_clusterblks;
+};
+
+bool z_erofs_apply_compress_hints(struct erofs_inode *inode);
+void erofs_cleanup_compress_hints(void);
+int erofs_load_compress_hints(void);
+#endif
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 95fc23e79e26..d5d9b5a751c0 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -50,13 +50,14 @@ struct erofs_configure {
 	/* related arguments for mkfs.erofs */
 	char *c_img_path;
 	char *c_src_path;
+	char *c_compress_hints_file;
 	char *c_compr_alg_master;
 	int c_compr_level_master;
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
 	int c_inline_xattr_tolerance;
 
-	u32 c_physical_clusterblks;
+	u32 c_pclusterblks_max, c_pclusterblks_def;
 	u32 c_max_decompressed_extent_bytes;
 	u64 c_unix_timestamp;
 	u32 c_uid, c_gid;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index b939155ac951..f5eacea5d4d7 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -164,6 +164,7 @@ struct erofs_inode {
 			uint16_t z_advise;
 			uint8_t  z_algorithmtype[2];
 			uint8_t  z_logical_clusterbits;
+			uint8_t  z_physical_clusterblks;
 		};
 	};
 #ifdef WITH_ANDROID
diff --git a/lib/Makefile.am b/lib/Makefile.am
index b5127c439e43..5a33e297c194 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -16,11 +16,12 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/list.h \
       $(top_srcdir)/include/erofs/print.h \
       $(top_srcdir)/include/erofs/trace.h \
-      $(top_srcdir)/include/erofs/xattr.h
+      $(top_srcdir)/include/erofs/xattr.h \
+      $(top_srcdir)/include/erofs/compress_hints.h
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
-		      namei.c data.c compress.c compressor.c zmap.c decompress.c
+		      namei.c data.c compress.c compressor.c zmap.c decompress.c compress_hints.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/compress.c b/lib/compress.c
index 6df30ea564a3..2806a7edfcb6 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -17,6 +17,7 @@
 #include "erofs/compress.h"
 #include "compressor.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 static struct erofs_compress compresshandle;
 static int compressionlevel;
@@ -89,8 +90,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 	}
 
 	do {
-		/* XXX: big pcluster feature should be per-inode */
-		if (d0 == 1 && cfg.c_physical_clusterblks > 1) {
+		if (d0 == 1 && erofs_sb_has_big_pcluster()) {
 			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
 			di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
 					Z_EROFS_VLE_DI_D0_CBLKCNT);
@@ -149,14 +149,18 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 	return count;
 }
 
-/* TODO: apply per-(sub)file strategies here */
 static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
 {
 #ifndef NDEBUG
 	if (cfg.c_random_pclusterblks)
-		return 1 + rand() % cfg.c_physical_clusterblks;
+		return 1 + rand() % cfg.c_pclusterblks_max;
 #endif
-	return cfg.c_physical_clusterblks;
+	if (cfg.c_compress_hints_file) {
+		z_erofs_apply_compress_hints(inode);
+		DBG_BUGON(!inode->z_physical_clusterblks);
+		return inode->z_physical_clusterblks;
+	}
+	return cfg.c_pclusterblks_def;
 }
 
 static int vle_compress_one(struct erofs_inode *inode,
@@ -493,7 +497,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
 	}
 
-	if (cfg.c_physical_clusterblks > 1) {
+	if (erofs_sb_has_big_pcluster()) {
 		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
 		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
 			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
@@ -603,7 +607,7 @@ int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh)
 			.lz4 = {
 				.max_distance =
 					cpu_to_le16(sbi.lz4_max_distance),
-				.max_pclusterblks = cfg.c_physical_clusterblks,
+				.max_pclusterblks = cfg.c_pclusterblks_max,
 			}
 		};
 
@@ -655,11 +659,11 @@ int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
 	 * if big pcluster is enabled, an extra CBLKCNT lcluster index needs
 	 * to be loaded in order to get those compressed block counts.
 	 */
-	if (cfg.c_physical_clusterblks > 1) {
-		if (cfg.c_physical_clusterblks >
+	if (cfg.c_pclusterblks_max > 1) {
+		if (cfg.c_pclusterblks_max >
 		    Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
 			erofs_err("unsupported clusterblks %u (too large)",
-				  cfg.c_physical_clusterblks);
+				  cfg.c_pclusterblks_max);
 			return -EINVAL;
 		}
 		erofs_sb_set_big_pcluster();
diff --git a/lib/compress_hints.c b/lib/compress_hints.c
new file mode 100644
index 000000000000..81a8ac9ef04f
--- /dev/null
+++ b/lib/compress_hints.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * Created by Huang Jianan <huangjianan at oppo.com>
+ */
+#include <string.h>
+#include <stdlib.h>
+#include "erofs/err.h"
+#include "erofs/list.h"
+#include "erofs/print.h"
+#include "erofs/compress_hints.h"
+
+static LIST_HEAD(compress_hints_head);
+
+static void dump_regerror(int errcode, const char *s, const regex_t *preg)
+{
+	char str[512];
+
+	regerror(errcode, preg, str, sizeof(str));
+	erofs_err("invalid regex %s (%s)\n", s, str);
+}
+
+static int erofs_insert_compress_hints(const char *s, unsigned int blks)
+{
+	struct erofs_compress_hints *r;
+	int ret;
+
+	r = malloc(sizeof(struct erofs_compress_hints));
+	if (!r)
+		return -ENOMEM;
+
+	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
+	if (ret) {
+		dump_regerror(ret, s, &r->reg);
+		goto err_out;
+	}
+	r->physical_clusterblks = blks;
+
+	list_add_tail(&r->list, &compress_hints_head);
+	erofs_info("compress hint %s (%u) is inserted", s, blks);
+	return ret;
+
+err_out:
+	free(r);
+	return ret;
+}
+
+bool z_erofs_apply_compress_hints(struct erofs_inode *inode)
+{
+	const char *s;
+	struct erofs_compress_hints *r;
+	unsigned int pclusterblks;
+
+	if (inode->z_physical_clusterblks)
+		return true;
+
+	s = erofs_fspath(inode->i_srcpath);
+	pclusterblks = cfg.c_pclusterblks_def;
+
+	list_for_each_entry(r, &compress_hints_head, list) {
+		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
+
+		if (!ret) {
+			pclusterblks = r->physical_clusterblks;
+			break;
+		}
+		if (ret != REG_NOMATCH)
+			dump_regerror(ret, s, &r->reg);
+	}
+	inode->z_physical_clusterblks = pclusterblks;
+
+	/* pclusterblks is 0 means this file shouldn't be compressed */
+	return !!pclusterblks;
+}
+
+void erofs_cleanup_compress_hints(void)
+{
+	struct erofs_compress_hints *r, *n;
+
+	list_for_each_entry_safe(r, n, &compress_hints_head, list) {
+		list_del(&r->list);
+		free(r);
+	}
+}
+
+int erofs_load_compress_hints(void)
+{
+	char buf[PATH_MAX + 100];
+	FILE *f;
+	unsigned int line, max_pclustersize = 0;
+
+	if (!cfg.c_compress_hints_file)
+		return 0;
+
+	f = fopen(cfg.c_compress_hints_file, "r");
+	if (!f)
+		return -errno;
+
+	for (line = 1; fgets(buf, sizeof(buf), f); ++line) {
+		unsigned int pclustersize;
+		char *pattern;
+
+		pclustersize = atoi(strtok(buf, "\t "));
+		pattern = strtok(NULL, "\n");
+		if (!pattern || *pattern == '\0') {
+			erofs_err("cannot find a match pattern at line %u",
+				  line);
+			return -EINVAL;
+		}
+		if (pclustersize % EROFS_BLKSIZ) {
+			erofs_warn("invalid physical clustersize %u, "
+				   "use default pclusterblks %u",
+				   pclustersize, cfg.c_pclusterblks_def);
+			continue;
+		}
+		erofs_insert_compress_hints(pattern,
+					    pclustersize / EROFS_BLKSIZ);
+
+		if (pclustersize > max_pclustersize)
+			max_pclustersize = pclustersize;
+	}
+	fclose(f);
+	if (cfg.c_pclusterblks_max * EROFS_BLKSIZ < max_pclustersize) {
+		cfg.c_pclusterblks_max = max_pclustersize / EROFS_BLKSIZ;
+		erofs_warn("update max pclusterblks to %u", cfg.c_pclusterblks_max);
+	}
+	return 0;
+}
diff --git a/lib/config.c b/lib/config.c
index 4757dbbfdd4c..cc2aa7d0112f 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -24,7 +24,8 @@ void erofs_init_configure(void)
 	cfg.c_unix_timestamp = -1;
 	cfg.c_uid = -1;
 	cfg.c_gid = -1;
-	cfg.c_physical_clusterblks = 1;
+	cfg.c_pclusterblks_max = 1;
+	cfg.c_pclusterblks_def = 1;
 	cfg.c_max_decompressed_extent_bytes = -1;
 }
 
diff --git a/lib/inode.c b/lib/inode.c
index 6024e8c593dd..5bad75e1c550 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -23,6 +23,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 #define S_SHIFT                 12
 static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
@@ -327,6 +328,8 @@ static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
 /* rules to decide whether a file could be compressed or not */
 static bool erofs_file_is_compressible(struct erofs_inode *inode)
 {
+	if (cfg.c_compress_hints_file)
+		return z_erofs_apply_compress_hints(inode);
 	return true;
 }
 
@@ -849,6 +852,7 @@ static struct erofs_inode *erofs_new_inode(void)
 
 	inode->bh = inode->bh_inline = inode->bh_data = NULL;
 	inode->idata = NULL;
+	inode->z_physical_clusterblks = 0;
 	return inode;
 }
 
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index bc0a10be72a1..1446cb56db30 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -88,6 +88,17 @@ Display this help and exit.
 .TP
 .B \-\-max-extent-bytes #
 Specify maximum decompressed extent size # in bytes.
+.TP
+.BI "\-\-compress-hints " file
+If the optional
+.BI "\-\-compress-hints " file
+argument is given,
+.B mkfs.erofs
+uses it to apply the per-file compression strategy. Each line is defined by
+tokens separated by spaces in the following form:
+.RS 1.2i
+<pcluster-in-bytes> <match-pattern>
+.RE
 .SH AUTHOR
 This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee at gmail.com>,
 Miao Xie <miaoxie at huawei.com> and Gao Xiang <xiang at kernel.org> with
diff --git a/mkfs/main.c b/mkfs/main.c
index 40ca94ff8db9..addefcefea38 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -21,6 +21,7 @@
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
+#include "erofs/compress_hints.h"
 
 #ifdef HAVE_LIBUUID
 #include <uuid.h>
@@ -42,6 +43,7 @@ static struct option long_options[] = {
 	{"random-pclusterblks", no_argument, NULL, 8},
 #endif
 	{"max-extent-bytes", required_argument, NULL, 9},
+	{"compress-hints", required_argument, NULL, 10},
 #ifdef WITH_ANDROID
 	{"mount-point", required_argument, NULL, 512},
 	{"product-out", required_argument, NULL, 513},
@@ -87,6 +89,7 @@ static void usage(void)
 	      " --all-root            make all files owned by root\n"
 	      " --help                display this help and exit\n"
 	      " --max-extent-bytes=#  set maximum decompressed extent size # in bytes\n"
+	      " --compress-hints=X    specify a file to configure per-file compression strategy\n"
 #ifndef NDEBUG
 	      " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
 #endif
@@ -95,7 +98,7 @@ static void usage(void)
 	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
 	      " --product-out=X       X=product_out directory\n"
 	      " --fs-config-file=X    X=fs_config file\n"
-	      " --block-list-file=X    X=block_list file\n"
+	      " --block-list-file=X   X=block_list file\n"
 #endif
 	      "\nAvailable compressors are: ", stderr);
 	print_available_compressors(stderr, ", ");
@@ -286,6 +289,9 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 10:
+			cfg.c_compress_hints_file = optarg;
+			break;
 #ifdef WITH_ANDROID
 		case 512:
 			cfg.mount_point = optarg;
@@ -312,7 +318,8 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 					  optarg);
 				return -EINVAL;
 			}
-			cfg.c_physical_clusterblks = i / EROFS_BLKSIZ;
+			cfg.c_pclusterblks_max = i / EROFS_BLKSIZ;
+			cfg.c_pclusterblks_def = cfg.c_pclusterblks_max;
 			break;
 
 		case 1:
@@ -578,6 +585,13 @@ int main(int argc, char **argv)
 		goto exit;
 	}
 
+	err = erofs_load_compress_hints();
+	if (err) {
+		erofs_err("Failed to load compress hints %s",
+			  cfg.c_compress_hints_file);
+		goto exit;
+	}
+
 	err = z_erofs_compress_init(sb_bh);
 	if (err) {
 		erofs_err("Failed to initialize compressor: %s",
@@ -626,6 +640,7 @@ exit:
 	erofs_droid_blocklist_fclose();
 #endif
 	dev_close();
+	erofs_cleanup_compress_hints();
 	erofs_cleanup_exclude_rules();
 	erofs_exit_configure();
 
-- 
2.20.1



More information about the Linux-erofs mailing list