[PATCH v3] erofs-utils: support per-inode compress pcluster
Huang Jianan
huangjianan at oppo.com
Mon Sep 6 19:38:43 AEST 2021
在 2021/9/6 1:59, Gao Xiang 写道:
> On Wed, Aug 25, 2021 at 11:35:23AM +0800, Huang Jianan via Linux-erofs wrote:
>> Add an option to configure per-inode compression strategy. Each line
>> of the file should be in the following form:
>>
>> <Regular-expression> <pcluster-in-bytes>
>>
>> When pcluster is 0, it means that the file shouldn't be compressed.
>>
>> Signed-off-by: Huang Jianan <huangjianan at oppo.com>
>> ---
>> changes since v2:
>> - change compress_rule to compress_hints for better understanding. (Gao Xiang)
>> - use default "-C" value when input physical clustersize is invalid. (Gao Xiang)
>> - change the val of WITH_ANDROID option to a separated patch. (Gao Xiang)
>>
>> changes since v1:
>> - rename c_pclusterblks to c_physical_clusterblks and place it in union.
>> - change cfg.c_physical_clusterblks > 1 to erofs_sb_has_big_pcluster() since
>> it's per-inode compression strategy.
>>
> Hi Jianan,
>
> I sorted out a version this weekend (e.g. bump up max pclustersize if
> needed and update the man page), would you mind confirm on your side
> as well?
Hi Xiang,
Thanks for your modification, looks good to me.
> Also, it'd be better to add some functionality testcases to cover this
> if you have extra time:
Ok, should I use the experimental-tests branch now?
Thanks,
Jianan
> Thanks,
> Gao Xiang
>
> From 0e675d679c8732bd39699e5a9b1b6d9d742fb728 Mon Sep 17 00:00:00 2001
> From: Huang Jianan <huangjianan at oppo.com>
> Date: Wed, 25 Aug 2021 11:35:23 +0800
> Subject: [PATCH v4] erofs-utils: support per-inode compress pcluster
>
> Add an option to configure per-inode compression strategy.
>
> Each line of the file should be in the following form:
> <pcluster-in-bytes> <match-pattern>
>
> Note that <match-pattern> can be as a regular expression.
> If pcluster size is 0, it means that files shouldn't be compressed.
>
> Signed-off-by: Huang Jianan <huangjianan at oppo.com>
> Signed-off-by: Gao Xiang <xiang at kernel.org>
> ---
> include/erofs/compress_hints.h | 23 ++++++
> include/erofs/config.h | 3 +-
> include/erofs/internal.h | 1 +
> lib/Makefile.am | 5 +-
> lib/compress.c | 24 ++++---
> lib/compress_hints.c | 128 +++++++++++++++++++++++++++++++++
> lib/config.c | 3 +-
> lib/inode.c | 4 ++
> man/mkfs.erofs.1 | 11 +++
> mkfs/main.c | 19 ++++-
> 10 files changed, 205 insertions(+), 16 deletions(-)
> create mode 100644 include/erofs/compress_hints.h
> create mode 100644 lib/compress_hints.c
>
> diff --git a/include/erofs/compress_hints.h b/include/erofs/compress_hints.h
> new file mode 100644
> index 000000000000..a5772c72b1c4
> --- /dev/null
> +++ b/include/erofs/compress_hints.h
> @@ -0,0 +1,23 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/*
> + * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
> + * Created by Huang Jianan <huangjianan at oppo.com>
> + */
> +#ifndef __EROFS_COMPRESS_HINTS_H
> +#define __EROFS_COMPRESS_HINTS_H
> +
> +#include "erofs/internal.h"
> +#include <sys/types.h>
> +#include <regex.h>
> +
> +struct erofs_compress_hints {
> + struct list_head list;
> +
> + regex_t reg;
> + unsigned int physical_clusterblks;
> +};
> +
> +bool z_erofs_apply_compress_hints(struct erofs_inode *inode);
> +void erofs_cleanup_compress_hints(void);
> +int erofs_load_compress_hints(void);
> +#endif
> diff --git a/include/erofs/config.h b/include/erofs/config.h
> index 95fc23e79e26..d5d9b5a751c0 100644
> --- a/include/erofs/config.h
> +++ b/include/erofs/config.h
> @@ -50,13 +50,14 @@ struct erofs_configure {
> /* related arguments for mkfs.erofs */
> char *c_img_path;
> char *c_src_path;
> + char *c_compress_hints_file;
> char *c_compr_alg_master;
> int c_compr_level_master;
> int c_force_inodeversion;
> /* < 0, xattr disabled and INT_MAX, always use inline xattrs */
> int c_inline_xattr_tolerance;
>
> - u32 c_physical_clusterblks;
> + u32 c_pclusterblks_max, c_pclusterblks_def;
> u32 c_max_decompressed_extent_bytes;
> u64 c_unix_timestamp;
> u32 c_uid, c_gid;
> diff --git a/include/erofs/internal.h b/include/erofs/internal.h
> index b939155ac951..f5eacea5d4d7 100644
> --- a/include/erofs/internal.h
> +++ b/include/erofs/internal.h
> @@ -164,6 +164,7 @@ struct erofs_inode {
> uint16_t z_advise;
> uint8_t z_algorithmtype[2];
> uint8_t z_logical_clusterbits;
> + uint8_t z_physical_clusterblks;
> };
> };
> #ifdef WITH_ANDROID
> diff --git a/lib/Makefile.am b/lib/Makefile.am
> index b5127c439e43..5a33e297c194 100644
> --- a/lib/Makefile.am
> +++ b/lib/Makefile.am
> @@ -16,11 +16,12 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
> $(top_srcdir)/include/erofs/list.h \
> $(top_srcdir)/include/erofs/print.h \
> $(top_srcdir)/include/erofs/trace.h \
> - $(top_srcdir)/include/erofs/xattr.h
> + $(top_srcdir)/include/erofs/xattr.h \
> + $(top_srcdir)/include/erofs/compress_hints.h
>
> noinst_HEADERS += compressor.h
> liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
> - namei.c data.c compress.c compressor.c zmap.c decompress.c
> + namei.c data.c compress.c compressor.c zmap.c decompress.c compress_hints.c
> liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
> if ENABLE_LZ4
> liberofs_la_CFLAGS += ${LZ4_CFLAGS}
> diff --git a/lib/compress.c b/lib/compress.c
> index 6df30ea564a3..2806a7edfcb6 100644
> --- a/lib/compress.c
> +++ b/lib/compress.c
> @@ -17,6 +17,7 @@
> #include "erofs/compress.h"
> #include "compressor.h"
> #include "erofs/block_list.h"
> +#include "erofs/compress_hints.h"
>
> static struct erofs_compress compresshandle;
> static int compressionlevel;
> @@ -89,8 +90,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
> }
>
> do {
> - /* XXX: big pcluster feature should be per-inode */
> - if (d0 == 1 && cfg.c_physical_clusterblks > 1) {
> + if (d0 == 1 && erofs_sb_has_big_pcluster()) {
> type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
> di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
> Z_EROFS_VLE_DI_D0_CBLKCNT);
> @@ -149,14 +149,18 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
> return count;
> }
>
> -/* TODO: apply per-(sub)file strategies here */
> static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
> {
> #ifndef NDEBUG
> if (cfg.c_random_pclusterblks)
> - return 1 + rand() % cfg.c_physical_clusterblks;
> + return 1 + rand() % cfg.c_pclusterblks_max;
> #endif
> - return cfg.c_physical_clusterblks;
> + if (cfg.c_compress_hints_file) {
> + z_erofs_apply_compress_hints(inode);
> + DBG_BUGON(!inode->z_physical_clusterblks);
> + return inode->z_physical_clusterblks;
> + }
> + return cfg.c_pclusterblks_def;
> }
>
> static int vle_compress_one(struct erofs_inode *inode,
> @@ -493,7 +497,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
> inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
> }
>
> - if (cfg.c_physical_clusterblks > 1) {
> + if (erofs_sb_has_big_pcluster()) {
> inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
> if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
> inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
> @@ -603,7 +607,7 @@ int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh)
> .lz4 = {
> .max_distance =
> cpu_to_le16(sbi.lz4_max_distance),
> - .max_pclusterblks = cfg.c_physical_clusterblks,
> + .max_pclusterblks = cfg.c_pclusterblks_max,
> }
> };
>
> @@ -655,11 +659,11 @@ int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
> * if big pcluster is enabled, an extra CBLKCNT lcluster index needs
> * to be loaded in order to get those compressed block counts.
> */
> - if (cfg.c_physical_clusterblks > 1) {
> - if (cfg.c_physical_clusterblks >
> + if (cfg.c_pclusterblks_max > 1) {
> + if (cfg.c_pclusterblks_max >
> Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
> erofs_err("unsupported clusterblks %u (too large)",
> - cfg.c_physical_clusterblks);
> + cfg.c_pclusterblks_max);
> return -EINVAL;
> }
> erofs_sb_set_big_pcluster();
> diff --git a/lib/compress_hints.c b/lib/compress_hints.c
> new file mode 100644
> index 000000000000..81a8ac9ef04f
> --- /dev/null
> +++ b/lib/compress_hints.c
> @@ -0,0 +1,128 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
> + * Created by Huang Jianan <huangjianan at oppo.com>
> + */
> +#include <string.h>
> +#include <stdlib.h>
> +#include "erofs/err.h"
> +#include "erofs/list.h"
> +#include "erofs/print.h"
> +#include "erofs/compress_hints.h"
> +
> +static LIST_HEAD(compress_hints_head);
> +
> +static void dump_regerror(int errcode, const char *s, const regex_t *preg)
> +{
> + char str[512];
> +
> + regerror(errcode, preg, str, sizeof(str));
> + erofs_err("invalid regex %s (%s)\n", s, str);
> +}
> +
> +static int erofs_insert_compress_hints(const char *s, unsigned int blks)
> +{
> + struct erofs_compress_hints *r;
> + int ret;
> +
> + r = malloc(sizeof(struct erofs_compress_hints));
> + if (!r)
> + return -ENOMEM;
> +
> + ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
> + if (ret) {
> + dump_regerror(ret, s, &r->reg);
> + goto err_out;
> + }
> + r->physical_clusterblks = blks;
> +
> + list_add_tail(&r->list, &compress_hints_head);
> + erofs_info("compress hint %s (%u) is inserted", s, blks);
> + return ret;
> +
> +err_out:
> + free(r);
> + return ret;
> +}
> +
> +bool z_erofs_apply_compress_hints(struct erofs_inode *inode)
> +{
> + const char *s;
> + struct erofs_compress_hints *r;
> + unsigned int pclusterblks;
> +
> + if (inode->z_physical_clusterblks)
> + return true;
> +
> + s = erofs_fspath(inode->i_srcpath);
> + pclusterblks = cfg.c_pclusterblks_def;
> +
> + list_for_each_entry(r, &compress_hints_head, list) {
> + int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
> +
> + if (!ret) {
> + pclusterblks = r->physical_clusterblks;
> + break;
> + }
> + if (ret != REG_NOMATCH)
> + dump_regerror(ret, s, &r->reg);
> + }
> + inode->z_physical_clusterblks = pclusterblks;
> +
> + /* pclusterblks is 0 means this file shouldn't be compressed */
> + return !!pclusterblks;
> +}
> +
> +void erofs_cleanup_compress_hints(void)
> +{
> + struct erofs_compress_hints *r, *n;
> +
> + list_for_each_entry_safe(r, n, &compress_hints_head, list) {
> + list_del(&r->list);
> + free(r);
> + }
> +}
> +
> +int erofs_load_compress_hints(void)
> +{
> + char buf[PATH_MAX + 100];
> + FILE *f;
> + unsigned int line, max_pclustersize = 0;
> +
> + if (!cfg.c_compress_hints_file)
> + return 0;
> +
> + f = fopen(cfg.c_compress_hints_file, "r");
> + if (!f)
> + return -errno;
> +
> + for (line = 1; fgets(buf, sizeof(buf), f); ++line) {
> + unsigned int pclustersize;
> + char *pattern;
> +
> + pclustersize = atoi(strtok(buf, "\t "));
> + pattern = strtok(NULL, "\n");
> + if (!pattern || *pattern == '\0') {
> + erofs_err("cannot find a match pattern at line %u",
> + line);
> + return -EINVAL;
> + }
> + if (pclustersize % EROFS_BLKSIZ) {
> + erofs_warn("invalid physical clustersize %u, "
> + "use default pclusterblks %u",
> + pclustersize, cfg.c_pclusterblks_def);
> + continue;
> + }
> + erofs_insert_compress_hints(pattern,
> + pclustersize / EROFS_BLKSIZ);
> +
> + if (pclustersize > max_pclustersize)
> + max_pclustersize = pclustersize;
> + }
> + fclose(f);
> + if (cfg.c_pclusterblks_max * EROFS_BLKSIZ < max_pclustersize) {
> + cfg.c_pclusterblks_max = max_pclustersize / EROFS_BLKSIZ;
> + erofs_warn("update max pclusterblks to %u", cfg.c_pclusterblks_max);
> + }
> + return 0;
> +}
> diff --git a/lib/config.c b/lib/config.c
> index 4757dbbfdd4c..cc2aa7d0112f 100644
> --- a/lib/config.c
> +++ b/lib/config.c
> @@ -24,7 +24,8 @@ void erofs_init_configure(void)
> cfg.c_unix_timestamp = -1;
> cfg.c_uid = -1;
> cfg.c_gid = -1;
> - cfg.c_physical_clusterblks = 1;
> + cfg.c_pclusterblks_max = 1;
> + cfg.c_pclusterblks_def = 1;
> cfg.c_max_decompressed_extent_bytes = -1;
> }
>
> diff --git a/lib/inode.c b/lib/inode.c
> index 6024e8c593dd..5bad75e1c550 100644
> --- a/lib/inode.c
> +++ b/lib/inode.c
> @@ -23,6 +23,7 @@
> #include "erofs/xattr.h"
> #include "erofs/exclude.h"
> #include "erofs/block_list.h"
> +#include "erofs/compress_hints.h"
>
> #define S_SHIFT 12
> static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
> @@ -327,6 +328,8 @@ static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
> /* rules to decide whether a file could be compressed or not */
> static bool erofs_file_is_compressible(struct erofs_inode *inode)
> {
> + if (cfg.c_compress_hints_file)
> + return z_erofs_apply_compress_hints(inode);
> return true;
> }
>
> @@ -849,6 +852,7 @@ static struct erofs_inode *erofs_new_inode(void)
>
> inode->bh = inode->bh_inline = inode->bh_data = NULL;
> inode->idata = NULL;
> + inode->z_physical_clusterblks = 0;
> return inode;
> }
>
> diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
> index bc0a10be72a1..1446cb56db30 100644
> --- a/man/mkfs.erofs.1
> +++ b/man/mkfs.erofs.1
> @@ -88,6 +88,17 @@ Display this help and exit.
> .TP
> .B \-\-max-extent-bytes #
> Specify maximum decompressed extent size # in bytes.
> +.TP
> +.BI "\-\-compress-hints " file
> +If the optional
> +.BI "\-\-compress-hints " file
> +argument is given,
> +.B mkfs.erofs
> +uses it to apply the per-file compression strategy. Each line is defined by
> +tokens separated by spaces in the following form:
> +.RS 1.2i
> +<pcluster-in-bytes> <match-pattern>
> +.RE
> .SH AUTHOR
> This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee at gmail.com>,
> Miao Xie <miaoxie at huawei.com> and Gao Xiang <xiang at kernel.org> with
> diff --git a/mkfs/main.c b/mkfs/main.c
> index 40ca94ff8db9..addefcefea38 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -21,6 +21,7 @@
> #include "erofs/xattr.h"
> #include "erofs/exclude.h"
> #include "erofs/block_list.h"
> +#include "erofs/compress_hints.h"
>
> #ifdef HAVE_LIBUUID
> #include <uuid.h>
> @@ -42,6 +43,7 @@ static struct option long_options[] = {
> {"random-pclusterblks", no_argument, NULL, 8},
> #endif
> {"max-extent-bytes", required_argument, NULL, 9},
> + {"compress-hints", required_argument, NULL, 10},
> #ifdef WITH_ANDROID
> {"mount-point", required_argument, NULL, 512},
> {"product-out", required_argument, NULL, 513},
> @@ -87,6 +89,7 @@ static void usage(void)
> " --all-root make all files owned by root\n"
> " --help display this help and exit\n"
> " --max-extent-bytes=# set maximum decompressed extent size # in bytes\n"
> + " --compress-hints=X specify a file to configure per-file compression strategy\n"
> #ifndef NDEBUG
> " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
> #endif
> @@ -95,7 +98,7 @@ static void usage(void)
> " --mount-point=X X=prefix of target fs path (default: /)\n"
> " --product-out=X X=product_out directory\n"
> " --fs-config-file=X X=fs_config file\n"
> - " --block-list-file=X X=block_list file\n"
> + " --block-list-file=X X=block_list file\n"
> #endif
> "\nAvailable compressors are: ", stderr);
> print_available_compressors(stderr, ", ");
> @@ -286,6 +289,9 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
> return -EINVAL;
> }
> break;
> + case 10:
> + cfg.c_compress_hints_file = optarg;
> + break;
> #ifdef WITH_ANDROID
> case 512:
> cfg.mount_point = optarg;
> @@ -312,7 +318,8 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
> optarg);
> return -EINVAL;
> }
> - cfg.c_physical_clusterblks = i / EROFS_BLKSIZ;
> + cfg.c_pclusterblks_max = i / EROFS_BLKSIZ;
> + cfg.c_pclusterblks_def = cfg.c_pclusterblks_max;
> break;
>
> case 1:
> @@ -578,6 +585,13 @@ int main(int argc, char **argv)
> goto exit;
> }
>
> + err = erofs_load_compress_hints();
> + if (err) {
> + erofs_err("Failed to load compress hints %s",
> + cfg.c_compress_hints_file);
> + goto exit;
> + }
> +
> err = z_erofs_compress_init(sb_bh);
> if (err) {
> erofs_err("Failed to initialize compressor: %s",
> @@ -626,6 +640,7 @@ exit:
> erofs_droid_blocklist_fclose();
> #endif
> dev_close();
> + erofs_cleanup_compress_hints();
> erofs_cleanup_exclude_rules();
> erofs_exit_configure();
>
More information about the Linux-erofs
mailing list