[PATCH v4] erofs-utils: introduce segment compression

Gao Xiang hsiangkao at gmx.com
Fri Jun 19 09:05:06 AEST 2020


Hi Guifu,

On Fri, Jun 19, 2020 at 12:26:57AM +0800, Li Guifu via Linux-erofs wrote:
> Support segment compression which seperates files in several logic
> units (segments) and each segment is compressed independently.
>
> Advantages:
>  - more friendly for data differencing;
>  - it can also be used for parallel compression in the same file later.
>
> Signed-off-by: Li Guifu <bluce.lee at aliyun.com>
> ---
> Changes since v3 suggest by Gao Xiang<hsiangkao at gmx.com>:
>  - add 'S#' parameter to custome compression segment size
>  - move limit logic to size decrease
>
>  include/erofs/config.h |  1 +
>  lib/compress.c         |  8 ++++++--
>  lib/config.c           |  1 +
>  mkfs/main.c            | 16 +++++++++++++++-

Just do a quick response for this, and will test it later.

First, You might need to update the manpage as well.

>  4 files changed, 23 insertions(+), 3 deletions(-)
>
> diff --git a/include/erofs/config.h b/include/erofs/config.h
> index 2f09749..9125c1e 100644
> --- a/include/erofs/config.h
> +++ b/include/erofs/config.h
> @@ -36,6 +36,7 @@ struct erofs_configure {
>  	char *c_src_path;
>  	char *c_compr_alg_master;
>  	int c_compr_level_master;

u64 c_compr_segsize;

> +	unsigned int c_compr_seg_size;	/* max segment compress size */
>  	int c_force_inodeversion;
>  	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
>  	int c_inline_xattr_tolerance;
> diff --git a/lib/compress.c b/lib/compress.c
> index 6cc68ed..eb024aa 100644
> --- a/lib/compress.c
> +++ b/lib/compress.c
> @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
>
>  	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
>  	u16 clusterofs;
> +	unsigned int comprlimits;

How about the name "segavail"; ?

u64 segavail;


>  };
>
>  #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
> @@ -163,8 +164,7 @@ static int vle_compress_one(struct erofs_inode *inode,
>  				goto nocompression;
>  			break;
>  		}

I think we might add "if (segavail < EROFS_BLKSIZE) goto nocompression;"
since it seems better.

> -
> -		count = len;
> +		count = min(len, ctx->comprlimits);
>  		ret = erofs_compress_destsize(h, compressionlevel,
>  					      ctx->queue + ctx->head,
>  					      &count, dst, EROFS_BLKSIZ);
> @@ -202,6 +202,9 @@ nocompression:
>
>  		++ctx->blkaddr;
>  		len -= count;
> +		ctx->comprlimits -= count;
> +		if (!ctx->comprlimits)
> +			ctx->comprlimits = cfg.c_compr_seg_size;
>
>  		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
>  			const unsigned int qh_aligned =
> @@ -422,6 +425,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
>  	ctx.head = ctx.tail = 0;
>  	ctx.clusterofs = 0;
>  	remaining = inode->i_size;
> +	ctx.comprlimits = cfg.c_compr_seg_size;
>
>  	while (remaining) {
>  		const u64 readcount = min_t(u64, remaining,
> diff --git a/lib/config.c b/lib/config.c
> index da0c260..1c39403 100644
> --- a/lib/config.c
> +++ b/lib/config.c
> @@ -23,6 +23,7 @@ void erofs_init_configure(void)
>  	cfg.c_force_inodeversion = 0;
>  	cfg.c_inline_xattr_tolerance = 2;
>  	cfg.c_unix_timestamp = -1;
> +	cfg.c_compr_seg_size = 1024U * EROFS_BLKSIZ;

We don't need that limit by default, so
cfg.c_compr_segsize = -1ULL;

>  }
>
>  void erofs_show_config(void)
> diff --git a/mkfs/main.c b/mkfs/main.c
> index 94bf1e6..036d818 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -61,6 +61,7 @@ static void usage(void)
>  	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
>  	      " -EX[,...]         X=extended options\n"
>  	      " -T#               set a fixed UNIX timestamp # to all files\n"
> +	      " -S#               set the max input stream size # to one compress\n"
>  	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
>  	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
>  #ifdef HAVE_LIBSELINUX
> @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>  	char *endptr;
>  	int opt, i;
>
> -	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
> +	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
>  				 long_options, NULL)) != -1) {
>  		switch (opt) {
>  		case 'z':
> @@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>  				return -EINVAL;
>  			}
>  			break;
> +		case 'S':
> +			cfg.c_compr_seg_size = strtol(optarg, &endptr, 0);
> +			if (*endptr != '\0') {
> +				erofs_err("invalid compress segment size %s",
> +					  optarg);
> +				return -EINVAL;
> +			}
> +			if (cfg.c_compr_seg_size % EROFS_BLKSIZ != 0) {
> +				erofs_err("segment size:%u should be align to %u",
> +					  cfg.c_compr_seg_size, EROFS_BLKSIZ);
> +				return -EINVAL;
> +			}

if (!cfg.c_compr_segsize)
	cfg.c_compr_segsize = -1ULL;
else if (cfg.c_compr_segsize % EROFS_BLKSIZ) {
	erofs_err("segmentsize %u should be aligned with blocksize %u",
		  cfg.c_compr_seg_size, EROFS_BLKSIZ);
	return -EINVAL;
}

Thanks,
Gao Xiang



More information about the Linux-erofs mailing list