[PATCH v11] erofs-utils: introduce segment compression

Gao Xiang hsiangkao at aol.com
Thu Oct 8 02:04:22 AEDT 2020


Guifu, some progress on this as well?

On Mon, Jul 06, 2020 at 02:20:50AM +0800, Gao Xiang via Linux-erofs wrote:
> Hi Guifu,
> 
> On Sun, Jul 05, 2020 at 04:32:30PM +0800, Li Guifu via Linux-erofs wrote:
> > Support segment compression which seperates files in several logic
> > units (segments) and each segment is compressed independently.
> > 
> > Advantages:
> >  - more friendly for data differencing;
> >  - it can also be used for parallel compression in the same file later.
> > 
> > Signed-off-by: Li Guifu <bluce.lee at aliyun.com>
> > ---
> > Changes from v10
> > - chang variable uncomprofs to clusterofs which only used
> >   when write uncompress block
> 
> Could you please test the following patch if you're available?
> Does it work?
> 
> From 0436ed04717853351e13d68db6f170f60e25fc12 Mon Sep 17 00:00:00 2001
> From: Li Guifu <bluce.lee at aliyun.com>
> Date: Sun, 5 Jul 2020 16:32:30 +0800
> Subject: [PATCH v12] erofs-utils: introduce segment compression
> 
> Support segment compression which seperates files in several logic
> units (segments) and each segment is compressed independently.
> 
> Advantages:
>  - more friendly for data differencing;
>  - it can also be used for parallel compression in the same file later.
> 
> Signed-off-by: Li Guifu <bluce.lee at aliyun.com>
> Signed-off-by: Gao Xiang <hsiangkao at redhat.com>
> ---
>  include/erofs/config.h |  2 ++
>  lib/compress.c         | 38 ++++++++++++++++++++++++++++++--------
>  lib/config.c           |  1 +
>  man/mkfs.erofs.1       |  4 ++++
>  mkfs/main.c            | 12 +++++++++++-
>  5 files changed, 48 insertions(+), 9 deletions(-)
> 
> diff --git a/include/erofs/config.h b/include/erofs/config.h
> index 2f09749..b149633 100644
> --- a/include/erofs/config.h
> +++ b/include/erofs/config.h
> @@ -35,6 +35,8 @@ struct erofs_configure {
>  	char *c_img_path;
>  	char *c_src_path;
>  	char *c_compr_alg_master;
> +	u64 c_compr_segsize;
> +
>  	int c_compr_level_master;
>  	int c_force_inodeversion;
>  	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
> diff --git a/lib/compress.c b/lib/compress.c
> index 6cc68ed..4216fa7 100644
> --- a/lib/compress.c
> +++ b/lib/compress.c
> @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
>  
>  	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
>  	u16 clusterofs;
> +	u64 segavail;
>  };
>  
>  #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
> @@ -124,24 +125,33 @@ static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
>  
>  	/* reset clusterofs to 0 if permitted */
>  	if (!erofs_sb_has_lz4_0padding() &&
> -	    ctx->head >= ctx->clusterofs) {
> +	    ctx->clusterofs && ctx->head >= ctx->clusterofs) {
>  		ctx->head -= ctx->clusterofs;
>  		*len += ctx->clusterofs;
> +
> +		ctx->segavail += ctx->clusterofs;
> +		DBG_BUGON(ctx->segavail > cfg.c_compr_segsize);
> +
> +		DBG_BUGON(ctx->segavail < EROFS_BLKSIZ);
> +		/* so only *len will be the candidate instead of segavail */
> +		count = *len;
> +
>  		ctx->clusterofs = 0;
> +	} else {
> +		count = min_t(u64, ctx->segavail, *len);
>  	}
>  
> -	/* write uncompressed data */
> -	count = min(EROFS_BLKSIZ, *len);
> +	if (count > EROFS_BLKSIZ)
> +		count = EROFS_BLKSIZ;
>  
> +	/* fill zero if the uncompressed block isn't full */
>  	memcpy(dst, ctx->queue + ctx->head, count);
>  	memset(dst + count, 0, EROFS_BLKSIZ - count);
>  
>  	erofs_dbg("Writing %u uncompressed data to block %u",
>  		  count, ctx->blkaddr);
>  	ret = blk_write(dst, ctx->blkaddr, 1);
> -	if (ret)
> -		return ret;
> -	return count;
> +	return ret ? ret : count;
>  }
>  
>  static int vle_compress_one(struct erofs_inode *inode,
> @@ -158,13 +168,20 @@ static int vle_compress_one(struct erofs_inode *inode,
>  	while (len) {
>  		bool raw;
>  
> +		if (ctx->segavail <= EROFS_BLKSIZ) {
> +			if (len < ctx->segavail && !final)
> +				break;
> +
> +			goto nocompression;
> +		}
> +
>  		if (len <= EROFS_BLKSIZ) {
>  			if (final)
>  				goto nocompression;
>  			break;
>  		}
>  
> -		count = len;
> +		count = min_t(u64, len, ctx->segavail);
>  		ret = erofs_compress_destsize(h, compressionlevel,
>  					      ctx->queue + ctx->head,
>  					      &count, dst, EROFS_BLKSIZ);
> @@ -195,8 +212,12 @@ nocompression:
>  				return ret;
>  			raw = false;
>  		}
> -
>  		ctx->head += count;
> +		DBG_BUGON(ctx->segavail < count);
> +		ctx->segavail -= count;
> +		if (!ctx->segavail)
> +			ctx->segavail = cfg.c_compr_segsize;
> +
>  		/* write compression indexes for this blkaddr */
>  		vle_write_indexes(ctx, count, raw);
>  
> @@ -421,6 +442,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
>  	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
>  	ctx.head = ctx.tail = 0;
>  	ctx.clusterofs = 0;
> +	ctx.segavail = cfg.c_compr_segsize;
>  	remaining = inode->i_size;
>  
>  	while (remaining) {
> diff --git a/lib/config.c b/lib/config.c
> index da0c260..9d4bea1 100644
> --- a/lib/config.c
> +++ b/lib/config.c
> @@ -23,6 +23,7 @@ void erofs_init_configure(void)
>  	cfg.c_force_inodeversion = 0;
>  	cfg.c_inline_xattr_tolerance = 2;
>  	cfg.c_unix_timestamp = -1;
> +	cfg.c_compr_segsize = -1;
>  }
>  
>  void erofs_show_config(void)
> diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
> index 891c5a8..8d0fc10 100644
> --- a/man/mkfs.erofs.1
> +++ b/man/mkfs.erofs.1
> @@ -48,6 +48,10 @@ Forcely generate compact inodes (32-byte inodes) to output.
>  Forcely generate extended inodes (64-byte inodes) to output.
>  .RE
>  .TP
> +.BI "\-S " #
> +Set maximum blocks for each individual compress segment.
> +The default is 0 (disabled).
> +.TP
>  .BI "\-T " #
>  Set all files to the given UNIX timestamp. Reproducible builds requires setting
>  all to a specific one.
> diff --git a/mkfs/main.c b/mkfs/main.c
> index 94bf1e6..0265ae9 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -61,6 +61,7 @@ static void usage(void)
>  	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
>  	      " -EX[,...]         X=extended options\n"
>  	      " -T#               set a fixed UNIX timestamp # to all files\n"
> +	      " -S#               Set maximum blocks for each individual compress segment\n"
>  	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
>  	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
>  #ifdef HAVE_LIBSELINUX
> @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>  	char *endptr;
>  	int opt, i;
>  
> -	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
> +	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
>  				 long_options, NULL)) != -1) {
>  		switch (opt) {
>  		case 'z':
> @@ -188,6 +189,15 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>  				return -EINVAL;
>  			}
>  			break;
> +		case 'S':
> +			i = strtoll(optarg, &endptr, 0);
> +			if (*endptr != '\0') {
> +				erofs_err("invalid blocks per compress segment %s",
> +					  optarg);
> +				return -EINVAL;
> +			}
> +			cfg.c_compr_segsize = i ? blknr_to_addr(i) : -1;
> +			break;
>  		case 2:
>  			opt = erofs_parse_exclude_path(optarg, false);
>  			if (opt) {
> -- 
> 2.24.0
> 
> 
> 


More information about the Linux-erofs mailing list