[PATCH v11] erofs-utils: introduce segment compression

Gao Xiang hsiangkao at aol.com
Mon Jul 6 04:20:50 AEST 2020


Hi Guifu,

On Sun, Jul 05, 2020 at 04:32:30PM +0800, Li Guifu via Linux-erofs wrote:
> Support segment compression which seperates files in several logic
> units (segments) and each segment is compressed independently.
> 
> Advantages:
>  - more friendly for data differencing;
>  - it can also be used for parallel compression in the same file later.
> 
> Signed-off-by: Li Guifu <bluce.lee at aliyun.com>
> ---
> Changes from v10
> - chang variable uncomprofs to clusterofs which only used
>   when write uncompress block

Could you please test the following patch if you're available?
Does it work?

>From 0436ed04717853351e13d68db6f170f60e25fc12 Mon Sep 17 00:00:00 2001
From: Li Guifu <bluce.lee at aliyun.com>
Date: Sun, 5 Jul 2020 16:32:30 +0800
Subject: [PATCH v12] erofs-utils: introduce segment compression

Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.

Advantages:
 - more friendly for data differencing;
 - it can also be used for parallel compression in the same file later.

Signed-off-by: Li Guifu <bluce.lee at aliyun.com>
Signed-off-by: Gao Xiang <hsiangkao at redhat.com>
---
 include/erofs/config.h |  2 ++
 lib/compress.c         | 38 ++++++++++++++++++++++++++++++--------
 lib/config.c           |  1 +
 man/mkfs.erofs.1       |  4 ++++
 mkfs/main.c            | 12 +++++++++++-
 5 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..b149633 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -35,6 +35,8 @@ struct erofs_configure {
 	char *c_img_path;
 	char *c_src_path;
 	char *c_compr_alg_master;
+	u64 c_compr_segsize;
+
 	int c_compr_level_master;
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..4216fa7 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
 
 	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
 	u16 clusterofs;
+	u64 segavail;
 };
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
@@ -124,24 +125,33 @@ static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
 
 	/* reset clusterofs to 0 if permitted */
 	if (!erofs_sb_has_lz4_0padding() &&
-	    ctx->head >= ctx->clusterofs) {
+	    ctx->clusterofs && ctx->head >= ctx->clusterofs) {
 		ctx->head -= ctx->clusterofs;
 		*len += ctx->clusterofs;
+
+		ctx->segavail += ctx->clusterofs;
+		DBG_BUGON(ctx->segavail > cfg.c_compr_segsize);
+
+		DBG_BUGON(ctx->segavail < EROFS_BLKSIZ);
+		/* so only *len will be the candidate instead of segavail */
+		count = *len;
+
 		ctx->clusterofs = 0;
+	} else {
+		count = min_t(u64, ctx->segavail, *len);
 	}
 
-	/* write uncompressed data */
-	count = min(EROFS_BLKSIZ, *len);
+	if (count > EROFS_BLKSIZ)
+		count = EROFS_BLKSIZ;
 
+	/* fill zero if the uncompressed block isn't full */
 	memcpy(dst, ctx->queue + ctx->head, count);
 	memset(dst + count, 0, EROFS_BLKSIZ - count);
 
 	erofs_dbg("Writing %u uncompressed data to block %u",
 		  count, ctx->blkaddr);
 	ret = blk_write(dst, ctx->blkaddr, 1);
-	if (ret)
-		return ret;
-	return count;
+	return ret ? ret : count;
 }
 
 static int vle_compress_one(struct erofs_inode *inode,
@@ -158,13 +168,20 @@ static int vle_compress_one(struct erofs_inode *inode,
 	while (len) {
 		bool raw;
 
+		if (ctx->segavail <= EROFS_BLKSIZ) {
+			if (len < ctx->segavail && !final)
+				break;
+
+			goto nocompression;
+		}
+
 		if (len <= EROFS_BLKSIZ) {
 			if (final)
 				goto nocompression;
 			break;
 		}
 
-		count = len;
+		count = min_t(u64, len, ctx->segavail);
 		ret = erofs_compress_destsize(h, compressionlevel,
 					      ctx->queue + ctx->head,
 					      &count, dst, EROFS_BLKSIZ);
@@ -195,8 +212,12 @@ nocompression:
 				return ret;
 			raw = false;
 		}
-
 		ctx->head += count;
+		DBG_BUGON(ctx->segavail < count);
+		ctx->segavail -= count;
+		if (!ctx->segavail)
+			ctx->segavail = cfg.c_compr_segsize;
+
 		/* write compression indexes for this blkaddr */
 		vle_write_indexes(ctx, count, raw);
 
@@ -421,6 +442,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
+	ctx.segavail = cfg.c_compr_segsize;
 	remaining = inode->i_size;
 
 	while (remaining) {
diff --git a/lib/config.c b/lib/config.c
index da0c260..9d4bea1 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
+	cfg.c_compr_segsize = -1;
 }
 
 void erofs_show_config(void)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 891c5a8..8d0fc10 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -48,6 +48,10 @@ Forcely generate compact inodes (32-byte inodes) to output.
 Forcely generate extended inodes (64-byte inodes) to output.
 .RE
 .TP
+.BI "\-S " #
+Set maximum blocks for each individual compress segment.
+The default is 0 (disabled).
+.TP
 .BI "\-T " #
 Set all files to the given UNIX timestamp. Reproducible builds requires setting
 all to a specific one.
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..0265ae9 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
 	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
 	      " -EX[,...]         X=extended options\n"
 	      " -T#               set a fixed UNIX timestamp # to all files\n"
+	      " -S#               Set maximum blocks for each individual compress segment\n"
 	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
 	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 	char *endptr;
 	int opt, i;
 
-	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
 				 long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'z':
@@ -188,6 +189,15 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 'S':
+			i = strtoll(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid blocks per compress segment %s",
+					  optarg);
+				return -EINVAL;
+			}
+			cfg.c_compr_segsize = i ? blknr_to_addr(i) : -1;
+			break;
 		case 2:
 			opt = erofs_parse_exclude_path(optarg, false);
 			if (opt) {
-- 
2.24.0





More information about the Linux-erofs mailing list