[PATCH v6] erofs-utils: introduce segment limits compression
Li Guifu
bluce.lee at aliyun.com
Sun Jun 21 20:51:47 AEST 2020
Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.
Advantages:
- more friendly for data differencing;
- it can also be used for parallel compression in the same file later.
Signed-off-by: Li Guifu <bluce.lee at aliyun.com>
---
Changes since v3 suggest by Gao Xiang<hsiangkao at gmx.com>:
- add a limits varialbe to give the limits size in the write_uncompress_block
- Set comments more readable
include/erofs/config.h | 1 +
lib/compress.c | 29 +++++++++++++++++++++--------
lib/config.c | 1 +
man/mkfs.erofs.1 | 4 ++++
mkfs/main.c | 18 +++++++++++++++++-
5 files changed, 44 insertions(+), 9 deletions(-)
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..e5f1bfb 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -35,6 +35,7 @@ struct erofs_configure {
char *c_img_path;
char *c_src_path;
char *c_compr_alg_master;
+ u64 c_compr_seg_size;
int c_compr_level_master;
int c_force_inodeversion;
/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..8a79895 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
erofs_blk_t blkaddr; /* pointing to the next blkaddr */
u16 clusterofs;
+ u64 segavail;
};
#define Z_EROFS_LEGACY_MAP_HEADER_SIZE \
@@ -116,23 +117,21 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
}
static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
- unsigned int *len,
+ unsigned int *len, unsigned int limits,
char *dst)
{
int ret;
- unsigned int count;
+ unsigned int count = min(limits, *len); /* write uncompressed data */
/* reset clusterofs to 0 if permitted */
if (!erofs_sb_has_lz4_0padding() &&
ctx->head >= ctx->clusterofs) {
ctx->head -= ctx->clusterofs;
*len += ctx->clusterofs;
+ count += ctx->clusterofs;
ctx->clusterofs = 0;
}
- /* write uncompressed data */
- count = min(EROFS_BLKSIZ, *len);
-
memcpy(dst, ctx->queue + ctx->head, count);
memset(dst + count, 0, EROFS_BLKSIZ - count);
@@ -157,14 +156,22 @@ static int vle_compress_one(struct erofs_inode *inode,
while (len) {
bool raw;
+ unsigned int limits = EROFS_BLKSIZ;
+
+ if (ctx->segavail <= EROFS_BLKSIZ) {
+ if (len < ctx->segavail && !final)
+ break;
+
+ limits = ctx->segavail;
+ goto nocompression;
+ }
if (len <= EROFS_BLKSIZ) {
if (final)
goto nocompression;
break;
}
-
- count = len;
+ count = min_t(u64, len, ctx->segavail);
ret = erofs_compress_destsize(h, compressionlevel,
ctx->queue + ctx->head,
&count, dst, EROFS_BLKSIZ);
@@ -175,7 +182,7 @@ static int vle_compress_one(struct erofs_inode *inode,
erofs_strerror(ret));
}
nocompression:
- ret = write_uncompressed_block(ctx, &len, dst);
+ ret = write_uncompressed_block(ctx, &len, limits, dst);
if (ret < 0)
return ret;
count = ret;
@@ -203,6 +210,11 @@ nocompression:
++ctx->blkaddr;
len -= count;
+ if (count >= ctx->segavail)
+ ctx->segavail = cfg.c_compr_seg_size;
+ else
+ ctx->segavail -= count;
+
if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
const unsigned int qh_aligned =
round_down(ctx->head, EROFS_BLKSIZ);
@@ -422,6 +434,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
ctx.head = ctx.tail = 0;
ctx.clusterofs = 0;
remaining = inode->i_size;
+ ctx.segavail = cfg.c_compr_seg_size;
while (remaining) {
const u64 readcount = min_t(u64, remaining,
diff --git a/lib/config.c b/lib/config.c
index da0c260..fbb2914 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
cfg.c_force_inodeversion = 0;
cfg.c_inline_xattr_tolerance = 2;
cfg.c_unix_timestamp = -1;
+ cfg.c_compr_seg_size = -1UL;
}
void erofs_show_config(void)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 891c5a8..0b613e4 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output.
Set all files to the given UNIX timestamp. Reproducible builds requires setting
all to a specific one.
.TP
+.BI "\-S " #
+Set max input stream size for each individual segment (disabled if 0).
+The default value is 0. It should be aligned with blocksize.
+.TP
.BI "\-\-exclude-path=" path
Ignore file that matches the exact literal path.
You may give multiple `--exclude-path' options.
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..0e26f4f 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
" -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
" -EX[,...] X=extended options\n"
" -T# set a fixed UNIX timestamp # to all files\n"
+ " -S# Set max input stream size # for each individual segment\n"
" --exclude-path=X avoid including file X (X = exact literal path)\n"
" --exclude-regex=X avoid including files that match X (X = regular expression)\n"
#ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
char *endptr;
int opt, i;
- while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+ while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
long_options, NULL)) != -1) {
switch (opt) {
case 'z':
@@ -188,6 +189,21 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
return -EINVAL;
}
break;
+ case 'S':
+ cfg.c_compr_seg_size = strtoll(optarg, &endptr, 0);
+ if (*endptr != '\0') {
+ erofs_err("invalid compress segment size %s",
+ optarg);
+ return -EINVAL;
+ }
+ if (!cfg.c_compr_seg_size) {
+ cfg.c_compr_seg_size = -1UL;
+ } else if (cfg.c_compr_seg_size % EROFS_BLKSIZ) {
+ erofs_err("segment size:%"PRIu64" should be align to %u",
+ cfg.c_compr_seg_size, EROFS_BLKSIZ);
+ return -EINVAL;
+ }
+ break;
case 2:
opt = erofs_parse_exclude_path(optarg, false);
if (opt) {
--
2.17.1
More information about the Linux-erofs
mailing list