[PATCH 10/13] erofs-utils: introduce compression for regular files

Gao Xiang hsiangkao at aol.com
Fri May 31 10:50:44 AEST 2019


From: Gao Xiang <gaoxiang25 at huawei.com>

Yay! Let's try to transparent file compression
for all regular files if necessary.

Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
---
 include/erofs/compress.h |  25 ++++
 include/erofs/config.h   |   2 +
 include/erofs/internal.h |   1 +
 lib/Makefile.am          |   2 +-
 lib/compress.c           | 303 +++++++++++++++++++++++++++++++++++++++
 lib/config.c             |   1 +
 lib/inode.c              |  18 ++-
 mkfs/main.c              |  21 +++
 8 files changed, 369 insertions(+), 4 deletions(-)
 create mode 100644 include/erofs/compress.h
 create mode 100644 lib/compress.c

diff --git a/include/erofs/compress.h b/include/erofs/compress.h
new file mode 100644
index 0000000..e0abb8f
--- /dev/null
+++ b/include/erofs/compress.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * erofs_utils/include/erofs/compress.h
+ *
+ * Copyright (C) 2019 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25 at huawei.com>
+ */
+#ifndef __EROFS_COMPRESS_H
+#define __EROFS_COMPRESS_H
+
+#include "internal.h"
+
+/* workaround for an upstream lz4 compression issue, which can crash us */
+/* #define EROFS_CONFIG_COMPR_MAX_SZ        (1024 * 1024) */
+#define EROFS_CONFIG_COMPR_MAX_SZ           (900  * 1024)
+#define EROFS_CONFIG_COMPR_MIN_SZ           (32   * 1024)
+
+int erofs_write_compressed_file(struct erofs_inode *inode);
+
+int z_erofs_compress_init(void);
+int z_erofs_compress_exit(void);
+
+#endif
+
diff --git a/include/erofs/config.h b/include/erofs/config.h
index e31732b..177017b 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -19,6 +19,8 @@ struct erofs_configure {
 	/* related arguments for mkfs.erofs */
 	char *c_img_path;
 	char *c_src_path;
+	char *c_compr_alg_master;
+	int c_compr_level_master;
 };
 
 extern struct erofs_configure cfg;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index dfa6173..78da589 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -94,6 +94,7 @@ struct erofs_inode {
 	struct erofs_buffer_head *bh_inline, *bh_data;
 
 	void *idata;
+	void *compressmeta;
 };
 
 #define IS_ROOT(x)	((x) == (x)->i_parent)
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 17dc9e1..dea82f7 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -2,7 +2,7 @@
 # Makefile.am
 
 noinst_LTLIBRARIES = liberofs.la
-liberofs_la_SOURCES = config.c io.c cache.c inode.c compressor.c
+liberofs_la_SOURCES = config.c io.c cache.c inode.c compress.c compressor.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/compress.c b/lib/compress.c
new file mode 100644
index 0000000..205a4f2
--- /dev/null
+++ b/lib/compress.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * erofs_utils/lib/compress.c
+ *
+ * Copyright (C) 2018-2019 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Miao Xie <miaoxie at huawei.com>
+ * with heavy changes by Gao Xiang <gaoxiang25 at huawei.com>
+ */
+#define _LARGEFILE64_SOURCE
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "erofs/print.h"
+#include "erofs/io.h"
+#include "erofs/cache.h"
+#include "erofs/compress.h"
+#include "compressor.h"
+
+static struct erofs_compress compresshandle;
+static int compressionlevel;
+
+struct z_erofs_vle_compress_ctx {
+	u8 *metacur;
+
+	u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
+	unsigned int head, tail;
+
+	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
+	u16 clusterofs;
+};
+
+static unsigned int get_vle_compress_metasize(erofs_off_t filesize)
+{
+	const unsigned int indexsize = BLK_ROUND_UP(filesize) *
+		sizeof(struct z_erofs_vle_decompressed_index);
+
+	return sizeof(struct erofs_extent_header) + indexsize;
+}
+
+static void vle_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
+{
+	const unsigned int type = Z_EROFS_VLE_CLUSTER_TYPE_PLAIN;
+	struct z_erofs_vle_decompressed_index di;
+
+	if (!ctx->clusterofs)
+		return;
+
+	di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
+	di.di_u.blkaddr = 0;
+	di.di_advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
+
+	memcpy(ctx->metacur, &di, sizeof(di));
+	ctx->metacur += sizeof(di);
+}
+
+static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
+			      unsigned int count, bool raw)
+{
+	unsigned int d0 = 0, d1 = (ctx->clusterofs + count) / EROFS_BLKSIZ;
+	unsigned int clusterofs = ctx->clusterofs;
+	struct z_erofs_vle_decompressed_index di;
+	unsigned int type;
+	__le16 advise;
+
+	di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
+
+	/* whether the tail-end (un)compressed block or not */
+	if (clusterofs + count < EROFS_BLKSIZ) {
+		type = raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
+			Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
+		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
+
+		di.di_advise = advise;
+		di.di_u.blkaddr = cpu_to_le32(ctx->blkaddr);
+		memcpy(ctx->metacur, &di, sizeof(di));
+		ctx->metacur += sizeof(di);
+
+		/* don't add the final index if the tail-end block exists */
+		ctx->clusterofs = 0;
+		return;
+	}
+
+	do {
+		if (d0) {
+			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
+
+			di.di_u.delta[0] = cpu_to_le16(d0);
+			di.di_u.delta[1] = cpu_to_le16(d1);
+		} else {
+			type = raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
+				Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
+			di.di_u.blkaddr = cpu_to_le32(ctx->blkaddr);
+		}
+		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
+		di.di_advise = advise;
+
+		memcpy(ctx->metacur, &di, sizeof(di));
+		ctx->metacur += sizeof(di);
+
+		count -= EROFS_BLKSIZ - clusterofs;
+		clusterofs = 0;
+
+		++d0;
+		--d1;
+	} while (clusterofs + count >= EROFS_BLKSIZ);
+
+	ctx->clusterofs = clusterofs + count;
+}
+
+static int vle_compress_one(struct erofs_inode *inode,
+			    struct z_erofs_vle_compress_ctx *ctx,
+			    bool final)
+{
+	struct erofs_compress *const h = &compresshandle;
+	unsigned int len = ctx->tail - ctx->head;
+	unsigned int count;
+	int ret;
+	char dst[EROFS_BLKSIZ];
+
+	while (len) {
+		bool raw;
+
+		if (len <= EROFS_BLKSIZ) {
+			if (final)
+				goto nocompression;
+			break;
+		}
+
+		count = len;
+		ret = erofs_compress_destsize(h, compressionlevel,
+					      ctx->queue + ctx->head,
+					      &count, dst, EROFS_BLKSIZ);
+		if (ret) {
+			if (ret != -EAGAIN) {
+				erofs_err("failed to compress %s: %s",
+					  inode->i_srcpath,
+					  erofs_strerror(ret));
+			}
+nocompression:
+			/* fix up clusterofs to 0 if possable */
+			if (ctx->head >= ctx->clusterofs) {
+				ctx->head -= ctx->clusterofs;
+				len += ctx->clusterofs;
+				ctx->clusterofs = 0;
+			}
+
+			/* write uncompressed data */
+			count = min(EROFS_BLKSIZ, len);
+
+			memcpy(dst, ctx->queue + ctx->head, count);
+			memset(dst + count, 0, EROFS_BLKSIZ - count);
+
+			erofs_dbg("Writing %u uncompressed data to block %u",
+				  count, ctx->blkaddr);
+
+			ret = blk_write(dst, ctx->blkaddr, 1);
+			if (ret)
+				return ret;
+			raw = true;
+		} else {
+			/* write compressed data */
+			erofs_dbg("Writing %u compressed data to block %u",
+				  count, ctx->blkaddr);
+
+			ret = blk_write(dst, ctx->blkaddr, 1);
+			if (ret)
+				return ret;
+			raw = false;
+		}
+
+		ctx->head += count;
+		/* write compression indexes for this blkaddr */
+		vle_write_indexes(ctx, count, raw);
+
+		++ctx->blkaddr;
+		len -= count;
+
+		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
+			const uint qh_aligned = round_down(ctx->head, EROFS_BLKSIZ);
+			const uint qh_after = ctx->head - qh_aligned;
+
+			memmove(ctx->queue, ctx->queue + qh_aligned,
+				len + qh_after);
+			ctx->head = qh_after;
+			ctx->tail = qh_after + len;
+			break;
+		}
+	}
+	return 0;
+}
+
+int erofs_write_compressed_file(struct erofs_inode *inode)
+{
+	const unsigned int metasize = get_vle_compress_metasize(inode->i_size);
+	struct erofs_buffer_head *bh;
+	struct z_erofs_vle_compress_ctx ctx;
+	erofs_off_t remaining;
+	erofs_blk_t blkaddr;
+
+	int ret, fd;
+	u8 *compressmeta = malloc(metasize);
+
+	if (!compressmeta)
+		return -ENOMEM;
+
+	fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
+	if (fd < 0) {
+		ret = -errno;
+		goto err_free;
+	}
+
+	/* allocate main data buffer */
+	bh = erofs_balloc(DATA, 0, 0, 0);
+	if (IS_ERR(bh)) {
+		ret = PTR_ERR(bh);
+		goto err_close;
+	}
+
+	blkaddr = erofs_mapbh(bh->block, true);	/* start_blkaddr */
+	ctx.blkaddr = blkaddr;
+	ctx.metacur = compressmeta + sizeof(struct erofs_extent_header);
+	ctx.head = ctx.tail = 0;
+	ctx.clusterofs = 0;
+	remaining = inode->i_size;
+
+	while (remaining) {
+		const uint readcount = min_t(uint, remaining,
+					     sizeof(ctx.queue) - ctx.tail);
+
+		ret = read(fd, ctx.queue + ctx.tail, readcount);
+		if (ret != readcount) {
+			ret = -errno;
+			goto err_bdrop;
+		}
+		remaining -= readcount;
+		ctx.tail += readcount;
+
+		/* do one compress round */
+		ret = vle_compress_one(inode, &ctx, false);
+		if (ret)
+			goto err_bdrop;
+	}
+
+	/* do the final round */
+	ret = vle_compress_one(inode, &ctx, true);
+	if (ret)
+		goto err_bdrop;
+
+	/* fall back to no compression mode */
+	if (ctx.blkaddr - blkaddr >= BLK_ROUND_UP(inode->i_size)) {
+		ret = -ENOSPC;
+		goto err_bdrop;
+	}
+
+	vle_write_indexes_final(&ctx);
+
+	close(fd);
+	ret = erofs_bh_balloon(bh, blknr_to_addr(ctx.blkaddr - blkaddr));
+	DBG_BUGON(ret);
+
+	erofs_info("compressed %s (%lu bytes) into %u blocks",
+		   inode->i_srcpath, inode->i_size, ctx.blkaddr - blkaddr);
+
+	/*
+	 * TODO: need to move erofs_bdrop to erofs_write_tail_end
+	 *       when both mkfs & kernel support compression inline.
+	 */
+	erofs_bdrop(bh, false);
+	inode->compressmeta = compressmeta;
+	inode->idata_size = 0;
+	inode->extent_isize = metasize;
+	inode->data_mapping_mode = EROFS_INODE_LAYOUT_COMPRESSION;
+	return 0;
+
+err_bdrop:
+	erofs_bdrop(bh, true);	/* revoke buffer */
+err_close:
+	close(fd);
+err_free:
+	free(compressmeta);
+	return ret;
+}
+
+int z_erofs_compress_init(void)
+{
+	/* initialize for primary compression algorithm */
+	int ret = erofs_compressor_init(&compresshandle,
+					cfg.c_compr_alg_master);
+	if (ret)
+		return ret;
+
+	compressionlevel = cfg.c_compr_level_master < 0 ?
+		compresshandle.alg->default_level :
+		cfg.c_compr_level_master;
+	return 0;
+}
+
+int z_erofs_compress_exit(void)
+{
+	return erofs_compressor_exit(&compresshandle);
+}
+
diff --git a/lib/config.c b/lib/config.c
index 6ff8e4d..3312c9b 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -18,6 +18,7 @@ void erofs_init_configure(void)
 	cfg.c_dbg_lvl  = 0;
 	cfg.c_version  = PACKAGE_VERSION;
 	cfg.c_dry_run  = false;
+	cfg.c_compr_level_master = -1;
 }
 
 void erofs_show_config(void)
diff --git a/lib/inode.c b/lib/inode.c
index d3e5ed8..b7fcf0e 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -17,6 +17,7 @@
 #include "erofs/inode.h"
 #include "erofs/cache.h"
 #include "erofs/io.h"
+#include "erofs/compress.h"
 
 struct erofs_sb_info sbi;
 
@@ -286,8 +287,11 @@ int erofs_write_file(struct erofs_inode *inode)
 	unsigned int nblocks, i;
 	int ret, fd;
 
-	if (erofs_file_is_compressible(inode)) {
-		/* to be implemented */
+	if (cfg.c_compr_alg_master && erofs_file_is_compressible(inode)) {
+		ret = erofs_write_compressed_file(inode);
+
+		if (!ret || ret != -ENOSPC)
+			return ret;
 	}
 
 	/* fallback to all data uncompressed */
@@ -340,7 +344,7 @@ fail:
 static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
 {
 	struct erofs_inode *const inode = bh->fsprivate;
-	const erofs_off_t off = erofs_btell(bh, false);
+	erofs_off_t off = erofs_btell(bh, false);
 
 	/* let's support v1 currently */
 	struct erofs_inode_v1 v1 = {0};
@@ -379,6 +383,14 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
 	if (ret)
 		return false;
 
+	if (inode->extent_isize) {
+		/* write compression metadata */
+		off = Z_EROFS_VLE_EXTENT_ALIGN(off + inode->inode_isize);
+		ret = dev_write(inode->compressmeta, off, inode->extent_isize);
+		if (ret)
+			return false;
+	}
+
 	erofs_iput(inode);
 	return erofs_bh_flush_generic_end(bh);
 }
diff --git a/mkfs/main.c b/mkfs/main.c
index 1ed15d2..ce64ece 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -17,6 +17,7 @@
 #include "erofs/cache.h"
 #include "erofs/inode.h"
 #include "erofs/io.h"
+#include "erofs/compress.h"
 
 #define EROFS_SUPER_END (EROFS_SUPER_OFFSET + sizeof(struct erofs_super_block))
 
@@ -25,6 +26,7 @@ static void usage(char *execpath)
 	fprintf(stderr, "%s %s\n", basename(execpath), cfg.c_version);
 	fprintf(stderr, "usage: [options] FILE DIRECTORY\n\n");
 	fprintf(stderr, "Generate erofs image from DIRECTORY to FILE, and [options] are:\n");
+	fprintf(stderr, " -zX[,Y]  X=compressor (Y=compression level, optional)\n");
 	fprintf(stderr, " -d#      set output message level to # (maximum 9)\n");
 }
 
@@ -44,6 +46,23 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 
 	while ((opt = getopt(argc, argv, "d:z:")) != -1) {
 		switch (opt) {
+		case 'z':
+			if (!optarg) {
+				cfg.c_compr_alg_master = "(default)";
+				break;
+			}
+			/* get specified compression level */
+			for (i = 0; optarg[i] != '\0'; ++i) {
+				if (optarg[i] == ',') {
+					cfg.c_compr_level_master =
+						atoi(optarg + i + 1);
+					optarg[i] = '\0';
+					break;
+				}
+			}
+			cfg.c_compr_alg_master = strndup(optarg, i);
+			break;
+
 		case 'd':
 			cfg.c_dbg_lvl = parse_num_from_str(optarg);
 			break;
@@ -148,6 +167,7 @@ int main(int argc, char **argv)
 		goto exit;
 	}
 
+	z_erofs_compress_init();
 	erofs_inode_manager_init();
 
 	root_inode = erofs_mkfs_build_tree_from_path(NULL, cfg.c_src_path);
@@ -167,6 +187,7 @@ int main(int argc, char **argv)
 	if (!erofs_bflush(NULL))
 		err = -EIO;
 exit:
+	z_erofs_compress_exit();
 	dev_close();
 	erofs_exit_configure();
 
-- 
2.17.1



More information about the Linux-erofs mailing list