[PATCH 2/2] erofs-utils: mkfs: support directory compression

Gao Xiang hsiangkao at linux.alibaba.com
Fri Oct 17 22:20:02 AEDT 2025


Although directories can be compressed using various approaches
supported by regular inodes, it is difficult to predict the total
compressed size due to a lack of (parent) NIDs or child NIDs at
that time.

To simplify this, dump entire directory data into the packed
inode (all-fragments) as a first step.  Therefore, Linux 6.1+ is
required for directory compression for now.

 _____________________________________________________________________
|__ Testset __|_________|_______ Command line _________|____ Size ____|
|             | Vanilla | -zlzma -Efragments -C1048576 |  2553946112  |
|             |         |______________________________|_ [2436 MiB] _|
|             |         | [..] -m4096                  |  2524413952  |
| Fedora KIWI |_________|______________________________|_ [2408 MiB] _|
|             | After   | [..] --zD                    |  2542051328  |
|             |         |______________________________|_ [2425 MiB] _|
|             |         | [..] -m4096 --zD             |  2521423872  |
|_____________|_________|______________________________|_ [2405 MiB] _|
|             | Vanilla | -zlzma -Efragments -C1048576 |   4837376    |
|   OpenWrt   |_________|______________________________|_ [4724 KiB] _|
|             | After   | [..] -m4096 --zD             |   4730880    |
|_____________|_________|______________________________|_ [4620 KiB] _|

`-m4096`  Enable inode metadata compression in 4K pclusters.
`--zD`    Enable directory data compression.

Note that incremental builds are still unsupported for compressed
directories, but this can be implemented later.

Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 include/erofs/importer.h |  1 +
 lib/compress.c           | 45 +++++++++++++++++++++++++++++++++++++++-
 lib/fragments.c          |  8 +++++--
 lib/importer.c           | 17 +++++++--------
 lib/inode.c              | 31 ++++++++++++++++++++-------
 lib/liberofs_compress.h  |  4 ++++
 mkfs/main.c              |  8 +++++++
 7 files changed, 93 insertions(+), 21 deletions(-)

diff --git a/include/erofs/importer.h b/include/erofs/importer.h
index 48fe47e..60f81d6 100644
--- a/include/erofs/importer.h
+++ b/include/erofs/importer.h
@@ -48,6 +48,7 @@ struct erofs_importer_params {
 	bool dedupe;
 	bool fragments;
 	bool all_fragments;
+	bool compress_dir;
 	char fragdedupe;
 };
 
diff --git a/lib/compress.c b/lib/compress.c
index 97cb791..1a68841 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -1341,7 +1341,7 @@ int erofs_commit_compressed_file(struct z_erofs_compress_ictx *ictx,
 		if (ret)
 			goto err_free_idata;
 		inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
-		erofs_sb_set_fragments(inode->sbi);
+		erofs_sb_set_fragments(sbi);
 	}
 
 	/* fall back to no compression mode */
@@ -1980,6 +1980,49 @@ out:
 	return ret;
 }
 
+int erofs_begin_compress_dir(struct erofs_importer *im,
+			     struct erofs_inode *inode)
+
+{
+	if (!im->params->compress_dir ||
+	    inode->i_size < Z_EROFS_LEGACY_MAP_HEADER_SIZE)
+		return -ENOSPC;
+
+	inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
+	erofs_sb_set_fragments(inode->sbi);
+	inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+	inode->extent_isize = Z_EROFS_LEGACY_MAP_HEADER_SIZE;
+	inode->compressmeta = NULL;
+	return 0;
+}
+
+int erofs_write_compress_dir(struct erofs_inode *inode, struct erofs_vfile *vf)
+{
+	void *compressmeta;
+	int err;
+
+	if (inode->datalayout != EROFS_INODE_COMPRESSED_FULL ||
+	    inode->extent_isize < Z_EROFS_LEGACY_MAP_HEADER_SIZE) {
+		DBG_BUGON(1);
+		return -EINVAL;
+	}
+
+	err = erofs_pack_file_from_fd(inode, vf, 0, ~0U);
+	if (err || !inode->fragment_size)
+		return err;
+	err = erofs_fragment_commit(inode, ~0);
+	if (err)
+		return err;
+
+	compressmeta = calloc(1, Z_EROFS_LEGACY_MAP_HEADER_SIZE);
+	if (!compressmeta)
+		return -ENOMEM;
+	*(__le64 *)compressmeta =
+		cpu_to_le64(inode->fragmentoff | 1ULL << 63);
+	inode->compressmeta = compressmeta;
+	return 0;
+}
+
 static int z_erofs_build_compr_cfgs(struct erofs_importer *im,
 				    u32 *max_dict_size)
 {
diff --git a/lib/fragments.c b/lib/fragments.c
index 15092e1..0f07e33 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -257,11 +257,13 @@ int erofs_pack_file_from_fd(struct erofs_inode *inode,
 	if (memblock == MAP_FAILED || !memblock) {
 		erofs_off_t remaining = inode->i_size;
 		struct erofs_vfile vout = { .fd = epi->fd };
+		bool noseek = vf->ops && !vf->ops->pread;
 		off_t pos = fpos;
 
 		do {
 			sz = min_t(u64, remaining, UINT_MAX);
-			rc = erofs_io_sendfile(&vout, vf, &pos, sz);
+			rc = erofs_io_sendfile(&vout, vf,
+					       noseek ? NULL : &pos, sz);
 			if (rc <= 0)
 				break;
 			remaining -= rc;
@@ -372,10 +374,12 @@ int erofs_flush_packed_inode(struct erofs_importer *im)
 	struct erofs_inode *inode;
 
 	if (!epi || !erofs_sb_has_fragments(sbi))
-		return -EINVAL;
+		return 0;
 
 	if (lseek(epi->fd, 0, SEEK_CUR) <= 0)
 		return 0;
+
+	erofs_update_progressinfo("Processing packed data ...");
 	inode = erofs_mkfs_build_special_from_fd(im, epi->fd,
 						 EROFS_PACKED_INODE);
 	sbi->packed_nid = erofs_lookupnid(inode);
diff --git a/lib/importer.c b/lib/importer.c
index ad4bed8..c73dde2 100644
--- a/lib/importer.c
+++ b/lib/importer.c
@@ -57,12 +57,14 @@ int erofs_importer_init(struct erofs_importer *im)
 	if (err)
 		goto out_err;
 
-	if (params->fragments || cfg.c_extra_ea_name_prefixes) {
+	if (params->fragments || cfg.c_extra_ea_name_prefixes ||
+	    params->compress_dir) {
 		subsys = "packedfile";
 		if (!params->pclusterblks_packed)
 			params->pclusterblks_packed = params->pclusterblks_def;
 
-		err = erofs_packedfile_init(sbi, params->fragments);
+		err = erofs_packedfile_init(sbi, params->fragments ||
+						params->compress_dir);
 		if (err)
 			goto out_err;
 	}
@@ -90,7 +92,6 @@ out_err:
 
 int erofs_importer_flush_all(struct erofs_importer *im)
 {
-	const struct erofs_importer_params *params = im->params;
 	struct erofs_sb_info *sbi = im->sbi;
 	unsigned int fsalignblks;
 	int err;
@@ -102,13 +103,9 @@ int erofs_importer_flush_all(struct erofs_importer *im)
 			return err;
 	}
 
-	if ((params->fragments || cfg.c_extra_ea_name_prefixes) &&
-	    erofs_sb_has_fragments(sbi)) {
-		erofs_update_progressinfo("Handling packed data ...");
-		err = erofs_flush_packed_inode(im);
-		if (err)
-			return err;
-	}
+	err = erofs_flush_packed_inode(im);
+	if (err)
+		return err;
 
 	fsalignblks = im->params->fsalignblks ?
 		roundup_pow_of_two(im->params->fsalignblks) : 1;
diff --git a/lib/inode.c b/lib/inode.c
index 7587248..f9b5ee9 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -344,12 +344,7 @@ static int erofs_prepare_dir_file(struct erofs_importer *im,
 		return -EFAULT;
 	}
 	dir->i_size = d_size;
-
-	/* no compression for all dirs */
-	dir->datalayout = EROFS_INODE_FLAT_INLINE;
-
-	/* it will be used in erofs_prepare_inode_buffer */
-	dir->idata_size = d_size % erofs_blksiz(sbi);
+	dir->datalayout = EROFS_INODE_DATALAYOUT_MAX;
 	return 0;
 }
 
@@ -703,12 +698,16 @@ static int erofs_write_dir_file(struct erofs_inode *dir)
 	struct erofs_vfile *vf;
 	int err;
 
-	DBG_BUGON(dir->idata_size != (dir->i_size & (bsz - 1)));
 	vf = erofs_dirwriter_open(dir);
 	if (IS_ERR(vf))
 		return PTR_ERR(vf);
 
-	err = erofs_write_unencoded_data(dir, vf, 0, true);
+	if (erofs_inode_is_data_compressed(dir->datalayout)) {
+		err = erofs_write_compress_dir(dir, vf);
+	} else {
+		DBG_BUGON(dir->idata_size != (dir->i_size & (bsz - 1)));
+		err = erofs_write_unencoded_data(dir, vf, 0, true);
+	}
 	erofs_io_close(vf);
 	return err;
 }
@@ -1510,6 +1509,22 @@ static int erofs_mkfs_jobfn(struct erofs_importer *im,
 		return erofs_mkfs_handle_nondirectory(im, &item->u.ndir);
 
 	if (item->type == EROFS_MKFS_JOB_DIR) {
+		unsigned int bsz = erofs_blksiz(inode->sbi);
+
+		if (inode->datalayout == EROFS_INODE_DATALAYOUT_MAX) {
+			inode->datalayout = EROFS_INODE_FLAT_INLINE;
+
+			ret = erofs_begin_compress_dir(im, inode);
+			if (ret && ret != -ENOSPC)
+				return ret;
+		} else {
+			DBG_BUGON(inode->datalayout != EROFS_INODE_FLAT_PLAIN);
+		}
+
+		/* it will be used in erofs_prepare_inode_buffer */
+		if (inode->datalayout == EROFS_INODE_FLAT_INLINE)
+			inode->idata_size = inode->i_size & (bsz - 1);
+
 		ret = erofs_prepare_inode_buffer(im, inode);
 		if (ret)
 			return ret;
diff --git a/lib/liberofs_compress.h b/lib/liberofs_compress.h
index 8b39735..4b9dd42 100644
--- a/lib/liberofs_compress.h
+++ b/lib/liberofs_compress.h
@@ -22,6 +22,10 @@ void erofs_bind_compressed_file_with_fd(struct z_erofs_compress_ictx *ictx,
 int erofs_begin_compressed_file(struct z_erofs_compress_ictx *ictx);
 int erofs_write_compressed_file(struct z_erofs_compress_ictx *ictx);
 
+int erofs_begin_compress_dir(struct erofs_importer *im,
+			     struct erofs_inode *inode);
+int erofs_write_compress_dir(struct erofs_inode *inode, struct erofs_vfile *vf);
+
 int z_erofs_compress_init(struct erofs_importer *im);
 int z_erofs_compress_exit(struct erofs_sb_info *sbi);
 
diff --git a/mkfs/main.c b/mkfs/main.c
index 11e3032..f1ea7df 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -100,6 +100,7 @@ static struct option long_options[] = {
 #ifdef OCIEROFS_ENABLED
 	{"oci", optional_argument, NULL, 534},
 #endif
+	{"zD", optional_argument, NULL, 536},
 	{0, 0, 0, 0},
 };
 
@@ -174,6 +175,7 @@ static void usage(int argc, char **argv)
 		"    --all-time         the timestamp is also applied to all files (default)\n"
 		"    --mkfs-time        the timestamp is applied as build time only\n"
 		" -UX                   use a given filesystem UUID\n"
+		" --zD[=<0|1>]          specify directory compression: 0=disable [default], 1=enable\n"
 		" --all-root            make all files owned by root\n"
 #ifdef EROFS_MT_ENABLED
 		" --async-queue-limit=# specify the maximum number of entries in the multi-threaded job queue\n"
@@ -1404,6 +1406,12 @@ static int mkfs_parse_options_cfg(struct erofs_importer_params *params,
 				mkfs_aws_zinfo_file = strdup(optarg);
 			tarerofs_decoder = EROFS_IOS_DECODER_GZRAN;
 			break;
+		case 536:
+			if (!optarg || strcmp(optarg, "1"))
+				params->compress_dir = true;
+			else
+				params->compress_dir = false;
+			break;
 		case 'V':
 			version();
 			exit(0);
-- 
2.43.5



More information about the Linux-erofs mailing list