[PATCH 7/7] erofs-utils: support encoded extents

Gao Xiang hsiangkao at linux.alibaba.com
Mon Mar 10 20:25:08 AEDT 2025


Use encoded extents if 48bit is set and metadata is smaller for big
pclusters.

For Zstd, since it doesn't natively support fixed-sized output
compression, switch to use fixed-sized input compression if
`--max-extent-bytes=` is specified and no more than `-C`. Later we
might introduce a simpilified option for users too.

Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 include/erofs/internal.h |   1 +
 include/erofs_fs.h       |   3 +-
 lib/compress.c           | 257 ++++++++++++++++++++++++++++++---------
 lib/compressor.c         |  11 ++
 lib/compressor.h         |   6 +
 lib/compressor_libzstd.c |  17 +++
 6 files changed, 235 insertions(+), 60 deletions(-)

diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 227e830..7a21044 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -269,6 +269,7 @@ struct erofs_inode {
 				unsigned int z_idataoff;
 				erofs_off_t fragmentoff;
 			};
+			unsigned int z_extents;
 #define z_idata_size	idata_size
 		};
 	};
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index ce319d7..77af967 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -413,8 +413,9 @@ struct z_erofs_lcluster_index {
 	} di_u;
 };
 
+#define Z_EROFS_MAP_HEADER_START(end)	round_up(end, 8)
 #define Z_EROFS_MAP_HEADER_END(end)	\
-	(round_up(end, 8) + sizeof(struct z_erofs_map_header))
+	(Z_EROFS_MAP_HEADER_START(end) + sizeof(struct z_erofs_map_header))
 #define Z_EROFS_FULL_INDEX_START(end)	(Z_EROFS_MAP_HEADER_END(end) + 8)
 
 #define Z_EROFS_EXTENT_PLEN_PARTIAL	BIT(27)
diff --git a/lib/compress.c b/lib/compress.c
index 98288d4..0a8f893 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -49,6 +49,8 @@ struct z_erofs_compress_ictx {		/* inode context */
 	u32 tof_chksum;
 	bool fix_dedupedfrag;
 	bool fragemitted;
+	bool dedupe;
+	bool data_unaligned;
 
 	/* fields for write indexes */
 	u8 *metacur;
@@ -78,13 +80,12 @@ struct z_erofs_compress_sctx {		/* segment context */
 	unsigned int head, tail;
 
 	unsigned int pclustersize;
-	erofs_off_t pstart;
+	erofs_off_t pstart, poff;
 	u16 clusterofs;
 
 	int seg_idx;
 
 	void *membuf;
-	erofs_off_t memoff;
 };
 
 #ifdef EROFS_MT_ENABLED
@@ -336,10 +337,7 @@ static int z_erofs_compress_dedupe(struct z_erofs_compress_sctx *ctx)
 			ei->e.partial = true;
 			ei->e.length -= delta;
 		}
-
-		/* fall back to noncompact indexes for deduplication */
-		inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B;
-		inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+		ctx->ictx->dedupe = true;
 		erofs_sb_set_dedupe(sbi);
 
 		sbi->saved_by_deduplication += dctx.e.plen;
@@ -389,8 +387,7 @@ static int write_uncompressed_block(struct z_erofs_compress_sctx *ctx,
 	if (ctx->membuf) {
 		erofs_dbg("Writing %u uncompressed data of %s", count,
 			  inode->i_srcpath);
-		memcpy(ctx->membuf + ctx->memoff, dst, erofs_blksiz(sbi));
-		ctx->memoff += erofs_blksiz(sbi);
+		memcpy(ctx->membuf + ctx->poff, dst, erofs_blksiz(sbi));
 	} else {
 		erofs_dbg("Writing %u uncompressed data to %llu", count,
 			  ctx->pstart | 0ULL);
@@ -398,6 +395,7 @@ static int write_uncompressed_block(struct z_erofs_compress_sctx *ctx,
 		if (ret)
 			return ret;
 	}
+	ctx->poff += erofs_blksiz(sbi);
 	return count;
 }
 
@@ -555,7 +553,9 @@ static int __z_erofs_compress_one(struct z_erofs_compress_sctx *ctx,
 	bool is_packed_inode = erofs_is_packed_inode(inode);
 	bool tsg = (ctx->seg_idx + 1 >= ictx->seg_num), final = !ctx->remaining;
 	bool may_packing = (cfg.c_fragments && tsg && final && !is_packed_inode);
-	bool may_inline = (cfg.c_ztailpacking && tsg && final && !may_packing);
+	bool data_unaligned = ictx->data_unaligned;
+	bool may_inline = (cfg.c_ztailpacking && !data_unaligned && tsg &&
+			   final && !may_packing);
 	unsigned int compressedsize;
 	int ret;
 
@@ -579,21 +579,32 @@ static int __z_erofs_compress_one(struct z_erofs_compress_sctx *ctx,
 	}
 
 	e->length = min(len, cfg.c_max_decompressed_extent_bytes);
-	ret = erofs_compress_destsize(h, ctx->queue + ctx->head,
-				      &e->length, dst, ctx->pclustersize);
-	if (ret <= 0) {
+	if (data_unaligned) {
+		ret = erofs_compress(h, ctx->queue + ctx->head, e->length,
+				     dst, ctx->pclustersize);
+		if (ret == -EOPNOTSUPP) {
+			data_unaligned = false;
+			goto retry_aligned;
+		}
+	} else {
+retry_aligned:
+		ret = erofs_compress_destsize(h, ctx->queue + ctx->head,
+					      &e->length, dst, ctx->pclustersize);
+	}
+
+	if (ret > 0) {
+		compressedsize = ret;
+		/* even compressed size is smaller, there is no real gain */
+		if (!data_unaligned && !(may_inline && e->length == len && ret < blksz))
+			ret = roundup(ret, blksz);
+	} else if (ret != -ENOSPC) {
 		erofs_err("failed to compress %s: %s", inode->i_srcpath,
 			  erofs_strerror(ret));
 		return ret;
 	}
 
-	compressedsize = ret;
-	/* even compressed size is smaller, there is no real gain */
-	if (!(may_inline && e->length == len && ret < blksz))
-		ret = roundup(ret, blksz);
-
 	/* check if there is enough gain to keep the compressed data */
-	if (ret * h->compress_threshold / 100 >= e->length) {
+	if (ret < 0 || ret * h->compress_threshold / 100 >= e->length) {
 		if (may_inline && len < blksz) {
 			ret = z_erofs_fill_inline_data(inode,
 					ctx->queue + ctx->head, len, true);
@@ -652,7 +663,7 @@ frag_packing:
 		e->plen = blksz;
 		e->raw = false;
 	} else {
-		unsigned int tailused, padding;
+		unsigned int padding;
 
 		/*
 		 * If there's space left for the last round when deduping
@@ -660,7 +671,7 @@ frag_packing:
 		 * more to check whether it can be filled up.  Fix the fragment
 		 * if succeeds.  Otherwise, just drop it and go on packing.
 		 */
-		if (may_packing && len == e->length &&
+		if (!data_unaligned && may_packing && len == e->length &&
 		    (compressedsize & (blksz - 1)) &&
 		    ctx->tail < Z_EROFS_COMPR_QUEUE_SZ) {
 			ctx->pclustersize = roundup(compressedsize, blksz);
@@ -676,13 +687,12 @@ frag_packing:
 				return ret;
 		}
 
-		e->plen = round_up(compressedsize, blksz);
+		if (data_unaligned)
+			e->plen = compressedsize;
+		else
+			e->plen = round_up(compressedsize, blksz);
 		DBG_BUGON(e->plen >= e->length);
-
-		padding = 0;
-		tailused = compressedsize & (blksz - 1);
-		if (tailused)
-			padding = blksz - tailused;
+		padding = e->plen - compressedsize;
 
 		/* zero out garbage trailing data for non-0padding */
 		if (!erofs_sb_has_lz4_0padding(sbi)) {
@@ -695,9 +705,7 @@ frag_packing:
 			erofs_dbg("Writing %u compressed data of %u bytes of %s",
 				  e->length, e->plen, inode->i_srcpath);
 
-			memcpy(ctx->membuf + ctx->memoff,
-			       dst - padding, e->plen);
-			ctx->memoff += e->plen;
+			memcpy(ctx->membuf + ctx->poff, dst - padding, e->plen);
 		} else {
 			erofs_dbg("Writing %u compressed data to %llu of %u bytes",
 				  e->length, ctx->pstart, e->plen);
@@ -707,6 +715,7 @@ frag_packing:
 			if (ret)
 				return ret;
 		}
+		ctx->poff += e->plen;
 		e->raw = false;
 		may_inline = false;
 		may_packing = false;
@@ -979,30 +988,171 @@ static void z_erofs_write_mapheader(struct erofs_inode *inode,
 				    void *compressmeta)
 {
 	struct erofs_sb_info *sbi = inode->sbi;
-	struct z_erofs_map_header h = {
-		.h_advise = cpu_to_le16(inode->z_advise),
-		.h_algorithmtype = inode->z_algorithmtype[1] << 4 |
-				   inode->z_algorithmtype[0],
-		/* lclustersize */
-		.h_clusterbits = inode->z_logical_clusterbits - sbi->blkszbits,
-	};
+	struct z_erofs_map_header h;
 
-	if (inode->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
-		h.h_fragmentoff = cpu_to_le32(inode->fragmentoff);
-	else
-		h.h_idata_size = cpu_to_le16(inode->idata_size);
+	if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
+	    (inode->z_advise & Z_EROFS_ADVISE_EXTENTS)) {
+		int recsz = z_erofs_extent_recsize(inode->z_advise);
+
+		if (recsz > offsetof(struct z_erofs_extent, pstart_hi)) {
+			h = (struct z_erofs_map_header) {
+				.h_advise = cpu_to_le16(inode->z_advise),
+				.h_extents_lo = cpu_to_le32(inode->z_extents),
+			};
+		} else {
+			DBG_BUGON(inode->z_logical_clusterbits < sbi->blkszbits);
+			h = (struct z_erofs_map_header) {
+				.h_advise = cpu_to_le16(inode->z_advise),
+				.h_clusterbits = inode->z_logical_clusterbits - sbi->blkszbits,
+			};
+		}
+	} else {
+		h = (struct z_erofs_map_header) {
+			.h_advise = cpu_to_le16(inode->z_advise),
+			.h_algorithmtype = inode->z_algorithmtype[1] << 4 |
+					   inode->z_algorithmtype[0],
+			/* lclustersize */
+			.h_clusterbits = inode->z_logical_clusterbits - sbi->blkszbits,
+		};
+		if (inode->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
+			h.h_fragmentoff = cpu_to_le32(inode->fragmentoff);
+		else
+			h.h_idata_size = cpu_to_le16(inode->idata_size);
 
-	memset(compressmeta, 0, Z_EROFS_LEGACY_MAP_HEADER_SIZE);
+		memset(compressmeta, 0, Z_EROFS_LEGACY_MAP_HEADER_SIZE);
+	}
 	/* write out map header */
 	memcpy(compressmeta, &h, sizeof(struct z_erofs_map_header));
 }
 
+#define EROFS_FULL_INDEXES_SZ(inode)	\
+	(BLK_ROUND_UP(inode->sbi, inode->i_size) * \
+	 sizeof(struct z_erofs_lcluster_index) + Z_EROFS_LEGACY_MAP_HEADER_SIZE)
+
+static void *z_erofs_write_extents(struct z_erofs_compress_ictx *ctx)
+{
+	struct erofs_inode *inode = ctx->inode;
+	struct erofs_sb_info *sbi = inode->sbi;
+	struct z_erofs_extent_item *ei, *n;
+	unsigned int lclusterbits, nexts;
+	bool pstart_hi = false, unaligned_data = false;
+	erofs_off_t pstart, pend, lstart;
+	unsigned int recsz, metasz, moff;
+	void *metabuf;
+
+	ei = list_first_entry(&ctx->extents, struct z_erofs_extent_item,
+			      list);
+	lclusterbits = max_t(u8, ilog2(ei->e.length - 1) + 1, sbi->blkszbits);
+	pend = pstart = ei->e.pstart;
+	nexts = 0;
+	list_for_each_entry(ei, &ctx->extents, list) {
+		pstart_hi |= (ei->e.pstart > UINT32_MAX);
+		if ((ei->e.pstart | ei->e.plen) & ((1U << sbi->blkszbits) - 1))
+			unaligned_data = true;
+		if (pend != ei->e.pstart)
+			pend = EROFS_NULL_ADDR;
+		else
+			pend += ei->e.plen;
+		if (ei->e.length != 1 << lclusterbits) {
+			if (ei->list.next != &ctx->extents ||
+			    ei->e.length > 1 << lclusterbits)
+				lclusterbits = 0;
+		}
+		++nexts;
+	}
+
+	recsz = inode->i_size > UINT32_MAX ? 32 : 16;
+	if (lclusterbits) {
+		if (pend != EROFS_NULL_ADDR)
+			recsz = 4;
+		else if (recsz <= 16 && !pstart_hi)
+			recsz = 8;
+	}
+
+	moff = Z_EROFS_MAP_HEADER_END(inode->inode_isize + inode->xattr_isize);
+	moff = round_up(moff, recsz) -
+		Z_EROFS_MAP_HEADER_START(inode->inode_isize + inode->xattr_isize);
+	metasz = moff + recsz * nexts + 8 * (recsz <= 4);
+	if (!unaligned_data && metasz > EROFS_FULL_INDEXES_SZ(inode))
+		return ERR_PTR(-EAGAIN);
+
+	metabuf = malloc(metasz);
+	if (!metabuf)
+		return ERR_PTR(-ENOMEM);
+	inode->z_logical_clusterbits = lclusterbits;
+	inode->z_extents = nexts;
+	ctx->metacur = metabuf + moff;
+	if (recsz <= 4) {
+		*(__le64 *)ctx->metacur	= cpu_to_le64(pstart);
+		ctx->metacur += sizeof(__le64);
+	}
+
+	nexts = 0;
+	lstart = 0;
+	list_for_each_entry_safe(ei, n, &ctx->extents, list) {
+		struct z_erofs_extent de;
+		u32 fmt, plen;
+
+		plen = ei->e.plen;
+		if (!plen) {
+			plen = inode->fragmentoff;
+			ei->e.pstart = inode->fragmentoff >> 32;
+		} else {
+			fmt = ei->e.raw ? 0 : inode->z_algorithmtype[0] + 1;
+			plen |= fmt << Z_EROFS_EXTENT_PLEN_FMT_BIT;
+			if (ei->e.partial)
+				plen |= Z_EROFS_EXTENT_PLEN_PARTIAL;
+		}
+		de = (struct z_erofs_extent) {
+			.plen = cpu_to_le32(plen),
+			.pstart_lo = cpu_to_le32(ei->e.pstart),
+			.lstart_lo = cpu_to_le32(lstart),
+			.pstart_hi = cpu_to_le32(ei->e.pstart >> 32),
+			.lstart_hi = cpu_to_le32(lstart >> 32),
+		};
+		memcpy(ctx->metacur, &de, recsz);
+		ctx->metacur += recsz;
+		lstart += ei->e.length;
+		list_del(&ei->list);
+		free(ei);
+	}
+	inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+	inode->z_advise |= Z_EROFS_ADVISE_EXTENTS |
+		((ilog2(recsz) - 2) << Z_EROFS_ADVISE_EXTRECSZ_BIT);
+	return metabuf;
+}
+
 static void *z_erofs_write_indexes(struct z_erofs_compress_ictx *ctx)
 {
 	struct erofs_inode *inode = ctx->inode;
+	struct erofs_sb_info *sbi = inode->sbi;
 	struct z_erofs_extent_item *ei, *n;
 	void *metabuf;
 
+	if (erofs_sb_has_48bit(sbi)) {
+		metabuf = z_erofs_write_extents(ctx);
+		if (metabuf != ERR_PTR(-EAGAIN)) {
+			if (IS_ERR(metabuf))
+				return metabuf;
+			goto out;
+		}
+	}
+
+	if (!cfg.c_legacy_compress && !ctx->dedupe &&
+	    inode->z_logical_clusterbits <= 14) {
+		if (inode->z_logical_clusterbits <= 12)
+			inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
+		inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
+	} else {
+		inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+	}
+
+	if (erofs_sb_has_big_pcluster(sbi)) {
+		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+		if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
+			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
+	}
+
 	metabuf = malloc(BLK_ROUND_UP(inode->sbi, inode->i_size) *
 			 sizeof(struct z_erofs_lcluster_index) +
 			 Z_EROFS_LEGACY_MAP_HEADER_SIZE);
@@ -1018,6 +1168,7 @@ static void *z_erofs_write_indexes(struct z_erofs_compress_ictx *ctx)
 		free(ei);
 	}
 	z_erofs_fini_full_indexes(ctx);
+out:
 	z_erofs_write_mapheader(inode, metabuf);
 	return metabuf;
 }
@@ -1075,6 +1226,7 @@ int z_erofs_compress_segment(struct z_erofs_compress_sctx *ctx,
 	int fd = ictx->fd;
 
 	ctx->pstart = pstart;
+	ctx->poff = 0;
 	while (ctx->remaining) {
 		const u64 rx = min_t(u64, ctx->remaining,
 				     Z_EROFS_COMPR_QUEUE_SZ - ctx->tail);
@@ -1310,8 +1462,6 @@ void z_erofs_mt_workfn(struct erofs_work *work, void *tlsp)
 		ret = -ENOMEM;
 		goto out;
 	}
-	sctx->memoff = 0;
-
 	ret = z_erofs_compress_segment(sctx, sctx->seg_idx * cfg.c_mkfs_segment_size,
 				       EROFS_NULL_ADDR);
 
@@ -1480,22 +1630,6 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
 	/* initialize per-file compression setting */
 	inode->z_advise = 0;
 	inode->z_logical_clusterbits = sbi->blkszbits;
-	if (!cfg.c_legacy_compress && inode->z_logical_clusterbits <= 14) {
-		if (inode->z_logical_clusterbits <= 12)
-			inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
-		inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
-	} else {
-		inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
-	}
-
-	if (erofs_sb_has_big_pcluster(sbi)) {
-		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
-		if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
-			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
-	}
-	if (cfg.c_fragments && !cfg.c_dedupe)
-		inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
-
 #ifndef NDEBUG
 	if (cfg.c_random_algorithms) {
 		while (1) {
@@ -1530,6 +1664,11 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
 	ictx->ccfg = &erofs_ccfg[inode->z_algorithmtype[0]];
 	inode->z_algorithmtype[0] = ictx->ccfg->algorithmtype;
 	inode->z_algorithmtype[1] = 0;
+	ictx->data_unaligned = erofs_sb_has_48bit(sbi) &&
+		cfg.c_max_decompressed_extent_bytes <=
+			z_erofs_get_max_pclustersize(inode);
+	if (cfg.c_fragments && !cfg.c_dedupe && !ictx->data_unaligned)
+		inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
 
 	/*
 	 * Handle tails in advance to avoid writing duplicated
diff --git a/lib/compressor.c b/lib/compressor.c
index 41f49ff..6d8c1c2 100644
--- a/lib/compressor.c
+++ b/lib/compressor.c
@@ -85,6 +85,17 @@ int erofs_compress_destsize(const struct erofs_compress *c,
 	return c->alg->c->compress_destsize(c, src, srcsize, dst, dstsize);
 }
 
+int erofs_compress(const struct erofs_compress *c,
+		   const void *src, unsigned int srcsize,
+		   void *dst, unsigned int dstcapacity)
+{
+	DBG_BUGON(!c->alg);
+	if (!c->alg->c->compress)
+		return -EOPNOTSUPP;
+
+	return c->alg->c->compress(c, src, srcsize, dst, dstcapacity);
+}
+
 int erofs_compressor_init(struct erofs_sb_info *sbi, struct erofs_compress *c,
 			  char *alg_name, int compression_level, u32 dict_size)
 {
diff --git a/lib/compressor.h b/lib/compressor.h
index 8d322d5..ea2d03d 100644
--- a/lib/compressor.h
+++ b/lib/compressor.h
@@ -26,6 +26,9 @@ struct erofs_compressor {
 	int (*compress_destsize)(const struct erofs_compress *c,
 				 const void *src, unsigned int *srcsize,
 				 void *dst, unsigned int dstsize);
+	int (*compress)(const struct erofs_compress *c,
+			const void *src, unsigned int srcsize,
+			void *dst, unsigned dstcapacity);
 };
 
 struct erofs_algorithm {
@@ -60,6 +63,9 @@ int z_erofs_get_compress_algorithm_id(const struct erofs_compress *c);
 int erofs_compress_destsize(const struct erofs_compress *c,
 			    const void *src, unsigned int *srcsize,
 			    void *dst, unsigned int dstsize);
+int erofs_compress(const struct erofs_compress *c,
+		   const void *src, unsigned int srcsize,
+		   void *dst, unsigned int dstcapacity);
 
 int erofs_compressor_init(struct erofs_sb_info *sbi, struct erofs_compress *c,
 			  char *alg_name, int compression_level, u32 dict_size);
diff --git a/lib/compressor_libzstd.c b/lib/compressor_libzstd.c
index 223806e..feacb85 100644
--- a/lib/compressor_libzstd.c
+++ b/lib/compressor_libzstd.c
@@ -8,6 +8,22 @@
 #include "compressor.h"
 #include "erofs/atomic.h"
 
+static int libzstd_compress(const struct erofs_compress *c,
+			    const void *src, unsigned int srcsize,
+			    void *dst, unsigned dstcapacity)
+{
+	ZSTD_CCtx *cctx = c->private_data;
+	size_t csize;
+
+	csize = ZSTD_compress2(cctx, dst, dstcapacity, src, srcsize);
+	if (ZSTD_isError(csize)) {
+		if (ZSTD_getErrorCode(csize) == ZSTD_error_dstSize_tooSmall)
+			return -ENOSPC;
+		return -EFAULT;
+	}
+	return csize;
+}
+
 static int libzstd_compress_destsize(const struct erofs_compress *c,
 				     const void *src, unsigned int *srcsize,
 				     void *dst, unsigned int dstsize)
@@ -139,5 +155,6 @@ const struct erofs_compressor erofs_compressor_libzstd = {
 	.exit = compressor_libzstd_exit,
 	.setlevel = erofs_compressor_libzstd_setlevel,
 	.setdictsize = erofs_compressor_libzstd_setdictsize,
+	.compress = libzstd_compress,
 	.compress_destsize = libzstd_compress_destsize,
 };
-- 
2.43.5



More information about the Linux-erofs mailing list