[PATCH] erofs-utils: mkfs: fix extent-based deduplication

Gao Xiang hsiangkao at linux.alibaba.com
Sat Jul 12 03:43:24 AEST 2025


- `pstart` should be `erofs_off_t` instead of `erofs_blk_t`, otherwise,
  its upper 32 bits may be truncated;

- If it falls back to uncompressed inodes due to insufficient
  space savings, dedupe-ext records should also be revoked.

Reported-by: Axel Fontaine <axel at axelfontaine.com>
Fixes: cf04b8b78f09 ("erofs-utils: mkfs: implement extent-based deduplication")
Closes: https://github.com/erofs/erofs-utils/issues/23
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 include/erofs/dedupe.h |  2 +-
 lib/compress.c         | 15 +++++++++------
 lib/dedupe_ext.c       | 21 +++++++++++----------
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/include/erofs/dedupe.h b/include/erofs/dedupe.h
index ffb00a50..f9caa611 100644
--- a/include/erofs/dedupe.h
+++ b/include/erofs/dedupe.h
@@ -34,7 +34,7 @@ void z_erofs_dedupe_exit(void);
 
 int z_erofs_dedupe_ext_insert(struct z_erofs_inmem_extent *e,
 			      u64 hash);
-erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
+erofs_off_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
 			u8 *encoded, unsigned int size, bool raw, u64 *hash);
 void z_erofs_dedupe_ext_commit(bool drop);
 int z_erofs_dedupe_ext_init(void);
diff --git a/lib/compress.c b/lib/compress.c
index 6f65993c..b16f5d1d 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -403,7 +403,7 @@ static int write_uncompressed_block(struct z_erofs_compress_sctx *ctx,
 	memcpy(dst, ctx->queue + ctx->head + rightpart, count - rightpart);
 
 	if (ctx->membuf) {
-		erofs_dbg("Writing %u uncompressed data of %s", count,
+		erofs_dbg("Recording %u uncompressed data of %s", count,
 			  inode->i_srcpath);
 		memcpy(ctx->membuf + ctx->memoff, dst, erofs_blksiz(sbi));
 		ctx->memoff += erofs_blksiz(sbi);
@@ -705,7 +705,7 @@ frag_packing:
 
 		/* write compressed data */
 		if (ctx->membuf) {
-			erofs_dbg("Writing %u compressed data of %u bytes of %s",
+			erofs_dbg("Recording %u compressed data of %u bytes of %s",
 				  e->length, e->plen, inode->i_srcpath);
 
 			memcpy(ctx->membuf + ctx->memoff,
@@ -1207,6 +1207,7 @@ int erofs_commit_compressed_file(struct z_erofs_compress_ictx *ictx,
 	/* estimate if data compression saves space or not */
 	if (!inode->fragment_size && ptotal + inode->idata_size +
 	    legacymetasize >= inode->i_size) {
+		z_erofs_dedupe_ext_commit(true);
 		z_erofs_dedupe_commit(true);
 		ret = -ENOSPC;
 		goto err_free_meta;
@@ -1411,7 +1412,7 @@ int z_erofs_merge_segment(struct z_erofs_compress_ictx *ictx,
 	bool dedupe_ext = cfg.c_fragments;
 	erofs_off_t off = 0;
 	int ret = 0, ret2;
-	erofs_blk_t dupb;
+	erofs_off_t dpo;
 	u64 hash;
 
 	list_for_each_entry_safe(ei, n, &sctx->extents, list) {
@@ -1429,10 +1430,10 @@ int z_erofs_merge_segment(struct z_erofs_compress_ictx *ictx,
 			continue;
 
 		if (dedupe_ext) {
-			dupb = z_erofs_dedupe_ext_match(sbi, sctx->membuf + off,
+			dpo = z_erofs_dedupe_ext_match(sbi, sctx->membuf + off,
 						ei->e.plen, ei->e.raw, &hash);
-			if (dupb != EROFS_NULL_ADDR) {
-				ei->e.pstart = dupb;
+			if (dpo) {
+				ei->e.pstart = dpo;
 				sctx->pstart -= ei->e.plen;
 				off += ei->e.plen;
 				ictx->dedupe = true;
@@ -1444,6 +1445,8 @@ int z_erofs_merge_segment(struct z_erofs_compress_ictx *ictx,
 				continue;
 			}
 		}
+		erofs_dbg("Writing %u %scompressed data of %s to %llu", ei->e.length,
+			  ei->e.raw ? "un" : "", ictx->inode->i_srcpath, ei->e.pstart);
 		ret2 = erofs_dev_write(sbi, sctx->membuf + off, ei->e.pstart,
 				       ei->e.plen);
 		off += ei->e.plen;
diff --git a/lib/dedupe_ext.c b/lib/dedupe_ext.c
index c2c5ca94..d7a9b737 100644
--- a/lib/dedupe_ext.c
+++ b/lib/dedupe_ext.c
@@ -32,14 +32,14 @@ int z_erofs_dedupe_ext_insert(struct z_erofs_inmem_extent *e,
 	return 0;
 }
 
-erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
+erofs_off_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
 				     u8 *encoded, unsigned int len,
 				     bool raw, u64 *hash)
 {
 	struct z_erofs_dedupe_ext_item *item;
 	struct list_head *p;
 	u64 _xxh64;
-	char *memb;
+	char *memb = NULL;
 	int ret;
 
 	*hash = _xxh64 = xxh64(encoded, len, 0);
@@ -47,19 +47,20 @@ erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
 	list_for_each_entry(item, p, list) {
 		if (item->xxh64 == _xxh64 && item->e.plen == len &&
 		    item->e.raw == raw) {
-			memb = malloc(len);
-			if (!memb)
-				break;
-			ret = erofs_dev_read(sbi, 0, memb, item->e.pstart, len);
-			if (ret < 0 || memcmp(memb, encoded, len)) {
-				free(memb);
-				break;
+			if (!memb) {
+				memb = malloc(len);
+				if (!memb)
+					break;
 			}
+			ret = erofs_dev_read(sbi, 0, memb, item->e.pstart, len);
+			if (ret < 0 || memcmp(memb, encoded, len))
+				continue;
 			free(memb);
 			return item->e.pstart;
 		}
 	}
-	return EROFS_NULL_ADDR;
+	free(memb);
+	return 0;
 }
 
 void z_erofs_dedupe_ext_commit(bool drop)
-- 
2.43.5



More information about the Linux-erofs mailing list