[PATCH 3/3] erofs-utils: mkfs: implement extent-based deduplication

Gao Xiang hsiangkao at linux.alibaba.com
Thu Apr 3 19:14:03 AEDT 2025


Currently, only rolling-hash deduplication could be used for
compressed data, and it is still single-threaded for now.

Before applying multi-threaded compression to that, let's allow
extent-based compressed data deduplication if `-Efragments` is on.

This feature will only work if multi-threaded compression is active.

Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 include/erofs/dedupe.h |   8 ++++
 lib/Makefile.am        |   2 +-
 lib/compress.c         |  63 +++++++++++++++++---------
 lib/dedupe_ext.c       | 100 +++++++++++++++++++++++++++++++++++++++++
 mkfs/main.c            |  10 +++++
 5 files changed, 162 insertions(+), 21 deletions(-)
 create mode 100644 lib/dedupe_ext.c

diff --git a/include/erofs/dedupe.h b/include/erofs/dedupe.h
index 1af08e3..ffb00a5 100644
--- a/include/erofs/dedupe.h
+++ b/include/erofs/dedupe.h
@@ -32,6 +32,14 @@ void z_erofs_dedupe_commit(bool drop);
 int z_erofs_dedupe_init(unsigned int wsiz);
 void z_erofs_dedupe_exit(void);
 
+int z_erofs_dedupe_ext_insert(struct z_erofs_inmem_extent *e,
+			      u64 hash);
+erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
+			u8 *encoded, unsigned int size, bool raw, u64 *hash);
+void z_erofs_dedupe_ext_commit(bool drop);
+int z_erofs_dedupe_ext_init(void);
+void z_erofs_dedupe_ext_exit(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 9cddc92..bdc74ad 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -35,7 +35,7 @@ liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
 		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
 		      fragments.c dedupe.c uuid_unparse.c uuid.c tar.c \
-		      block_list.c rebuild.c diskbuf.c bitops.c
+		      block_list.c rebuild.c diskbuf.c bitops.c dedupe_ext.c
 
 liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include
 if ENABLE_LZ4
diff --git a/lib/compress.c b/lib/compress.c
index 50d155e..cc7dce0 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -49,6 +49,7 @@ struct z_erofs_compress_ictx {		/* inode context */
 	u32 tof_chksum;
 	bool fix_dedupedfrag;
 	bool fragemitted;
+	bool dedupe;
 
 	/* fields for write indexes */
 	u8 *metacur;
@@ -337,10 +338,7 @@ static int z_erofs_compress_dedupe(struct z_erofs_compress_sctx *ctx)
 			ei->e.partial = true;
 			ei->e.length -= delta;
 		}
-
-		/* fall back to noncompact indexes for deduplication */
-		inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B;
-		inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+		ctx->ictx->dedupe = true;
 		erofs_sb_set_dedupe(sbi);
 
 		sbi->saved_by_deduplication += dctx.e.plen;
@@ -1001,9 +999,25 @@ static void z_erofs_write_mapheader(struct erofs_inode *inode,
 static void *z_erofs_write_indexes(struct z_erofs_compress_ictx *ctx)
 {
 	struct erofs_inode *inode = ctx->inode;
+	struct erofs_sb_info *sbi = inode->sbi;
 	struct z_erofs_extent_item *ei, *n;
 	void *metabuf;
 
+	if (!cfg.c_legacy_compress && !ctx->dedupe &&
+	    inode->z_logical_clusterbits <= 14) {
+		if (inode->z_logical_clusterbits <= 12)
+			inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
+		inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
+	} else {
+		inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+	}
+
+	if (erofs_sb_has_big_pcluster(sbi)) {
+		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+		if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
+			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
+	}
+
 	metabuf = malloc(BLK_ROUND_UP(inode->sbi, inode->i_size) *
 			 sizeof(struct z_erofs_lcluster_index) +
 			 Z_EROFS_LEGACY_MAP_HEADER_SIZE);
@@ -1170,6 +1184,7 @@ int erofs_commit_compressed_file(struct z_erofs_compress_ictx *ictx,
 		ret = -ENOSPC;
 		goto err_free_meta;
 	}
+	z_erofs_dedupe_ext_commit(false);
 	z_erofs_dedupe_commit(false);
 
 	if (!ictx->fragemitted)
@@ -1345,8 +1360,11 @@ int z_erofs_merge_segment(struct z_erofs_compress_ictx *ictx,
 {
 	struct z_erofs_extent_item *ei, *n;
 	struct erofs_sb_info *sbi = ictx->inode->sbi;
+	bool dedupe_ext = cfg.c_fragments;
 	erofs_off_t off = 0;
 	int ret = 0, ret2;
+	erofs_blk_t dupb;
+	u64 hash;
 
 	list_for_each_entry_safe(ei, n, &sctx->extents, list) {
 		list_del(&ei->list);
@@ -1361,13 +1379,30 @@ int z_erofs_merge_segment(struct z_erofs_compress_ictx *ictx,
 		/* skip write data but leave blkaddr for inline fallback */
 		if (ei->e.inlined || !ei->e.plen)
 			continue;
+
+		if (dedupe_ext) {
+			dupb = z_erofs_dedupe_ext_match(sbi, sctx->membuf + off,
+						ei->e.plen, ei->e.raw, &hash);
+			if (dupb != EROFS_NULL_ADDR) {
+				ei->e.pstart = dupb;
+				sctx->pstart -= ei->e.plen;
+				off += ei->e.plen;
+				ictx->dedupe = true;
+				erofs_sb_set_dedupe(sbi);
+				sbi->saved_by_deduplication += ei->e.plen;
+				erofs_dbg("Dedupe %u %scompressed data to %llu of %u bytes",
+					  ei->e.length, ei->e.raw ? "un" : "",
+					  ei->e.pstart | 0ULL, ei->e.plen);
+				continue;
+			}
+		}
 		ret2 = erofs_dev_write(sbi, sctx->membuf + off, ei->e.pstart,
 				       ei->e.plen);
 		off += ei->e.plen;
-		if (ret2) {
+		if (ret2)
 			ret = ret2;
-			continue;
-		}
+		else if (dedupe_ext)
+			z_erofs_dedupe_ext_insert(&ei->e, hash);
 	}
 	free(sctx->membuf);
 	sctx->membuf = NULL;
@@ -1543,19 +1578,6 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
 	/* initialize per-file compression setting */
 	inode->z_advise = 0;
 	inode->z_logical_clusterbits = sbi->blkszbits;
-	if (!cfg.c_legacy_compress && inode->z_logical_clusterbits <= 14) {
-		if (inode->z_logical_clusterbits <= 12)
-			inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
-		inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
-	} else {
-		inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
-	}
-
-	if (erofs_sb_has_big_pcluster(sbi)) {
-		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
-		if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
-			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
-	}
 	if (cfg.c_fragments && !cfg.c_dedupe)
 		inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
 
@@ -1615,6 +1637,7 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
 	init_list_head(&ictx->extents);
 	ictx->fix_dedupedfrag = false;
 	ictx->fragemitted = false;
+	ictx->dedupe = false;
 
 	if (all_fragments && !inode->fragment_size) {
 		ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tof_chksum);
diff --git a/lib/dedupe_ext.c b/lib/dedupe_ext.c
new file mode 100644
index 0000000..b6603e6
--- /dev/null
+++ b/lib/dedupe_ext.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/dedupe.h"
+#include "liberofs_xxhash.h"
+#include <stdlib.h>
+
+struct z_erofs_dedupe_ext_item {
+	struct list_head list;
+	struct z_erofs_dedupe_ext_item *revoke;
+	struct z_erofs_inmem_extent e;
+	u64		xxh64;
+};
+
+static struct list_head dupl_ext[65536];
+static struct z_erofs_dedupe_ext_item *revoke_list;
+
+int z_erofs_dedupe_ext_insert(struct z_erofs_inmem_extent *e,
+			      u64 hash)
+{
+	struct z_erofs_dedupe_ext_item *item;
+	struct list_head *p;
+
+	item = malloc(sizeof(struct z_erofs_dedupe_ext_item));
+	if (!item)
+		return -ENOMEM;
+	item->e = *e;
+	item->xxh64 = hash;
+
+	p = &dupl_ext[hash & (ARRAY_SIZE(dupl_ext) - 1)];
+	list_add_tail(&item->list, p);
+	item->revoke = revoke_list;
+	revoke_list = item;
+	return 0;
+}
+
+erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
+				     u8 *encoded, unsigned int len,
+				     bool raw, u64 *hash)
+{
+	struct z_erofs_dedupe_ext_item *item;
+	struct list_head *p;
+	u64 _xxh64;
+	char *memb;
+	int ret;
+
+	*hash = _xxh64 = xxh64(encoded, len, 0);
+	p = &dupl_ext[_xxh64 & (ARRAY_SIZE(dupl_ext) - 1)];
+	list_for_each_entry(item, p, list) {
+		if (item->xxh64 == _xxh64 && item->e.plen == len &&
+		    item->e.raw == raw) {
+			memb = malloc(len);
+			if (!memb)
+				break;
+			ret = erofs_dev_read(sbi, 0, memb, item->e.pstart, len);
+			free(memb);
+			if (ret < 0 || memcmp(memb, encoded, len))
+				break;
+			return item->e.pstart;
+		}
+	}
+	return EROFS_NULL_ADDR;
+}
+
+void z_erofs_dedupe_ext_commit(bool drop)
+{
+	if (drop) {
+		struct z_erofs_dedupe_ext_item *item, *n;
+
+		for (item = revoke_list; item; item = n) {
+			n = item->revoke;
+			list_del(&item->list);
+			free(item);
+		}
+	}
+	revoke_list = NULL;
+}
+
+int z_erofs_dedupe_ext_init(void)
+{
+	struct list_head *p;
+
+	for (p = dupl_ext; p < dupl_ext + ARRAY_SIZE(dupl_ext); ++p)
+		init_list_head(p);
+	return 0;
+}
+
+void z_erofs_dedupe_ext_exit(void)
+{
+	struct z_erofs_dedupe_ext_item *item, *n;
+	struct list_head *p;
+
+	if (!dupl_ext[0].next)
+		return;
+	z_erofs_dedupe_commit(true);
+	for (p = dupl_ext; p < dupl_ext + ARRAY_SIZE(dupl_ext); ++p) {
+		list_for_each_entry_safe(item, n, p, list) {
+			list_del(&item->list);
+			free(item);
+		}
+	}
+}
diff --git a/mkfs/main.c b/mkfs/main.c
index b2396d0..00bd21a 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -1374,6 +1374,15 @@ int main(int argc, char **argv)
 		}
 	}
 
+	if (cfg.c_fragments) {
+		err = z_erofs_dedupe_ext_init();
+		if (err) {
+			erofs_err("failed to initialize extent deduplication: %s",
+				  erofs_strerror(err));
+			goto exit;
+		}
+	}
+
 	if (cfg.c_chunkbits) {
 		err = erofs_blob_init(cfg.c_blobdev_path, 1 << cfg.c_chunkbits);
 		if (err)
@@ -1486,6 +1495,7 @@ exit:
 		erofs_iput(root);
 	z_erofs_compress_exit();
 	z_erofs_dedupe_exit();
+	z_erofs_dedupe_ext_exit();
 	blklst = erofs_blocklist_close();
 	if (blklst)
 		fclose(blklst);
-- 
2.43.5



More information about the Linux-erofs mailing list