[PATCH 3/3] erofs-utils: mkfs: implement extent-based deduplication
Gao Xiang
hsiangkao at linux.alibaba.com
Thu Apr 3 19:14:03 AEDT 2025
Currently, only rolling-hash deduplication could be used for
compressed data, and it is still single-threaded for now.
Before applying multi-threaded compression to that, let's allow
extent-based compressed data deduplication if `-Efragments` is on.
This feature will only work if multi-threaded compression is active.
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
include/erofs/dedupe.h | 8 ++++
lib/Makefile.am | 2 +-
lib/compress.c | 63 +++++++++++++++++---------
lib/dedupe_ext.c | 100 +++++++++++++++++++++++++++++++++++++++++
mkfs/main.c | 10 +++++
5 files changed, 162 insertions(+), 21 deletions(-)
create mode 100644 lib/dedupe_ext.c
diff --git a/include/erofs/dedupe.h b/include/erofs/dedupe.h
index 1af08e3..ffb00a5 100644
--- a/include/erofs/dedupe.h
+++ b/include/erofs/dedupe.h
@@ -32,6 +32,14 @@ void z_erofs_dedupe_commit(bool drop);
int z_erofs_dedupe_init(unsigned int wsiz);
void z_erofs_dedupe_exit(void);
+int z_erofs_dedupe_ext_insert(struct z_erofs_inmem_extent *e,
+ u64 hash);
+erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
+ u8 *encoded, unsigned int size, bool raw, u64 *hash);
+void z_erofs_dedupe_ext_commit(bool drop);
+int z_erofs_dedupe_ext_init(void);
+void z_erofs_dedupe_ext_exit(void);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 9cddc92..bdc74ad 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -35,7 +35,7 @@ liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
namei.c data.c compress.c compressor.c zmap.c decompress.c \
compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
fragments.c dedupe.c uuid_unparse.c uuid.c tar.c \
- block_list.c rebuild.c diskbuf.c bitops.c
+ block_list.c rebuild.c diskbuf.c bitops.c dedupe_ext.c
liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include
if ENABLE_LZ4
diff --git a/lib/compress.c b/lib/compress.c
index 50d155e..cc7dce0 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -49,6 +49,7 @@ struct z_erofs_compress_ictx { /* inode context */
u32 tof_chksum;
bool fix_dedupedfrag;
bool fragemitted;
+ bool dedupe;
/* fields for write indexes */
u8 *metacur;
@@ -337,10 +338,7 @@ static int z_erofs_compress_dedupe(struct z_erofs_compress_sctx *ctx)
ei->e.partial = true;
ei->e.length -= delta;
}
-
- /* fall back to noncompact indexes for deduplication */
- inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B;
- inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+ ctx->ictx->dedupe = true;
erofs_sb_set_dedupe(sbi);
sbi->saved_by_deduplication += dctx.e.plen;
@@ -1001,9 +999,25 @@ static void z_erofs_write_mapheader(struct erofs_inode *inode,
static void *z_erofs_write_indexes(struct z_erofs_compress_ictx *ctx)
{
struct erofs_inode *inode = ctx->inode;
+ struct erofs_sb_info *sbi = inode->sbi;
struct z_erofs_extent_item *ei, *n;
void *metabuf;
+ if (!cfg.c_legacy_compress && !ctx->dedupe &&
+ inode->z_logical_clusterbits <= 14) {
+ if (inode->z_logical_clusterbits <= 12)
+ inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
+ inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
+ } else {
+ inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+ }
+
+ if (erofs_sb_has_big_pcluster(sbi)) {
+ inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+ if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
+ inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
+ }
+
metabuf = malloc(BLK_ROUND_UP(inode->sbi, inode->i_size) *
sizeof(struct z_erofs_lcluster_index) +
Z_EROFS_LEGACY_MAP_HEADER_SIZE);
@@ -1170,6 +1184,7 @@ int erofs_commit_compressed_file(struct z_erofs_compress_ictx *ictx,
ret = -ENOSPC;
goto err_free_meta;
}
+ z_erofs_dedupe_ext_commit(false);
z_erofs_dedupe_commit(false);
if (!ictx->fragemitted)
@@ -1345,8 +1360,11 @@ int z_erofs_merge_segment(struct z_erofs_compress_ictx *ictx,
{
struct z_erofs_extent_item *ei, *n;
struct erofs_sb_info *sbi = ictx->inode->sbi;
+ bool dedupe_ext = cfg.c_fragments;
erofs_off_t off = 0;
int ret = 0, ret2;
+ erofs_blk_t dupb;
+ u64 hash;
list_for_each_entry_safe(ei, n, &sctx->extents, list) {
list_del(&ei->list);
@@ -1361,13 +1379,30 @@ int z_erofs_merge_segment(struct z_erofs_compress_ictx *ictx,
/* skip write data but leave blkaddr for inline fallback */
if (ei->e.inlined || !ei->e.plen)
continue;
+
+ if (dedupe_ext) {
+ dupb = z_erofs_dedupe_ext_match(sbi, sctx->membuf + off,
+ ei->e.plen, ei->e.raw, &hash);
+ if (dupb != EROFS_NULL_ADDR) {
+ ei->e.pstart = dupb;
+ sctx->pstart -= ei->e.plen;
+ off += ei->e.plen;
+ ictx->dedupe = true;
+ erofs_sb_set_dedupe(sbi);
+ sbi->saved_by_deduplication += ei->e.plen;
+ erofs_dbg("Dedupe %u %scompressed data to %llu of %u bytes",
+ ei->e.length, ei->e.raw ? "un" : "",
+ ei->e.pstart | 0ULL, ei->e.plen);
+ continue;
+ }
+ }
ret2 = erofs_dev_write(sbi, sctx->membuf + off, ei->e.pstart,
ei->e.plen);
off += ei->e.plen;
- if (ret2) {
+ if (ret2)
ret = ret2;
- continue;
- }
+ else if (dedupe_ext)
+ z_erofs_dedupe_ext_insert(&ei->e, hash);
}
free(sctx->membuf);
sctx->membuf = NULL;
@@ -1543,19 +1578,6 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
/* initialize per-file compression setting */
inode->z_advise = 0;
inode->z_logical_clusterbits = sbi->blkszbits;
- if (!cfg.c_legacy_compress && inode->z_logical_clusterbits <= 14) {
- if (inode->z_logical_clusterbits <= 12)
- inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
- inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
- } else {
- inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
- }
-
- if (erofs_sb_has_big_pcluster(sbi)) {
- inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
- if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
- inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
- }
if (cfg.c_fragments && !cfg.c_dedupe)
inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
@@ -1615,6 +1637,7 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
init_list_head(&ictx->extents);
ictx->fix_dedupedfrag = false;
ictx->fragemitted = false;
+ ictx->dedupe = false;
if (all_fragments && !inode->fragment_size) {
ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tof_chksum);
diff --git a/lib/dedupe_ext.c b/lib/dedupe_ext.c
new file mode 100644
index 0000000..b6603e6
--- /dev/null
+++ b/lib/dedupe_ext.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/dedupe.h"
+#include "liberofs_xxhash.h"
+#include <stdlib.h>
+
+struct z_erofs_dedupe_ext_item {
+ struct list_head list;
+ struct z_erofs_dedupe_ext_item *revoke;
+ struct z_erofs_inmem_extent e;
+ u64 xxh64;
+};
+
+static struct list_head dupl_ext[65536];
+static struct z_erofs_dedupe_ext_item *revoke_list;
+
+int z_erofs_dedupe_ext_insert(struct z_erofs_inmem_extent *e,
+ u64 hash)
+{
+ struct z_erofs_dedupe_ext_item *item;
+ struct list_head *p;
+
+ item = malloc(sizeof(struct z_erofs_dedupe_ext_item));
+ if (!item)
+ return -ENOMEM;
+ item->e = *e;
+ item->xxh64 = hash;
+
+ p = &dupl_ext[hash & (ARRAY_SIZE(dupl_ext) - 1)];
+ list_add_tail(&item->list, p);
+ item->revoke = revoke_list;
+ revoke_list = item;
+ return 0;
+}
+
+erofs_blk_t z_erofs_dedupe_ext_match(struct erofs_sb_info *sbi,
+ u8 *encoded, unsigned int len,
+ bool raw, u64 *hash)
+{
+ struct z_erofs_dedupe_ext_item *item;
+ struct list_head *p;
+ u64 _xxh64;
+ char *memb;
+ int ret;
+
+ *hash = _xxh64 = xxh64(encoded, len, 0);
+ p = &dupl_ext[_xxh64 & (ARRAY_SIZE(dupl_ext) - 1)];
+ list_for_each_entry(item, p, list) {
+ if (item->xxh64 == _xxh64 && item->e.plen == len &&
+ item->e.raw == raw) {
+ memb = malloc(len);
+ if (!memb)
+ break;
+ ret = erofs_dev_read(sbi, 0, memb, item->e.pstart, len);
+ free(memb);
+ if (ret < 0 || memcmp(memb, encoded, len))
+ break;
+ return item->e.pstart;
+ }
+ }
+ return EROFS_NULL_ADDR;
+}
+
+void z_erofs_dedupe_ext_commit(bool drop)
+{
+ if (drop) {
+ struct z_erofs_dedupe_ext_item *item, *n;
+
+ for (item = revoke_list; item; item = n) {
+ n = item->revoke;
+ list_del(&item->list);
+ free(item);
+ }
+ }
+ revoke_list = NULL;
+}
+
+int z_erofs_dedupe_ext_init(void)
+{
+ struct list_head *p;
+
+ for (p = dupl_ext; p < dupl_ext + ARRAY_SIZE(dupl_ext); ++p)
+ init_list_head(p);
+ return 0;
+}
+
+void z_erofs_dedupe_ext_exit(void)
+{
+ struct z_erofs_dedupe_ext_item *item, *n;
+ struct list_head *p;
+
+ if (!dupl_ext[0].next)
+ return;
+ z_erofs_dedupe_commit(true);
+ for (p = dupl_ext; p < dupl_ext + ARRAY_SIZE(dupl_ext); ++p) {
+ list_for_each_entry_safe(item, n, p, list) {
+ list_del(&item->list);
+ free(item);
+ }
+ }
+}
diff --git a/mkfs/main.c b/mkfs/main.c
index b2396d0..00bd21a 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -1374,6 +1374,15 @@ int main(int argc, char **argv)
}
}
+ if (cfg.c_fragments) {
+ err = z_erofs_dedupe_ext_init();
+ if (err) {
+ erofs_err("failed to initialize extent deduplication: %s",
+ erofs_strerror(err));
+ goto exit;
+ }
+ }
+
if (cfg.c_chunkbits) {
err = erofs_blob_init(cfg.c_blobdev_path, 1 << cfg.c_chunkbits);
if (err)
@@ -1486,6 +1495,7 @@ exit:
erofs_iput(root);
z_erofs_compress_exit();
z_erofs_dedupe_exit();
+ z_erofs_dedupe_ext_exit();
blklst = erofs_blocklist_close();
if (blklst)
fclose(blklst);
--
2.43.5
More information about the Linux-erofs
mailing list