[PATCH] erofs-utils: mkfs: fix inefficient fragment deduplication

Gao Xiang hsiangkao at linux.alibaba.com
Wed Jan 22 01:34:00 AEDT 2025


Currently, long fragment comparisons could cause suboptimal results,
leading to final image sizes still larger than expected:
 _________________________________________________________________________
|______ Testset _____|        Vanilla          |          After           | Command Line
|  CoreOS [1]        |   802107392 (765 MiB)   |   687501312 (656 MiB)    | -zlzma,6 -Eall-fragments,fragdedupe=inode -C131072
|____________________|__ 771715072 (736 MiB) __|__ 658485248 (628 MiB)  __| -zlzma,6 -Eall-fragments,fragdedupe=inode -C1048576
|  Fedora KIWI [2]   |_ 2584076288 (2465 MiB) _|_ 2550837248 (2433 MiB) __| -zlzma,6 -Eall-fragments,fragdedupe=inode -C1048576
|____________________|_ 2843598848 (2712 MiB) _|_ 2810359808 (2681 MiB) __| (Fedora-KDE-Desktop-Live-Rawhide.0.x86_64.iso)

Almost all images that use `-Eall-fragments` could benefit from this.

[1] https://builds.coreos.fedoraproject.org/prod/streams/stable/builds/41.20241215.3.0/x86_64/fedora-coreos-41.20241215.3.0-live.x86_64.iso
[2] https://pagure.io/fedora-kiwi-descriptions.git
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 lib/compress.c  |   3 +-
 lib/fragments.c | 123 ++++++++++++++++++++++--------------------------
 2 files changed, 57 insertions(+), 69 deletions(-)

diff --git a/lib/compress.c b/lib/compress.c
index eb3190d..5c9c051 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -1534,7 +1534,8 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
 
 		if (cfg.c_fragdedupe == FRAGDEDUPE_INODE &&
 		    inode->fragment_size < inode->i_size) {
-			erofs_dbg("Discard the sub-inode tail fragment @ nid %llu", inode->nid);
+			erofs_dbg("Discard the sub-inode tail fragment of %s",
+				  inode->i_srcpath);
 			inode->fragment_size = 0;
 		}
 	}
diff --git a/lib/fragments.c b/lib/fragments.c
index f662cbc..32ac6f5 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -33,6 +33,7 @@ struct erofs_fragment_dedupe_item {
 	u8			data[];
 };
 
+#define EROFS_FRAGMENT_INMEM_SZ_MAX	EROFS_CONFIG_COMPR_MAX_SZ
 #define EROFS_TOF_HASHLEN		16
 
 #define FRAGMENT_HASHSIZE		65536
@@ -62,96 +63,82 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
 	struct erofs_packed_inode *epi = inode->sbi->packedinode;
 	struct erofs_fragment_dedupe_item *cur, *di = NULL;
 	struct list_head *head = &epi->hash[FRAGMENT_HASH(crc)];
+	unsigned int s1, e1;
+	erofs_off_t deduped;
 	u8 *data;
-	unsigned int length, e2, deduped;
-	erofs_off_t pos;
 	int ret;
 
 	if (list_empty(head))
 		return 0;
 
-	/* XXX: no need to read so much for smaller? */
-	if (inode->i_size < EROFS_CONFIG_COMPR_MAX_SZ)
-		length = inode->i_size;
-	else
-		length = EROFS_CONFIG_COMPR_MAX_SZ;
-
-	data = malloc(length);
+	s1 = min_t(u64, EROFS_FRAGMENT_INMEM_SZ_MAX, inode->i_size);
+	data = malloc(s1);
 	if (!data)
 		return -ENOMEM;
 
-	if (erofs_lseek64(fd, inode->i_size - length, SEEK_SET) < 0) {
-		ret = -errno;
-		goto out;
-	}
-
-	ret = read(fd, data, length);
-	if (ret != length) {
-		ret = -errno;
-		goto out;
+	ret = pread(fd, data, s1, inode->i_size - s1);
+	if (ret != s1) {
+		free(data);
+		return -errno;
 	}
-
-	DBG_BUGON(length <= EROFS_TOF_HASHLEN);
-	e2 = length - EROFS_TOF_HASHLEN;
+	e1 = s1 - EROFS_TOF_HASHLEN;
 	deduped = 0;
-
 	list_for_each_entry(cur, head, list) {
-		unsigned int e1, mn, i = 0;
+		unsigned int e2, mn;
+		erofs_off_t i, pos;
 
 		DBG_BUGON(cur->length <= EROFS_TOF_HASHLEN);
-		e1 = cur->length - EROFS_TOF_HASHLEN;
+		e2 = cur->length - EROFS_TOF_HASHLEN;
 
-		if (memcmp(cur->data + e1, data + e2, EROFS_TOF_HASHLEN))
+		if (memcmp(data + e1, cur->data + e2, EROFS_TOF_HASHLEN))
 			continue;
 
+		i = 0;
 		mn = min(e1, e2);
-		while (i < mn && cur->data[e1 - i - 1] == data[e2 - i - 1])
+		while (i < mn && cur->data[e2 - i - 1] == data[e1 - i - 1])
 			++i;
 
-		if (!di || i + EROFS_TOF_HASHLEN > deduped) {
-			deduped = i + EROFS_TOF_HASHLEN;
-			di = cur;
+		i += EROFS_TOF_HASHLEN;
+		if (i >= s1) {		/* full short match */
+			DBG_BUGON(i > s1);
+			pos = cur->pos + cur->length - s1;
+			while (i < inode->i_size && pos) {
+				char buf[2][16384];
+				unsigned int sz;
+
+				sz = min_t(u64, pos, sizeof(buf[0]));
+				sz = min_t(u64, sz, inode->i_size - i);
+				if (pread(fileno(epi->file), buf[0], sz,
+					  pos - sz) != sz)
+					break;
+				if (pread(fd, buf[1], sz,
+					  inode->i_size - i - sz) != sz)
+					break;
 
-			/* full match */
-			if (i == e2)
-				break;
+				if (memcmp(buf[0], buf[1], sz))
+					break;
+				pos -= sz;
+				i += sz;
+			}
 		}
-	}
-	if (!di)
-		goto out;
-
-	DBG_BUGON(di->length < deduped);
-	pos = di->pos + di->length - deduped;
-	/* let's read more to dedupe as long as we can */
-	if (deduped == di->length) {
-		fflush(epi->file);
-
-		while(deduped < inode->i_size && pos) {
-			char buf[2][16384];
-			unsigned int sz = min_t(unsigned int, pos,
-						sizeof(buf[0]));
-
-			if (pread(fileno(epi->file), buf[0], sz,
-				  pos - sz) != sz)
-				break;
-			if (pread(fd, buf[1], sz,
-				  inode->i_size - deduped - sz) != sz)
-				break;
 
-			if (memcmp(buf[0], buf[1], sz))
-				break;
-			pos -= sz;
-			deduped += sz;
-		}
+		if (i <= deduped)
+			continue;
+		di = cur;
+		deduped = i;
+		if (deduped == inode->i_size)
+			break;
 	}
-	inode->fragment_size = deduped;
-	inode->fragmentoff = pos;
 
-	erofs_dbg("Dedupe %llu tail data at %llu", inode->fragment_size | 0ULL,
-		  inode->fragmentoff | 0ULL);
-out:
 	free(data);
-	return ret;
+	if (deduped) {
+		DBG_BUGON(!di);
+		inode->fragment_size = deduped;
+		inode->fragmentoff = di->pos + di->length - deduped;
+		erofs_dbg("Dedupe %llu tail data at %llu",
+			  inode->fragment_size | 0ULL, inode->fragmentoff | 0ULL);
+	}
+	return 0;
 }
 
 int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc)
@@ -186,10 +173,10 @@ static int z_erofs_fragments_dedupe_insert(struct list_head *hash, void *data,
 
 	if (len <= EROFS_TOF_HASHLEN)
 		return 0;
-	if (len > EROFS_CONFIG_COMPR_MAX_SZ) {
-		data += len - EROFS_CONFIG_COMPR_MAX_SZ;
-		pos += len - EROFS_CONFIG_COMPR_MAX_SZ;
-		len = EROFS_CONFIG_COMPR_MAX_SZ;
+	if (len > EROFS_FRAGMENT_INMEM_SZ_MAX) {
+		data += len - EROFS_FRAGMENT_INMEM_SZ_MAX;
+		pos += len - EROFS_FRAGMENT_INMEM_SZ_MAX;
+		len = EROFS_FRAGMENT_INMEM_SZ_MAX;
 	}
 	di = malloc(sizeof(*di) + len);
 	if (!di)
-- 
2.43.5



More information about the Linux-erofs mailing list