[PATCH 5/5] erofs-utils: dedupe more in the packed inode if possible

Gao Xiang hsiangkao at linux.alibaba.com
Sun Mar 5 06:58:12 AEDT 2023


In addition to just compare the in-memory data.

Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 lib/compress.c  |  3 ++-
 lib/fragments.c | 25 ++++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/lib/compress.c b/lib/compress.c
index f38c795..afa3bf7 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -371,7 +371,8 @@ static bool z_erofs_fixup_deduped_fragment(struct z_erofs_vle_compress_ctx *ctx,
 		  inode->fragment_size, inode->fragmentoff | 0ULL);
 
 	/* it's the end */
-	ctx->head += newsize;
+	DBG_BUGON(ctx->tail - ctx->head + ctx->remaining != newsize);
+	ctx->head = ctx->tail;
 	ctx->remaining = 0;
 	return true;
 }
diff --git a/lib/fragments.c b/lib/fragments.c
index 30e9ba6..0366c82 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -55,6 +55,7 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
 	struct list_head *head;
 	u8 *data;
 	unsigned int length, e2, deduped;
+	erofs_off_t pos;
 	int ret;
 
 	head = &dupli_frags[FRAGMENT_HASH(crc)];
@@ -112,9 +113,31 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
 		goto out;
 
 	DBG_BUGON(di->length < deduped);
+	pos = di->pos + di->length - deduped;
+	/* let's read more to dedupe as long as we can */
+	if (deduped == di->length) {
+		fflush(packedfile);
+
+		while(deduped < inode->i_size && pos) {
+			char buf[2][16384];
+			unsigned int sz = min_t(unsigned int, pos,
+						sizeof(buf[0]));
+
+			if (pread(fileno(packedfile), buf[0], sz,
+				  pos - sz) != sz)
+				break;
+			if (pread(fd, buf[1], sz,
+				  inode->i_size - deduped - sz) != sz)
+				break;
 
+			if (memcmp(buf[0], buf[1], sz))
+				break;
+			pos -= sz;
+			deduped += sz;
+		}
+	}
 	inode->fragment_size = deduped;
-	inode->fragmentoff = di->pos + di->length - deduped;
+	inode->fragmentoff = pos;
 
 	erofs_dbg("Dedupe %u tail data at %llu", inode->fragment_size,
 		  inode->fragmentoff | 0ULL);
-- 
2.24.4



More information about the Linux-erofs mailing list