[PATCH v6] erofs-utils: mkfs: support fragment deduplication

Gao Xiang hsiangkao at linux.alibaba.com
Wed Dec 7 18:32:02 AEDT 2022


On Wed, Dec 07, 2022 at 01:47:43PM +0800, Yue Hu wrote:
> From: Yue Hu <huyue2 at coolpad.com>
> 
> Previously, there's no fragment deduplication when this feature is
> introduced.  Let's support it now.
> 
> Fragments are deduplicated before compression, so that duplicated
> parts will not be written into the packed inode.
> 
> Suggested-by: Gao Xiang <hsiangkao at linux.alibaba.com>
> Signed-off-by: Yue Hu <huyue2 at coolpad.com>


I'd like to submit it with minor update:

diff --git a/lib/compress.c b/lib/compress.c
index 6b79096..b205aa6 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -349,8 +349,8 @@ static bool z_erofs_fixup_deduped_fragment(struct z_erofs_vle_compress_ctx *ctx,
 	inode->fragmentoff += inode->fragment_size - newsize;
 	inode->fragment_size = newsize;

-	erofs_dbg("Reducing fragment size to %u at %lu",
-		  inode->fragment_size, inode->fragmentoff);
+	erofs_dbg("Reducing fragment size to %u at %llu",
+		  inode->fragment_size, inode->fragmentoff | 0ULL);

 	/* it's the end */
 	ctx->head += newsize;
diff --git a/lib/fragments.c b/lib/fragments.c
index e855467..e69ae47 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -22,10 +22,10 @@ struct erofs_fragment_dedupe_item {

 #define EROFS_TOF_HASHLEN		16

-#define FRAGMENT_HASHTABLE_SIZE		65536
-#define FRAGMENT_HASH(crc)		(crc & (FRAGMENT_HASHTABLE_SIZE - 1))
+#define FRAGMENT_HASHSIZE		65536
+#define FRAGMENT_HASH(c)		((c) & (FRAGMENT_HASHSIZE - 1))

-static struct list_head dupli_frags[FRAGMENT_HASHTABLE_SIZE];
+static struct list_head dupli_frags[FRAGMENT_HASHSIZE];

 static FILE *packedfile;
 const char *frags_packedname = "packed_file";
@@ -98,8 +98,8 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
 	inode->fragment_size = di->nr_dup;
 	inode->fragmentoff = di->pos + di->length - di->nr_dup;

-	erofs_dbg("Dedupe %u fragment data at %lu", inode->fragment_size,
-		  inode->fragmentoff);
+	erofs_dbg("Dedupe %u tail data at %llu", inode->fragment_size,
+		  inode->fragmentoff | 0ULL);
 out:
 	free(data);
 	return ret;
@@ -156,7 +156,7 @@ static void z_erofs_fragments_dedupe_init(void)
 {
 	unsigned int i;

-	for (i = 0; i < FRAGMENT_HASHTABLE_SIZE; ++i)
+	for (i = 0; i < FRAGMENT_HASHSIZE; ++i)
 		init_list_head(&dupli_frags[i]);
 }

@@ -166,12 +166,9 @@ static void z_erofs_fragments_dedupe_exit(void)
 	struct list_head *head;
 	unsigned int i;

-	for (i = 0; i < FRAGMENT_HASHTABLE_SIZE; ++i) {
+	for (i = 0; i < FRAGMENT_HASHSIZE; ++i) {
 		head = &dupli_frags[i];

-		if (list_empty(head))
-			continue;
-
 		list_for_each_entry_safe(di, n, head, list)
 			free(di);
 	}



More information about the Linux-erofs mailing list