[PATCH 1/5] erofs-utils: lib: introduce z_erofs_fragments_tofh()

Gao Xiang hsiangkao at linux.alibaba.com
Tue Jun 3 13:56:53 AEST 2025


Introduce a separate z_erofs_fragments_tofh() to get the tail hash in
order to prepare for the upcoming multi-threaded fragment improvement.

Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 include/erofs/fragments.h |  3 +-
 lib/compress.c            | 38 ++++++++++----------
 lib/fragments.c           | 75 ++++++++++++++++++---------------------
 3 files changed, 57 insertions(+), 59 deletions(-)

diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h
index a57b63c..75f1055 100644
--- a/include/erofs/fragments.h
+++ b/include/erofs/fragments.h
@@ -15,7 +15,8 @@ extern "C"
 extern const char *erofs_frags_packedname;
 #define EROFS_PACKED_INODE	erofs_frags_packedname
 
-int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc);
+u32 z_erofs_fragments_tofh(struct erofs_inode *inode, int fd, erofs_off_t fpos);
+int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 tofh);
 
 int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc);
 int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
diff --git a/lib/compress.c b/lib/compress.c
index d046112..a260dc4 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -46,7 +46,7 @@ struct z_erofs_compress_ictx {		/* inode context */
 	int fd;
 	u64 fpos;
 
-	u32 tof_chksum;
+	u32 tofh;
 	bool fix_dedupedfrag;
 	bool fragemitted;
 	bool dedupe;
@@ -626,7 +626,7 @@ nocompression:
 		   (!inode->fragment_size || ictx->fix_dedupedfrag)) {
 frag_packing:
 		ret = z_erofs_pack_fragments(inode, ctx->queue + ctx->head,
-					     len, ictx->tof_chksum);
+					     len, ictx->tofh);
 		if (ret < 0)
 			return ret;
 		e->plen = 0;	/* indicate a fragment */
@@ -1103,7 +1103,7 @@ int z_erofs_compress_segment(struct z_erofs_compress_sctx *ctx,
 	DBG_BUGON(offset != -1 && frag && inode->fragment_size);
 	if (offset != -1 && frag && !inode->fragment_size &&
 	    cfg.c_fragdedupe != FRAGDEDUPE_OFF) {
-		ret = z_erofs_fragments_dedupe(inode, fd, &ictx->tof_chksum);
+		ret = z_erofs_fragments_dedupe(inode, fd, ictx->tofh);
 		if (ret < 0)
 			return ret;
 		if (inode->fragment_size > ctx->remaining)
@@ -1622,21 +1622,23 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
 	inode->z_algorithmtype[0] = ictx->ccfg->algorithmtype;
 	inode->z_algorithmtype[1] = 0;
 
-	/*
-	 * Handle tails in advance to avoid writing duplicated
-	 * parts into the packed inode.
-	 */
-	if (cfg.c_fragments && !erofs_is_packed_inode(inode) &&
-	    ictx == &g_ictx && cfg.c_fragdedupe != FRAGDEDUPE_OFF) {
-		ret = z_erofs_fragments_dedupe(inode, fd, &ictx->tof_chksum);
-		if (ret < 0)
-			goto err_free_ictx;
+	if (cfg.c_fragments && !erofs_is_packed_inode(inode)) {
+		ictx->tofh = z_erofs_fragments_tofh(inode, fd, fpos);
+		if (ictx == &g_ictx && cfg.c_fragdedupe != FRAGDEDUPE_OFF) {
+			/*
+			 * Handle tails in advance to avoid writing duplicated
+			 * parts into the packed inode.
+			 */
+			ret = z_erofs_fragments_dedupe(inode, fd, ictx->tofh);
+			if (ret < 0)
+				goto err_free_ictx;
 
-		if (cfg.c_fragdedupe == FRAGDEDUPE_INODE &&
-		    inode->fragment_size < inode->i_size) {
-			erofs_dbg("Discard the sub-inode tail fragment of %s",
-				  inode->i_srcpath);
-			inode->fragment_size = 0;
+			if (cfg.c_fragdedupe == FRAGDEDUPE_INODE &&
+			    inode->fragment_size < inode->i_size) {
+				erofs_dbg("Discard the sub-inode tail fragment of %s",
+					  inode->i_srcpath);
+				inode->fragment_size = 0;
+			}
 		}
 	}
 	ictx->inode = inode;
@@ -1647,7 +1649,7 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
 	ictx->dedupe = false;
 
 	if (all_fragments && !inode->fragment_size) {
-		ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tof_chksum);
+		ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tofh);
 		if (ret)
 			goto err_free_idata;
 	}
diff --git a/lib/fragments.c b/lib/fragments.c
index 9dfe0e3..9f5f1f9 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -3,9 +3,6 @@
  * Copyright (C), 2022, Coolpad Group Limited.
  * Created by Yue Hu <huyue2 at coolpad.com>
  */
-#ifndef _LARGEFILE64_SOURCE
-#define _LARGEFILE64_SOURCE
-#endif
 #ifndef _FILE_OFFSET_BITS
 #define _FILE_OFFSET_BITS 64
 #endif
@@ -49,23 +46,39 @@ struct erofs_packed_inode {
 
 const char *erofs_frags_packedname = "packed_file";
 
-#ifndef HAVE_LSEEK64
-#define erofs_lseek64 lseek
-#else
-#define erofs_lseek64 lseek64
-#endif
+u32 z_erofs_fragments_tofh(struct erofs_inode *inode, int fd, erofs_off_t fpos)
+{
+	u8 data_to_hash[EROFS_TOF_HASHLEN];
+	u32 hash;
+	int ret;
+
+	if (inode->i_size <= EROFS_TOF_HASHLEN)
+		return ~0U;
+
+	ret = pread(fd, data_to_hash, EROFS_TOF_HASHLEN,
+		    fpos + inode->i_size - EROFS_TOF_HASHLEN);
+	if (ret < 0)
+		return -errno;
+	if (ret != EROFS_TOF_HASHLEN) {
+		DBG_BUGON(1);
+		return -EIO;
+	}
+	hash = erofs_crc32c(~0, data_to_hash, EROFS_TOF_HASHLEN);
+	return hash != ~0U ? hash : 0;
+}
 
-static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
-					 u32 crc)
+int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 tofh)
 {
 	struct erofs_packed_inode *epi = inode->sbi->packedinode;
 	struct erofs_fragment_dedupe_item *cur, *di = NULL;
-	struct list_head *head = &epi->hash[FRAGMENT_HASH(crc)];
+	struct list_head *head = &epi->hash[FRAGMENT_HASH(tofh)];
 	unsigned int s1, e1;
 	erofs_off_t deduped;
 	u8 *data;
 	int ret;
 
+	if (inode->i_size <= EROFS_TOF_HASHLEN)
+		return 0;
 	if (list_empty(head))
 		return 0;
 
@@ -138,27 +151,13 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
 	return 0;
 }
 
-int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc)
-{
-	u8 data_to_hash[EROFS_TOF_HASHLEN];
-	int ret;
-
-	if (inode->i_size <= EROFS_TOF_HASHLEN)
-		return 0;
-
-	ret = pread(fd, data_to_hash, EROFS_TOF_HASHLEN,
-		    inode->i_size - EROFS_TOF_HASHLEN);
-	if (ret != EROFS_TOF_HASHLEN)
-		return -errno;
-
-	*tofcrc = erofs_crc32c(~0, data_to_hash, EROFS_TOF_HASHLEN);
-	return z_erofs_fragments_dedupe_find(inode, fd, *tofcrc);
-}
-
-static int z_erofs_fragments_dedupe_insert(struct list_head *hash, void *data,
-					   unsigned int len, erofs_off_t pos)
+static int z_erofs_fragments_dedupe_insert(struct erofs_inode *inode,
+					   void *data, u32 tofh)
 {
+	struct erofs_packed_inode *epi = inode->sbi->packedinode;
 	struct erofs_fragment_dedupe_item *di;
+	erofs_off_t len = inode->fragment_size;
+	erofs_off_t pos = inode->fragmentoff;
 
 	if (len <= EROFS_TOF_HASHLEN)
 		return 0;
@@ -172,14 +171,13 @@ static int z_erofs_fragments_dedupe_insert(struct list_head *hash, void *data,
 		return -ENOMEM;
 
 	memcpy(di->data, data, len);
-	di->length = len;
 	di->pos = pos;
-
-	list_add_tail(&di->list, hash);
+	di->length = len;
+	list_add_tail(&di->list, &epi->hash[FRAGMENT_HASH(tofh)]);
 	return 0;
 }
 
-int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc)
+int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofh)
 {
 	struct erofs_packed_inode *epi = inode->sbi->packedinode;
 	s64 offset, rc;
@@ -240,9 +238,7 @@ int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc)
 		  inode->i_srcpath);
 
 	if (memblock)
-		rc = z_erofs_fragments_dedupe_insert(
-			&epi->hash[FRAGMENT_HASH(tofcrc)], memblock,
-			inode->fragment_size, inode->fragmentoff);
+		rc = z_erofs_fragments_dedupe_insert(inode, memblock, tofh);
 	else
 		rc = 0;
 out:
@@ -256,7 +252,7 @@ out:
 }
 
 int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
-			   unsigned int len, u32 tofcrc)
+			   unsigned int len, u32 tofh)
 {
 	struct erofs_packed_inode *epi = inode->sbi->packedinode;
 	s64 offset = lseek(epi->fd, 0, SEEK_CUR);
@@ -279,8 +275,7 @@ int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
 		  inode->fragment_size | 0ULL, inode->fragmentoff | 0ULL,
 		  inode->i_srcpath);
 
-	ret = z_erofs_fragments_dedupe_insert(&epi->hash[FRAGMENT_HASH(tofcrc)],
-					      data, len, inode->fragmentoff);
+	ret = z_erofs_fragments_dedupe_insert(inode, data, tofh);
 	if (ret)
 		return ret;
 	return len;
-- 
2.43.5



More information about the Linux-erofs mailing list