[PATCH 1/5] erofs-utils: lib: introduce z_erofs_fragments_tofh()
Gao Xiang
hsiangkao at linux.alibaba.com
Tue Jun 3 13:56:53 AEST 2025
Introduce a separate z_erofs_fragments_tofh() to get the tail hash in
order to prepare for the upcoming multi-threaded fragment improvement.
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
include/erofs/fragments.h | 3 +-
lib/compress.c | 38 ++++++++++----------
lib/fragments.c | 75 ++++++++++++++++++---------------------
3 files changed, 57 insertions(+), 59 deletions(-)
diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h
index a57b63c..75f1055 100644
--- a/include/erofs/fragments.h
+++ b/include/erofs/fragments.h
@@ -15,7 +15,8 @@ extern "C"
extern const char *erofs_frags_packedname;
#define EROFS_PACKED_INODE erofs_frags_packedname
-int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc);
+u32 z_erofs_fragments_tofh(struct erofs_inode *inode, int fd, erofs_off_t fpos);
+int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 tofh);
int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc);
int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
diff --git a/lib/compress.c b/lib/compress.c
index d046112..a260dc4 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -46,7 +46,7 @@ struct z_erofs_compress_ictx { /* inode context */
int fd;
u64 fpos;
- u32 tof_chksum;
+ u32 tofh;
bool fix_dedupedfrag;
bool fragemitted;
bool dedupe;
@@ -626,7 +626,7 @@ nocompression:
(!inode->fragment_size || ictx->fix_dedupedfrag)) {
frag_packing:
ret = z_erofs_pack_fragments(inode, ctx->queue + ctx->head,
- len, ictx->tof_chksum);
+ len, ictx->tofh);
if (ret < 0)
return ret;
e->plen = 0; /* indicate a fragment */
@@ -1103,7 +1103,7 @@ int z_erofs_compress_segment(struct z_erofs_compress_sctx *ctx,
DBG_BUGON(offset != -1 && frag && inode->fragment_size);
if (offset != -1 && frag && !inode->fragment_size &&
cfg.c_fragdedupe != FRAGDEDUPE_OFF) {
- ret = z_erofs_fragments_dedupe(inode, fd, &ictx->tof_chksum);
+ ret = z_erofs_fragments_dedupe(inode, fd, ictx->tofh);
if (ret < 0)
return ret;
if (inode->fragment_size > ctx->remaining)
@@ -1622,21 +1622,23 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
inode->z_algorithmtype[0] = ictx->ccfg->algorithmtype;
inode->z_algorithmtype[1] = 0;
- /*
- * Handle tails in advance to avoid writing duplicated
- * parts into the packed inode.
- */
- if (cfg.c_fragments && !erofs_is_packed_inode(inode) &&
- ictx == &g_ictx && cfg.c_fragdedupe != FRAGDEDUPE_OFF) {
- ret = z_erofs_fragments_dedupe(inode, fd, &ictx->tof_chksum);
- if (ret < 0)
- goto err_free_ictx;
+ if (cfg.c_fragments && !erofs_is_packed_inode(inode)) {
+ ictx->tofh = z_erofs_fragments_tofh(inode, fd, fpos);
+ if (ictx == &g_ictx && cfg.c_fragdedupe != FRAGDEDUPE_OFF) {
+ /*
+ * Handle tails in advance to avoid writing duplicated
+ * parts into the packed inode.
+ */
+ ret = z_erofs_fragments_dedupe(inode, fd, ictx->tofh);
+ if (ret < 0)
+ goto err_free_ictx;
- if (cfg.c_fragdedupe == FRAGDEDUPE_INODE &&
- inode->fragment_size < inode->i_size) {
- erofs_dbg("Discard the sub-inode tail fragment of %s",
- inode->i_srcpath);
- inode->fragment_size = 0;
+ if (cfg.c_fragdedupe == FRAGDEDUPE_INODE &&
+ inode->fragment_size < inode->i_size) {
+ erofs_dbg("Discard the sub-inode tail fragment of %s",
+ inode->i_srcpath);
+ inode->fragment_size = 0;
+ }
}
}
ictx->inode = inode;
@@ -1647,7 +1649,7 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
ictx->dedupe = false;
if (all_fragments && !inode->fragment_size) {
- ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tof_chksum);
+ ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tofh);
if (ret)
goto err_free_idata;
}
diff --git a/lib/fragments.c b/lib/fragments.c
index 9dfe0e3..9f5f1f9 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -3,9 +3,6 @@
* Copyright (C), 2022, Coolpad Group Limited.
* Created by Yue Hu <huyue2 at coolpad.com>
*/
-#ifndef _LARGEFILE64_SOURCE
-#define _LARGEFILE64_SOURCE
-#endif
#ifndef _FILE_OFFSET_BITS
#define _FILE_OFFSET_BITS 64
#endif
@@ -49,23 +46,39 @@ struct erofs_packed_inode {
const char *erofs_frags_packedname = "packed_file";
-#ifndef HAVE_LSEEK64
-#define erofs_lseek64 lseek
-#else
-#define erofs_lseek64 lseek64
-#endif
+u32 z_erofs_fragments_tofh(struct erofs_inode *inode, int fd, erofs_off_t fpos)
+{
+ u8 data_to_hash[EROFS_TOF_HASHLEN];
+ u32 hash;
+ int ret;
+
+ if (inode->i_size <= EROFS_TOF_HASHLEN)
+ return ~0U;
+
+ ret = pread(fd, data_to_hash, EROFS_TOF_HASHLEN,
+ fpos + inode->i_size - EROFS_TOF_HASHLEN);
+ if (ret < 0)
+ return -errno;
+ if (ret != EROFS_TOF_HASHLEN) {
+ DBG_BUGON(1);
+ return -EIO;
+ }
+ hash = erofs_crc32c(~0, data_to_hash, EROFS_TOF_HASHLEN);
+ return hash != ~0U ? hash : 0;
+}
-static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
- u32 crc)
+int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 tofh)
{
struct erofs_packed_inode *epi = inode->sbi->packedinode;
struct erofs_fragment_dedupe_item *cur, *di = NULL;
- struct list_head *head = &epi->hash[FRAGMENT_HASH(crc)];
+ struct list_head *head = &epi->hash[FRAGMENT_HASH(tofh)];
unsigned int s1, e1;
erofs_off_t deduped;
u8 *data;
int ret;
+ if (inode->i_size <= EROFS_TOF_HASHLEN)
+ return 0;
if (list_empty(head))
return 0;
@@ -138,27 +151,13 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
return 0;
}
-int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc)
-{
- u8 data_to_hash[EROFS_TOF_HASHLEN];
- int ret;
-
- if (inode->i_size <= EROFS_TOF_HASHLEN)
- return 0;
-
- ret = pread(fd, data_to_hash, EROFS_TOF_HASHLEN,
- inode->i_size - EROFS_TOF_HASHLEN);
- if (ret != EROFS_TOF_HASHLEN)
- return -errno;
-
- *tofcrc = erofs_crc32c(~0, data_to_hash, EROFS_TOF_HASHLEN);
- return z_erofs_fragments_dedupe_find(inode, fd, *tofcrc);
-}
-
-static int z_erofs_fragments_dedupe_insert(struct list_head *hash, void *data,
- unsigned int len, erofs_off_t pos)
+static int z_erofs_fragments_dedupe_insert(struct erofs_inode *inode,
+ void *data, u32 tofh)
{
+ struct erofs_packed_inode *epi = inode->sbi->packedinode;
struct erofs_fragment_dedupe_item *di;
+ erofs_off_t len = inode->fragment_size;
+ erofs_off_t pos = inode->fragmentoff;
if (len <= EROFS_TOF_HASHLEN)
return 0;
@@ -172,14 +171,13 @@ static int z_erofs_fragments_dedupe_insert(struct list_head *hash, void *data,
return -ENOMEM;
memcpy(di->data, data, len);
- di->length = len;
di->pos = pos;
-
- list_add_tail(&di->list, hash);
+ di->length = len;
+ list_add_tail(&di->list, &epi->hash[FRAGMENT_HASH(tofh)]);
return 0;
}
-int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc)
+int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofh)
{
struct erofs_packed_inode *epi = inode->sbi->packedinode;
s64 offset, rc;
@@ -240,9 +238,7 @@ int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc)
inode->i_srcpath);
if (memblock)
- rc = z_erofs_fragments_dedupe_insert(
- &epi->hash[FRAGMENT_HASH(tofcrc)], memblock,
- inode->fragment_size, inode->fragmentoff);
+ rc = z_erofs_fragments_dedupe_insert(inode, memblock, tofh);
else
rc = 0;
out:
@@ -256,7 +252,7 @@ out:
}
int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
- unsigned int len, u32 tofcrc)
+ unsigned int len, u32 tofh)
{
struct erofs_packed_inode *epi = inode->sbi->packedinode;
s64 offset = lseek(epi->fd, 0, SEEK_CUR);
@@ -279,8 +275,7 @@ int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
inode->fragment_size | 0ULL, inode->fragmentoff | 0ULL,
inode->i_srcpath);
- ret = z_erofs_fragments_dedupe_insert(&epi->hash[FRAGMENT_HASH(tofcrc)],
- data, len, inode->fragmentoff);
+ ret = z_erofs_fragments_dedupe_insert(inode, data, tofh);
if (ret)
return ret;
return len;
--
2.43.5
More information about the Linux-erofs
mailing list