[PATCH] erofs-utils: mkfs: fix inefficient fragment deduplication
Gao Xiang
hsiangkao at linux.alibaba.com
Wed Jan 22 01:34:00 AEDT 2025
Currently, long fragment comparisons could cause suboptimal results,
leading to final image sizes still larger than expected:
_________________________________________________________________________
|______ Testset _____| Vanilla | After | Command Line
| CoreOS [1] | 802107392 (765 MiB) | 687501312 (656 MiB) | -zlzma,6 -Eall-fragments,fragdedupe=inode -C131072
|____________________|__ 771715072 (736 MiB) __|__ 658485248 (628 MiB) __| -zlzma,6 -Eall-fragments,fragdedupe=inode -C1048576
| Fedora KIWI [2] |_ 2584076288 (2465 MiB) _|_ 2550837248 (2433 MiB) __| -zlzma,6 -Eall-fragments,fragdedupe=inode -C1048576
|____________________|_ 2843598848 (2712 MiB) _|_ 2810359808 (2681 MiB) __| (Fedora-KDE-Desktop-Live-Rawhide.0.x86_64.iso)
Almost all images that use `-Eall-fragments` could benefit from this.
[1] https://builds.coreos.fedoraproject.org/prod/streams/stable/builds/41.20241215.3.0/x86_64/fedora-coreos-41.20241215.3.0-live.x86_64.iso
[2] https://pagure.io/fedora-kiwi-descriptions.git
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
lib/compress.c | 3 +-
lib/fragments.c | 123 ++++++++++++++++++++++--------------------------
2 files changed, 57 insertions(+), 69 deletions(-)
diff --git a/lib/compress.c b/lib/compress.c
index eb3190d..5c9c051 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -1534,7 +1534,8 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos)
if (cfg.c_fragdedupe == FRAGDEDUPE_INODE &&
inode->fragment_size < inode->i_size) {
- erofs_dbg("Discard the sub-inode tail fragment @ nid %llu", inode->nid);
+ erofs_dbg("Discard the sub-inode tail fragment of %s",
+ inode->i_srcpath);
inode->fragment_size = 0;
}
}
diff --git a/lib/fragments.c b/lib/fragments.c
index f662cbc..32ac6f5 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -33,6 +33,7 @@ struct erofs_fragment_dedupe_item {
u8 data[];
};
+#define EROFS_FRAGMENT_INMEM_SZ_MAX EROFS_CONFIG_COMPR_MAX_SZ
#define EROFS_TOF_HASHLEN 16
#define FRAGMENT_HASHSIZE 65536
@@ -62,96 +63,82 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
struct erofs_packed_inode *epi = inode->sbi->packedinode;
struct erofs_fragment_dedupe_item *cur, *di = NULL;
struct list_head *head = &epi->hash[FRAGMENT_HASH(crc)];
+ unsigned int s1, e1;
+ erofs_off_t deduped;
u8 *data;
- unsigned int length, e2, deduped;
- erofs_off_t pos;
int ret;
if (list_empty(head))
return 0;
- /* XXX: no need to read so much for smaller? */
- if (inode->i_size < EROFS_CONFIG_COMPR_MAX_SZ)
- length = inode->i_size;
- else
- length = EROFS_CONFIG_COMPR_MAX_SZ;
-
- data = malloc(length);
+ s1 = min_t(u64, EROFS_FRAGMENT_INMEM_SZ_MAX, inode->i_size);
+ data = malloc(s1);
if (!data)
return -ENOMEM;
- if (erofs_lseek64(fd, inode->i_size - length, SEEK_SET) < 0) {
- ret = -errno;
- goto out;
- }
-
- ret = read(fd, data, length);
- if (ret != length) {
- ret = -errno;
- goto out;
+ ret = pread(fd, data, s1, inode->i_size - s1);
+ if (ret != s1) {
+ free(data);
+ return -errno;
}
-
- DBG_BUGON(length <= EROFS_TOF_HASHLEN);
- e2 = length - EROFS_TOF_HASHLEN;
+ e1 = s1 - EROFS_TOF_HASHLEN;
deduped = 0;
-
list_for_each_entry(cur, head, list) {
- unsigned int e1, mn, i = 0;
+ unsigned int e2, mn;
+ erofs_off_t i, pos;
DBG_BUGON(cur->length <= EROFS_TOF_HASHLEN);
- e1 = cur->length - EROFS_TOF_HASHLEN;
+ e2 = cur->length - EROFS_TOF_HASHLEN;
- if (memcmp(cur->data + e1, data + e2, EROFS_TOF_HASHLEN))
+ if (memcmp(data + e1, cur->data + e2, EROFS_TOF_HASHLEN))
continue;
+ i = 0;
mn = min(e1, e2);
- while (i < mn && cur->data[e1 - i - 1] == data[e2 - i - 1])
+ while (i < mn && cur->data[e2 - i - 1] == data[e1 - i - 1])
++i;
- if (!di || i + EROFS_TOF_HASHLEN > deduped) {
- deduped = i + EROFS_TOF_HASHLEN;
- di = cur;
+ i += EROFS_TOF_HASHLEN;
+ if (i >= s1) { /* full short match */
+ DBG_BUGON(i > s1);
+ pos = cur->pos + cur->length - s1;
+ while (i < inode->i_size && pos) {
+ char buf[2][16384];
+ unsigned int sz;
+
+ sz = min_t(u64, pos, sizeof(buf[0]));
+ sz = min_t(u64, sz, inode->i_size - i);
+ if (pread(fileno(epi->file), buf[0], sz,
+ pos - sz) != sz)
+ break;
+ if (pread(fd, buf[1], sz,
+ inode->i_size - i - sz) != sz)
+ break;
- /* full match */
- if (i == e2)
- break;
+ if (memcmp(buf[0], buf[1], sz))
+ break;
+ pos -= sz;
+ i += sz;
+ }
}
- }
- if (!di)
- goto out;
-
- DBG_BUGON(di->length < deduped);
- pos = di->pos + di->length - deduped;
- /* let's read more to dedupe as long as we can */
- if (deduped == di->length) {
- fflush(epi->file);
-
- while(deduped < inode->i_size && pos) {
- char buf[2][16384];
- unsigned int sz = min_t(unsigned int, pos,
- sizeof(buf[0]));
-
- if (pread(fileno(epi->file), buf[0], sz,
- pos - sz) != sz)
- break;
- if (pread(fd, buf[1], sz,
- inode->i_size - deduped - sz) != sz)
- break;
- if (memcmp(buf[0], buf[1], sz))
- break;
- pos -= sz;
- deduped += sz;
- }
+ if (i <= deduped)
+ continue;
+ di = cur;
+ deduped = i;
+ if (deduped == inode->i_size)
+ break;
}
- inode->fragment_size = deduped;
- inode->fragmentoff = pos;
- erofs_dbg("Dedupe %llu tail data at %llu", inode->fragment_size | 0ULL,
- inode->fragmentoff | 0ULL);
-out:
free(data);
- return ret;
+ if (deduped) {
+ DBG_BUGON(!di);
+ inode->fragment_size = deduped;
+ inode->fragmentoff = di->pos + di->length - deduped;
+ erofs_dbg("Dedupe %llu tail data at %llu",
+ inode->fragment_size | 0ULL, inode->fragmentoff | 0ULL);
+ }
+ return 0;
}
int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc)
@@ -186,10 +173,10 @@ static int z_erofs_fragments_dedupe_insert(struct list_head *hash, void *data,
if (len <= EROFS_TOF_HASHLEN)
return 0;
- if (len > EROFS_CONFIG_COMPR_MAX_SZ) {
- data += len - EROFS_CONFIG_COMPR_MAX_SZ;
- pos += len - EROFS_CONFIG_COMPR_MAX_SZ;
- len = EROFS_CONFIG_COMPR_MAX_SZ;
+ if (len > EROFS_FRAGMENT_INMEM_SZ_MAX) {
+ data += len - EROFS_FRAGMENT_INMEM_SZ_MAX;
+ pos += len - EROFS_FRAGMENT_INMEM_SZ_MAX;
+ len = EROFS_FRAGMENT_INMEM_SZ_MAX;
}
di = malloc(sizeof(*di) + len);
if (!di)
--
2.43.5
More information about the Linux-erofs
mailing list