[PATCH 3/3] erofs-utils: introduce fragment cache
Gao Xiang
hsiangkao at linux.alibaba.com
Fri Jan 17 18:46:02 AEDT 2025
Difference from the previous Yiyan's version [1], it just uses
a tmpfile to keep all decompressed data for fragments.
Dataset: linux 5.4.140
mkfs.erofs command line:
mkfs.erofs -zlzma -C131072 -T0 -Eall-fragments,fragdedupe=inode foo.erofs <dir>
Test command line:
hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "fsck/fsck.erofs --extract foo.erofs"
Vanilla:
Time (mean ± σ): 362.309 s ± 0.406 s [User: 360.298 s, System: 0.956 s]
After:
Time (mean ± σ): 20.880 s ± 0.026 s [User: 19.751 s, System: 1.058 s]
[1] https://lore.kernel.org/r/20231023071528.1912105-1-lyy0627@sjtu.edu.cn
Cc: Li Yiyan <lyy0627 at sjtu.edu.cn>
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
fsck/main.c | 12 ++-
fuse/main.c | 16 +++-
include/erofs/fragments.h | 3 +
lib/data.c | 14 +--
lib/fragments.c | 192 ++++++++++++++++++++++++++++++++++++++
5 files changed, 222 insertions(+), 15 deletions(-)
diff --git a/fsck/main.c b/fsck/main.c
index f56a812..d375835 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -16,6 +16,7 @@
#include "erofs/dir.h"
#include "erofs/xattr.h"
#include "../lib/compressor.h"
+#include "erofs/fragments.h"
static int erofsfsck_check_inode(erofs_nid_t pnid, erofs_nid_t nid);
@@ -1079,10 +1080,17 @@ int main(int argc, char *argv[])
erofsfsck_hardlink_init();
if (erofs_sb_has_fragments(&g_sbi) && g_sbi.packed_nid > 0) {
+ err = erofs_packedfile_init(&g_sbi, false);
+ if (err) {
+ erofs_err("failed to initialize packedfile: %s",
+ erofs_strerror(err));
+ goto exit_hardlink;
+ }
+
err = erofsfsck_check_inode(g_sbi.packed_nid, g_sbi.packed_nid);
if (err) {
erofs_err("failed to verify packed file");
- goto exit_hardlink;
+ goto exit_packedinode;
}
}
@@ -1108,6 +1116,8 @@ int main(int argc, char *argv[])
}
}
+exit_packedinode:
+ erofs_packedfile_exit(&g_sbi);
exit_hardlink:
if (fsckcfg.extract_path)
erofsfsck_hardlink_exit();
diff --git a/fuse/main.c b/fuse/main.c
index f6c04e8..cb2759e 100644
--- a/fuse/main.c
+++ b/fuse/main.c
@@ -12,6 +12,7 @@
#include "erofs/print.h"
#include "erofs/dir.h"
#include "erofs/inode.h"
+#include "erofs/fragments.h"
#include <float.h>
#include <fuse.h>
@@ -688,11 +689,20 @@ int main(int argc, char *argv[])
goto err_dev_close;
}
+ if (erofs_sb_has_fragments(&g_sbi) && g_sbi.packed_nid > 0) {
+ ret = erofs_packedfile_init(&g_sbi, false);
+ if (ret) {
+ erofs_err("failed to initialize packedfile: %s",
+ erofs_strerror(ret));
+ goto err_super_put;
+ }
+ }
+
#if FUSE_MAJOR_VERSION >= 3
se = fuse_session_new(&args, &erofsfuse_lops, sizeof(erofsfuse_lops),
NULL);
if (!se)
- goto err_super_put;
+ goto err_packedinode;
if (fuse_session_mount(se, opts.mountpoint) >= 0) {
EROFSFUSE_MOUNT_MSG
@@ -722,7 +732,7 @@ int main(int argc, char *argv[])
#else
ch = fuse_mount(opts.mountpoint, &args);
if (!ch)
- goto err_super_put;
+ goto err_packedinode;
EROFSFUSE_MOUNT_MSG
se = fuse_lowlevel_new(&args, &erofsfuse_lops, sizeof(erofsfuse_lops),
NULL);
@@ -743,6 +753,8 @@ int main(int argc, char *argv[])
fuse_unmount(opts.mountpoint, ch);
#endif
+err_packedinode:
+ erofs_packedfile_exit(&g_sbi);
err_super_put:
erofs_put_super(&g_sbi);
err_dev_close:
diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h
index e92b7c7..14a1b4a 100644
--- a/include/erofs/fragments.h
+++ b/include/erofs/fragments.h
@@ -27,6 +27,9 @@ FILE *erofs_packedfile(struct erofs_sb_info *sbi);
int erofs_packedfile_init(struct erofs_sb_info *sbi, bool fragments_mkfs);
void erofs_packedfile_exit(struct erofs_sb_info *sbi);
+int erofs_packedfile_read(struct erofs_sb_info *sbi,
+ void *buf, erofs_off_t len, erofs_off_t pos);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/data.c b/lib/data.c
index 8033208..fd9c21a 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -8,6 +8,7 @@
#include "erofs/internal.h"
#include "erofs/trace.h"
#include "erofs/decompress.h"
+#include "erofs/fragments.h"
static int erofs_map_blocks_flatmode(struct erofs_inode *inode,
struct erofs_map_blocks *map,
@@ -248,18 +249,7 @@ int z_erofs_read_one_data(struct erofs_inode *inode,
int ret = 0;
if (map->m_flags & EROFS_MAP_FRAGMENT) {
- struct erofs_inode packed_inode = {
- .sbi = sbi,
- .nid = sbi->packed_nid,
- };
-
- ret = erofs_read_inode_from_disk(&packed_inode);
- if (ret) {
- erofs_err("failed to read packed inode from disk");
- return ret;
- }
-
- return erofs_pread(&packed_inode, buffer, length - skip,
+ return erofs_packedfile_read(sbi, buffer, length - skip,
inode->fragmentoff + skip);
}
diff --git a/lib/fragments.c b/lib/fragments.c
index 43cebe0..0aea25f 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -24,6 +24,7 @@
#include "erofs/print.h"
#include "erofs/internal.h"
#include "erofs/fragments.h"
+#include "erofs/bitops.h"
struct erofs_fragment_dedupe_item {
struct list_head list;
@@ -40,6 +41,11 @@ struct erofs_fragment_dedupe_item {
struct erofs_packed_inode {
struct list_head *hash;
FILE *file;
+ unsigned long *uptodate;
+#if EROFS_MT_ENABLED
+ pthread_mutex_t mutex;
+#endif
+ unsigned int uptodate_size;
};
const char *erofs_frags_packedname = "packed_file";
@@ -340,6 +346,9 @@ void erofs_packedfile_exit(struct erofs_sb_info *sbi)
if (!epi)
return;
+ if (epi->uptodate)
+ free(epi->uptodate);
+
if (epi->hash) {
for (i = 0; i < FRAGMENT_HASHSIZE; ++i)
list_for_each_entry_safe(di, n, &epi->hash[i], list)
@@ -386,9 +395,192 @@ int erofs_packedfile_init(struct erofs_sb_info *sbi, bool fragments_mkfs)
err = -errno;
goto err_out;
}
+
+ if (erofs_sb_has_fragments(sbi) && sbi->packed_nid > 0) {
+ struct erofs_inode ei = {
+ .sbi = sbi,
+ .nid = sbi->packed_nid,
+ };
+
+ err = erofs_read_inode_from_disk(&ei);
+ if (err) {
+ erofs_err("failed to read packed inode from disk: %s",
+ erofs_strerror(-errno));
+ goto err_out;
+ }
+
+ err = fseek(epi->file, ei.i_size, SEEK_SET);
+ if (err) {
+ err = -errno;
+ goto err_out;
+ }
+ epi->uptodate_size = BLK_ROUND_UP(sbi, ei.i_size) / 8;
+ epi->uptodate = calloc(1, epi->uptodate_size);
+ if (!epi->uptodate) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ }
return 0;
err_out:
erofs_packedfile_exit(sbi);
return err;
}
+
+static int erofs_load_packedinode_from_disk(struct erofs_inode *pi)
+{
+ struct erofs_sb_info *sbi = pi->sbi;
+ int err;
+
+ if (pi->nid)
+ return 0;
+
+ pi->nid = sbi->packed_nid;
+ err = erofs_read_inode_from_disk(pi);
+ if (err) {
+ erofs_err("failed to read packed inode from disk: %s",
+ erofs_strerror(err));
+ return err;
+ }
+ return 0;
+}
+
+static void *erofs_packedfile_preload(struct erofs_inode *pi,
+ struct erofs_map_blocks *map)
+{
+ struct erofs_sb_info *sbi = pi->sbi;
+ struct erofs_packed_inode *epi = sbi->packedinode;
+ unsigned int bsz = erofs_blksiz(sbi);
+ char *buffer;
+ erofs_off_t pos, end;
+ ssize_t err;
+
+ err = erofs_load_packedinode_from_disk(pi);
+ if (err)
+ return ERR_PTR(err);
+
+ pos = map->m_la;
+ err = erofs_map_blocks(pi, map, EROFS_GET_BLOCKS_FIEMAP);
+ if (err)
+ return ERR_PTR(err);
+
+ end = round_up(map->m_la + map->m_llen, bsz);
+ if (map->m_la < pos)
+ map->m_la = round_up(map->m_la, bsz);
+ else
+ DBG_BUGON(map->m_la > pos);
+
+ map->m_llen = end - map->m_la;
+ DBG_BUGON(!map->m_llen);
+ buffer = malloc(map->m_llen);
+ if (!buffer)
+ return ERR_PTR(-ENOMEM);
+
+ err = erofs_pread(pi, buffer, map->m_llen, map->m_la);
+ if (err)
+ goto err_out;
+
+ fflush(epi->file);
+ err = pwrite(fileno(epi->file), buffer, map->m_llen, map->m_la);
+ if (err < 0) {
+ err = -errno;
+ if (err == -ENOSPC) {
+ ftruncate(fileno(epi->file), 0);
+ memset(epi->uptodate, 0, epi->uptodate_size);
+ }
+ goto err_out;
+ }
+ if (err != map->m_llen) {
+ err = -EIO;
+ goto err_out;
+ }
+ for (pos = map->m_la; pos < end; pos += bsz)
+ __erofs_set_bit(erofs_blknr(sbi, pos), epi->uptodate);
+ return buffer;
+
+err_out:
+ free(buffer);
+ map->m_llen = 0;
+ return ERR_PTR(err);
+}
+
+int erofs_packedfile_read(struct erofs_sb_info *sbi,
+ void *buf, erofs_off_t len, erofs_off_t pos)
+{
+ struct erofs_packed_inode *epi = sbi->packedinode;
+ struct erofs_inode pi = {
+ .sbi = sbi,
+ };
+ struct erofs_map_blocks map = {
+ .index = UINT_MAX,
+ };
+ unsigned int bsz = erofs_blksiz(sbi);
+ erofs_off_t end = pos + len;
+ char *buffer = NULL;
+ int err;
+
+ if (!epi) {
+ err = erofs_load_packedinode_from_disk(&pi);
+ if (!err)
+ err = erofs_pread(&pi, buf, len, pos);
+ return err;
+ }
+
+ err = 0;
+ while (pos < end) {
+ if (pos >= map.m_la && pos < map.m_la + map.m_llen) {
+ len = min_t(erofs_off_t, end - pos,
+ map.m_la + map.m_llen - pos);
+ memcpy(buf, buffer + pos - map.m_la, len);
+ } else {
+ erofs_blk_t bnr = erofs_blknr(sbi, pos);
+ bool uptodate;
+
+ map.m_la = round_down(pos, bsz);
+ len = min_t(erofs_off_t, bsz - (pos & (bsz - 1)),
+ end - pos);
+ uptodate = __erofs_test_bit(bnr, epi->uptodate);
+ if (!uptodate) {
+#if EROFS_MT_ENABLED
+ pthread_mutex_lock(&epi->mutex);
+ uptodate = __erofs_test_bit(bnr, epi->uptodate);
+ if (!uptodate) {
+#endif
+ free(buffer);
+ buffer = erofs_packedfile_preload(&pi, &map);
+ if (IS_ERR(buffer)) {
+ buffer = NULL;
+ goto fallback;
+ }
+
+#if EROFS_MT_ENABLED
+ }
+ pthread_mutex_unlock(&epi->mutex);
+#endif
+ }
+
+ if (!uptodate)
+ continue;
+
+ err = pread(fileno(epi->file), buf, len, pos);
+ if (err < 0)
+ break;
+ if (err == len) {
+ err = 0;
+ } else {
+fallback:
+ err = erofs_load_packedinode_from_disk(&pi);
+ if (!err)
+ err = erofs_pread(&pi, buf, len, pos);
+ if (err)
+ break;
+ }
+ map.m_llen = 0;
+ }
+ buf += len;
+ pos += len;
+ }
+ free(buffer);
+ return err;
+}
--
2.43.5
More information about the Linux-erofs
mailing list