[PATCH 3/3] erofs-utils: introduce fragment cache

Gao Xiang hsiangkao at linux.alibaba.com
Fri Jan 17 18:46:02 AEDT 2025


Difference from the previous Yiyan's version [1], it just uses
a tmpfile to keep all decompressed data for fragments.

Dataset: linux 5.4.140
mkfs.erofs command line:
	mkfs.erofs -zlzma -C131072 -T0 -Eall-fragments,fragdedupe=inode foo.erofs <dir>
Test command line:
	hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "fsck/fsck.erofs --extract foo.erofs"

Vanilla:
  Time (mean ± σ):     362.309 s ±  0.406 s   [User: 360.298 s, System: 0.956 s]

After:
  Time (mean ± σ):     20.880 s ±  0.026 s    [User: 19.751 s, System: 1.058 s]

[1] https://lore.kernel.org/r/20231023071528.1912105-1-lyy0627@sjtu.edu.cn
Cc: Li Yiyan <lyy0627 at sjtu.edu.cn>
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 fsck/main.c               |  12 ++-
 fuse/main.c               |  16 +++-
 include/erofs/fragments.h |   3 +
 lib/data.c                |  14 +--
 lib/fragments.c           | 192 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 222 insertions(+), 15 deletions(-)

diff --git a/fsck/main.c b/fsck/main.c
index f56a812..d375835 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -16,6 +16,7 @@
 #include "erofs/dir.h"
 #include "erofs/xattr.h"
 #include "../lib/compressor.h"
+#include "erofs/fragments.h"
 
 static int erofsfsck_check_inode(erofs_nid_t pnid, erofs_nid_t nid);
 
@@ -1079,10 +1080,17 @@ int main(int argc, char *argv[])
 		erofsfsck_hardlink_init();
 
 	if (erofs_sb_has_fragments(&g_sbi) && g_sbi.packed_nid > 0) {
+		err = erofs_packedfile_init(&g_sbi, false);
+		if (err) {
+			erofs_err("failed to initialize packedfile: %s",
+				  erofs_strerror(err));
+			goto exit_hardlink;
+		}
+
 		err = erofsfsck_check_inode(g_sbi.packed_nid, g_sbi.packed_nid);
 		if (err) {
 			erofs_err("failed to verify packed file");
-			goto exit_hardlink;
+			goto exit_packedinode;
 		}
 	}
 
@@ -1108,6 +1116,8 @@ int main(int argc, char *argv[])
 		}
 	}
 
+exit_packedinode:
+	erofs_packedfile_exit(&g_sbi);
 exit_hardlink:
 	if (fsckcfg.extract_path)
 		erofsfsck_hardlink_exit();
diff --git a/fuse/main.c b/fuse/main.c
index f6c04e8..cb2759e 100644
--- a/fuse/main.c
+++ b/fuse/main.c
@@ -12,6 +12,7 @@
 #include "erofs/print.h"
 #include "erofs/dir.h"
 #include "erofs/inode.h"
+#include "erofs/fragments.h"
 
 #include <float.h>
 #include <fuse.h>
@@ -688,11 +689,20 @@ int main(int argc, char *argv[])
 		goto err_dev_close;
 	}
 
+	if (erofs_sb_has_fragments(&g_sbi) && g_sbi.packed_nid > 0) {
+		ret = erofs_packedfile_init(&g_sbi, false);
+		if (ret) {
+			erofs_err("failed to initialize packedfile: %s",
+				  erofs_strerror(ret));
+			goto err_super_put;
+		}
+	}
+
 #if FUSE_MAJOR_VERSION >= 3
 	se = fuse_session_new(&args, &erofsfuse_lops, sizeof(erofsfuse_lops),
 			      NULL);
 	if (!se)
-		goto err_super_put;
+		goto err_packedinode;
 
 	if (fuse_session_mount(se, opts.mountpoint) >= 0) {
 		EROFSFUSE_MOUNT_MSG
@@ -722,7 +732,7 @@ int main(int argc, char *argv[])
 #else
 	ch = fuse_mount(opts.mountpoint, &args);
 	if (!ch)
-		goto err_super_put;
+		goto err_packedinode;
 	EROFSFUSE_MOUNT_MSG
 	se = fuse_lowlevel_new(&args, &erofsfuse_lops, sizeof(erofsfuse_lops),
 			       NULL);
@@ -743,6 +753,8 @@ int main(int argc, char *argv[])
 	fuse_unmount(opts.mountpoint, ch);
 #endif
 
+err_packedinode:
+	erofs_packedfile_exit(&g_sbi);
 err_super_put:
 	erofs_put_super(&g_sbi);
 err_dev_close:
diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h
index e92b7c7..14a1b4a 100644
--- a/include/erofs/fragments.h
+++ b/include/erofs/fragments.h
@@ -27,6 +27,9 @@ FILE *erofs_packedfile(struct erofs_sb_info *sbi);
 int erofs_packedfile_init(struct erofs_sb_info *sbi, bool fragments_mkfs);
 void erofs_packedfile_exit(struct erofs_sb_info *sbi);
 
+int erofs_packedfile_read(struct erofs_sb_info *sbi,
+			  void *buf, erofs_off_t len, erofs_off_t pos);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/data.c b/lib/data.c
index 8033208..fd9c21a 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -8,6 +8,7 @@
 #include "erofs/internal.h"
 #include "erofs/trace.h"
 #include "erofs/decompress.h"
+#include "erofs/fragments.h"
 
 static int erofs_map_blocks_flatmode(struct erofs_inode *inode,
 				     struct erofs_map_blocks *map,
@@ -248,18 +249,7 @@ int z_erofs_read_one_data(struct erofs_inode *inode,
 	int ret = 0;
 
 	if (map->m_flags & EROFS_MAP_FRAGMENT) {
-		struct erofs_inode packed_inode = {
-			.sbi = sbi,
-			.nid = sbi->packed_nid,
-		};
-
-		ret = erofs_read_inode_from_disk(&packed_inode);
-		if (ret) {
-			erofs_err("failed to read packed inode from disk");
-			return ret;
-		}
-
-		return erofs_pread(&packed_inode, buffer, length - skip,
+		return erofs_packedfile_read(sbi, buffer, length - skip,
 				   inode->fragmentoff + skip);
 	}
 
diff --git a/lib/fragments.c b/lib/fragments.c
index 43cebe0..0aea25f 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -24,6 +24,7 @@
 #include "erofs/print.h"
 #include "erofs/internal.h"
 #include "erofs/fragments.h"
+#include "erofs/bitops.h"
 
 struct erofs_fragment_dedupe_item {
 	struct list_head	list;
@@ -40,6 +41,11 @@ struct erofs_fragment_dedupe_item {
 struct erofs_packed_inode {
 	struct list_head *hash;
 	FILE *file;
+	unsigned long *uptodate;
+#if EROFS_MT_ENABLED
+	pthread_mutex_t mutex;
+#endif
+	unsigned int uptodate_size;
 };
 
 const char *erofs_frags_packedname = "packed_file";
@@ -340,6 +346,9 @@ void erofs_packedfile_exit(struct erofs_sb_info *sbi)
 	if (!epi)
 		return;
 
+	if (epi->uptodate)
+		free(epi->uptodate);
+
 	if (epi->hash) {
 		for (i = 0; i < FRAGMENT_HASHSIZE; ++i)
 			list_for_each_entry_safe(di, n, &epi->hash[i], list)
@@ -386,9 +395,192 @@ int erofs_packedfile_init(struct erofs_sb_info *sbi, bool fragments_mkfs)
 		err = -errno;
 		goto err_out;
 	}
+
+	if (erofs_sb_has_fragments(sbi) && sbi->packed_nid > 0) {
+		struct erofs_inode ei = {
+			.sbi = sbi,
+			.nid = sbi->packed_nid,
+		};
+
+		err = erofs_read_inode_from_disk(&ei);
+		if (err) {
+			erofs_err("failed to read packed inode from disk: %s",
+				  erofs_strerror(-errno));
+			goto err_out;
+		}
+
+		err = fseek(epi->file, ei.i_size, SEEK_SET);
+		if (err) {
+			err = -errno;
+			goto err_out;
+		}
+		epi->uptodate_size = BLK_ROUND_UP(sbi, ei.i_size) / 8;
+		epi->uptodate = calloc(1, epi->uptodate_size);
+		if (!epi->uptodate) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+	}
 	return 0;
 
 err_out:
 	erofs_packedfile_exit(sbi);
 	return err;
 }
+
+static int erofs_load_packedinode_from_disk(struct erofs_inode *pi)
+{
+	struct erofs_sb_info *sbi = pi->sbi;
+	int err;
+
+	if (pi->nid)
+		return 0;
+
+	pi->nid = sbi->packed_nid;
+	err = erofs_read_inode_from_disk(pi);
+	if (err) {
+		erofs_err("failed to read packed inode from disk: %s",
+			  erofs_strerror(err));
+		return err;
+	}
+	return 0;
+}
+
+static void *erofs_packedfile_preload(struct erofs_inode *pi,
+				      struct erofs_map_blocks *map)
+{
+	struct erofs_sb_info *sbi = pi->sbi;
+	struct erofs_packed_inode *epi = sbi->packedinode;
+	unsigned int bsz = erofs_blksiz(sbi);
+	char *buffer;
+	erofs_off_t pos, end;
+	ssize_t err;
+
+	err = erofs_load_packedinode_from_disk(pi);
+	if (err)
+		return ERR_PTR(err);
+
+	pos = map->m_la;
+	err = erofs_map_blocks(pi, map, EROFS_GET_BLOCKS_FIEMAP);
+	if (err)
+		return ERR_PTR(err);
+
+	end = round_up(map->m_la + map->m_llen, bsz);
+	if (map->m_la < pos)
+		map->m_la = round_up(map->m_la, bsz);
+	else
+		DBG_BUGON(map->m_la > pos);
+
+	map->m_llen = end - map->m_la;
+	DBG_BUGON(!map->m_llen);
+	buffer = malloc(map->m_llen);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+
+	err = erofs_pread(pi, buffer, map->m_llen, map->m_la);
+	if (err)
+		goto err_out;
+
+	fflush(epi->file);
+	err = pwrite(fileno(epi->file), buffer, map->m_llen, map->m_la);
+	if (err < 0) {
+		err = -errno;
+		if (err == -ENOSPC) {
+			ftruncate(fileno(epi->file), 0);
+			memset(epi->uptodate, 0, epi->uptodate_size);
+		}
+		goto err_out;
+	}
+	if (err != map->m_llen) {
+		err = -EIO;
+		goto err_out;
+	}
+	for (pos = map->m_la; pos < end; pos += bsz)
+		__erofs_set_bit(erofs_blknr(sbi, pos), epi->uptodate);
+	return buffer;
+
+err_out:
+	free(buffer);
+	map->m_llen = 0;
+	return ERR_PTR(err);
+}
+
+int erofs_packedfile_read(struct erofs_sb_info *sbi,
+			  void *buf, erofs_off_t len, erofs_off_t pos)
+{
+	struct erofs_packed_inode *epi = sbi->packedinode;
+	struct erofs_inode pi = {
+		.sbi = sbi,
+	};
+	struct erofs_map_blocks map = {
+		.index = UINT_MAX,
+	};
+	unsigned int bsz = erofs_blksiz(sbi);
+	erofs_off_t end = pos + len;
+	char *buffer = NULL;
+	int err;
+
+	if (!epi) {
+		err = erofs_load_packedinode_from_disk(&pi);
+		if (!err)
+			err = erofs_pread(&pi, buf, len, pos);
+		return err;
+	}
+
+	err = 0;
+	while (pos < end) {
+		if (pos >= map.m_la && pos < map.m_la + map.m_llen) {
+			len = min_t(erofs_off_t, end - pos,
+				    map.m_la + map.m_llen - pos);
+			memcpy(buf, buffer + pos - map.m_la, len);
+		} else {
+			erofs_blk_t bnr = erofs_blknr(sbi, pos);
+			bool uptodate;
+
+			map.m_la = round_down(pos, bsz);
+			len = min_t(erofs_off_t, bsz - (pos & (bsz - 1)),
+				    end - pos);
+			uptodate = __erofs_test_bit(bnr, epi->uptodate);
+			if (!uptodate) {
+#if EROFS_MT_ENABLED
+				pthread_mutex_lock(&epi->mutex);
+				uptodate = __erofs_test_bit(bnr, epi->uptodate);
+				if (!uptodate) {
+#endif
+					free(buffer);
+					buffer = erofs_packedfile_preload(&pi, &map);
+					if (IS_ERR(buffer)) {
+						buffer = NULL;
+						goto fallback;
+					}
+
+#if EROFS_MT_ENABLED
+				}
+				pthread_mutex_unlock(&epi->mutex);
+#endif
+			}
+
+			if (!uptodate)
+				continue;
+
+			err = pread(fileno(epi->file), buf, len, pos);
+			if (err < 0)
+				break;
+			if (err == len) {
+				err = 0;
+			} else {
+fallback:
+				err = erofs_load_packedinode_from_disk(&pi);
+				if (!err)
+					err = erofs_pread(&pi, buf, len, pos);
+				if (err)
+					break;
+			}
+			map.m_llen = 0;
+		}
+		buf += len;
+		pos += len;
+	}
+	free(buffer);
+	return err;
+}
-- 
2.43.5



More information about the Linux-erofs mailing list