[PATCH v1] erofs-utils: enhance fragment cache for fuse

Li Yiyan lyy0627 at sjtu.edu.cn
Mon Oct 23 18:15:28 AEDT 2023


This is the initial draft of the fragment cache that
I proposed for Fuse. I have added an LRU cache that allows us
to check if there is a cached fragment block based on the offset
when we need to read a fragment inode. If a cached block exists,
we can then read it from memory, saving one disk read and one
decompression step. This has resulted in a performance improvement
of over ten times on Linux images with enabled fragments.
Please note that my code has not been polished and thoroughly
tested yet. It is only a preliminary implementation and
performance validation of the idea.

I appreciate your valuable feedback in advance.

Draft for the fragment cache. Now just improve performance
of first evaluation run, thanks to prefetching disk and avoiding
decompression. When running next 9 runs, versions w/ and w/o fragment
cache remain the same performance because no request read fragment
actually due to page cache.

Dataset: linux 5.15
Compression algorithm: -lzma
Additional options: -C131072 -T0 -Efragments
Test options: --warmup 3 -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1"

Evaluation result (w/o fragcache->lw/ fragcache avg time):
	- Sequence data: 252.617 s -> 19.171 s
	- Random data: 214.561 s -> 12.356 s

Signed-off-by: Li Yiyan <lyy0627 at sjtu.edu.cn>
---
 include/erofs/lru.h |  39 ++++++++++++++
 lib/Makefile.am     |   3 +-
 lib/data.c          |  76 ++++++++++++++++++++++++++++
 lib/lru.c           | 121 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 238 insertions(+), 1 deletion(-)
 create mode 100644 include/erofs/lru.h
 create mode 100644 lib/lru.c

diff --git a/include/erofs/lru.h b/include/erofs/lru.h
new file mode 100644
index 0000000..cc8bf20
--- /dev/null
+++ b/include/erofs/lru.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * JUST FOR EXPERIMENTAL USE
+ */
+#ifndef __EROFS_CACHE_H
+#define __EROFS_CACHE_H
+
+#include "erofs/list.h"
+#include "erofs/hashmap.h"
+
+#define FRAGMENT_CACHE_SIZE (4096 * 1024)
+
+struct fragment_entry {
+	struct hashmap_entry hentry;
+
+	u64 key;
+	char *value;
+	struct list_head node;
+};
+
+struct lru_cache {
+	struct list_head lru;
+	struct hashmap map;
+	int capacity;
+	int size;
+
+	/* statistics */
+	unsigned int hit;
+	unsigned int miss;
+	unsigned int evicted;
+};
+
+struct lru_cache *lru_cache_new(int capacity);
+void lru_cache_free(struct lru_cache *lru);
+void lru_cache_remove(struct lru_cache *lru, const u64 key);
+void *lru_cache_get(struct lru_cache *lru, const u64 key);
+void lru_cache_put(struct lru_cache *lru, const u64 key, void *value);
+
+#endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 54b9c9c..308cf49 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -19,6 +19,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/internal.h \
       $(top_srcdir)/include/erofs/io.h \
       $(top_srcdir)/include/erofs/list.h \
+      $(top_srcdir)/include/erofs/lru.h \
       $(top_srcdir)/include/erofs/print.h \
       $(top_srcdir)/include/erofs/tar.h \
       $(top_srcdir)/include/erofs/trace.h \
@@ -34,7 +35,7 @@ liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
 		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
 		      fragments.c dedupe.c uuid_unparse.c uuid.c tar.c \
-		      block_list.c xxhash.c rebuild.c diskbuf.c
+		      block_list.c xxhash.c rebuild.c diskbuf.c  lru.c
 
 liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include
 if ENABLE_LZ4
diff --git a/lib/data.c b/lib/data.c
index a87053f..bb46bab 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -232,6 +232,45 @@ static int erofs_read_raw_data(struct erofs_inode *inode, char *buffer,
 	return 0;
 }
 
+#define FRAGEMENT_CACHE 1
+
+#if FRAGEMENT_CACHE
+#include "erofs/lru.h"
+
+struct lru_cache *fragment_cache;
+
+static void merge_cache(u64 off, u64 len, char *cache_data1, char *cache_data2,
+			bool read_next, char *buffer)
+{
+	u64 cache_sz =
+		(off / FRAGMENT_CACHE_SIZE + 1) * FRAGMENT_CACHE_SIZE - off;
+
+	if (read_next) {
+		memcpy(buffer, cache_data1, cache_sz);
+		memcpy(buffer + cache_sz, cache_data2,
+		       len - cache_sz);
+	} else {
+		memcpy(buffer, cache_data1, len);
+	}
+}
+
+static int fetch_cache_fragment(struct erofs_inode *inode, u64 idx, char **data)
+{
+	int ret;
+
+	if (!data || !inode)
+		return -1;
+
+	*data = malloc(FRAGMENT_CACHE_SIZE);
+	ret = erofs_pread(inode, *data, FRAGMENT_CACHE_SIZE,
+			  idx * FRAGMENT_CACHE_SIZE);
+	if (!ret)
+		lru_cache_put(fragment_cache, idx, *data);
+
+	return ret;
+}
+#endif
+
 int z_erofs_read_one_data(struct erofs_inode *inode,
 			struct erofs_map_blocks *map, char *raw, char *buffer,
 			erofs_off_t skip, erofs_off_t length, bool trimmed)
@@ -241,6 +280,29 @@ int z_erofs_read_one_data(struct erofs_inode *inode,
 	int ret = 0;
 
 	if (map->m_flags & EROFS_MAP_FRAGMENT) {
+#if FRAGEMENT_CACHE
+		char *cache_data1, *cache_data2;
+		u64 offset = inode->fragmentoff + skip;
+		u64 idx = offset / FRAGMENT_CACHE_SIZE;
+		bool read_next = (((idx + 1) * FRAGMENT_CACHE_SIZE - offset) <=
+				  (length - skip));
+
+		erofs_dbg(
+			"fragment offset %llu, idx %llu, len %llu, read_next %d",
+			offset, idx, length - skip, read_next);
+
+		if (!fragment_cache)
+			fragment_cache = lru_cache_new(1024);
+
+		cache_data1 = lru_cache_get(fragment_cache, idx);
+		if (cache_data1 && read_next)
+			cache_data2 = lru_cache_get(fragment_cache, idx + 1);
+		if (cache_data1 && (!read_next || cache_data2)) {
+			merge_cache(offset, length - skip, cache_data1,
+				    cache_data2, read_next, buffer);
+			return 0;
+		}
+#endif
 		struct erofs_inode packed_inode = {
 			.sbi = sbi,
 			.nid = sbi->packed_nid,
@@ -252,8 +314,22 @@ int z_erofs_read_one_data(struct erofs_inode *inode,
 			return ret;
 		}
 
+#if FRAGEMENT_CACHE
+		ret = fetch_cache_fragment(&packed_inode, idx, &cache_data1);
+		if (ret)
+			return ret;
+		if (read_next) {
+			ret = fetch_cache_fragment(&packed_inode, idx + 1, &cache_data2);
+			if (ret)
+				return ret;
+		}
+		merge_cache(offset, length - skip, cache_data1,
+			    cache_data2, read_next, buffer);
+		return ret;
+#else
 		return erofs_pread(&packed_inode, buffer, length - skip,
 				   inode->fragmentoff + skip);
+#endif
 	}
 
 	/* no device id here, thus it will always succeed */
diff --git a/lib/lru.c b/lib/lru.c
new file mode 100644
index 0000000..3fb60b3
--- /dev/null
+++ b/lib/lru.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * JUST FOR EXPERIMENTAL USE
+ */
+
+#include "erofs/lru.h"
+#include "erofs/defs.h"
+
+static int u64_cmp(const void *a, const void *b, const void *key)
+{
+	struct fragment_entry *ea = (struct fragment_entry *)a;
+	struct fragment_entry *eb = (struct fragment_entry *)b;
+
+	return ea->key == eb->key;
+}
+
+struct lru_cache *lru_cache_new(int capacity)
+{
+	struct lru_cache *cache = malloc(sizeof(struct lru_cache *));
+
+	if (!cache)
+		return NULL;
+	hashmap_init(&cache->map, u64_cmp, 0);
+	init_list_head(&cache->lru);
+	cache->capacity = capacity;
+	cache->size = 0;
+	cache->hit = cache->miss = cache->evicted = 0;
+
+	return cache;
+}
+
+void lru_cache_free(struct lru_cache *lru)
+{
+	hashmap_free(&lru->map);
+	free(lru);
+}
+
+void *lru_cache_get(struct lru_cache *lru, const u64 key)
+{
+	unsigned int hash;
+	struct fragment_entry *entry;
+
+	hash = memhash(&key, sizeof(key));
+	entry = hashmap_get_from_hash(&lru->map, hash, &key);
+
+	if (entry) {
+		list_del(&entry->node);
+		list_add_tail(&entry->node, &lru->lru);
+
+		++lru->hit;
+		erofs_dbg("statistic: hit %u, miss %u, evicted %u", lru->hit,
+			  lru->miss, lru->evicted);
+		return entry->value;
+	}
+
+	++lru->miss;
+	erofs_dbg("statistic: hit %u, miss %u, evicted %u", lru->hit, lru->miss,
+		  lru->evicted);
+	return NULL;
+}
+
+void lru_cache_put(struct lru_cache *lru, const u64 key, void *value)
+{
+	unsigned int hash;
+	struct fragment_entry *entry;
+
+	hash = memhash(&key, sizeof(key));
+	entry = hashmap_get_from_hash(&lru->map, hash, &key);
+	if (entry)
+		return;
+
+	if (lru->size == lru->capacity) {
+		struct fragment_entry *victim;
+
+		victim = list_first_entry(&lru->lru, struct fragment_entry,
+					  node);
+		list_del(&victim->node);
+
+		hashmap_remove(&lru->map, victim);
+
+		free(victim->value);
+		free(victim);
+		lru->size--;
+
+		++lru->evicted;
+		erofs_dbg("statistic: hit %u, miss %u, evicted %u", lru->hit,
+			  lru->miss, lru->evicted);
+	}
+
+	entry = malloc(sizeof(struct fragment_entry));
+	if (!entry)
+		return;
+
+	entry->key = key;
+	entry->value = value;
+	if (!entry->value)
+		return;
+
+	hashmap_entry_init(entry, hash);
+	hashmap_add(&lru->map, entry);
+	lru->size++;
+
+	list_add_tail(&entry->node, &lru->lru);
+}
+
+void lru_cache_remove(struct lru_cache *lru, const u64 key)
+{
+	unsigned int hash;
+	struct fragment_entry *entry;
+
+	hash = memhash(&key, sizeof(key));
+	entry = hashmap_get_from_hash(&lru->map, hash, &key);
+	if (entry) {
+		list_del(&entry->node);
+		hashmap_remove(&lru->map, entry);
+
+		free(entry->value);
+		free(entry);
+		lru->size--;
+	}
+}
-- 
2.34.1



More information about the Linux-erofs mailing list