[PATCH v1] erofs-utils: enhance fragment cache for fuse
Yue Hu
zbestahu at gmail.com
Wed Nov 15 16:31:25 AEDT 2023
Hi Yiyan,
The LRU cache implementation roughly looks good to me, I think you can just
send a refined patch so that we can thoroughly review it.
Now, I have several comments listed below, please check.
On Mon, 23 Oct 2023 15:15:28 +0800
Li Yiyan <lyy0627 at sjtu.edu.cn> wrote:
> This is the initial draft of the fragment cache that
> I proposed for Fuse. I have added an LRU cache that allows us
> to check if there is a cached fragment block based on the offset
> when we need to read a fragment inode. If a cached block exists,
> we can then read it from memory, saving one disk read and one
> decompression step. This has resulted in a performance improvement
> of over ten times on Linux images with enabled fragments.
> Please note that my code has not been polished and thoroughly
> tested yet. It is only a preliminary implementation and
> performance validation of the idea.
>
> I appreciate your valuable feedback in advance.
>
> Draft for the fragment cache. Now just improve performance
> of first evaluation run, thanks to prefetching disk and avoiding
> decompression. When running next 9 runs, versions w/ and w/o fragment
> cache remain the same performance because no request read fragment
> actually due to page cache.
>
> Dataset: linux 5.15
> Compression algorithm: -lzma
> Additional options: -C131072 -T0 -Efragments
> Test options: --warmup 3 -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1"
The hyperfine benchmarking tool should not require the use of `--warmup` if
the `-p` option is used in conjunction with the command `echo 3 > /proc/sys/vm/drop_caches`.
see https://github.com/sharkdp/hyperfine/blob/master/README.md#warmup-runs-and-preparation-commands
>
> Evaluation result (w/o fragcache->lw/ fragcache avg time):
> - Sequence data: 252.617 s -> 19.171 s
> - Random data: 214.561 s -> 12.356 s
>
> Signed-off-by: Li Yiyan <lyy0627 at sjtu.edu.cn>
> ---
> include/erofs/lru.h | 39 ++++++++++++++
> lib/Makefile.am | 3 +-
> lib/data.c | 76 ++++++++++++++++++++++++++++
> lib/lru.c | 121 ++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 238 insertions(+), 1 deletion(-)
> create mode 100644 include/erofs/lru.h
> create mode 100644 lib/lru.c
>
> diff --git a/include/erofs/lru.h b/include/erofs/lru.h
> new file mode 100644
> index 0000000..cc8bf20
> --- /dev/null
> +++ b/include/erofs/lru.h
> @@ -0,0 +1,39 @@
> +/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
> +/*
> + * JUST FOR EXPERIMENTAL USE
> + */
> +#ifndef __EROFS_CACHE_H
> +#define __EROFS_CACHE_H
> +
> +#include "erofs/list.h"
> +#include "erofs/hashmap.h"
> +
> +#define FRAGMENT_CACHE_SIZE (4096 * 1024)
> +
> +struct fragment_entry {
> + struct hashmap_entry hentry;
> +
> + u64 key;
> + char *value;
> + struct list_head node;
> +};
> +
> +struct lru_cache {
> + struct list_head lru;
> + struct hashmap map;
> + int capacity;
> + int size;
> +
> + /* statistics */
> + unsigned int hit;
> + unsigned int miss;
> + unsigned int evicted;
> +};
> +
> +struct lru_cache *lru_cache_new(int capacity);
> +void lru_cache_free(struct lru_cache *lru);
> +void lru_cache_remove(struct lru_cache *lru, const u64 key);
> +void *lru_cache_get(struct lru_cache *lru, const u64 key);
> +void lru_cache_put(struct lru_cache *lru, const u64 key, void *value);
> +
> +#endif
> diff --git a/lib/Makefile.am b/lib/Makefile.am
> index 54b9c9c..308cf49 100644
> --- a/lib/Makefile.am
> +++ b/lib/Makefile.am
> @@ -19,6 +19,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
> $(top_srcdir)/include/erofs/internal.h \
> $(top_srcdir)/include/erofs/io.h \
> $(top_srcdir)/include/erofs/list.h \
> + $(top_srcdir)/include/erofs/lru.h \
> $(top_srcdir)/include/erofs/print.h \
> $(top_srcdir)/include/erofs/tar.h \
> $(top_srcdir)/include/erofs/trace.h \
> @@ -34,7 +35,7 @@ liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
> namei.c data.c compress.c compressor.c zmap.c decompress.c \
> compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
> fragments.c dedupe.c uuid_unparse.c uuid.c tar.c \
> - block_list.c xxhash.c rebuild.c diskbuf.c
> + block_list.c xxhash.c rebuild.c diskbuf.c lru.c
>
> liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include
> if ENABLE_LZ4
> diff --git a/lib/data.c b/lib/data.c
> index a87053f..bb46bab 100644
> --- a/lib/data.c
> +++ b/lib/data.c
> @@ -232,6 +232,45 @@ static int erofs_read_raw_data(struct erofs_inode *inode, char *buffer,
> return 0;
> }
>
> +#define FRAGEMENT_CACHE 1
> +
> +#if FRAGEMENT_CACHE
> +#include "erofs/lru.h"
> +
> +struct lru_cache *fragment_cache;
> +
> +static void merge_cache(u64 off, u64 len, char *cache_data1, char *cache_data2,
> + bool read_next, char *buffer)
> +{
> + u64 cache_sz =
> + (off / FRAGMENT_CACHE_SIZE + 1) * FRAGMENT_CACHE_SIZE - off;
> +
> + if (read_next) {
> + memcpy(buffer, cache_data1, cache_sz);
> + memcpy(buffer + cache_sz, cache_data2,
> + len - cache_sz);
> + } else {
> + memcpy(buffer, cache_data1, len);
> + }
> +}
> +
> +static int fetch_cache_fragment(struct erofs_inode *inode, u64 idx, char **data)
> +{
> + int ret;
> +
> + if (!data || !inode)
> + return -1;
> +
> + *data = malloc(FRAGMENT_CACHE_SIZE);
> + ret = erofs_pread(inode, *data, FRAGMENT_CACHE_SIZE,
> + idx * FRAGMENT_CACHE_SIZE);
> + if (!ret)
> + lru_cache_put(fragment_cache, idx, *data);
> +
> + return ret;
> +}
> +#endif
> +
> int z_erofs_read_one_data(struct erofs_inode *inode,
> struct erofs_map_blocks *map, char *raw, char *buffer,
> erofs_off_t skip, erofs_off_t length, bool trimmed)
> @@ -241,6 +280,29 @@ int z_erofs_read_one_data(struct erofs_inode *inode,
> int ret = 0;
>
> if (map->m_flags & EROFS_MAP_FRAGMENT) {
> +#if FRAGEMENT_CACHE
> + char *cache_data1, *cache_data2;
> + u64 offset = inode->fragmentoff + skip;
> + u64 idx = offset / FRAGMENT_CACHE_SIZE;
> + bool read_next = (((idx + 1) * FRAGMENT_CACHE_SIZE - offset) <=
> + (length - skip));
Why not calculate the unit number based on the read length? This way, we can read
multiple units instead of just the next one.
> +
> + erofs_dbg(
> + "fragment offset %llu, idx %llu, len %llu, read_next %d",
> + offset, idx, length - skip, read_next);
> +
> + if (!fragment_cache)
> + fragment_cache = lru_cache_new(1024);
> +
> + cache_data1 = lru_cache_get(fragment_cache, idx);
> + if (cache_data1 && read_next)
> + cache_data2 = lru_cache_get(fragment_cache, idx + 1);
> + if (cache_data1 && (!read_next || cache_data2)) {
> + merge_cache(offset, length - skip, cache_data1,
> + cache_data2, read_next, buffer);
> + return 0;
> + }
> +#endif
> struct erofs_inode packed_inode = {
> .sbi = sbi,
> .nid = sbi->packed_nid,
> @@ -252,8 +314,22 @@ int z_erofs_read_one_data(struct erofs_inode *inode,
> return ret;
> }
>
> +#if FRAGEMENT_CACHE
> + ret = fetch_cache_fragment(&packed_inode, idx, &cache_data1);
> + if (ret)
> + return ret;
> + if (read_next) {
> + ret = fetch_cache_fragment(&packed_inode, idx + 1, &cache_data2);
> + if (ret)
> + return ret;
> + }
> + merge_cache(offset, length - skip, cache_data1,
> + cache_data2, read_next, buffer);
> + return ret;
> +#else
> return erofs_pread(&packed_inode, buffer, length - skip,
> inode->fragmentoff + skip);
> +#endif
> }
>
> /* no device id here, thus it will always succeed */
> diff --git a/lib/lru.c b/lib/lru.c
> new file mode 100644
> index 0000000..3fb60b3
> --- /dev/null
> +++ b/lib/lru.c
> @@ -0,0 +1,121 @@
> +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
> +/*
> + * JUST FOR EXPERIMENTAL USE
> + */
> +
> +#include "erofs/lru.h"
> +#include "erofs/defs.h"
> +
> +static int u64_cmp(const void *a, const void *b, const void *key)
> +{
> + struct fragment_entry *ea = (struct fragment_entry *)a;
> + struct fragment_entry *eb = (struct fragment_entry *)b;
> +
> + return ea->key == eb->key;
> +}
> +
> +struct lru_cache *lru_cache_new(int capacity)
> +{
> + struct lru_cache *cache = malloc(sizeof(struct lru_cache *));
> +
> + if (!cache)
> + return NULL;
> + hashmap_init(&cache->map, u64_cmp, 0);
> + init_list_head(&cache->lru);
> + cache->capacity = capacity;
> + cache->size = 0;
> + cache->hit = cache->miss = cache->evicted = 0;
> +
> + return cache;
> +}
> +
> +void lru_cache_free(struct lru_cache *lru)
> +{
> + hashmap_free(&lru->map);
> + free(lru);
> +}
> +
> +void *lru_cache_get(struct lru_cache *lru, const u64 key)
> +{
> + unsigned int hash;
> + struct fragment_entry *entry;
> +
> + hash = memhash(&key, sizeof(key));
> + entry = hashmap_get_from_hash(&lru->map, hash, &key);
> +
> + if (entry) {
> + list_del(&entry->node);
> + list_add_tail(&entry->node, &lru->lru);
> +
> + ++lru->hit;
> + erofs_dbg("statistic: hit %u, miss %u, evicted %u", lru->hit,
> + lru->miss, lru->evicted);
> + return entry->value;
> + }
> +
> + ++lru->miss;
> + erofs_dbg("statistic: hit %u, miss %u, evicted %u", lru->hit, lru->miss,
> + lru->evicted);
> + return NULL;
> +}
> +
> +void lru_cache_put(struct lru_cache *lru, const u64 key, void *value)
> +{
> + unsigned int hash;
> + struct fragment_entry *entry;
> +
> + hash = memhash(&key, sizeof(key));
> + entry = hashmap_get_from_hash(&lru->map, hash, &key);
> + if (entry)
> + return;
> +
> + if (lru->size == lru->capacity) {
> + struct fragment_entry *victim;
> +
> + victim = list_first_entry(&lru->lru, struct fragment_entry,
> + node);
> + list_del(&victim->node);
> +
> + hashmap_remove(&lru->map, victim);
> +
> + free(victim->value);
> + free(victim);
> + lru->size--;
> +
> + ++lru->evicted;
> + erofs_dbg("statistic: hit %u, miss %u, evicted %u", lru->hit,
> + lru->miss, lru->evicted);
> + }
> +
> + entry = malloc(sizeof(struct fragment_entry));
> + if (!entry)
> + return;
> +
> + entry->key = key;
> + entry->value = value;
> + if (!entry->value)
> + return;
> +
> + hashmap_entry_init(entry, hash);
> + hashmap_add(&lru->map, entry);
> + lru->size++;
> +
> + list_add_tail(&entry->node, &lru->lru);
> +}
> +
> +void lru_cache_remove(struct lru_cache *lru, const u64 key)
> +{
> + unsigned int hash;
> + struct fragment_entry *entry;
> +
> + hash = memhash(&key, sizeof(key));
> + entry = hashmap_get_from_hash(&lru->map, hash, &key);
> + if (entry) {
> + list_del(&entry->node);
> + hashmap_remove(&lru->map, entry);
> +
> + free(entry->value);
> + free(entry);
> + lru->size--;
> + }
> +}
More information about the Linux-erofs
mailing list