[PATCH v3] erofs: rename per CPU buffer to global buffer pool and make it configurable
Chunhai Guo
guochunhai at vivo.com
Sat Mar 23 00:45:40 AEDT 2024
It will cost more time if compressed buffers are allocated on demand for
low-latency algorithms (like lz4) so EROFS uses per-CPU buffers to keep
compressed data if in-place decompression is unfulfilled. While it is kind
of wasteful of memory for a device with hundreds of CPUs, and only a small
number of CPUs concurrently decompress most of the time.
This patch renames it as 'global buffer pool' and makes it configurable.
This allows two or more CPUs to share a common buffer to reduce memory
occupation.
Signed-off-by: Chunhai Guo <guochunhai at vivo.com>
Suggested-by: Gao Xiang <xiang at kernel.org>
---
fs/erofs/Makefile | 2 +-
fs/erofs/decompressor.c | 6 +-
fs/erofs/internal.h | 14 ++--
fs/erofs/pcpubuf.c | 148 ----------------------------------------
fs/erofs/super.c | 9 ++-
fs/erofs/utils.c | 148 ++++++++++++++++++++++++++++++++++++++++
6 files changed, 166 insertions(+), 161 deletions(-)
delete mode 100644 fs/erofs/pcpubuf.c
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
index 994d0b9deddf..6cf1504c63e6 100644
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -3,7 +3,7 @@
obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o sysfs.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
-erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index d4cee95af14c..72a11c5eba8f 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -54,7 +54,7 @@ static int z_erofs_load_lz4_config(struct super_block *sb,
sbi->lz4.max_distance_pages = distance ?
DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
LZ4_MAX_DISTANCE_PAGES;
- return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks);
+ return z_erofs_gbuf_growsize(sbi->lz4.max_pclusterblks);
}
/*
@@ -159,7 +159,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
docopy:
/* Or copy compressed data which can be overlapped to per-CPU buffer */
in = rq->in;
- src = erofs_get_pcpubuf(ctx->inpages);
+ src = z_erofs_get_gbuf(ctx->inpages);
if (!src) {
DBG_BUGON(1);
kunmap_local(inpage);
@@ -260,7 +260,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
} else if (maptype == 1) {
vm_unmap_ram(src, ctx->inpages);
} else if (maptype == 2) {
- erofs_put_pcpubuf(src);
+ z_erofs_put_gbuf(src);
} else if (maptype != 3) {
DBG_BUGON(1);
return -EFAULT;
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index b0409badb017..574a7c5b3929 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -471,11 +471,11 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
struct erofs_workgroup *egrp);
int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
int flags);
-void *erofs_get_pcpubuf(unsigned int requiredpages);
-void erofs_put_pcpubuf(void *ptr);
-int erofs_pcpubuf_growsize(unsigned int nrpages);
-void __init erofs_pcpubuf_init(void);
-void erofs_pcpubuf_exit(void);
+void *z_erofs_get_gbuf(unsigned int requiredpages);
+void z_erofs_put_gbuf(void *ptr);
+int z_erofs_gbuf_growsize(unsigned int nrpages);
+int __init z_erofs_gbuf_init(void);
+void z_erofs_gbuf_exit(void);
int erofs_init_managed_cache(struct super_block *sb);
int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb);
#else
@@ -485,8 +485,8 @@ static inline int erofs_init_shrinker(void) { return 0; }
static inline void erofs_exit_shrinker(void) {}
static inline int z_erofs_init_zip_subsystem(void) { return 0; }
static inline void z_erofs_exit_zip_subsystem(void) {}
-static inline void erofs_pcpubuf_init(void) {}
-static inline void erofs_pcpubuf_exit(void) {}
+static inline void z_erofs_gbuf_init(void) {}
+static inline void z_erofs_gbuf_exit(void) {}
static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif /* !CONFIG_EROFS_FS_ZIP */
diff --git a/fs/erofs/pcpubuf.c b/fs/erofs/pcpubuf.c
deleted file mode 100644
index c7a4b1d77069..000000000000
--- a/fs/erofs/pcpubuf.c
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) Gao Xiang <xiang at kernel.org>
- *
- * For low-latency decompression algorithms (e.g. lz4), reserve consecutive
- * per-CPU virtual memory (in pages) in advance to store such inplace I/O
- * data if inplace decompression is failed (due to unmet inplace margin for
- * example).
- */
-#include "internal.h"
-
-struct erofs_pcpubuf {
- raw_spinlock_t lock;
- void *ptr;
- struct page **pages;
- unsigned int nrpages;
-};
-
-static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb);
-
-void *erofs_get_pcpubuf(unsigned int requiredpages)
- __acquires(pcb->lock)
-{
- struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb);
-
- raw_spin_lock(&pcb->lock);
- /* check if the per-CPU buffer is too small */
- if (requiredpages > pcb->nrpages) {
- raw_spin_unlock(&pcb->lock);
- put_cpu_var(erofs_pcb);
- /* (for sparse checker) pretend pcb->lock is still taken */
- __acquire(pcb->lock);
- return NULL;
- }
- return pcb->ptr;
-}
-
-void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock)
-{
- struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id());
-
- DBG_BUGON(pcb->ptr != ptr);
- raw_spin_unlock(&pcb->lock);
- put_cpu_var(erofs_pcb);
-}
-
-/* the next step: support per-CPU page buffers hotplug */
-int erofs_pcpubuf_growsize(unsigned int nrpages)
-{
- static DEFINE_MUTEX(pcb_resize_mutex);
- static unsigned int pcb_nrpages;
- struct page *pagepool = NULL;
- int delta, cpu, ret, i;
-
- mutex_lock(&pcb_resize_mutex);
- delta = nrpages - pcb_nrpages;
- ret = 0;
- /* avoid shrinking pcpubuf, since no idea how many fses rely on */
- if (delta <= 0)
- goto out;
-
- for_each_possible_cpu(cpu) {
- struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
- struct page **pages, **oldpages;
- void *ptr, *old_ptr;
-
- pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL);
- if (!pages) {
- ret = -ENOMEM;
- break;
- }
-
- for (i = 0; i < nrpages; ++i) {
- pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL);
- if (!pages[i]) {
- ret = -ENOMEM;
- oldpages = pages;
- goto free_pagearray;
- }
- }
- ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL);
- if (!ptr) {
- ret = -ENOMEM;
- oldpages = pages;
- goto free_pagearray;
- }
- raw_spin_lock(&pcb->lock);
- old_ptr = pcb->ptr;
- pcb->ptr = ptr;
- oldpages = pcb->pages;
- pcb->pages = pages;
- i = pcb->nrpages;
- pcb->nrpages = nrpages;
- raw_spin_unlock(&pcb->lock);
-
- if (!oldpages) {
- DBG_BUGON(old_ptr);
- continue;
- }
-
- if (old_ptr)
- vunmap(old_ptr);
-free_pagearray:
- while (i)
- erofs_pagepool_add(&pagepool, oldpages[--i]);
- kfree(oldpages);
- if (ret)
- break;
- }
- pcb_nrpages = nrpages;
- erofs_release_pages(&pagepool);
-out:
- mutex_unlock(&pcb_resize_mutex);
- return ret;
-}
-
-void __init erofs_pcpubuf_init(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
-
- raw_spin_lock_init(&pcb->lock);
- }
-}
-
-void erofs_pcpubuf_exit(void)
-{
- int cpu, i;
-
- for_each_possible_cpu(cpu) {
- struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
-
- if (pcb->ptr) {
- vunmap(pcb->ptr);
- pcb->ptr = NULL;
- }
- if (!pcb->pages)
- continue;
-
- for (i = 0; i < pcb->nrpages; ++i)
- if (pcb->pages[i])
- put_page(pcb->pages[i]);
- kfree(pcb->pages);
- pcb->pages = NULL;
- }
-}
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 5f60f163bd56..5a045a61801f 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -902,7 +902,10 @@ static int __init erofs_module_init(void)
if (err)
goto deflate_err;
- erofs_pcpubuf_init();
+ err = z_erofs_gbuf_init();
+ if (err)
+ goto gbuf_err;
+
err = z_erofs_init_zip_subsystem();
if (err)
goto zip_err;
@@ -922,6 +925,8 @@ static int __init erofs_module_init(void)
sysfs_err:
z_erofs_exit_zip_subsystem();
zip_err:
+ z_erofs_gbuf_exit();
+gbuf_err:
z_erofs_deflate_exit();
deflate_err:
z_erofs_lzma_exit();
@@ -945,7 +950,7 @@ static void __exit erofs_module_exit(void)
z_erofs_lzma_exit();
erofs_exit_shrinker();
kmem_cache_destroy(erofs_inode_cachep);
- erofs_pcpubuf_exit();
+ z_erofs_gbuf_exit();
}
static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
index e146d09151af..3cf2155bd3bf 100644
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -284,4 +284,152 @@ void erofs_exit_shrinker(void)
{
shrinker_free(erofs_shrinker_info);
}
+
+struct z_erofs_gbuf {
+ spinlock_t lock;
+ void *ptr;
+ struct page **pages;
+ unsigned int nrpages;
+};
+
+static struct z_erofs_gbuf *z_erofs_gbufpool;
+static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages;
+
+module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444);
+
+static unsigned int z_erofs_gbuf_id(void)
+{
+ return smp_processor_id() % z_erofs_gbuf_count;
+}
+
+void *z_erofs_get_gbuf(unsigned int requiredpages)
+ __acquires(gbuf->lock)
+{
+ struct z_erofs_gbuf *gbuf;
+
+ gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
+ spin_lock(&gbuf->lock);
+ /* check if the buffer is too small */
+ if (requiredpages > gbuf->nrpages) {
+ spin_unlock(&gbuf->lock);
+ /* (for sparse checker) pretend gbuf->lock is still taken */
+ __acquire(gbuf->lock);
+ return NULL;
+ }
+ return gbuf->ptr;
+}
+
+void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock)
+{
+ struct z_erofs_gbuf *gbuf;
+
+ gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
+ DBG_BUGON(gbuf->ptr != ptr);
+ spin_unlock(&gbuf->lock);
+}
+
+int z_erofs_gbuf_growsize(unsigned int nrpages)
+{
+ static DEFINE_MUTEX(gbuf_resize_mutex);
+ struct page *pagepool = NULL;
+ int delta, ret, i, j;
+
+ mutex_lock(&gbuf_resize_mutex);
+ delta = nrpages - z_erofs_gbuf_nrpages;
+ ret = 0;
+ /* avoid shrinking gbufs, since no idea how many fses rely on */
+ if (delta <= 0)
+ goto out;
+
+ for (i = 0; i < z_erofs_gbuf_count; ++i) {
+ struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
+ struct page **pages, **tmp_pages;
+ void *ptr, *old_ptr = NULL;
+
+ ret = -ENOMEM;
+ tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL);
+ if (!tmp_pages)
+ break;
+ for (j = 0; j < nrpages; ++j) {
+ tmp_pages[j] = erofs_allocpage(&pagepool, GFP_KERNEL);
+ if (!tmp_pages[j])
+ goto free_pagearray;
+ }
+ ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL);
+ if (!ptr)
+ goto free_pagearray;
+
+ pages = tmp_pages;
+ spin_lock(&gbuf->lock);
+ old_ptr = gbuf->ptr;
+ gbuf->ptr = ptr;
+ tmp_pages = gbuf->pages;
+ gbuf->pages = pages;
+ j = gbuf->nrpages;
+ gbuf->nrpages = nrpages;
+ spin_unlock(&gbuf->lock);
+ ret = 0;
+ if (!tmp_pages) {
+ DBG_BUGON(old_ptr);
+ continue;
+ }
+
+ if (old_ptr)
+ vunmap(old_ptr);
+free_pagearray:
+ while (j)
+ erofs_pagepool_add(&pagepool, tmp_pages[--j]);
+ kfree(tmp_pages);
+ if (ret)
+ break;
+ }
+ z_erofs_gbuf_nrpages = nrpages;
+ erofs_release_pages(&pagepool);
+out:
+ mutex_unlock(&gbuf_resize_mutex);
+ return ret;
+}
+
+int __init z_erofs_gbuf_init(void)
+{
+ unsigned int i = num_possible_cpus();
+
+ if (!z_erofs_gbuf_count)
+ z_erofs_gbuf_count = i;
+ else
+ z_erofs_gbuf_count = min(z_erofs_gbuf_count, i);
+
+ z_erofs_gbufpool = kcalloc(z_erofs_gbuf_count,
+ sizeof(*z_erofs_gbufpool), GFP_KERNEL);
+ if (!z_erofs_gbufpool)
+ return -ENOMEM;
+
+ for (i = 0; i < z_erofs_gbuf_count; ++i)
+ spin_lock_init(&z_erofs_gbufpool[i].lock);
+ return 0;
+}
+
+void z_erofs_gbuf_exit(void)
+{
+ int i;
+
+ for (i = 0; i < z_erofs_gbuf_count; ++i) {
+ struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
+
+ if (gbuf->ptr) {
+ vunmap(gbuf->ptr);
+ gbuf->ptr = NULL;
+ }
+
+ if (!gbuf->pages)
+ continue;
+
+ for (i = 0; i < gbuf->nrpages; ++i)
+ if (gbuf->pages[i])
+ put_page(gbuf->pages[i]);
+ kfree(gbuf->pages);
+ gbuf->pages = NULL;
+ }
+ kfree(z_erofs_gbufpool);
+}
#endif /* !CONFIG_EROFS_FS_ZIP */
--
2.25.1
More information about the Linux-erofs
mailing list