[PATCH v2 3/3] erofs-utils: lib: enhance erofs_rebuild_get_dentry with bloom filter
Hongzhen Luo
hongzhen at linux.alibaba.com
Mon Jul 22 17:11:40 AEST 2024
Add a bloom filter to exclude entries that are not in `pwd->i_subdirs`,
thereby improving the performance of `erofs_rebuild_get_dentry`. Below
are the results for different # of files in the same directory:
+---------+--------------------+
| # files | time reduction (%) |
+---------+--------------------+
| 1e4 | 55% |
+---------+--------------------+
| 1e5 | 98% |
+---------+--------------------+
| 2e5 | 98% |
+---------+--------------------+
| 3e5 | 99% |
+---------+--------------------+
Signed-off-by: Hongzhen Luo <hongzhen at linux.alibaba.com>
---
v2: Changes since v1:
- move erofs_bloom_init() to erofs_super_init()
- make the # hash function and bit array size of the bloom filter configurable
v1: https://lore.kernel.org/all/20240718054025.427439-3-hongzhen@linux.alibaba.com/
---
include/erofs/config.h | 2 ++
include/erofs/internal.h | 1 +
lib/rebuild.c | 61 ++++++++++++++++++++++++++++++++++------
lib/super.c | 27 ++++++++++++++++++
mkfs/main.c | 20 +++++++++++++
5 files changed, 103 insertions(+), 8 deletions(-)
diff --git a/include/erofs/config.h b/include/erofs/config.h
index f8726b5..c322c9a 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -92,6 +92,8 @@ struct erofs_configure {
char *fs_config_file;
char *block_list_file;
#endif
+ u32 c_bloom_nrfuc;
+ unsigned long c_bloom_bitsize;
};
extern struct erofs_configure cfg;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index d3dd676..ef7dd2d 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -402,6 +402,7 @@ struct erofs_map_dev {
};
/* super.c */
+int erofs_super_init(struct erofs_sb_info *sbi);
int erofs_read_superblock(struct erofs_sb_info *sbi);
void erofs_put_super(struct erofs_sb_info *sbi);
int erofs_writesb(struct erofs_sb_info *sbi, struct erofs_buffer_head *sb_bh,
diff --git a/lib/rebuild.c b/lib/rebuild.c
index 9e8bf8f..3fd3ea0 100644
--- a/lib/rebuild.c
+++ b/lib/rebuild.c
@@ -15,6 +15,7 @@
#include "erofs/xattr.h"
#include "erofs/blobchunk.h"
#include "erofs/internal.h"
+#include "erofs/bloom_filter.h"
#include "liberofs_uuid.h"
#ifdef HAVE_LINUX_AUFS_TYPE_H
@@ -62,14 +63,48 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
char *path, bool aufs, bool *whout, bool *opq, bool to_head)
{
struct erofs_dentry *d = NULL;
- unsigned int len = strlen(path);
- char *s = path;
+ unsigned int len, p = 0;
+ char *s;
+ struct erofs_sb_info *sbi = pwd->sbi;
+ char buf[4096];
*whout = false;
*opq = false;
+ s = pwd->i_srcpath;
+ len = strlen(pwd->i_srcpath);
+ /* normalize the pwd path, e.g., /./../xxx/yyy -> /xxx/yyy */
+ buf[p++] = '/';
+ while (s < pwd->i_srcpath + len) {
+ char *slash = memchr(s, '/', pwd->i_srcpath + len - s);
+ if (slash) {
+ if (s == slash) {
+ while(*++s == '/');
+ continue;;
+ }
+ *slash = '\0';
+ }
+ if (memcmp(s, ".", 2) && memcmp(s, "..", 3)) {
+ memcpy(buf + p, s, strlen(s));
+ p += strlen(s);
+ buf[p++] = '/';
+
+ }
+ if (slash) {
+ *slash = '/';
+ s = slash + 1;
+ } else{
+ break;
+ }
+ }
+ if (buf[p - 1] != '/')
+ buf[p++] = '/';
+
+ s = path;
+ len = strlen(path);
while (s < path + len) {
char *slash = memchr(s, '/', path + len - s);
+ int bloom, slen;
if (slash) {
if (s == slash) {
@@ -97,13 +132,19 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
}
}
- list_for_each_entry(d, &pwd->i_subdirs, d_child) {
- if (!strcmp(d->name, s)) {
- if (d->type != EROFS_FT_DIR && slash)
- return ERR_PTR(-EIO);
- inode = d->inode;
- break;
+ slen = strlen(s);
+ memcpy(buf + p, s, slen);
+ p += slen;
+ if ((bloom = erofs_bloom_test(sbi, buf, p)) > 0) {
+ list_for_each_entry(d, &pwd->i_subdirs, d_child) {
+ if (!strcmp(d->name, s)) {
+ if (d->type != EROFS_FT_DIR && slash)
+ return ERR_PTR(-EIO);
+ inode = d->inode;
+ break;
+ }
}
+
}
if (inode) {
@@ -124,6 +165,10 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
return d;
pwd = d->inode;
}
+ if (!bloom && erofs_bloom_add(sbi, buf, p))
+ return ERR_PTR(-EINVAL);
+ if (slash)
+ buf[p++] = '/';
}
if (slash) {
*slash = '/';
diff --git a/lib/super.c b/lib/super.c
index 45233c4..39c3bfd 100644
--- a/lib/super.c
+++ b/lib/super.c
@@ -4,9 +4,14 @@
*/
#include <string.h>
#include <stdlib.h>
+#include <time.h>
#include "erofs/print.h"
#include "erofs/xattr.h"
#include "erofs/cache.h"
+#include "erofs/bloom_filter.h"
+
+#define DEFAULT_BLOOM_NRFUNC 5
+#define DEFAULT_BLOOM_SIZE 1000000
static bool check_layout_compatibility(struct erofs_sb_info *sbi,
struct erofs_super_block *dsb)
@@ -72,6 +77,27 @@ static int erofs_init_devices(struct erofs_sb_info *sbi,
return 0;
}
+int erofs_super_init(struct erofs_sb_info *sbi)
+{
+ int err;
+
+ srand(time(NULL));
+ if (cfg.c_bloom_nrfuc && cfg.c_bloom_bitsize)
+ err = erofs_bloom_init(sbi, cfg.c_bloom_nrfuc,
+ cfg.c_bloom_bitsize, rand());
+ else
+ err = erofs_bloom_init(sbi, DEFAULT_BLOOM_NRFUNC,
+ DEFAULT_BLOOM_SIZE, rand());
+ if (err) {
+ erofs_err("failed to init bloom filter");
+ goto exit;
+ }
+
+ err = 0;
+exit:
+ return err;
+}
+
int erofs_read_superblock(struct erofs_sb_info *sbi)
{
u8 data[EROFS_MAX_BLOCK_SIZE];
@@ -153,6 +179,7 @@ void erofs_put_super(struct erofs_sb_info *sbi)
erofs_buffer_exit(sbi->bmgr);
sbi->bmgr = NULL;
}
+ erofs_bloom_exit(sbi);
}
int erofs_writesb(struct erofs_sb_info *sbi, struct erofs_buffer_head *sb_bh,
diff --git a/mkfs/main.c b/mkfs/main.c
index 20f12fc..21003bc 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -31,6 +31,7 @@
#include "../lib/liberofs_private.h"
#include "../lib/liberofs_uuid.h"
#include "../lib/compressor.h"
+#include "erofs/bloom_filter.h"
static struct option long_options[] = {
{"version", no_argument, 0, 'V'},
@@ -81,6 +82,7 @@ static struct option long_options[] = {
{"zfeature-bits", required_argument, NULL, 521},
{"clean", optional_argument, NULL, 522},
{"incremental", optional_argument, NULL, 523},
+ {"bloom-filter", optional_argument, NULL, 524},
{0, 0, 0, 0},
};
@@ -179,6 +181,9 @@ static void usage(int argc, char **argv)
" headerball=file data is omited in the source stream)\n"
" --ovlfs-strip=<0,1> strip overlayfs metadata in the target image (e.g. whiteouts)\n"
" --quiet quiet execution (do not write anything to standard output.)\n"
+ " --bloom-filter=X boost up images generation with bloom filter\n"
+ " (X=n,s; n=# of hash functions (default 5),\n"
+ " s=bit array size (default 1e6))\n"
#ifndef NDEBUG
" --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
" --random-algorithms randomize per-file algorithms (debugging only)\n"
@@ -820,6 +825,20 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
}
incremental_mode = (opt == 523);
break;
+ case 524:
+ cfg.c_bloom_nrfuc = strtol(optarg, &endptr, 0);
+ if (*endptr != ',') {
+ erofs_err("invalid --bloom-filter=%s", optarg);
+ cfg.c_bloom_nrfuc = 0;
+ return -EINVAL;
+ }
+ cfg.c_bloom_bitsize = strtol(endptr + 1, &endptr, 0);
+ if (*endptr != '\0') {
+ erofs_err("invalid --bloom-filter=%s", optarg);
+ cfg.c_bloom_bitsize = 0;
+ return -EINVAL;
+ }
+ break;
case 'V':
version();
exit(0);
@@ -1344,6 +1363,7 @@ int main(int argc, char **argv)
}
erofs_inode_manager_init();
+ erofs_super_init(&g_sbi);
if (tar_mode) {
root = erofs_rebuild_make_root(&g_sbi);
--
2.43.5
More information about the Linux-erofs
mailing list