[PATCH 3/3] erofs-utils: lib: enhance erofs_rebuild_get_dentry with bloom filter
Gao Xiang
hsiangkao at linux.alibaba.com
Fri Jul 19 18:19:00 AEST 2024
On 2024/7/18 13:40, Hongzhen Luo wrote:
> Add a bloom filter to exclude entries that are not in `pwd->i_subdirs`,
> thereby improving the performance of `erofs_rebuild_get_dentry`. Below
> are the results for different # of files in the same directory:
>
> +---------+--------------------+
> | # files | time reduction (%) |
> +---------+--------------------+
> | 1e4 | 55% |
> +---------+--------------------+
> | 1e5 | 98% |
> +---------+--------------------+
> | 2e5 | 98% |
> +---------+--------------------+
> | 3e5 | 99% |
> +---------+--------------------+
>
> Signed-off-by: Hongzhen Luo <hongzhen at linux.alibaba.com>
> ---
> lib/rebuild.c | 61 ++++++++++++++++++++++++++++++++++++++++++++-------
> lib/super.c | 2 ++
> mkfs/main.c | 4 ++++
> 3 files changed, 59 insertions(+), 8 deletions(-)
>
> diff --git a/lib/rebuild.c b/lib/rebuild.c
> index 9e8bf8f..3fd3ea0 100644
> --- a/lib/rebuild.c
> +++ b/lib/rebuild.c
> @@ -15,6 +15,7 @@
> #include "erofs/xattr.h"
> #include "erofs/blobchunk.h"
> #include "erofs/internal.h"
> +#include "erofs/bloom_filter.h"
> #include "liberofs_uuid.h"
>
> #ifdef HAVE_LINUX_AUFS_TYPE_H
> @@ -62,14 +63,48 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
> char *path, bool aufs, bool *whout, bool *opq, bool to_head)
> {
> struct erofs_dentry *d = NULL;
> - unsigned int len = strlen(path);
> - char *s = path;
> + unsigned int len, p = 0;
> + char *s;
> + struct erofs_sb_info *sbi = pwd->sbi;
> + char buf[4096];
>
> *whout = false;
> *opq = false;
>
> + s = pwd->i_srcpath;
> + len = strlen(pwd->i_srcpath);
> + /* normalize the pwd path, e.g., /./../xxx/yyy -> /xxx/yyy */
> + buf[p++] = '/';
> + while (s < pwd->i_srcpath + len) {
Why should we need that?
> + char *slash = memchr(s, '/', pwd->i_srcpath + len - s);
> + if (slash) {
> + if (s == slash) {
> + while(*++s == '/');
> + continue;;
> + }
> + *slash = '\0';
> + }
> + if (memcmp(s, ".", 2) && memcmp(s, "..", 3)) {
> + memcpy(buf + p, s, strlen(s));
> + p += strlen(s);
> + buf[p++] = '/';
> +
> + }
> + if (slash) {
> + *slash = '/';
> + s = slash + 1;
> + } else{
> + break;
> + }
> + }
> + if (buf[p - 1] != '/')
> + buf[p++] = '/';
> +
> + s = path;
> + len = strlen(path);
> while (s < path + len) {
> char *slash = memchr(s, '/', path + len - s);
> + int bloom, slen;
>
> if (slash) {
> if (s == slash) {
> @@ -97,13 +132,19 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
> }
> }
>
> - list_for_each_entry(d, &pwd->i_subdirs, d_child) {
> - if (!strcmp(d->name, s)) {
> - if (d->type != EROFS_FT_DIR && slash)
> - return ERR_PTR(-EIO);
> - inode = d->inode;
> - break;
> + slen = strlen(s);
> + memcpy(buf + p, s, slen);
> + p += slen;
> + if ((bloom = erofs_bloom_test(sbi, buf, p)) > 0) {
> + list_for_each_entry(d, &pwd->i_subdirs, d_child) {
> + if (!strcmp(d->name, s)) {
> + if (d->type != EROFS_FT_DIR && slash)
> + return ERR_PTR(-EIO);
> + inode = d->inode;
> + break;
> + }
> }
> +
> }
>
> if (inode) {
> @@ -124,6 +165,10 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
> return d;
> pwd = d->inode;
> }
> + if (!bloom && erofs_bloom_add(sbi, buf, p))
> + return ERR_PTR(-EINVAL);
> + if (slash)
> + buf[p++] = '/';
> }
> if (slash) {
> *slash = '/';
> diff --git a/lib/super.c b/lib/super.c
> index 45233c4..7289236 100644
> --- a/lib/super.c
> +++ b/lib/super.c
> @@ -7,6 +7,7 @@
> #include "erofs/print.h"
> #include "erofs/xattr.h"
> #include "erofs/cache.h"
> +#include "erofs/bloom_filter.h"
>
> static bool check_layout_compatibility(struct erofs_sb_info *sbi,
> struct erofs_super_block *dsb)
> @@ -153,6 +154,7 @@ void erofs_put_super(struct erofs_sb_info *sbi)
> erofs_buffer_exit(sbi->bmgr);
> sbi->bmgr = NULL;
> }
> + erofs_bloom_exit(sbi);
> }
>
> int erofs_writesb(struct erofs_sb_info *sbi, struct erofs_buffer_head *sb_bh,
> diff --git a/mkfs/main.c b/mkfs/main.c
> index 20f12fc..27a2ea0 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -31,6 +31,7 @@
> #include "../lib/liberofs_private.h"
> #include "../lib/liberofs_uuid.h"
> #include "../lib/compressor.h"
> +#include "erofs/bloom_filter.h"
>
> static struct option long_options[] = {
> {"version", no_argument, 0, 'V'},
> @@ -1344,6 +1345,9 @@ int main(int argc, char **argv)
> }
>
> erofs_inode_manager_init();
> + srand(time(NULL));
> + /* 1000000 should be enough */
> + erofs_bloom_init(&g_sbi, 5, 1000000, rand());
I don't want to add any new init function anymore, please wrap
all init functions into a common one.
Also what's the meaning of hard-coded 1000000?
Thanks,
Gao Xiang
More information about the Linux-erofs
mailing list