[PATCH 3/3] erofs-utils: lib: enhance erofs_rebuild_get_dentry with bloom filter

Hongzhen Luo hongzhen at linux.alibaba.com
Thu Jul 18 15:40:25 AEST 2024


Add a bloom filter to exclude entries that are not in `pwd->i_subdirs`,
thereby improving the performance of `erofs_rebuild_get_dentry`. Below
are the results for different # of files in the same directory:

+---------+--------------------+
| # files | time reduction (%) |
+---------+--------------------+
|   1e4   |         55%        |
+---------+--------------------+
|   1e5   |         98%        |
+---------+--------------------+
|   2e5   |         98%        |
+---------+--------------------+
|   3e5   |         99%        |
+---------+--------------------+

Signed-off-by: Hongzhen Luo <hongzhen at linux.alibaba.com>
---
 lib/rebuild.c | 61 ++++++++++++++++++++++++++++++++++++++++++++-------
 lib/super.c   |  2 ++
 mkfs/main.c   |  4 ++++
 3 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/lib/rebuild.c b/lib/rebuild.c
index 9e8bf8f..3fd3ea0 100644
--- a/lib/rebuild.c
+++ b/lib/rebuild.c
@@ -15,6 +15,7 @@
 #include "erofs/xattr.h"
 #include "erofs/blobchunk.h"
 #include "erofs/internal.h"
+#include "erofs/bloom_filter.h"
 #include "liberofs_uuid.h"
 
 #ifdef HAVE_LINUX_AUFS_TYPE_H
@@ -62,14 +63,48 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
 		char *path, bool aufs, bool *whout, bool *opq, bool to_head)
 {
 	struct erofs_dentry *d = NULL;
-	unsigned int len = strlen(path);
-	char *s = path;
+	unsigned int len, p = 0;
+	char *s;
+	struct erofs_sb_info *sbi = pwd->sbi;
+	char buf[4096];
 
 	*whout = false;
 	*opq = false;
 
+	s = pwd->i_srcpath;
+	len = strlen(pwd->i_srcpath);
+	/* normalize the pwd path, e.g., /./../xxx/yyy -> /xxx/yyy */
+	buf[p++] = '/';
+	while (s < pwd->i_srcpath + len) {
+		char *slash = memchr(s, '/', pwd->i_srcpath + len - s);
+		if (slash) {
+			if (s == slash) {
+				while(*++s == '/');
+				continue;;
+			}
+			*slash = '\0';
+		}
+		if (memcmp(s, ".", 2) && memcmp(s, "..", 3)) {
+			memcpy(buf + p, s, strlen(s));
+			p += strlen(s);
+			buf[p++] = '/';
+
+		}
+		if (slash) {
+			*slash = '/';
+			s = slash + 1;
+		} else{
+			break;
+		}
+	}
+	if (buf[p - 1] != '/')
+		buf[p++] = '/';
+
+	s = path;
+	len = strlen(path);
 	while (s < path + len) {
 		char *slash = memchr(s, '/', path + len - s);
+		int bloom, slen;
 
 		if (slash) {
 			if (s == slash) {
@@ -97,13 +132,19 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
 				}
 			}
 
-			list_for_each_entry(d, &pwd->i_subdirs, d_child) {
-				if (!strcmp(d->name, s)) {
-					if (d->type != EROFS_FT_DIR && slash)
-						return ERR_PTR(-EIO);
-					inode = d->inode;
-					break;
+			slen = strlen(s);
+			memcpy(buf + p, s, slen);
+			p += slen;
+			if ((bloom = erofs_bloom_test(sbi, buf, p)) > 0) {
+				list_for_each_entry(d, &pwd->i_subdirs, d_child) {
+					if (!strcmp(d->name, s)) {
+						if (d->type != EROFS_FT_DIR && slash)
+							return ERR_PTR(-EIO);
+						inode = d->inode;
+						break;
+					}
 				}
+
 			}
 
 			if (inode) {
@@ -124,6 +165,10 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
 					return d;
 				pwd = d->inode;
 			}
+			if (!bloom && erofs_bloom_add(sbi, buf, p))
+				return ERR_PTR(-EINVAL);
+			if (slash)
+				buf[p++] = '/';
 		}
 		if (slash) {
 			*slash = '/';
diff --git a/lib/super.c b/lib/super.c
index 45233c4..7289236 100644
--- a/lib/super.c
+++ b/lib/super.c
@@ -7,6 +7,7 @@
 #include "erofs/print.h"
 #include "erofs/xattr.h"
 #include "erofs/cache.h"
+#include "erofs/bloom_filter.h"
 
 static bool check_layout_compatibility(struct erofs_sb_info *sbi,
 				       struct erofs_super_block *dsb)
@@ -153,6 +154,7 @@ void erofs_put_super(struct erofs_sb_info *sbi)
 		erofs_buffer_exit(sbi->bmgr);
 		sbi->bmgr = NULL;
 	}
+	erofs_bloom_exit(sbi);
 }
 
 int erofs_writesb(struct erofs_sb_info *sbi, struct erofs_buffer_head *sb_bh,
diff --git a/mkfs/main.c b/mkfs/main.c
index 20f12fc..27a2ea0 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -31,6 +31,7 @@
 #include "../lib/liberofs_private.h"
 #include "../lib/liberofs_uuid.h"
 #include "../lib/compressor.h"
+#include "erofs/bloom_filter.h"
 
 static struct option long_options[] = {
 	{"version", no_argument, 0, 'V'},
@@ -1344,6 +1345,9 @@ int main(int argc, char **argv)
 	}
 
 	erofs_inode_manager_init();
+	srand(time(NULL));
+	/* 1000000 should be enough */
+	erofs_bloom_init(&g_sbi, 5, 1000000, rand());
 
 	if (tar_mode) {
 		root = erofs_rebuild_make_root(&g_sbi);
-- 
2.43.5



More information about the Linux-erofs mailing list