[PATCH 3/3] erofs-utils: lib: implement 3-way merge sort

Gao Xiang hsiangkao at linux.alibaba.com
Thu Jul 31 13:16:42 AEST 2025


...so that only substrings need to be compared.

As a follow-up, need to add a fallback to a trivial sort and comparison
method when the total number of strings is small.

Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 lib/inode.c | 88 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 60 insertions(+), 28 deletions(-)

diff --git a/lib/inode.c b/lib/inode.c
index 59031144..63875212 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -211,28 +211,67 @@ int erofs_allocate_inode_bh_data(struct erofs_inode *inode, erofs_blk_t nblocks)
 	return 0;
 }
 
-static int comp_subdir(const void *a, const void *b)
+static void erofs_dentry_mergesort(struct list_head *entries, int k)
 {
-	const struct erofs_dentry *da, *db;
-	int commonlen, sign;
+	struct list_head *great = entries->next;
 
-	da = *((const struct erofs_dentry **)a);
-	db = *((const struct erofs_dentry **)b);
-	commonlen = min(round_up(da->namelen, EROFS_DENTRY_NAME_ALIGNMENT),
-			round_up(db->namelen, EROFS_DENTRY_NAME_ALIGNMENT));
-	sign = memcmp(da->name, db->name, commonlen);
-	if (sign)
-		return sign;
-	return cmpsgn(da->namelen, db->namelen);
+	entries->prev->next = NULL;
+	init_list_head(entries);
+	do {
+		struct list_head **greatp = &great;
+		struct erofs_dentry *e0, *d;
+		struct list_head le[2] = {
+			[0] = LIST_HEAD_INIT(le[0]),
+			[1] = LIST_HEAD_INIT(le[1]),
+		};
+		u32 count[2] = {};
+		int cmp, k1;
+
+		e0 = list_entry(great, struct erofs_dentry, d_child);
+		great = great->next;
+		list_add_tail(&e0->d_child, &le[1]);
+
+		while (*greatp) {
+			d = list_entry(*greatp, struct erofs_dentry, d_child);
+#if EROFS_DENTRY_NAME_ALIGNMENT == 4
+			cmp = cmpsgn(*((u32 *)d->name + k),
+				     *((u32 *)e0->name + k));
+#else
+			cmp = memcmp(d->name + k, e0->name + k,
+				     EROFS_DENTRY_NAME_ALIGNMENT);
+#endif
+
+			if (cmp > 0) {
+				greatp = &d->d_child.next;
+				continue;
+			}
+			*greatp = d->d_child.next;
+			list_add_tail(&d->d_child, &le[!cmp]);
+			++count[!cmp];
+		}
+
+		if (!list_empty(&le[0])) {
+			if (le[0].prev != le[0].next)
+				erofs_dentry_mergesort(&le[0], k);
+			__list_splice(&le[0], entries->prev, entries);
+		}
+
+		if (!list_empty(&le[1])) {
+			k1 = k + EROFS_DENTRY_NAME_ALIGNMENT;
+			if (le[1].prev != le[1].next &&
+			    e0->name[k1 - 1] != '\0')
+				erofs_dentry_mergesort(&le[1], k1);
+			__list_splice(&le[1], entries->prev, entries);
+		}
+	} while (great);
 }
 
 static int erofs_prepare_dir_file(struct erofs_inode *dir,
 				  unsigned int nr_subdirs)
 {
-	struct erofs_sb_info *sbi = dir->sbi;
-	struct erofs_dentry *d, *n, **sorted_d;
 	bool dot_omitted = cfg.c_dot_omitted;
-	unsigned int i;
+	struct erofs_sb_info *sbi = dir->sbi;
+	struct erofs_dentry *d;
 	unsigned int d_size = 0;
 
 	if (!dot_omitted) {
@@ -252,21 +291,9 @@ static int erofs_prepare_dir_file(struct erofs_inode *dir,
 	d->inode = erofs_igrab(erofs_parent_inode(dir));
 	d->type = EROFS_FT_DIR;
 
+	if (nr_subdirs)
+		erofs_dentry_mergesort(&dir->i_subdirs, 0);
 	nr_subdirs += 1 + !dot_omitted;
-	sorted_d = malloc(nr_subdirs * sizeof(d));
-	if (!sorted_d)
-		return -ENOMEM;
-
-	i = 0;
-	list_for_each_entry_safe(d, n, &dir->i_subdirs, d_child) {
-		list_del(&d->d_child);
-		sorted_d[i++] = d;
-	}
-	DBG_BUGON(i != nr_subdirs);
-	qsort(sorted_d, i, sizeof(d), comp_subdir);
-	while (i)
-		list_add(&sorted_d[--i]->d_child, &dir->i_subdirs);
-	free(sorted_d);
 
 	/* let's calculate dir size */
 	list_for_each_entry(d, &dir->i_subdirs, d_child) {
@@ -275,6 +302,11 @@ static int erofs_prepare_dir_file(struct erofs_inode *dir,
 		if (erofs_blkoff(sbi, d_size) + len > erofs_blksiz(sbi))
 			d_size = round_up(d_size, erofs_blksiz(sbi));
 		d_size += len;
+		--nr_subdirs;
+	}
+	if (nr_subdirs) {
+		DBG_BUGON(1);
+		return -EFAULT;
 	}
 	dir->i_size = d_size;
 
-- 
2.43.5



More information about the Linux-erofs mailing list