[PATCH v4 3/3] erofs-utils: lib: implement 3-way merge sort
Gao Xiang
hsiangkao at linux.alibaba.com
Fri Aug 1 13:50:42 AEST 2025
...so that only substrings need to be compared.
It can still be further improved (e.g., by generating a singly linked
list first); any contributions are welcome!
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
include/erofs/list.h | 5 ++
lib/inode.c | 111 ++++++++++++++++++++++++++++++++-----------
2 files changed, 88 insertions(+), 28 deletions(-)
diff --git a/include/erofs/list.h b/include/erofs/list.h
index d7a9fee..a7e30cc 100644
--- a/include/erofs/list.h
+++ b/include/erofs/list.h
@@ -130,6 +130,11 @@ static inline void list_splice_tail(struct list_head *list,
&pos->member != (head); \
pos = n, n = list_next_entry(n, member))
+#define list_for_each_entry_safe_from(pos, n, head, member) \
+ for (n = list_next_entry(pos, member); \
+ &pos->member != (head); \
+ pos = n, n = list_next_entry(n, member))
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/inode.c b/lib/inode.c
index 5903114..aebd94c 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -211,28 +211,90 @@ int erofs_allocate_inode_bh_data(struct erofs_inode *inode, erofs_blk_t nblocks)
return 0;
}
-static int comp_subdir(const void *a, const void *b)
+#define EROFS_DENTRY_MERGESORT_STEP 1
+
+static void erofs_dentry_mergesort(struct list_head *entries, int k)
{
- const struct erofs_dentry *da, *db;
- int commonlen, sign;
+ struct list_head *great = entries->next;
+
+ BUILD_BUG_ON(EROFS_DENTRY_MERGESORT_STEP > EROFS_DENTRY_NAME_ALIGNMENT);
+ entries->prev->next = NULL;
+ init_list_head(entries);
+ do {
+ struct list_head **greatp = &great;
+ struct erofs_dentry *e0, *d, *n;
+ struct list_head le[2];
+ int cmp, k1;
+ bool brk;
+
+ e0 = list_entry(great, struct erofs_dentry, d_child);
+ great = great->next;
+ init_list_head(&le[0]);
+ le[1] = (struct list_head)LIST_HEAD_INIT(e0->d_child);
+ e0->d_child.prev = e0->d_child.next = &le[1];
+
+ do {
+ d = list_entry(*greatp, struct erofs_dentry, d_child);
+ cmp = memcmp(d->name + k, e0->name + k,
+ EROFS_DENTRY_MERGESORT_STEP);
+
+ if (cmp > 0) {
+ greatp = &d->d_child.next;
+ continue;
+ }
+ *greatp = d->d_child.next;
+ list_add_tail(&d->d_child, &le[!cmp]);
+ } while (*greatp);
+
+ k1 = k + EROFS_DENTRY_MERGESORT_STEP;
+ brk = great || !list_empty(&le[0]);
+ while (e0->name[k1 - 1] != '\0') {
+ if (__erofs_likely(brk)) {
+ if (le[1].prev != le[1].next)
+ erofs_dentry_mergesort(&le[1], k1);
+ break;
+ }
+ e0 = list_first_entry(&le[1],
+ struct erofs_dentry, d_child);
+ d = list_next_entry(e0, d_child);
+ list_for_each_entry_safe_from(d, n, &le[1], d_child) {
+ cmp = memcmp(d->name + k1, e0->name + k1,
+ EROFS_DENTRY_MERGESORT_STEP);
+ if (!cmp)
+ continue;
+
+ __list_del(d->d_child.prev, d->d_child.next);
+ if (cmp < 0) {
+ list_add_tail(&d->d_child, &le[0]);
+ } else {
+ *greatp = &d->d_child;
+ d->d_child.next = NULL;
+ greatp = &d->d_child.next;
+ }
+ brk = true;
+ }
+ k = k1;
+ k1 += EROFS_DENTRY_MERGESORT_STEP;
+ }
+
+ if (!list_empty(&le[0])) {
+ if (le[0].prev != le[0].next)
+ erofs_dentry_mergesort(&le[0], k);
+ __list_splice(&le[0], entries->prev, entries);
+ }
+ __list_splice(&le[1], entries->prev, entries);
+ } while (great && great->next);
- da = *((const struct erofs_dentry **)a);
- db = *((const struct erofs_dentry **)b);
- commonlen = min(round_up(da->namelen, EROFS_DENTRY_NAME_ALIGNMENT),
- round_up(db->namelen, EROFS_DENTRY_NAME_ALIGNMENT));
- sign = memcmp(da->name, db->name, commonlen);
- if (sign)
- return sign;
- return cmpsgn(da->namelen, db->namelen);
+ if (great)
+ list_add_tail(great, entries);
}
static int erofs_prepare_dir_file(struct erofs_inode *dir,
unsigned int nr_subdirs)
{
- struct erofs_sb_info *sbi = dir->sbi;
- struct erofs_dentry *d, *n, **sorted_d;
bool dot_omitted = cfg.c_dot_omitted;
- unsigned int i;
+ struct erofs_sb_info *sbi = dir->sbi;
+ struct erofs_dentry *d;
unsigned int d_size = 0;
if (!dot_omitted) {
@@ -252,21 +314,9 @@ static int erofs_prepare_dir_file(struct erofs_inode *dir,
d->inode = erofs_igrab(erofs_parent_inode(dir));
d->type = EROFS_FT_DIR;
+ if (nr_subdirs)
+ erofs_dentry_mergesort(&dir->i_subdirs, 0);
nr_subdirs += 1 + !dot_omitted;
- sorted_d = malloc(nr_subdirs * sizeof(d));
- if (!sorted_d)
- return -ENOMEM;
-
- i = 0;
- list_for_each_entry_safe(d, n, &dir->i_subdirs, d_child) {
- list_del(&d->d_child);
- sorted_d[i++] = d;
- }
- DBG_BUGON(i != nr_subdirs);
- qsort(sorted_d, i, sizeof(d), comp_subdir);
- while (i)
- list_add(&sorted_d[--i]->d_child, &dir->i_subdirs);
- free(sorted_d);
/* let's calculate dir size */
list_for_each_entry(d, &dir->i_subdirs, d_child) {
@@ -275,6 +325,11 @@ static int erofs_prepare_dir_file(struct erofs_inode *dir,
if (erofs_blkoff(sbi, d_size) + len > erofs_blksiz(sbi))
d_size = round_up(d_size, erofs_blksiz(sbi));
d_size += len;
+ --nr_subdirs;
+ }
+ if (nr_subdirs) {
+ DBG_BUGON(1);
+ return -EFAULT;
}
dir->i_size = d_size;
--
2.43.5
More information about the Linux-erofs
mailing list