<div dir="ltr">Hi Gao,<div>    I drafted a new patchset on top of the dev branch. Changes since v6:</div><div><br></div><div>    1. block buffer moved to the heap, due to stack size concerns when iterating recursively</div><div>    2. Added a "recursive" option to input parameters</div><div>    3. dname buffers are still on the heap, but allocation is done once per directory, instead of once for each directory entry.</div><div>    4. Added a void* arg which will be forwarded to the callback function.</div><div><br></div><div>I ran scripts/<a href="http://checkpatch.pl">checkpatch.pl</a> . Hopefully this makes your life easier. Thanks for the reply!.</div><div><br></div><div><br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Fri, Dec 17, 2021 at 2:47 PM Kelvin Zhang <<a href="mailto:zhangkelvin@google.com">zhangkelvin@google.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Signed-off-by: Kelvin Zhang <<a href="mailto:zhangkelvin@google.com" target="_blank">zhangkelvin@google.com</a>><br>
---<br>
 fuse/Makefile.am    |   2 +-<br>
 fuse/dir.c          | 100 --------------------<br>
 fuse/main.c         |  65 +++++++++++--<br>
 include/erofs/dir.h |  52 +++++++++++<br>
 lib/Makefile.am     |   2 +-<br>
 lib/dir.c           | 223 ++++++++++++++++++++++++++++++++++++++++++++<br>
 6 files changed, 334 insertions(+), 110 deletions(-)<br>
 delete mode 100644 fuse/dir.c<br>
 create mode 100644 include/erofs/dir.h<br>
 create mode 100644 lib/dir.c<br>
<br>
diff --git a/fuse/Makefile.am b/fuse/Makefile.am<br>
index 7b007f3..0a78c0a 100644<br>
--- a/fuse/Makefile.am<br>
+++ b/fuse/Makefile.am<br>
@@ -2,7 +2,7 @@<br>
<br>
 AUTOMAKE_OPTIONS = foreign<br>
 bin_PROGRAMS     = erofsfuse<br>
-erofsfuse_SOURCES = dir.c main.c<br>
+erofsfuse_SOURCES = main.c<br>
 erofsfuse_CFLAGS = -Wall -Werror -I$(top_srcdir)/include<br>
 erofsfuse_CFLAGS += -DFUSE_USE_VERSION=26 ${libfuse_CFLAGS} ${libselinux_CFLAGS}<br>
 erofsfuse_LDADD = $(top_builddir)/lib/<a href="http://liberofs.la" rel="noreferrer" target="_blank">liberofs.la</a> ${libfuse_LIBS} ${liblz4_LIBS} \<br>
diff --git a/fuse/dir.c b/fuse/dir.c<br>
deleted file mode 100644<br>
index bc8735b..0000000<br>
--- a/fuse/dir.c<br>
+++ /dev/null<br>
@@ -1,100 +0,0 @@<br>
-// SPDX-License-Identifier: GPL-2.0+<br>
-/*<br>
- * Created by Li Guifu <<a href="mailto:blucerlee@gmail.com" target="_blank">blucerlee@gmail.com</a>><br>
- */<br>
-#include <fuse.h><br>
-#include <fuse_opt.h><br>
-#include "macosx.h"<br>
-#include "erofs/internal.h"<br>
-#include "erofs/print.h"<br>
-<br>
-static int erofs_fill_dentries(struct erofs_inode *dir,<br>
-                              fuse_fill_dir_t filler, void *buf,<br>
-                              void *dblk, unsigned int nameoff,<br>
-                              unsigned int maxsize)<br>
-{<br>
-       struct erofs_dirent *de = dblk;<br>
-       const struct erofs_dirent *end = dblk + nameoff;<br>
-       char namebuf[EROFS_NAME_LEN + 1];<br>
-<br>
-       while (de < end) {<br>
-               const char *de_name;<br>
-               unsigned int de_namelen;<br>
-<br>
-               nameoff = le16_to_cpu(de->nameoff);<br>
-               de_name = (char *)dblk + nameoff;<br>
-<br>
-               /* the last dirent in the block? */<br>
-               if (de + 1 >= end)<br>
-                       de_namelen = strnlen(de_name, maxsize - nameoff);<br>
-               else<br>
-                       de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;<br>
-<br>
-               /* a corrupted entry is found */<br>
-               if (nameoff + de_namelen > maxsize ||<br>
-                   de_namelen > EROFS_NAME_LEN) {<br>
-                       erofs_err("bogus dirent @ nid %llu", dir->nid | 0ULL);<br>
-                       DBG_BUGON(1);<br>
-                       return -EFSCORRUPTED;<br>
-               }<br>
-<br>
-               memcpy(namebuf, de_name, de_namelen);<br>
-               namebuf[de_namelen] = '\0';<br>
-<br>
-               filler(buf, namebuf, NULL, 0);<br>
-               ++de;<br>
-       }<br>
-       return 0;<br>
-}<br>
-<br>
-int erofsfuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler,<br>
-                     off_t offset, struct fuse_file_info *fi)<br>
-{<br>
-       int ret;<br>
-       struct erofs_inode dir;<br>
-       char dblk[EROFS_BLKSIZ];<br>
-       erofs_off_t pos;<br>
-<br>
-       erofs_dbg("readdir:%s offset=%llu", path, (long long)offset);<br>
-<br>
-       ret = erofs_ilookup(path, &dir);<br>
-       if (ret)<br>
-               return ret;<br>
-<br>
-       erofs_dbg("path=%s nid = %llu", path, dir.nid | 0ULL);<br>
-<br>
-       if (!S_ISDIR(dir.i_mode))<br>
-               return -ENOTDIR;<br>
-<br>
-       if (!dir.i_size)<br>
-               return 0;<br>
-<br>
-       pos = 0;<br>
-       while (pos < dir.i_size) {<br>
-               unsigned int nameoff, maxsize;<br>
-               struct erofs_dirent *de;<br>
-<br>
-               maxsize = min_t(unsigned int, EROFS_BLKSIZ,<br>
-                               dir.i_size - pos);<br>
-               ret = erofs_pread(&dir, dblk, maxsize, pos);<br>
-               if (ret)<br>
-                       return ret;<br>
-<br>
-               de = (struct erofs_dirent *)dblk;<br>
-               nameoff = le16_to_cpu(de->nameoff);<br>
-               if (nameoff < sizeof(struct erofs_dirent) ||<br>
-                   nameoff >= PAGE_SIZE) {<br>
-                       erofs_err("invalid de[0].nameoff %u @ nid %llu",<br>
-                                 nameoff, dir.nid | 0ULL);<br>
-                       ret = -EFSCORRUPTED;<br>
-                       break;<br>
-               }<br>
-<br>
-               ret = erofs_fill_dentries(&dir, filler, buf,<br>
-                                         dblk, nameoff, maxsize);<br>
-               if (ret)<br>
-                       break;<br>
-               pos += maxsize;<br>
-       }<br>
-       return 0;<br>
-}<br>
diff --git a/fuse/main.c b/fuse/main.c<br>
index 255965e..53dbdd4 100644<br>
--- a/fuse/main.c<br>
+++ b/fuse/main.c<br>
@@ -12,9 +12,58 @@<br>
 #include "erofs/config.h"<br>
 #include "erofs/print.h"<br>
 #include "erofs/io.h"<br>
+#include "erofs/dir.h"<br>
<br>
-int erofsfuse_readdir(const char *path, void *buffer, fuse_fill_dir_t filler,<br>
-                     off_t offset, struct fuse_file_info *fi);<br>
+struct erofsfuse_dir_context {<br>
+       fuse_fill_dir_t filler;<br>
+       struct fuse_file_info *fi;<br>
+       void *buf;<br>
+};<br>
+<br>
+static int erofsfuse_fill_dentries(struct erofs_dirent_info *ctx)<br>
+{<br>
+       struct erofsfuse_dir_context *fusectx = ctx->arg;<br>
+<br>
+       fusectx->filler(fusectx->buf, ctx->dname, NULL, 0);<br>
+       return 0;<br>
+}<br>
+<br>
+int erofsfuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler,<br>
+                     off_t offset, struct fuse_file_info *fi)<br>
+{<br>
+       int ret;<br>
+       struct erofs_inode dir;<br>
+<br>
+       erofs_dbg("readdir:%s offset=%llu", path, (long long)offset);<br>
+<br>
+       ret = erofs_ilookup(path, &dir);<br>
+       if (ret)<br>
+               return ret;<br>
+<br>
+       erofs_dbg("path=%s nid = %llu", path, dir.nid | 0ULL);<br>
+       if (!S_ISDIR(dir.i_mode))<br>
+               return -ENOTDIR;<br>
+<br>
+       if (!dir.i_size)<br>
+               return 0;<br>
+       struct erofsfuse_dir_context ctx = {<br>
+               .filler = filler,<br>
+               .fi = fi,<br>
+               .buf = buf,<br>
+       };<br>
+       struct erofs_iterate_dir_param param = {<br>
+               .cb = erofsfuse_fill_dentries,<br>
+               .fsck = false,<br>
+               .nid = dir.nid,<br>
+               .recursive = false,<br>
+               .arg = &ctx,<br>
+       };<br>
+#ifndef NDEBUG<br>
+       param.fsck = true;<br>
+#endif<br>
+       return erofs_iterate_dir(&param);<br>
+<br>
+}<br>
<br>
 static void *erofsfuse_init(struct fuse_conn_info *info)<br>
 {<br>
@@ -122,13 +171,13 @@ static void usage(void)<br>
        struct fuse_args args = FUSE_ARGS_INIT(0, NULL);<br>
<br>
        fputs("usage: [options] IMAGE MOUNTPOINT\n\n"<br>
-             "Options:\n"<br>
-             "    --dbglevel=#           set output message level to # (maximum 9)\n"<br>
-             "    --device=#             specify an extra device to be used together\n"<br>
+                 "Options:\n"<br>
+                 "    --dbglevel=#           set output message level to # (maximum 9)\n"<br>
+                 "    --device=#             specify an extra device to be used together\n"<br>
 #if FUSE_MAJOR_VERSION < 3<br>
-             "    --help                 display this help and exit\n"<br>
+                 "    --help                 display this help and exit\n"<br>
 #endif<br>
-             "\n", stderr);<br>
+                 "\n", stderr);<br>
<br>
 #if FUSE_MAJOR_VERSION >= 3<br>
        fuse_cmdline_help();<br>
@@ -148,7 +197,7 @@ static void erofsfuse_dumpcfg(void)<br>
 }<br>
<br>
 static int optional_opt_func(void *data, const char *arg, int key,<br>
-                            struct fuse_args *outargs)<br>
+                                struct fuse_args *outargs)<br>
 {<br>
        int ret;<br>
<br>
diff --git a/include/erofs/dir.h b/include/erofs/dir.h<br>
new file mode 100644<br>
index 0000000..affb607<br>
--- /dev/null<br>
+++ b/include/erofs/dir.h<br>
@@ -0,0 +1,52 @@<br>
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */<br>
+#ifndef __EROFS_DIR_H<br>
+#define __EROFS_DIR_H<br>
+<br>
+#ifdef __cplusplus<br>
+extern "C"<br>
+{<br>
+#endif<br>
+<br>
+#include "internal.h"<br>
+<br>
+struct erofs_dir_context;<br>
+<br>
+struct erofs_dirent_info {<br>
+       /* inode id of the inode being iterated */<br>
+       erofs_nid_t nid;<br>
+       /* file type, see values before EROFS_FT_MAX */<br>
+       u8 ftype;<br>
+       const char *dname;<br>
+       /* opaque ptr passed in erofs_iterate_dir_param,<br>
+        * can be used to persist state across multiple<br>
+        * callbacks.<br>
+        */<br>
+       void *arg;<br>
+};<br>
+<br>
+/* callback function for iterating over inodes of EROFS */<br>
+typedef int (*erofs_readdir_cb)(struct erofs_dirent_info *);<br>
+<br>
+/* callers could use a wrapper to contain extra information */<br>
+struct erofs_iterate_dir_param {<br>
+       /* inode id of the directory that needs to be iterated. */<br>
+       /* Use sbi.root_nid if you want to iterate over rood dir */<br>
+       erofs_nid_t nid;<br>
+       bool fsck;                      /* Whether to perform validity check */<br>
+       erofs_nid_t pnid;               /* optional, needed if fsck = true */<br>
+       erofs_readdir_cb cb;<br>
+       /* Whether to iterate this directory recursively */<br>
+       bool recursive;<br>
+       /* This will be copied to erofs_dirent_info.arg when invoking callback */<br>
+       void *arg;<br>
+};<br>
+<br>
+/* iterate over inodes that are in directory */<br>
+int erofs_iterate_dir(const struct erofs_iterate_dir_param *ctx);<br>
+<br>
+<br>
+#ifdef __cplusplus<br>
+}<br>
+#endif<br>
+<br>
+#endif<br>
diff --git a/lib/Makefile.am b/lib/Makefile.am<br>
index 67ba798..25a4a2b 100644<br>
--- a/lib/Makefile.am<br>
+++ b/lib/Makefile.am<br>
@@ -27,7 +27,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \<br>
 noinst_HEADERS += compressor.h<br>
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \<br>
                      namei.c data.c compress.c compressor.c zmap.c decompress.c \<br>
-                     compress_hints.c hashmap.c sha256.c blobchunk.c<br>
+                     compress_hints.c hashmap.c sha256.c blobchunk.c dir.c<br>
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include<br>
 if ENABLE_LZ4<br>
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}<br>
diff --git a/lib/dir.c b/lib/dir.c<br>
new file mode 100644<br>
index 0000000..04a4596<br>
--- /dev/null<br>
+++ b/lib/dir.c<br>
@@ -0,0 +1,223 @@<br>
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0<br>
+#include "erofs/dir.h"<br>
+<br>
+#include <linux/limits.h><br>
+#include <stdlib.h><br>
+<br>
+#include "erofs/internal.h"<br>
+#include "erofs/print.h"<br>
+<br>
+struct erofs_dir_context {<br>
+       erofs_nid_t pnid;               /* optional */<br>
+       struct erofs_inode *dir;<br>
+<br>
+       bool dot_found;<br>
+       bool dot_dot_found;<br>
+       char cur_name[PATH_MAX];<br>
+       char prev_name[PATH_MAX];<br>
+};<br>
+<br>
+<br>
+static int traverse_dirents(<br>
+               struct erofs_dir_context *ctx,<br>
+               const struct erofs_iterate_dir_param *params,<br>
+               const void *dentry_blk,<br>
+               unsigned int lblk, unsigned int next_nameoff,<br>
+               unsigned int maxsize, bool fsck)<br>
+{<br>
+       const struct erofs_dirent *de = dentry_blk;<br>
+       const struct erofs_dirent *end = dentry_blk + next_nameoff;<br>
+       char *prev_name = ctx->prev_name;<br>
+       char *cur_name = ctx->cur_name;<br>
+       const char *errmsg;<br>
+       int ret = 0;<br>
+       bool silent = false;<br>
+<br>
+       while (de < end) {<br>
+               unsigned int de_namelen;<br>
+               unsigned int nameoff;<br>
+<br>
+               nameoff = le16_to_cpu(de->nameoff);<br>
+               const char *de_name = (char *)dentry_blk + nameoff;<br>
+<br>
+               /* the last dirent check */<br>
+               if (de + 1 >= end)<br>
+                       de_namelen = strnlen(de_name, maxsize - nameoff);<br>
+               else<br>
+                       de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;<br>
+<br>
+               strncpy(cur_name, de_name, de_namelen);<br>
+               cur_name[de_namelen] = '\0';<br>
+               if (!cur_name) {<br>
+                       errmsg = "failed to allocate dirent name";<br>
+                       ret = -ENOMEM;<br>
+                       break;<br>
+               }<br>
+<br>
+               erofs_dbg("traversed filename(%s)", cur_name);<br>
+<br>
+               ret = -EFSCORRUPTED;<br>
+               /* corrupted entry check */<br>
+               if (nameoff != next_nameoff) {<br>
+                       errmsg = "bogus dirent nameoff";<br>
+                       break;<br>
+               }<br>
+<br>
+               if (nameoff + de_namelen > maxsize || de_namelen > EROFS_NAME_LEN) {<br>
+                       errmsg = "bogus dirent namelen";<br>
+                       break;<br>
+               }<br>
+<br>
+               if (fsck && prev_name && strcmp(prev_name, cur_name) >= 0) {<br>
+                       errmsg = "wrong dirent name order";<br>
+                       break;<br>
+               }<br>
+<br>
+               if (fsck && de->file_type >= EROFS_FT_MAX) {<br>
+                       errmsg = "invalid file type %u";<br>
+                       break;<br>
+               }<br>
+<br>
+               const bool dot_dotdot = is_dot_dotdot(cur_name);<br>
+<br>
+               if (dot_dotdot) {<br>
+                       switch (de_namelen) {<br>
+                       case 2:<br>
+                               if (fsck && ctx->dot_dot_found) {<br>
+                                       errmsg = "duplicated `..' dirent";<br>
+                                       goto out;<br>
+                               }<br>
+                               ctx->dot_dot_found = true;<br>
+                               if (sbi.root_nid == ctx->dir->nid) {<br>
+                                       ctx->pnid = sbi.root_nid;<br>
+                                       if (fsck && de->nid != ctx->pnid) {<br>
+                                               errmsg = "corrupted `..' dirent";<br>
+                                               goto out;<br>
+                                       }<br>
+                               }<br>
+                               break;<br>
+                       case 1:<br>
+                               if (fsck && ctx->dot_found) {<br>
+                                       errmsg = "duplicated `.' dirent";<br>
+                                       goto out;<br>
+                               }<br>
+<br>
+                               ctx->dot_found = true;<br>
+                               if (fsck && de->nid != ctx->dir->nid) {<br>
+                                       errmsg = "corrupted `.' dirent";<br>
+                                       goto out;<br>
+                               }<br>
+                               break;<br>
+                       }<br>
+               }<br>
+               struct erofs_dirent_info output_info = {<br>
+                       .ftype = de->file_type,<br>
+                       .nid = de->nid,<br>
+                       .arg = params->arg,<br>
+                       .dname = cur_name};<br>
+<br>
+               ret = params->cb(&output_info);<br>
+               if (ret) {<br>
+                       silent = true;<br>
+                       goto out;<br>
+               }<br>
+               if (params->recursive && de->file_type == EROFS_FT_DIR && !dot_dotdot) {<br>
+                       const struct erofs_iterate_dir_param recursive_param = {<br>
+                               .nid = de->nid,<br>
+                               .fsck = params->fsck,<br>
+                               .pnid = params->nid,<br>
+                               .cb = params->cb,<br>
+                               .recursive = params->recursive,<br>
+                               .arg = params->arg};<br>
+                       erofs_iterate_dir(&recursive_param);<br>
+               }<br>
+<br>
+               next_nameoff += de_namelen;<br>
+               ++de;<br>
+<br>
+               prev_name = cur_name;<br>
+               cur_name = prev_name == ctx->prev_name ? ctx->cur_name : ctx->prev_name;<br>
+       }<br>
+       if (fsck && (!ctx->dot_found || !ctx->dot_dot_found)) {<br>
+               erofs_err("`.' or `..' dirent is missing @ nid %llu", de->nid | 0ULL);<br>
+               return -EFSCORRUPTED;<br>
+       }<br>
+out:<br>
+       if (ret && !silent)<br>
+               erofs_err("%s @ nid %llu, lblk %u, index %lu",<br>
+                       errmsg, ctx->dir->nid | 0ULL,<br>
+                       lblk, (de - (struct erofs_dirent *)dentry_blk) | 0UL);<br>
+       return ret;<br>
+}<br>
+<br>
+int erofs_iterate_dir(const struct erofs_iterate_dir_param *params)<br>
+{<br>
+       int err;<br>
+       struct erofs_inode dir;<br>
+<br>
+       dir.nid = params->nid;<br>
+       err = erofs_read_inode_from_disk(&dir);<br>
+       if (err) {<br>
+               return err;<br>
+       }<br>
+       if ((dir.i_mode & S_IFMT) != S_IFDIR) {<br>
+               return -ENOTDIR;<br>
+       }<br>
+<br>
+       struct erofs_dirent_info output_info;<br>
+       /* Both |buf| and |ctx| can live on the stack, but that might cause<br>
+        * stack overflow when iterating recursively. So put them on heap.<br>
+        */<br>
+       char *buf = calloc(EROFS_BLKSIZ, 1);<br>
+<br>
+       if (buf == NULL) {<br>
+               goto out;<br>
+       }<br>
+       struct erofs_dir_context *ctx = calloc(sizeof(struct erofs_dir_context), 1);<br>
+<br>
+       if (ctx == NULL) {<br>
+               goto out;<br>
+       }<br>
+       ctx->dir = &dir;<br>
+       ctx->pnid = params->pnid;<br>
+<br>
+       erofs_off_t pos = 0;<br>
+<br>
+       while (pos < dir.i_size) {<br>
+               const erofs_blk_t lblk = erofs_blknr(pos);<br>
+               const erofs_off_t maxsize =<br>
+                       min_t(erofs_off_t, dir.i_size - pos, EROFS_BLKSIZ);<br>
+               const struct erofs_dirent *de = (const void *)buf;<br>
+<br>
+               err = erofs_pread(&dir, buf, maxsize, pos);<br>
+               if (err) {<br>
+                       erofs_err(<br>
+                       "I/O error occurred when reading dirents @ nid %llu, lblk %u: %d",<br>
+                       dir.nid | 0ULL, lblk, err);<br>
+                       break;<br>
+               }<br>
+<br>
+               const unsigned int nameoff = le16_to_cpu(de->nameoff);<br>
+<br>
+               if (nameoff < sizeof(struct erofs_dirent) || nameoff >= PAGE_SIZE) {<br>
+                       erofs_err("invalid de[0].nameoff %u @ nid %llu, lblk %u",<br>
+                               nameoff, dir.nid | 0ULL, lblk);<br>
+                       err = -EFSCORRUPTED;<br>
+                       break;<br>
+               }<br>
+               err = traverse_dirents(<br>
+                       ctx, params, buf, lblk, nameoff, maxsize, params->fsck);<br>
+               if (err) {<br>
+                       break;<br>
+               }<br>
+               pos += maxsize;<br>
+       }<br>
+out:<br>
+       if (ctx) {<br>
+               free(ctx);<br>
+       }<br>
+       if (buf) {<br>
+               free(buf);<br>
+       }<br>
+       return err;<br>
+}<br>
-- <br>
2.34.1.173.g76aa8bc2d0-goog<br>
<br>
</blockquote></div><br clear="all"><div><br></div>-- <br><div dir="ltr" class="gmail_signature"><div dir="ltr">Sincerely,<div><br></div><div>Kelvin Zhang</div></div></div>