[PATCH v2 1/2] erofs-utils: lib: add API to iterate dirs in EROFS
Gao Xiang
hsiangkao at linux.alibaba.com
Thu Dec 16 06:41:59 AEDT 2021
This introduces erofs_iterate_dir() to iterate all dirents in
a directory inode and convert erofsfuse to use the API.
Note that it doesn't recursively walk into sub-directories.
If it's really needed, users should handle this in the callback.
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
v1: https://lore.kernel.org/linux-erofs/20211215070017.83846-1-hsiangkao@linux.alibaba.com/
changes since v1:
- Avoid heap allocation of dirent name.
TODO:
- maybe avoid allocating EROFS_BLKSIZ buf on stack later..
fuse/Makefile.am | 2 +-
fuse/dir.c | 100 -----------------------
fuse/main.c | 52 +++++++++++-
include/erofs/dir.h | 44 +++++++++++
include/erofs/internal.h | 8 ++
lib/Makefile.am | 2 +-
lib/dir.c | 166 +++++++++++++++++++++++++++++++++++++++
7 files changed, 270 insertions(+), 104 deletions(-)
delete mode 100644 fuse/dir.c
create mode 100644 include/erofs/dir.h
create mode 100644 lib/dir.c
diff --git a/fuse/Makefile.am b/fuse/Makefile.am
index 8a2d472..5aa5ac0 100644
--- a/fuse/Makefile.am
+++ b/fuse/Makefile.am
@@ -3,7 +3,7 @@
AUTOMAKE_OPTIONS = foreign
noinst_HEADERS = $(top_srcdir)/fuse/macosx.h
bin_PROGRAMS = erofsfuse
-erofsfuse_SOURCES = dir.c main.c
+erofsfuse_SOURCES = main.c
erofsfuse_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
erofsfuse_CFLAGS += -DFUSE_USE_VERSION=26 ${libfuse_CFLAGS} ${libselinux_CFLAGS}
erofsfuse_LDADD = $(top_builddir)/lib/liberofs.la ${libfuse_LIBS} ${liblz4_LIBS} \
diff --git a/fuse/dir.c b/fuse/dir.c
deleted file mode 100644
index bc8735b..0000000
--- a/fuse/dir.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Created by Li Guifu <blucerlee at gmail.com>
- */
-#include <fuse.h>
-#include <fuse_opt.h>
-#include "macosx.h"
-#include "erofs/internal.h"
-#include "erofs/print.h"
-
-static int erofs_fill_dentries(struct erofs_inode *dir,
- fuse_fill_dir_t filler, void *buf,
- void *dblk, unsigned int nameoff,
- unsigned int maxsize)
-{
- struct erofs_dirent *de = dblk;
- const struct erofs_dirent *end = dblk + nameoff;
- char namebuf[EROFS_NAME_LEN + 1];
-
- while (de < end) {
- const char *de_name;
- unsigned int de_namelen;
-
- nameoff = le16_to_cpu(de->nameoff);
- de_name = (char *)dblk + nameoff;
-
- /* the last dirent in the block? */
- if (de + 1 >= end)
- de_namelen = strnlen(de_name, maxsize - nameoff);
- else
- de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;
-
- /* a corrupted entry is found */
- if (nameoff + de_namelen > maxsize ||
- de_namelen > EROFS_NAME_LEN) {
- erofs_err("bogus dirent @ nid %llu", dir->nid | 0ULL);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
-
- memcpy(namebuf, de_name, de_namelen);
- namebuf[de_namelen] = '\0';
-
- filler(buf, namebuf, NULL, 0);
- ++de;
- }
- return 0;
-}
-
-int erofsfuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
- off_t offset, struct fuse_file_info *fi)
-{
- int ret;
- struct erofs_inode dir;
- char dblk[EROFS_BLKSIZ];
- erofs_off_t pos;
-
- erofs_dbg("readdir:%s offset=%llu", path, (long long)offset);
-
- ret = erofs_ilookup(path, &dir);
- if (ret)
- return ret;
-
- erofs_dbg("path=%s nid = %llu", path, dir.nid | 0ULL);
-
- if (!S_ISDIR(dir.i_mode))
- return -ENOTDIR;
-
- if (!dir.i_size)
- return 0;
-
- pos = 0;
- while (pos < dir.i_size) {
- unsigned int nameoff, maxsize;
- struct erofs_dirent *de;
-
- maxsize = min_t(unsigned int, EROFS_BLKSIZ,
- dir.i_size - pos);
- ret = erofs_pread(&dir, dblk, maxsize, pos);
- if (ret)
- return ret;
-
- de = (struct erofs_dirent *)dblk;
- nameoff = le16_to_cpu(de->nameoff);
- if (nameoff < sizeof(struct erofs_dirent) ||
- nameoff >= PAGE_SIZE) {
- erofs_err("invalid de[0].nameoff %u @ nid %llu",
- nameoff, dir.nid | 0ULL);
- ret = -EFSCORRUPTED;
- break;
- }
-
- ret = erofs_fill_dentries(&dir, filler, buf,
- dblk, nameoff, maxsize);
- if (ret)
- break;
- pos += maxsize;
- }
- return 0;
-}
diff --git a/fuse/main.c b/fuse/main.c
index 255965e..2549d8a 100644
--- a/fuse/main.c
+++ b/fuse/main.c
@@ -12,9 +12,57 @@
#include "erofs/config.h"
#include "erofs/print.h"
#include "erofs/io.h"
+#include "erofs/dir.h"
-int erofsfuse_readdir(const char *path, void *buffer, fuse_fill_dir_t filler,
- off_t offset, struct fuse_file_info *fi);
+struct erofsfuse_dir_context {
+ struct erofs_dir_context ctx;
+ fuse_fill_dir_t filler;
+ struct fuse_file_info *fi;
+ void *buf;
+};
+
+static int erofsfuse_fill_dentries(struct erofs_dir_context *ctx)
+{
+ struct erofsfuse_dir_context *fusectx = (void *)ctx;
+ char dname[EROFS_NAME_LEN + 1];
+
+ strncpy(dname, ctx->dname, ctx->de_namelen);
+ dname[ctx->de_namelen] = '\0';
+ fusectx->filler(fusectx->buf, dname, NULL, 0);
+ return 0;
+}
+
+int erofsfuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
+ off_t offset, struct fuse_file_info *fi)
+{
+ int ret;
+ struct erofs_inode dir;
+ struct erofsfuse_dir_context ctx = {
+ .ctx.dir = &dir,
+ .ctx.cb = erofsfuse_fill_dentries,
+ .filler = filler,
+ .fi = fi,
+ .buf = buf,
+ };
+ erofs_dbg("readdir:%s offset=%llu", path, (long long)offset);
+
+ ret = erofs_ilookup(path, &dir);
+ if (ret)
+ return ret;
+
+ erofs_dbg("path=%s nid = %llu", path, dir.nid | 0ULL);
+ if (!S_ISDIR(dir.i_mode))
+ return -ENOTDIR;
+
+ if (!dir.i_size)
+ return 0;
+#ifdef NDEBUG
+ return erofs_iterate_dir(&ctx.ctx, false);
+#else
+ return erofs_iterate_dir(&ctx.ctx, true);
+#endif
+
+}
static void *erofsfuse_init(struct fuse_conn_info *info)
{
diff --git a/include/erofs/dir.h b/include/erofs/dir.h
new file mode 100644
index 0000000..5236ba8
--- /dev/null
+++ b/include/erofs/dir.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_DIR_H
+#define __EROFS_DIR_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "internal.h"
+
+#define EROFS_READDIR_VALID_PNID 0x0001
+#define EROFS_READDIR_DOTDOT_FOUND 0x0002
+#define EROFS_READDIR_DOT_FOUND 0x0004
+
+#define EROFS_READDIR_ALL_SPECIAL_FOUND \
+ (EROFS_READDIR_DOTDOT_FOUND | EROFS_READDIR_DOT_FOUND)
+
+struct erofs_dir_context;
+
+/* callback function for iterating over inodes of EROFS */
+typedef int (*erofs_readdir_cb)(struct erofs_dir_context *);
+
+/* callers could use a wrapper to contain extra information */
+struct erofs_dir_context {
+ struct erofs_inode *dir;
+ erofs_readdir_cb cb;
+ erofs_nid_t pnid; /* optional */
+
+ /* [out] the dirent which is currently found */
+ const struct erofs_dirent *de;
+ const char *dname;
+ u8 de_namelen, flags;
+ bool dot_dotdot;
+};
+
+/* iterate over inodes that are in directory */
+int erofs_iterate_dir(struct erofs_dir_context *ctx, bool fsck);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index e6beb8c..d2adf57 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -235,6 +235,14 @@ struct erofs_dentry {
};
};
+static inline bool is_dot_dotdot_len(const char *name, unsigned int len)
+{
+ if (len >= 1 && name[0] != '.')
+ return false;
+
+ return len == 1 || (len == 2 && name[1] == '.');
+}
+
static inline bool is_dot_dotdot(const char *name)
{
if (name[0] != '.')
diff --git a/lib/Makefile.am b/lib/Makefile.am
index c745e49..4a25013 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -27,7 +27,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
noinst_HEADERS += compressor.h
liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
namei.c data.c compress.c compressor.c zmap.c decompress.c \
- compress_hints.c hashmap.c sha256.c blobchunk.c
+ compress_hints.c hashmap.c sha256.c blobchunk.c dir.c
liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
if ENABLE_LZ4
liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/dir.c b/lib/dir.c
new file mode 100644
index 0000000..6dcb7e8
--- /dev/null
+++ b/lib/dir.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/print.h"
+#include "erofs/dir.h"
+#include <stdlib.h>
+
+static int traverse_dirents(struct erofs_dir_context *ctx,
+ void *dentry_blk, unsigned int lblk,
+ unsigned int next_nameoff, unsigned int maxsize,
+ bool fsck)
+{
+ struct erofs_dirent *de = dentry_blk;
+ const struct erofs_dirent *end = dentry_blk + next_nameoff;
+ const char *prev_name = NULL;
+ const char *errmsg;
+ unsigned int prev_namelen = 0;
+ int ret = 0;
+ bool silent = false;
+
+ while (de < end) {
+ const char *de_name;
+ unsigned int de_namelen;
+ unsigned int nameoff;
+
+ nameoff = le16_to_cpu(de->nameoff);
+ de_name = (char *)dentry_blk + nameoff;
+
+ /* the last dirent check */
+ if (de + 1 >= end)
+ de_namelen = strnlen(de_name, maxsize - nameoff);
+ else
+ de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;
+
+ erofs_dbg("traversed nid (%llu)", de->nid | 0ULL);
+
+ ret = -EFSCORRUPTED;
+ /* corrupted entry check */
+ if (nameoff != next_nameoff) {
+ errmsg = "bogus dirent nameoff";
+ break;
+ }
+
+ if (nameoff + de_namelen > maxsize ||
+ de_namelen > EROFS_NAME_LEN) {
+ errmsg = "bogus dirent namelen";
+ break;
+ }
+
+ if (fsck && prev_name) {
+ int cmp = strncmp(prev_name, de_name,
+ min(prev_namelen, de_namelen));
+
+ if (cmp > 0 || (cmp == 0 &&
+ prev_namelen >= de_namelen)) {
+ errmsg = "wrong dirent name order";
+ break;
+ }
+ }
+
+ if (fsck && de->file_type >= EROFS_FT_MAX) {
+ errmsg = "invalid file type %u";
+ break;
+ }
+ ctx->de = de;
+ ctx->dname = de_name;
+ ctx->de_namelen = de_namelen;
+ ctx->dot_dotdot = is_dot_dotdot_len(de_name, de_namelen);
+ if (ctx->dot_dotdot) {
+ switch (de_namelen) {
+ case 2:
+ if (fsck &&
+ (ctx->flags & EROFS_READDIR_DOTDOT_FOUND)) {
+ errmsg = "duplicated `..' dirent";
+ goto out;
+ }
+ ctx->flags |= EROFS_READDIR_DOTDOT_FOUND;
+ if (sbi.root_nid == ctx->dir->nid) {
+ ctx->pnid = sbi.root_nid;
+ ctx->flags |= EROFS_READDIR_VALID_PNID;
+ }
+ if (fsck &&
+ (ctx->flags & EROFS_READDIR_VALID_PNID) &&
+ de->nid != ctx->pnid) {
+ errmsg = "corrupted `..' dirent";
+ goto out;
+ }
+ break;
+ case 1:
+ if (fsck &&
+ (ctx->flags & EROFS_READDIR_DOT_FOUND)) {
+ errmsg = "duplicated `.' dirent";
+ goto out;
+ }
+
+ ctx->flags |= EROFS_READDIR_DOT_FOUND;
+ if (fsck && de->nid != ctx->dir->nid) {
+ errmsg = "corrupted `.' dirent";
+ goto out;
+ }
+ break;
+ }
+ }
+ ret = ctx->cb(ctx);
+ if (ret) {
+ silent = true;
+ goto out;
+ }
+ prev_name = de_name;
+ prev_namelen = de_namelen;
+ next_nameoff += de_namelen;
+ ++de;
+ }
+out:
+ if (ret && !silent)
+ erofs_err("%s @ nid %llu, lblk %u, index %lu",
+ errmsg, ctx->dir->nid | 0ULL, lblk,
+ (de - (struct erofs_dirent *)dentry_blk) | 0UL);
+ return ret;
+}
+
+int erofs_iterate_dir(struct erofs_dir_context *ctx, bool fsck)
+{
+ struct erofs_inode *dir = ctx->dir;
+ int err;
+ erofs_off_t pos;
+ char buf[EROFS_BLKSIZ];
+
+ if ((dir->i_mode & S_IFMT) != S_IFDIR)
+ return -ENOTDIR;
+
+ ctx->flags &= ~EROFS_READDIR_ALL_SPECIAL_FOUND;
+ pos = 0;
+ while (pos < dir->i_size) {
+ erofs_blk_t lblk = erofs_blknr(pos);
+ erofs_off_t maxsize = min_t(erofs_off_t,
+ dir->i_size - pos, EROFS_BLKSIZ);
+ const struct erofs_dirent *de = (const void *)buf;
+ unsigned int nameoff;
+
+ err = erofs_pread(dir, buf, maxsize, pos);
+ if (err) {
+ erofs_err("I/O error occurred when reading dirents @ nid %llu, lblk %u: %d",
+ dir->nid | 0ULL, lblk, err);
+ return err;
+ }
+
+ nameoff = le16_to_cpu(de->nameoff);
+ if (nameoff < sizeof(struct erofs_dirent) ||
+ nameoff >= PAGE_SIZE) {
+ erofs_err("invalid de[0].nameoff %u @ nid %llu, lblk %u",
+ nameoff, dir->nid | 0ULL, lblk);
+ return -EFSCORRUPTED;
+ }
+ err = traverse_dirents(ctx, buf, lblk, nameoff, maxsize, fsck);
+ if (err)
+ break;
+ pos += maxsize;
+ }
+
+ if (fsck && (ctx->flags & EROFS_READDIR_ALL_SPECIAL_FOUND) !=
+ EROFS_READDIR_ALL_SPECIAL_FOUND) {
+ erofs_err("`.' or `..' dirent is missing @ nid %llu",
+ dir->nid | 0ULL);
+ return -EFSCORRUPTED;
+ }
+ return 0;
+}
--
2.24.4
More information about the Linux-erofs
mailing list