[PATCH v7 2/2] erofs-utils: lib: add API to iterate dirs in EROFS
Kelvin Zhang
zhangkelvin at google.com
Sat Dec 18 06:47:20 AEDT 2021
Signed-off-by: Kelvin Zhang <zhangkelvin at google.com>
---
fuse/Makefile.am | 2 +-
fuse/dir.c | 100 --------------------
fuse/main.c | 65 +++++++++++--
include/erofs/dir.h | 52 +++++++++++
lib/Makefile.am | 2 +-
lib/dir.c | 223 ++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 334 insertions(+), 110 deletions(-)
delete mode 100644 fuse/dir.c
create mode 100644 include/erofs/dir.h
create mode 100644 lib/dir.c
diff --git a/fuse/Makefile.am b/fuse/Makefile.am
index 7b007f3..0a78c0a 100644
--- a/fuse/Makefile.am
+++ b/fuse/Makefile.am
@@ -2,7 +2,7 @@
AUTOMAKE_OPTIONS = foreign
bin_PROGRAMS = erofsfuse
-erofsfuse_SOURCES = dir.c main.c
+erofsfuse_SOURCES = main.c
erofsfuse_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
erofsfuse_CFLAGS += -DFUSE_USE_VERSION=26 ${libfuse_CFLAGS} ${libselinux_CFLAGS}
erofsfuse_LDADD = $(top_builddir)/lib/liberofs.la ${libfuse_LIBS} ${liblz4_LIBS} \
diff --git a/fuse/dir.c b/fuse/dir.c
deleted file mode 100644
index bc8735b..0000000
--- a/fuse/dir.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Created by Li Guifu <blucerlee at gmail.com>
- */
-#include <fuse.h>
-#include <fuse_opt.h>
-#include "macosx.h"
-#include "erofs/internal.h"
-#include "erofs/print.h"
-
-static int erofs_fill_dentries(struct erofs_inode *dir,
- fuse_fill_dir_t filler, void *buf,
- void *dblk, unsigned int nameoff,
- unsigned int maxsize)
-{
- struct erofs_dirent *de = dblk;
- const struct erofs_dirent *end = dblk + nameoff;
- char namebuf[EROFS_NAME_LEN + 1];
-
- while (de < end) {
- const char *de_name;
- unsigned int de_namelen;
-
- nameoff = le16_to_cpu(de->nameoff);
- de_name = (char *)dblk + nameoff;
-
- /* the last dirent in the block? */
- if (de + 1 >= end)
- de_namelen = strnlen(de_name, maxsize - nameoff);
- else
- de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;
-
- /* a corrupted entry is found */
- if (nameoff + de_namelen > maxsize ||
- de_namelen > EROFS_NAME_LEN) {
- erofs_err("bogus dirent @ nid %llu", dir->nid | 0ULL);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
-
- memcpy(namebuf, de_name, de_namelen);
- namebuf[de_namelen] = '\0';
-
- filler(buf, namebuf, NULL, 0);
- ++de;
- }
- return 0;
-}
-
-int erofsfuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
- off_t offset, struct fuse_file_info *fi)
-{
- int ret;
- struct erofs_inode dir;
- char dblk[EROFS_BLKSIZ];
- erofs_off_t pos;
-
- erofs_dbg("readdir:%s offset=%llu", path, (long long)offset);
-
- ret = erofs_ilookup(path, &dir);
- if (ret)
- return ret;
-
- erofs_dbg("path=%s nid = %llu", path, dir.nid | 0ULL);
-
- if (!S_ISDIR(dir.i_mode))
- return -ENOTDIR;
-
- if (!dir.i_size)
- return 0;
-
- pos = 0;
- while (pos < dir.i_size) {
- unsigned int nameoff, maxsize;
- struct erofs_dirent *de;
-
- maxsize = min_t(unsigned int, EROFS_BLKSIZ,
- dir.i_size - pos);
- ret = erofs_pread(&dir, dblk, maxsize, pos);
- if (ret)
- return ret;
-
- de = (struct erofs_dirent *)dblk;
- nameoff = le16_to_cpu(de->nameoff);
- if (nameoff < sizeof(struct erofs_dirent) ||
- nameoff >= PAGE_SIZE) {
- erofs_err("invalid de[0].nameoff %u @ nid %llu",
- nameoff, dir.nid | 0ULL);
- ret = -EFSCORRUPTED;
- break;
- }
-
- ret = erofs_fill_dentries(&dir, filler, buf,
- dblk, nameoff, maxsize);
- if (ret)
- break;
- pos += maxsize;
- }
- return 0;
-}
diff --git a/fuse/main.c b/fuse/main.c
index 255965e..53dbdd4 100644
--- a/fuse/main.c
+++ b/fuse/main.c
@@ -12,9 +12,58 @@
#include "erofs/config.h"
#include "erofs/print.h"
#include "erofs/io.h"
+#include "erofs/dir.h"
-int erofsfuse_readdir(const char *path, void *buffer, fuse_fill_dir_t filler,
- off_t offset, struct fuse_file_info *fi);
+struct erofsfuse_dir_context {
+ fuse_fill_dir_t filler;
+ struct fuse_file_info *fi;
+ void *buf;
+};
+
+static int erofsfuse_fill_dentries(struct erofs_dirent_info *ctx)
+{
+ struct erofsfuse_dir_context *fusectx = ctx->arg;
+
+ fusectx->filler(fusectx->buf, ctx->dname, NULL, 0);
+ return 0;
+}
+
+int erofsfuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
+ off_t offset, struct fuse_file_info *fi)
+{
+ int ret;
+ struct erofs_inode dir;
+
+ erofs_dbg("readdir:%s offset=%llu", path, (long long)offset);
+
+ ret = erofs_ilookup(path, &dir);
+ if (ret)
+ return ret;
+
+ erofs_dbg("path=%s nid = %llu", path, dir.nid | 0ULL);
+ if (!S_ISDIR(dir.i_mode))
+ return -ENOTDIR;
+
+ if (!dir.i_size)
+ return 0;
+ struct erofsfuse_dir_context ctx = {
+ .filler = filler,
+ .fi = fi,
+ .buf = buf,
+ };
+ struct erofs_iterate_dir_param param = {
+ .cb = erofsfuse_fill_dentries,
+ .fsck = false,
+ .nid = dir.nid,
+ .recursive = false,
+ .arg = &ctx,
+ };
+#ifndef NDEBUG
+ param.fsck = true;
+#endif
+ return erofs_iterate_dir(¶m);
+
+}
static void *erofsfuse_init(struct fuse_conn_info *info)
{
@@ -122,13 +171,13 @@ static void usage(void)
struct fuse_args args = FUSE_ARGS_INIT(0, NULL);
fputs("usage: [options] IMAGE MOUNTPOINT\n\n"
- "Options:\n"
- " --dbglevel=# set output message level to # (maximum 9)\n"
- " --device=# specify an extra device to be used together\n"
+ "Options:\n"
+ " --dbglevel=# set output message level to # (maximum 9)\n"
+ " --device=# specify an extra device to be used together\n"
#if FUSE_MAJOR_VERSION < 3
- " --help display this help and exit\n"
+ " --help display this help and exit\n"
#endif
- "\n", stderr);
+ "\n", stderr);
#if FUSE_MAJOR_VERSION >= 3
fuse_cmdline_help();
@@ -148,7 +197,7 @@ static void erofsfuse_dumpcfg(void)
}
static int optional_opt_func(void *data, const char *arg, int key,
- struct fuse_args *outargs)
+ struct fuse_args *outargs)
{
int ret;
diff --git a/include/erofs/dir.h b/include/erofs/dir.h
new file mode 100644
index 0000000..affb607
--- /dev/null
+++ b/include/erofs/dir.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_DIR_H
+#define __EROFS_DIR_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "internal.h"
+
+struct erofs_dir_context;
+
+struct erofs_dirent_info {
+ /* inode id of the inode being iterated */
+ erofs_nid_t nid;
+ /* file type, see values before EROFS_FT_MAX */
+ u8 ftype;
+ const char *dname;
+ /* opaque ptr passed in erofs_iterate_dir_param,
+ * can be used to persist state across multiple
+ * callbacks.
+ */
+ void *arg;
+};
+
+/* callback function for iterating over inodes of EROFS */
+typedef int (*erofs_readdir_cb)(struct erofs_dirent_info *);
+
+/* callers could use a wrapper to contain extra information */
+struct erofs_iterate_dir_param {
+ /* inode id of the directory that needs to be iterated. */
+ /* Use sbi.root_nid if you want to iterate over rood dir */
+ erofs_nid_t nid;
+ bool fsck; /* Whether to perform validity check */
+ erofs_nid_t pnid; /* optional, needed if fsck = true */
+ erofs_readdir_cb cb;
+ /* Whether to iterate this directory recursively */
+ bool recursive;
+ /* This will be copied to erofs_dirent_info.arg when invoking callback */
+ void *arg;
+};
+
+/* iterate over inodes that are in directory */
+int erofs_iterate_dir(const struct erofs_iterate_dir_param *ctx);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 67ba798..25a4a2b 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -27,7 +27,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
noinst_HEADERS += compressor.h
liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
namei.c data.c compress.c compressor.c zmap.c decompress.c \
- compress_hints.c hashmap.c sha256.c blobchunk.c
+ compress_hints.c hashmap.c sha256.c blobchunk.c dir.c
liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
if ENABLE_LZ4
liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/dir.c b/lib/dir.c
new file mode 100644
index 0000000..04a4596
--- /dev/null
+++ b/lib/dir.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/dir.h"
+
+#include <linux/limits.h>
+#include <stdlib.h>
+
+#include "erofs/internal.h"
+#include "erofs/print.h"
+
+struct erofs_dir_context {
+ erofs_nid_t pnid; /* optional */
+ struct erofs_inode *dir;
+
+ bool dot_found;
+ bool dot_dot_found;
+ char cur_name[PATH_MAX];
+ char prev_name[PATH_MAX];
+};
+
+
+static int traverse_dirents(
+ struct erofs_dir_context *ctx,
+ const struct erofs_iterate_dir_param *params,
+ const void *dentry_blk,
+ unsigned int lblk, unsigned int next_nameoff,
+ unsigned int maxsize, bool fsck)
+{
+ const struct erofs_dirent *de = dentry_blk;
+ const struct erofs_dirent *end = dentry_blk + next_nameoff;
+ char *prev_name = ctx->prev_name;
+ char *cur_name = ctx->cur_name;
+ const char *errmsg;
+ int ret = 0;
+ bool silent = false;
+
+ while (de < end) {
+ unsigned int de_namelen;
+ unsigned int nameoff;
+
+ nameoff = le16_to_cpu(de->nameoff);
+ const char *de_name = (char *)dentry_blk + nameoff;
+
+ /* the last dirent check */
+ if (de + 1 >= end)
+ de_namelen = strnlen(de_name, maxsize - nameoff);
+ else
+ de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;
+
+ strncpy(cur_name, de_name, de_namelen);
+ cur_name[de_namelen] = '\0';
+ if (!cur_name) {
+ errmsg = "failed to allocate dirent name";
+ ret = -ENOMEM;
+ break;
+ }
+
+ erofs_dbg("traversed filename(%s)", cur_name);
+
+ ret = -EFSCORRUPTED;
+ /* corrupted entry check */
+ if (nameoff != next_nameoff) {
+ errmsg = "bogus dirent nameoff";
+ break;
+ }
+
+ if (nameoff + de_namelen > maxsize || de_namelen > EROFS_NAME_LEN) {
+ errmsg = "bogus dirent namelen";
+ break;
+ }
+
+ if (fsck && prev_name && strcmp(prev_name, cur_name) >= 0) {
+ errmsg = "wrong dirent name order";
+ break;
+ }
+
+ if (fsck && de->file_type >= EROFS_FT_MAX) {
+ errmsg = "invalid file type %u";
+ break;
+ }
+
+ const bool dot_dotdot = is_dot_dotdot(cur_name);
+
+ if (dot_dotdot) {
+ switch (de_namelen) {
+ case 2:
+ if (fsck && ctx->dot_dot_found) {
+ errmsg = "duplicated `..' dirent";
+ goto out;
+ }
+ ctx->dot_dot_found = true;
+ if (sbi.root_nid == ctx->dir->nid) {
+ ctx->pnid = sbi.root_nid;
+ if (fsck && de->nid != ctx->pnid) {
+ errmsg = "corrupted `..' dirent";
+ goto out;
+ }
+ }
+ break;
+ case 1:
+ if (fsck && ctx->dot_found) {
+ errmsg = "duplicated `.' dirent";
+ goto out;
+ }
+
+ ctx->dot_found = true;
+ if (fsck && de->nid != ctx->dir->nid) {
+ errmsg = "corrupted `.' dirent";
+ goto out;
+ }
+ break;
+ }
+ }
+ struct erofs_dirent_info output_info = {
+ .ftype = de->file_type,
+ .nid = de->nid,
+ .arg = params->arg,
+ .dname = cur_name};
+
+ ret = params->cb(&output_info);
+ if (ret) {
+ silent = true;
+ goto out;
+ }
+ if (params->recursive && de->file_type == EROFS_FT_DIR && !dot_dotdot) {
+ const struct erofs_iterate_dir_param recursive_param = {
+ .nid = de->nid,
+ .fsck = params->fsck,
+ .pnid = params->nid,
+ .cb = params->cb,
+ .recursive = params->recursive,
+ .arg = params->arg};
+ erofs_iterate_dir(&recursive_param);
+ }
+
+ next_nameoff += de_namelen;
+ ++de;
+
+ prev_name = cur_name;
+ cur_name = prev_name == ctx->prev_name ? ctx->cur_name : ctx->prev_name;
+ }
+ if (fsck && (!ctx->dot_found || !ctx->dot_dot_found)) {
+ erofs_err("`.' or `..' dirent is missing @ nid %llu", de->nid | 0ULL);
+ return -EFSCORRUPTED;
+ }
+out:
+ if (ret && !silent)
+ erofs_err("%s @ nid %llu, lblk %u, index %lu",
+ errmsg, ctx->dir->nid | 0ULL,
+ lblk, (de - (struct erofs_dirent *)dentry_blk) | 0UL);
+ return ret;
+}
+
+int erofs_iterate_dir(const struct erofs_iterate_dir_param *params)
+{
+ int err;
+ struct erofs_inode dir;
+
+ dir.nid = params->nid;
+ err = erofs_read_inode_from_disk(&dir);
+ if (err) {
+ return err;
+ }
+ if ((dir.i_mode & S_IFMT) != S_IFDIR) {
+ return -ENOTDIR;
+ }
+
+ struct erofs_dirent_info output_info;
+ /* Both |buf| and |ctx| can live on the stack, but that might cause
+ * stack overflow when iterating recursively. So put them on heap.
+ */
+ char *buf = calloc(EROFS_BLKSIZ, 1);
+
+ if (buf == NULL) {
+ goto out;
+ }
+ struct erofs_dir_context *ctx = calloc(sizeof(struct erofs_dir_context), 1);
+
+ if (ctx == NULL) {
+ goto out;
+ }
+ ctx->dir = &dir;
+ ctx->pnid = params->pnid;
+
+ erofs_off_t pos = 0;
+
+ while (pos < dir.i_size) {
+ const erofs_blk_t lblk = erofs_blknr(pos);
+ const erofs_off_t maxsize =
+ min_t(erofs_off_t, dir.i_size - pos, EROFS_BLKSIZ);
+ const struct erofs_dirent *de = (const void *)buf;
+
+ err = erofs_pread(&dir, buf, maxsize, pos);
+ if (err) {
+ erofs_err(
+ "I/O error occurred when reading dirents @ nid %llu, lblk %u: %d",
+ dir.nid | 0ULL, lblk, err);
+ break;
+ }
+
+ const unsigned int nameoff = le16_to_cpu(de->nameoff);
+
+ if (nameoff < sizeof(struct erofs_dirent) || nameoff >= PAGE_SIZE) {
+ erofs_err("invalid de[0].nameoff %u @ nid %llu, lblk %u",
+ nameoff, dir.nid | 0ULL, lblk);
+ err = -EFSCORRUPTED;
+ break;
+ }
+ err = traverse_dirents(
+ ctx, params, buf, lblk, nameoff, maxsize, params->fsck);
+ if (err) {
+ break;
+ }
+ pos += maxsize;
+ }
+out:
+ if (ctx) {
+ free(ctx);
+ }
+ if (buf) {
+ free(buf);
+ }
+ return err;
+}
--
2.34.1.173.g76aa8bc2d0-goog
More information about the Linux-erofs
mailing list