[PATCH v4 2/2] Add API to iterate over inodes in EROFS

Kelvin Zhang zhangkelvin at google.com
Tue Dec 14 14:32:39 AEDT 2021


Change-Id: Ia35708080a72ee204eaaddfc670d3cb8023a078c
Signed-off-by: Kelvin Zhang <zhangkelvin at google.com>
---
 include/erofs/iterate.h |  46 ++++++++++++
 include/erofs_fs.h      |   4 +-
 lib/Makefile.am         |   2 +-
 lib/iterate.c           | 154 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 203 insertions(+), 3 deletions(-)
 create mode 100644 include/erofs/iterate.h
 create mode 100644 lib/iterate.c

diff --git a/include/erofs/iterate.h b/include/erofs/iterate.h
new file mode 100644
index 0000000..4e2c783
--- /dev/null
+++ b/include/erofs/iterate.h
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef ITERATE_ITERATE
+#define ITERATE_ITERATE
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+#include "erofs/io.h"
+#include "erofs/print.h"
+
+
+struct erofs_inode_info {
+	const char* name;
+	enum erofs_ftype ftype;
+	struct erofs_inode* inode;
+	void* arg;
+};
+// Callback function for iterating over inodes of EROFS
+
+typedef bool (*erofs_readdir_cb)(struct erofs_inode_info*);
+
+// Iterate over inodes that are in directory specified by |nid|.
+// |parent_nid| is optional, if specified, additional sanity checks will
+// be performed.
+// |cb| will be called for every inode, regardless of type of inode.
+// |arg| will be passed to the callback in |erofs_readdir_cb| struct's
+// |arg| field.
+int erofs_iterate_dir(const struct erofs_sb_info* sbi,
+									 erofs_nid_t nid,
+									 erofs_nid_t parent_nid,
+									 erofs_readdir_cb cb,
+									 void* arg);
+int erofs_iterate_root_dir(const struct erofs_sb_info* sbi,
+												erofs_readdir_cb cbg,
+												void* arg);
+int erofs_get_occupied_size(const struct erofs_inode* inode, erofs_off_t* size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // ITERATE_ITERATE
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index 9a91877..7ee8251 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -389,8 +389,8 @@ struct erofs_dirent {
 } __packed;
 
 /* file types used in inode_info->flags */
-enum {
-	EROFS_FT_UNKNOWN,
+enum erofs_ftype {
+	EROFS_FT_UNKNOWN = 0,
 	EROFS_FT_REG_FILE,
 	EROFS_FT_DIR,
 	EROFS_FT_CHRDEV,
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 67ba798..20c0e4f 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -27,7 +27,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
-		      compress_hints.c hashmap.c sha256.c blobchunk.c
+		      compress_hints.c hashmap.c sha256.c blobchunk.c iterate.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/iterate.c b/lib/iterate.c
new file mode 100644
index 0000000..e01eadf
--- /dev/null
+++ b/lib/iterate.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: Apache-2.0
+
+#include "erofs/internal.h"
+#include "erofs_fs.h"
+#include "erofs/print.h"
+#include "erofs/iterate.h"
+
+static int erofs_read_dirent(const struct erofs_sb_info* sbi,
+														 const struct erofs_dirent* de,
+														 erofs_nid_t nid,
+														 erofs_nid_t parent_nid,
+														 const char* dname,
+														 erofs_readdir_cb cb,
+														 void* arg) {
+	int err;
+	struct erofs_inode inode = {.nid = de->nid};
+	err = erofs_read_inode_from_disk(&inode);
+	if (err) {
+		erofs_err("read file inode from disk failed!");
+		return err;
+	}
+	char buf[PATH_MAX + 1];
+	erofs_get_inode_name(sbi, de->nid, buf, PATH_MAX + 1);
+	struct erofs_inode_info info = {
+			.name = buf,
+			.ftype = de->file_type,
+			.inode = &inode,
+			.arg = arg};
+	cb(&info);
+	if ((de->file_type == EROFS_FT_DIR) && de->nid != nid &&
+			de->nid != parent_nid) {
+		err = erofs_iterate_dir(sbi, de->nid, nid, cb, arg);
+		if (err) {
+			erofs_err("parse dir nid %u error occurred\n",
+								(unsigned int)(de->nid));
+			return err;
+		}
+	}
+	return 0;
+}
+
+static inline int erofs_checkdirent(const struct erofs_dirent* de,
+																		const struct erofs_dirent* last_de,
+																		u32 maxsize,
+																		const char* dname) {
+	int dname_len;
+	unsigned int nameoff = le16_to_cpu(de->nameoff);
+	if (nameoff < sizeof(struct erofs_dirent) || nameoff >= PAGE_SIZE) {
+		erofs_err("invalid de[0].nameoff %u @ nid %llu", nameoff, de->nid | 0ULL);
+		return -EFSCORRUPTED;
+	}
+	dname_len = (de + 1 >= last_de) ? strnlen(dname, maxsize - nameoff)
+																	: le16_to_cpu(de[1].nameoff) - nameoff;
+	/* a corrupted entry is found */
+	if (nameoff + dname_len > maxsize || dname_len > EROFS_NAME_LEN) {
+		erofs_err("bogus dirent @ nid %llu", le64_to_cpu(de->nid) | 0ULL);
+		DBG_BUGON(1);
+		return -EFSCORRUPTED;
+	}
+	if (de->file_type >= EROFS_FT_MAX) {
+		erofs_err("invalid file type %u", (unsigned int)(de->nid));
+		return -EFSCORRUPTED;
+	}
+	return dname_len;
+}
+
+int erofs_iterate_dir(const struct erofs_sb_info* sbi,
+									 erofs_nid_t nid,
+									 erofs_nid_t parent_nid,
+									 erofs_readdir_cb cb,
+									 void* arg) {
+	int err;
+	erofs_off_t offset;
+	char buf[EROFS_BLKSIZ];
+	struct erofs_inode vi = {.nid = nid};
+	err = erofs_read_inode_from_disk(&vi);
+	if (err)
+		return err;
+	struct erofs_inode_info inode_info = {
+			.name = buf,
+			.ftype = EROFS_FT_DIR,
+			.inode = &vi,
+			.arg = arg,
+	};
+	err = erofs_get_inode_name(sbi, nid, buf, EROFS_BLKSIZ);
+	cb(&inode_info);
+	if (err) {
+		return err;
+	}
+	offset = 0;
+	while (offset < vi.i_size) {
+		erofs_off_t maxsize = min_t(erofs_off_t, vi.i_size - offset, EROFS_BLKSIZ);
+		const struct erofs_dirent* de = (const struct erofs_dirent*)(buf);
+		struct erofs_dirent* end;
+		unsigned int nameoff;
+		err = erofs_pread(&vi, buf, maxsize, offset);
+		if (err)
+			return err;
+		nameoff = le16_to_cpu(de->nameoff);
+		end = (struct erofs_dirent*)(buf + nameoff);
+		while (de < end) {
+			const char * const dname = (char*)buf + nameoff;
+			int ret;
+			/* skip "." and ".." dentry */
+			if (is_dot_dotdot(dname)) {
+				if (dname[1] == '.' && parent_nid > 0) {
+					// Directory ".." should have nid == parent_nid.
+					// But parent_nid parameter is optional, so only perform the check
+					// if parent_nid is specified.
+					if (parent_nid != de->nid) {
+						return EFSCORRUPTED;
+					}
+				}
+				de++;
+				continue;
+			}
+			ret = erofs_checkdirent(de, end, maxsize, dname);
+			if (ret < 0)
+				return ret;
+			ret = erofs_read_dirent(sbi, de, nid, parent_nid, dname, cb, arg);
+			if (ret < 0)
+				return ret;
+			++de;
+		}
+		offset += maxsize;
+	}
+	return 0;
+}
+
+int erofs_get_occupied_size(const struct erofs_inode* inode, erofs_off_t* size) {
+	*size = 0;
+	switch (inode->datalayout) {
+		case EROFS_INODE_FLAT_INLINE:
+		case EROFS_INODE_FLAT_PLAIN:
+		case EROFS_INODE_CHUNK_BASED:
+			*size = inode->i_size;
+			break;
+		case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
+		case EROFS_INODE_FLAT_COMPRESSION:
+			*size = inode->u.i_blocks * EROFS_BLKSIZ;
+			break;
+		default:
+			erofs_err("unknown datalayout");
+			return -1;
+	}
+	return 0;
+}
+
+int erofs_iterate_root_dir(const struct erofs_sb_info* sbi,
+												erofs_readdir_cb cb,
+												void* arg) {
+	return erofs_iterate_dir(sbi, sbi->root_nid, sbi->root_nid, cb, arg);
+}
+
-- 
2.34.1.173.g76aa8bc2d0-goog



More information about the Linux-erofs mailing list