[RFC PATCH] [WIP] erofs-utils: fsck support for --extract=X to extract to path X

Gao Xiang xiang at kernel.org
Wed Jan 19 13:13:48 AEDT 2022


From: Igor Ostapenko <igoreisberg at gmail.com>

Add support to extract directories, regular files and symlinks.
Allocation for extract_path is done only once, then the buffer is
reused.

Raw and compressed data chunks are handled with a unified function
to avoid code duplication, compressed data is verified linearly (with
EROFS_GET_BLOCKS_FIEMAP) instead of lookback, as it's problematic to
extract data when looking backwards.

Signed-off-by: Igor Ostapenko <igoreisberg at gmail.com>
Signed-off-by: Gao Xiang <xiang at kernel.org>
---
 fsck/main.c | 483 ++++++++++++++++++++++++++++++++++++----------------
 mkfs/main.c |   2 +-
 2 files changed, 337 insertions(+), 148 deletions(-)

diff --git a/fsck/main.c b/fsck/main.c
index 30d0a1b..94cacfd 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -6,6 +6,8 @@
 #include <stdlib.h>
 #include <getopt.h>
 #include <time.h>
+#include <utime.h>
+#include <unistd.h>
 #include <sys/stat.h>
 #include "erofs/print.h"
 #include "erofs/io.h"
@@ -18,6 +20,9 @@ struct erofsfsck_cfg {
 	bool corrupted;
 	bool print_comp_ratio;
 	bool check_decomp;
+	char *extract_path;
+	size_t extract_pos;
+	bool force;
 	u64 physical_blocks;
 	u64 logical_blocks;
 };
@@ -25,8 +30,9 @@ static struct erofsfsck_cfg fsckcfg;
 
 static struct option long_options[] = {
 	{"help", no_argument, 0, 1},
-	{"extract", no_argument, 0, 2},
+	{"extract", optional_argument, 0, 2},
 	{"device", required_argument, 0, 3},
+	{"force", no_argument, 0, 4},
 	{0, 0, 0, 0},
 };
 
@@ -34,12 +40,13 @@ static void usage(void)
 {
 	fputs("usage: [options] IMAGE\n\n"
 	      "Check erofs filesystem integrity of IMAGE, and [options] are:\n"
-	      " -V              print the version number of fsck.erofs and exit.\n"
+	      " -V              print the version number of fsck.erofs and exit\n"
 	      " -d#             set output message level to # (maximum 9)\n"
 	      " -p              print total compression ratio of all files\n"
 	      " --device=X      specify an extra device to be used together\n"
-	      " --extract       check if all files are well encoded\n"
-	      " --help          display this help and exit.\n",
+	      " --extract[=X]   check if all files are well encoded, optionally extract to X\n"
+	      " --force         if file already exists then overwrite (--extract=X is required)\n"
+	      " --help          display this help and exit\n",
 	      stderr);
 }
 
@@ -74,6 +81,22 @@ static int erofsfsck_parse_options_cfg(int argc, char **argv)
 			exit(0);
 		case 2:
 			fsckcfg.check_decomp = true;
+			if (optarg) {
+				size_t len = strlen(optarg);
+				if (len == 0)
+					return -EINVAL;
+
+				/* remove trailing slashes except root */
+				while (len > 1 && optarg[len - 1] == '/')
+					len--;
+
+				fsckcfg.extract_path = malloc(PATH_MAX);
+				if (!fsckcfg.extract_path)
+					return -ENOMEM;
+				strncpy(fsckcfg.extract_path, optarg, len);
+				fsckcfg.extract_path[len] = '\0';
+				fsckcfg.extract_pos = len;
+			}
 			break;
 		case 3:
 			ret = blob_open_ro(optarg);
@@ -81,6 +104,9 @@ static int erofsfsck_parse_options_cfg(int argc, char **argv)
 				return ret;
 			++sbi.extra_devices;
 			break;
+		case 4:
+			fsckcfg.force = true;
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -89,6 +115,9 @@ static int erofsfsck_parse_options_cfg(int argc, char **argv)
 	if (optind >= argc)
 		return -EINVAL;
 
+	if (fsckcfg.force && !fsckcfg.extract_path)
+		return -EINVAL;
+
 	cfg.c_img_path = strdup(argv[optind++]);
 	if (!cfg.c_img_path)
 		return -ENOMEM;
@@ -100,6 +129,25 @@ static int erofsfsck_parse_options_cfg(int argc, char **argv)
 	return 0;
 }
 
+static void erofsfsck_set_attributes(struct erofs_inode *inode, char *path)
+{
+	int ret;
+	struct utimbuf ut;
+
+	ut.actime = inode->i_ctime;
+	ut.modtime = inode->i_ctime;
+	if (utime(path, &ut) < 0)
+		erofs_warn("failed to set times: %s", path);
+
+	ret = chmod(path, inode->i_mode);
+	if (ret < 0)
+		erofs_warn("failed to set permissions: %s", path);
+
+	ret = chown(path, inode->i_uid, inode->i_gid);
+	if (ret < 0)
+		erofs_warn("failed to change ownership: %s", path);
+}
+
 static int erofs_check_sb_chksum(void)
 {
 	int ret;
@@ -127,137 +175,6 @@ static int erofs_check_sb_chksum(void)
 	return 0;
 }
 
-static int verify_uncompressed_inode(struct erofs_inode *inode)
-{
-	struct erofs_map_blocks map = {
-		.index = UINT_MAX,
-	};
-	int ret;
-	erofs_off_t ptr = 0;
-	u64 i_blocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
-
-	while (ptr < inode->i_size) {
-		map.m_la = ptr;
-		ret = erofs_map_blocks(inode, &map, 0);
-		if (ret)
-			return ret;
-
-		if (map.m_plen != map.m_llen || ptr != map.m_la) {
-			erofs_err("broken data chunk layout m_la %" PRIu64 " ptr %" PRIu64 " m_llen %" PRIu64 " m_plen %" PRIu64,
-				  map.m_la, ptr, map.m_llen, map.m_plen);
-			return -EFSCORRUPTED;
-		}
-
-		if (!(map.m_flags & EROFS_MAP_MAPPED) && !map.m_llen) {
-			/* reached EOF */
-			ptr = inode->i_size;
-			continue;
-		}
-
-		ptr += map.m_llen;
-	}
-
-	if (fsckcfg.print_comp_ratio) {
-		fsckcfg.logical_blocks += i_blocks;
-		fsckcfg.physical_blocks += i_blocks;
-	}
-
-	return 0;
-}
-
-static int verify_compressed_inode(struct erofs_inode *inode)
-{
-	struct erofs_map_blocks map = {
-		.index = UINT_MAX,
-	};
-	struct erofs_map_dev mdev;
-	int ret = 0;
-	u64 pchunk_len = 0;
-	erofs_off_t end = inode->i_size;
-	unsigned int raw_size = 0, buffer_size = 0;
-	char *raw = NULL, *buffer = NULL;
-
-	while (end > 0) {
-		map.m_la = end - 1;
-
-		ret = z_erofs_map_blocks_iter(inode, &map, 0);
-		if (ret)
-			goto out;
-
-		if (end > map.m_la + map.m_llen) {
-			erofs_err("broken compressed chunk layout m_la %" PRIu64 " m_llen %" PRIu64 " end %" PRIu64,
-				  map.m_la, map.m_llen, end);
-			ret = -EFSCORRUPTED;
-			goto out;
-		}
-
-		pchunk_len += map.m_plen;
-		end = map.m_la;
-
-		if (!fsckcfg.check_decomp || !(map.m_flags & EROFS_MAP_MAPPED))
-			continue;
-
-		if (map.m_plen > raw_size) {
-			raw_size = map.m_plen;
-			raw = realloc(raw, raw_size);
-			BUG_ON(!raw);
-		}
-
-		if (map.m_llen > buffer_size) {
-			buffer_size = map.m_llen;
-			buffer = realloc(buffer, buffer_size);
-			BUG_ON(!buffer);
-		}
-
-		mdev = (struct erofs_map_dev) {
-			.m_deviceid = map.m_deviceid,
-			.m_pa = map.m_pa,
-		};
-		ret = erofs_map_dev(&sbi, &mdev);
-		if (ret) {
-			erofs_err("failed to map device of m_pa %" PRIu64 ", m_deviceid %u @ nid %llu: %d",
-				  map.m_pa, map.m_deviceid, inode->nid | 0ULL, ret);
-			goto out;
-		}
-
-		ret = dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen);
-		if (ret < 0) {
-			erofs_err("failed to read compressed data of m_pa %" PRIu64 ", m_plen %" PRIu64 " @ nid %llu: %d",
-				  mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
-			goto out;
-		}
-
-		ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
-					.in = raw,
-					.out = buffer,
-					.decodedskip = 0,
-					.inputsize = map.m_plen,
-					.decodedlength = map.m_llen,
-					.alg = map.m_algorithmformat,
-					.partial_decoding = 0
-					 });
-
-		if (ret < 0) {
-			erofs_err("failed to decompress data of m_pa %" PRIu64 ", m_plen %" PRIu64 " @ nid %llu: %d",
-				  mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
-			goto out;
-		}
-	}
-
-	if (fsckcfg.print_comp_ratio) {
-		fsckcfg.logical_blocks +=
-			DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
-		fsckcfg.physical_blocks +=
-			DIV_ROUND_UP(pchunk_len, EROFS_BLKSIZ);
-	}
-out:
-	if (raw)
-		free(raw);
-	if (buffer)
-		free(buffer);
-	return ret < 0 ? ret : 0;
-}
-
 static int erofs_verify_xattr(struct erofs_inode *inode)
 {
 	unsigned int xattr_hdr_size = sizeof(struct erofs_xattr_ibody_header);
@@ -336,9 +253,18 @@ out:
 	return ret;
 }
 
-static int erofs_verify_inode_data(struct erofs_inode *inode)
+static int erofs_verify_inode_data(struct erofs_inode *inode, int extract_fd)
 {
-	int ret;
+	struct erofs_map_blocks map = {
+		.index = UINT_MAX,
+	};
+	struct erofs_map_dev mdev;
+	int ret = 0;
+	bool compressed;
+	erofs_off_t pos = 0;
+	u64 pchunk_len = 0;
+	unsigned int raw_size = 0, buffer_size = 0;
+	char *raw = NULL, *buffer = NULL;
 
 	erofs_dbg("verify data chunk of nid(%llu): type(%d)",
 		  inode->nid | 0ULL, inode->datalayout);
@@ -347,30 +273,270 @@ static int erofs_verify_inode_data(struct erofs_inode *inode)
 	case EROFS_INODE_FLAT_PLAIN:
 	case EROFS_INODE_FLAT_INLINE:
 	case EROFS_INODE_CHUNK_BASED:
-		ret = verify_uncompressed_inode(inode);
+		compressed = false;
 		break;
 	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
 	case EROFS_INODE_FLAT_COMPRESSION:
-		ret = verify_compressed_inode(inode);
+		compressed = true;
 		break;
 	default:
-		ret = -EINVAL;
-		break;
+		erofs_err("unknown datalayout");
+		return -EINVAL;
+	}
+
+	while (pos < inode->i_size) {
+		map.m_la = pos;
+		if (compressed)
+			ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
+		else
+			ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
+		if (ret)
+			goto out;
+
+		if (!compressed && map.m_llen != map.m_plen) {
+			erofs_err("broken chunk length m_la %" PRIu64 " m_llen %" PRIu64 " m_plen %" PRIu64,
+				  map.m_la, map.m_llen, map.m_plen);
+			ret = -EFSCORRUPTED;
+			goto out;
+		}
+
+		/* the last lcluster can be devided into 3 parts */
+		if (map.m_la + map.m_llen > inode->i_size)
+			map.m_llen = inode->i_size - map.m_la;
+
+		pchunk_len += map.m_plen;
+		pos += map.m_llen;
+
+		/* should skip decomp? */
+		if (!(map.m_flags & EROFS_MAP_MAPPED) || !fsckcfg.check_decomp)
+			continue;
+
+		if (map.m_plen > raw_size) {
+			raw_size = map.m_plen;
+			raw = realloc(raw, raw_size);
+			BUG_ON(!raw);
+		}
+
+		mdev = (struct erofs_map_dev) {
+			.m_deviceid = map.m_deviceid,
+			.m_pa = map.m_pa,
+		};
+		ret = erofs_map_dev(&sbi, &mdev);
+		if (ret) {
+			erofs_err("failed to map device of m_pa %" PRIu64 ", m_deviceid %u @ nid %llu: %d",
+				  map.m_pa, map.m_deviceid, inode->nid | 0ULL, ret);
+			goto out;
+		}
+
+		if (compressed && map.m_llen > buffer_size) {
+			buffer_size = map.m_llen;
+			buffer = realloc(buffer, buffer_size);
+			BUG_ON(!buffer);
+		}
+
+		ret = dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen);
+		if (ret < 0) {
+			erofs_err("failed to read data of m_pa %" PRIu64 ", m_plen %" PRIu64 " @ nid %llu: %d",
+				  mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
+			goto out;
+		}
+
+		if (compressed) {
+			struct z_erofs_decompress_req rq = {
+				.in = raw,
+				.out = buffer,
+				.decodedskip = 0,
+				.inputsize = map.m_plen,
+				.decodedlength = map.m_llen,
+				.alg = map.m_algorithmformat,
+				.partial_decoding = 0
+			};
+
+			ret = z_erofs_decompress(&rq);
+			if (ret < 0) {
+				erofs_err("failed to decompress data of m_pa %" PRIu64 ", m_plen %" PRIu64 " @ nid %llu: %d",
+					  mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
+				goto out;
+			}
+		}
+
+		if (extract_fd >= 0 && write(extract_fd,
+					     compressed ? buffer : raw,
+					     map.m_llen) < 0) {
+			erofs_err("I/O error occurred when verifying data chunk of nid(%llu)",
+				  inode->nid | 0ULL);
+			ret = -EIO;
+			goto out;
+		}
+	}
+
+	if (fsckcfg.print_comp_ratio) {
+		fsckcfg.logical_blocks +=
+			DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
+		fsckcfg.physical_blocks +=
+			DIV_ROUND_UP(pchunk_len, EROFS_BLKSIZ);
+	}
+out:
+	if (raw)
+		free(raw);
+	if (buffer)
+		free(buffer);
+	return ret < 0 ? ret : 0;
+}
+
+static inline int erofs_extract_dir(struct erofs_inode *inode)
+{
+	int ret;
+
+	/* verify data chunk layout */
+	ret = erofs_verify_inode_data(inode, -1);
+	if (ret)
+		return ret;
+
+	erofs_dbg("create directory %s", fsckcfg.extract_path);
+
+	/*
+	 * Make directory with default user RWX permissions rather than
+	 * the permissions from the filesystem, as these may not have
+	 * write/execute permission.  These are fixed up later in
+	 * erofsfsck_set_attributes().
+	 */
+	if (mkdir(fsckcfg.extract_path, S_IRWXU) < 0) {
+		struct stat st;
+
+		/*
+		 * Skip directory if mkdir fails, unless we're
+		 * forcing and the error is -EEXIST
+		 */
+		if(!fsckcfg.force || errno != EEXIST) {
+			erofs_err("failed to create directory: %s",
+				  fsckcfg.extract_path);
+			return -errno;
+		}
+
+		if (lstat(fsckcfg.extract_path, &st) ||
+		    !S_ISDIR(st.st_mode)) {
+			erofs_err("path is not a directory: %s",
+				  fsckcfg.extract_path);
+			return -ENOTDIR;
+		}
+	}
+	return 0;
+}
+
+static inline int erofs_extract_file(struct erofs_inode *inode)
+{
+	bool tryagain = true;
+	int ret, fd;
+
+	erofs_dbg("extract file to path: %s", fsckcfg.extract_path);
+
+again:
+	fd = open(fsckcfg.extract_path,
+		  O_WRONLY | O_CREAT | (fsckcfg.force ? O_TRUNC : 0),
+		  (mode_t) inode->i_mode & 0777);
+	if (fd < 0) {
+		if (errno == EISDIR && fsckcfg.force && tryagain) {
+			erofs_warn("try to forcely remove directory %s",
+				   fsckcfg.extract_path);
+			if (rmdir(fsckcfg.extract_path) < 0) {
+				erofs_err("failed to remove: %s",
+					  fsckcfg.extract_path);
+				return -EISDIR;
+			}
+			tryagain = false;
+			goto again;
+		}
+		erofs_err("failed to open file: %s", fsckcfg.extract_path);
+		return -errno;
 	}
 
-	if (ret == -EIO)
-		erofs_err("I/O error occurred when verifying data chunk of nid(%llu)",
-			  inode->nid | 0ULL);
+	/* verify data chunk layout */
+	ret = erofs_verify_inode_data(inode, fd);
+	if (ret)
+		return ret;
 
+	if (close(fd))
+		return -errno;
+	return ret;
+}
+
+static inline int erofs_extract_symlink(struct erofs_inode *inode)
+{
+	bool tryagain = true;
+	int ret;
+	char *buf = NULL;
+
+	erofs_dbg("extract symlink to path: %s", fsckcfg.extract_path);
+
+	/* verify data chunk layout */
+	ret = erofs_verify_inode_data(inode, -1);
+	if (ret)
+		return ret;
+
+	buf = malloc(inode->i_size + 1);
+	if (!buf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = erofs_pread(inode, buf, inode->i_size, 0);
+	if (ret) {
+		erofs_err("I/O error occurred when reading symlink @ nid %llu: %d",
+			  inode->nid | 0ULL, ret);
+		goto out;
+	}
+
+	buf[inode->i_size] = '\0';
+again:
+	if (symlink(buf, fsckcfg.extract_path) < 0) {
+		if (errno == EEXIST && fsckcfg.force && tryagain) {
+			erofs_warn("try to forcely remove file %s",
+				   fsckcfg.extract_path);
+			if (unlink(fsckcfg.extract_path) < 0) {
+				erofs_err("failed to remove: %s",
+					  fsckcfg.extract_path);
+				ret = -errno;
+				goto out;
+			}
+			tryagain = false;
+			goto again;
+		}
+		erofs_err("failed to create symlink: %s", fsckcfg.extract_path);
+		ret = -errno;
+	}
+out:
+	if (buf)
+		free(buf);
 	return ret;
 }
 
 static int erofsfsck_dirent_iter(struct erofs_dir_context *ctx)
 {
+	int ret;
+	size_t prev_pos = fsckcfg.extract_pos;
+
 	if (ctx->dot_dotdot)
 		return 0;
 
-	return erofsfsck_check_inode(ctx->dir->nid, ctx->de_nid);
+	if (fsckcfg.extract_path) {
+		size_t curr_pos = prev_pos;
+
+		fsckcfg.extract_path[curr_pos++] = '/';
+		strncpy(fsckcfg.extract_path + curr_pos, ctx->dname,
+			ctx->de_namelen);
+		curr_pos += ctx->de_namelen;
+		fsckcfg.extract_path[curr_pos] = '\0';
+		fsckcfg.extract_pos = curr_pos;
+	}
+
+	ret = erofsfsck_check_inode(ctx->dir->nid, ctx->de_nid);
+
+	if (fsckcfg.extract_path) {
+		fsckcfg.extract_path[prev_pos] = '\0';
+		fsckcfg.extract_pos = prev_pos;
+	}
+	return ret;
 }
 
 static int erofsfsck_check_inode(erofs_nid_t pnid, erofs_nid_t nid)
@@ -394,8 +560,25 @@ static int erofsfsck_check_inode(erofs_nid_t pnid, erofs_nid_t nid)
 	if (ret)
 		goto out;
 
-	/* verify data chunk layout */
-	ret = erofs_verify_inode_data(&inode);
+	if (fsckcfg.extract_path) {
+		switch (inode.i_mode & S_IFMT) {
+		case S_IFDIR:
+			ret = erofs_extract_dir(&inode);
+			break;
+		case S_IFREG:
+			ret = erofs_extract_file(&inode);
+			break;
+		case S_IFLNK:
+			ret = erofs_extract_symlink(&inode);
+			break;
+		default:
+			goto verify;
+		}
+	} else {
+verify:
+		/* verify data chunk layout */
+		ret = erofs_verify_inode_data(&inode, -1);
+	}
 	if (ret)
 		goto out;
 
@@ -410,6 +593,10 @@ static int erofsfsck_check_inode(erofs_nid_t pnid, erofs_nid_t nid)
 
 		ret = erofs_iterate_dir(&ctx, true);
 	}
+
+	if (!ret)
+		erofsfsck_set_attributes(&inode, fsckcfg.extract_path);
+
 out:
 	if (ret && ret != -EIO)
 		fsckcfg.corrupted = true;
@@ -425,6 +612,8 @@ int main(int argc, char **argv)
 	fsckcfg.corrupted = false;
 	fsckcfg.print_comp_ratio = false;
 	fsckcfg.check_decomp = false;
+	fsckcfg.extract_path = NULL;
+	fsckcfg.extract_pos = 0;
 	fsckcfg.logical_blocks = 0;
 	fsckcfg.physical_blocks = 0;
 
diff --git a/mkfs/main.c b/mkfs/main.c
index 90cedde..1787b2c 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -589,7 +589,7 @@ int main(int argc, char **argv)
 	err = lstat64(cfg.c_src_path, &st);
 	if (err)
 		return 1;
-	if ((st.st_mode & S_IFMT) != S_IFDIR) {
+	if (!S_ISDIR(st.st_mode)) {
 		erofs_err("root of the filesystem is not a directory - %s",
 			  cfg.c_src_path);
 		usage();
-- 
2.30.2



More information about the Linux-erofs mailing list