erofs-utils: fsck support for --extract=X to extract to path X

Igor Eisberg igoreisberg at gmail.com
Sun Dec 19 03:06:54 AEDT 2021


>From 9b3c3256d3a630a7bfe825201e9ba06d67a81618 Mon Sep 17 00:00:00 2001
From: Igor Ostapenko <igoreisberg at gmail.com>
Date: Sat, 18 Dec 2021 18:01:46 +0200
Subject: erofs-utils: fsck support for --extract=X to extract to path X

Extracts dirs, regular files and symlinks (overwrite enabled with warnings,
mainly for use with WSL for debugging, in case certain files overlap,
i.e. "path/to/file/alarm" and "path/to/file/Alarm").
Allocation for extract_path is done only once, then the buffer is reused.
Raw and compressed data chunks are handled with a unified function to avoid
repeats, compressed data is verified lineary (with EROFS_GET_BLOCKS_FIEMAP)
instead of lookback, as it's problematic to extract data when looping
backwards.

Signed-off-by: Igor Ostapenko <igoreisberg at gmail.com>
---
 fsck/main.c | 482 ++++++++++++++++++++++++++++++++++++----------------
 mkfs/main.c |   2 +-
 2 files changed, 339 insertions(+), 145 deletions(-)

diff --git a/fsck/main.c b/fsck/main.c
index 30d0a1b..36d5d76 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -6,6 +6,8 @@
 #include <stdlib.h>
 #include <getopt.h>
 #include <time.h>
+#include <utime.h>
+#include <unistd.h>
 #include <sys/stat.h>
 #include "erofs/print.h"
 #include "erofs/io.h"
@@ -18,6 +20,10 @@ struct erofsfsck_cfg {
  bool corrupted;
  bool print_comp_ratio;
  bool check_decomp;
+ char *extract_path;
+ size_t extract_pos;
+ int extract_fd;
+ bool preserve;
  u64 physical_blocks;
  u64 logical_blocks;
 };
@@ -25,8 +31,9 @@ static struct erofsfsck_cfg fsckcfg;

 static struct option long_options[] = {
  {"help", no_argument, 0, 1},
- {"extract", no_argument, 0, 2},
- {"device", required_argument, 0, 3},
+ {"extract", optional_argument, 0, 2},
+ {"preserve", no_argument, 0, 3},
+ {"device", required_argument, 0, 4},
  {0, 0, 0, 0},
 };

@@ -34,12 +41,13 @@ static void usage(void)
 {
  fputs("usage: [options] IMAGE\n\n"
        "Check erofs filesystem integrity of IMAGE, and [options] are:\n"
-       " -V              print the version number of fsck.erofs and
exit.\n"
+       " -V              print the version number of fsck.erofs and exit\n"
        " -d#             set output message level to # (maximum 9)\n"
        " -p              print total compression ratio of all files\n"
        " --device=X      specify an extra device to be used together\n"
-       " --extract       check if all files are well encoded\n"
-       " --help          display this help and exit.\n",
+       " --extract[=X]   check if all files are well encoded, optionally
extract to X\n"
+       " --preserve      preserve mode, owner and group (--extract=X is
required)\n"
+       " --help          display this help and exit\n",
        stderr);
 }

@@ -74,8 +82,29 @@ static int erofsfsck_parse_options_cfg(int argc, char
**argv)
  exit(0);
  case 2:
  fsckcfg.check_decomp = true;
+ if (optarg) {
+ size_t len = strlen(optarg);
+ if (len == 0)
+ return -EINVAL;
+ /* remove trailing slashes except root */
+ while (len > 1 && optarg[len - 1] == '/')
+ len--;
+
+ fsckcfg.extract_path = malloc(PATH_MAX);
+ if (!fsckcfg.extract_path)
+ return -ENOMEM;
+
+ strncpy(fsckcfg.extract_path, optarg, len);
+ fsckcfg.extract_path[len] = '\0';
+ if (optarg[0] == '/')
+ len = 0;
+ fsckcfg.extract_pos = len;
+ }
  break;
  case 3:
+ fsckcfg.preserve = true;
+ break;
+ case 4:
  ret = blob_open_ro(optarg);
  if (ret)
  return ret;
@@ -89,6 +118,9 @@ static int erofsfsck_parse_options_cfg(int argc, char
**argv)
  if (optind >= argc)
  return -EINVAL;

+ if (fsckcfg.preserve && !fsckcfg.extract_path)
+ return -EINVAL;
+
  cfg.c_img_path = strdup(argv[optind++]);
  if (!cfg.c_img_path)
  return -ENOMEM;
@@ -100,6 +132,25 @@ static int erofsfsck_parse_options_cfg(int argc, char
**argv)
  return 0;
 }

+static void erofsfsck_restore_stat(struct erofs_inode *inode, char *path)
+{
+ int ret;
+ struct utimbuf ut;
+
+ ret = chmod(path, inode->i_mode);
+ if (ret < 0)
+ erofs_warn("failed to set permissions: %s", path);
+
+ ret = chown(path, inode->i_uid, inode->i_gid);
+ if (ret < 0)
+ erofs_warn("failed to change ownership: %s", path);
+
+ ut.actime = inode->i_ctime;
+ ut.modtime = inode->i_ctime;
+ if (utime(path, &ut) < 0)
+ erofs_warn("failed to set times: %s", path);
+}
+
 static int erofs_check_sb_chksum(void)
 {
  int ret;
@@ -127,137 +178,6 @@ static int erofs_check_sb_chksum(void)
  return 0;
 }

-static int verify_uncompressed_inode(struct erofs_inode *inode)
-{
- struct erofs_map_blocks map = {
- .index = UINT_MAX,
- };
- int ret;
- erofs_off_t ptr = 0;
- u64 i_blocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
-
- while (ptr < inode->i_size) {
- map.m_la = ptr;
- ret = erofs_map_blocks(inode, &map, 0);
- if (ret)
- return ret;
-
- if (map.m_plen != map.m_llen || ptr != map.m_la) {
- erofs_err("broken data chunk layout m_la %" PRIu64 " ptr %" PRIu64 "
m_llen %" PRIu64 " m_plen %" PRIu64,
-   map.m_la, ptr, map.m_llen, map.m_plen);
- return -EFSCORRUPTED;
- }
-
- if (!(map.m_flags & EROFS_MAP_MAPPED) && !map.m_llen) {
- /* reached EOF */
- ptr = inode->i_size;
- continue;
- }
-
- ptr += map.m_llen;
- }
-
- if (fsckcfg.print_comp_ratio) {
- fsckcfg.logical_blocks += i_blocks;
- fsckcfg.physical_blocks += i_blocks;
- }
-
- return 0;
-}
-
-static int verify_compressed_inode(struct erofs_inode *inode)
-{
- struct erofs_map_blocks map = {
- .index = UINT_MAX,
- };
- struct erofs_map_dev mdev;
- int ret = 0;
- u64 pchunk_len = 0;
- erofs_off_t end = inode->i_size;
- unsigned int raw_size = 0, buffer_size = 0;
- char *raw = NULL, *buffer = NULL;
-
- while (end > 0) {
- map.m_la = end - 1;
-
- ret = z_erofs_map_blocks_iter(inode, &map, 0);
- if (ret)
- goto out;
-
- if (end > map.m_la + map.m_llen) {
- erofs_err("broken compressed chunk layout m_la %" PRIu64 " m_llen %"
PRIu64 " end %" PRIu64,
-   map.m_la, map.m_llen, end);
- ret = -EFSCORRUPTED;
- goto out;
- }
-
- pchunk_len += map.m_plen;
- end = map.m_la;
-
- if (!fsckcfg.check_decomp || !(map.m_flags & EROFS_MAP_MAPPED))
- continue;
-
- if (map.m_plen > raw_size) {
- raw_size = map.m_plen;
- raw = realloc(raw, raw_size);
- BUG_ON(!raw);
- }
-
- if (map.m_llen > buffer_size) {
- buffer_size = map.m_llen;
- buffer = realloc(buffer, buffer_size);
- BUG_ON(!buffer);
- }
-
- mdev = (struct erofs_map_dev) {
- .m_deviceid = map.m_deviceid,
- .m_pa = map.m_pa,
- };
- ret = erofs_map_dev(&sbi, &mdev);
- if (ret) {
- erofs_err("failed to map device of m_pa %" PRIu64 ", m_deviceid %u @ nid
%llu: %d",
-   map.m_pa, map.m_deviceid, inode->nid | 0ULL, ret);
- goto out;
- }
-
- ret = dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen);
- if (ret < 0) {
- erofs_err("failed to read compressed data of m_pa %" PRIu64 ", m_plen %"
PRIu64 " @ nid %llu: %d",
-   mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
- goto out;
- }
-
- ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
- .in = raw,
- .out = buffer,
- .decodedskip = 0,
- .inputsize = map.m_plen,
- .decodedlength = map.m_llen,
- .alg = map.m_algorithmformat,
- .partial_decoding = 0
- });
-
- if (ret < 0) {
- erofs_err("failed to decompress data of m_pa %" PRIu64 ", m_plen %"
PRIu64 " @ nid %llu: %d",
-   mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
- goto out;
- }
- }
-
- if (fsckcfg.print_comp_ratio) {
- fsckcfg.logical_blocks +=
- DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
- fsckcfg.physical_blocks +=
- DIV_ROUND_UP(pchunk_len, EROFS_BLKSIZ);
- }
-out:
- if (raw)
- free(raw);
- if (buffer)
- free(buffer);
- return ret < 0 ? ret : 0;
-}
-
 static int erofs_verify_xattr(struct erofs_inode *inode)
 {
  unsigned int xattr_hdr_size = sizeof(struct erofs_xattr_ibody_header);
@@ -338,7 +258,16 @@ out:

 static int erofs_verify_inode_data(struct erofs_inode *inode)
 {
- int ret;
+ struct erofs_map_blocks map = {
+ .index = UINT_MAX,
+ };
+ struct erofs_map_dev mdev;
+ int ret = 0;
+ bool compressed;
+ erofs_off_t ptr = 0;
+ u64 pchunk_len = 0;
+ unsigned int raw_size = 0, buffer_size = 0;
+ char *raw = NULL, *buffer = NULL;

  erofs_dbg("verify data chunk of nid(%llu): type(%d)",
    inode->nid | 0ULL, inode->datalayout);
@@ -347,30 +276,275 @@ static int erofs_verify_inode_data(struct
erofs_inode *inode)
  case EROFS_INODE_FLAT_PLAIN:
  case EROFS_INODE_FLAT_INLINE:
  case EROFS_INODE_CHUNK_BASED:
- ret = verify_uncompressed_inode(inode);
+ compressed = false;
  break;
  case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
  case EROFS_INODE_FLAT_COMPRESSION:
- ret = verify_compressed_inode(inode);
+ compressed = true;
  break;
  default:
- ret = -EINVAL;
- break;
+ erofs_err("unknown datalayout");
+ return -EINVAL;
  }

+ while (ptr < inode->i_size) {
+ map.m_la = ptr;
+ if (compressed)
+ ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
+ else
+ ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
+ if (ret)
+ goto out;
+
+ if (compressed) {
+ if (ptr != map.m_la || map.m_la + map.m_llen > inode->i_size) {
+ erofs_err("broken compressed chunk layout ptr %" PRIu64 " m_la %" PRIu64
" m_llen %" PRIu64 " i_size %" PRIu64,
+   ptr, map.m_la, map.m_llen, inode->i_size);
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
+ } else {
+ if (ptr != map.m_la || map.m_llen != map.m_plen) {
+ erofs_err("broken data chunk layout ptr %" PRIu64 " m_la %" PRIu64 "
m_llen %" PRIu64 " m_plen %" PRIu64,
+   ptr, map.m_la, map.m_llen, map.m_plen);
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
+
+ if (map.m_la + map.m_llen > inode->i_size)
+ map.m_llen = inode->i_size - map.m_la;
+ }
+
+ pchunk_len += map.m_plen;
+ ptr += map.m_llen;
+
+ /* reached EOF? */
+ if (!(map.m_flags & EROFS_MAP_MAPPED) && !map.m_llen)
+ break;
+
+ /* should skip decomp? */
+ if (!fsckcfg.check_decomp)
+ continue;
+
+ if (map.m_plen > raw_size) {
+ raw_size = map.m_plen;
+ raw = realloc(raw, raw_size);
+ BUG_ON(!raw);
+ }
+
+ if (compressed && map.m_llen > buffer_size) {
+ buffer_size = map.m_llen;
+ buffer = realloc(buffer, buffer_size);
+ BUG_ON(!buffer);
+ }
+
+ mdev = (struct erofs_map_dev) {
+ .m_deviceid = map.m_deviceid,
+ .m_pa = map.m_pa,
+ };
+ ret = erofs_map_dev(&sbi, &mdev);
+ if (ret) {
+ erofs_err("failed to map device of m_pa %" PRIu64 ", m_deviceid %u @ nid
%llu: %d",
+   map.m_pa, map.m_deviceid, inode->nid | 0ULL, ret);
+ goto out;
+ }
+
+ ret = dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen);
+ if (ret < 0) {
+ erofs_err("failed to read data of m_pa %" PRIu64 ", m_plen %" PRIu64 " @
nid %llu: %d",
+   mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
+ goto out;
+ }
+
+ if (compressed) {
+ ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
+ .in = raw,
+ .out = buffer,
+ .decodedskip = 0,
+ .inputsize = map.m_plen,
+ .decodedlength = map.m_llen,
+ .alg = map.m_algorithmformat,
+ .partial_decoding = 0
+ });
+
+ if (ret < 0) {
+ erofs_err("failed to decompress data of m_pa %" PRIu64 ", m_plen %"
PRIu64 " @ nid %llu: %d",
+   mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
+ goto out;
+ }
+ }
+
+ if (fsckcfg.extract_fd != -1 &&
+   write(fsckcfg.extract_fd, compressed ? buffer : raw, map.m_llen) < 0) {
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ if (fsckcfg.print_comp_ratio) {
+ fsckcfg.logical_blocks +=
+ DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
+ fsckcfg.physical_blocks +=
+ DIV_ROUND_UP(pchunk_len, EROFS_BLKSIZ);
+ }
+out:
+ if (raw)
+ free(raw);
+ if (buffer)
+ free(buffer);
  if (ret == -EIO)
  erofs_err("I/O error occurred when verifying data chunk of nid(%llu)",
    inode->nid | 0ULL);
+ return ret < 0 ? ret : 0;
+}
+
+static inline int erofs_extract_dir(struct erofs_inode *inode)
+{
+ int ret;
+ struct stat sb;
+
+ /* verify data chunk layout */
+ ret = erofs_verify_inode_data(inode);
+ if (ret)
+ return ret;
+
+ erofs_dbg("create directory on path: %s", fsckcfg.extract_path);
+
+ if (!lstat(fsckcfg.extract_path, &sb)) {
+ if (!S_ISDIR(sb.st_mode)) {
+ erofs_err("path is not a directory: %s", fsckcfg.extract_path);
+ return -EIO;
+ }
+ } else if (errno != ENOENT || mkdir(fsckcfg.extract_path, S_IRWXU) < 0) {
+ erofs_err("failed to create directory: %s", fsckcfg.extract_path);
+ return -EIO;
+ }
+
+ if (fsckcfg.preserve)
+ erofsfsck_restore_stat(inode, fsckcfg.extract_path);
+ return 0;
+}
+
+static inline int erofs_extract_file(struct erofs_inode *inode)
+{
+ int ret;
+ struct stat sb;
+ int fsync_fail, close_fail;
+
+ erofs_dbg("extract file to path: %s", fsckcfg.extract_path);
+
+ if (!lstat(fsckcfg.extract_path, &sb)) {
+ if (S_ISDIR(sb.st_mode)) {
+ erofs_err("path is a directory: %s", fsckcfg.extract_path);
+ return -EIO;
+ }
+ erofs_warn("overwriting: %s", fsckcfg.extract_path);
+ if (unlink(fsckcfg.extract_path) < 0) {
+ erofs_err("failed to remove file: %s", fsckcfg.extract_path);
+ return -EIO;
+ }
+ }
+
+ fsckcfg.extract_fd = open(fsckcfg.extract_path, O_WRONLY | O_CREAT |
O_TRUNC, S_IRWXU);
+ if (fsckcfg.extract_fd < 0) {
+ erofs_err("failed to open file: %s", fsckcfg.extract_path);
+ return -EIO;
+ }
+
+ /* verify data chunk layout */
+ ret = erofs_verify_inode_data(inode);
+
+ fsync_fail = fsync(fsckcfg.extract_fd) != 0;
+ close_fail = close(fsckcfg.extract_fd) != 0;
+ fsckcfg.extract_fd = -1;
+
+ if (ret)
+ return ret;
+ if (fsync_fail || close_fail)
+ return -EIO;
+ if (fsckcfg.preserve)
+ erofsfsck_restore_stat(inode, fsckcfg.extract_path);
+ return ret;
+}
+
+static inline int erofs_extract_symlink(struct erofs_inode *inode)
+{
+ int ret;
+ struct stat sb;
+ char *buf = NULL;
+
+ /* verify data chunk layout */
+ ret = erofs_verify_inode_data(inode);
+ if (ret)
+ return ret;

+ erofs_dbg("extract symlink to path: %s", fsckcfg.extract_path);
+
+ if (!lstat(fsckcfg.extract_path, &sb)) {
+ if (S_ISDIR(sb.st_mode)) {
+ erofs_err("path is a directory: %s", fsckcfg.extract_path);
+ return -EIO;
+ }
+ erofs_warn("overwriting: %s", fsckcfg.extract_path);
+ if (unlink(fsckcfg.extract_path) < 0) {
+ erofs_err("failed to remove file: %s", fsckcfg.extract_path);
+ return -EIO;
+ }
+ }
+
+ buf = malloc(inode->i_size + 1);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = erofs_pread(inode, buf, inode->i_size, 0);
+ if (ret) {
+ erofs_err("I/O error occurred when reading symlink @ nid %llu: %d",
+   inode->nid | 0ULL, ret);
+ goto out;
+ }
+
+ buf[inode->i_size] = '\0';
+ if (symlink(buf, fsckcfg.extract_path) < 0) {
+ erofs_err("failed to create symlink: %s", fsckcfg.extract_path);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (fsckcfg.preserve)
+ erofsfsck_restore_stat(inode, fsckcfg.extract_path);
+out:
+ if (buf)
+ free(buf);
  return ret;
 }

 static int erofsfsck_dirent_iter(struct erofs_dir_context *ctx)
 {
+ int ret;
+ size_t prev_pos = fsckcfg.extract_pos;
+
  if (ctx->dot_dotdot)
  return 0;

- return erofsfsck_check_inode(ctx->dir->nid, ctx->de_nid);
+ if (fsckcfg.extract_path) {
+ size_t curr_pos = prev_pos;
+
+ fsckcfg.extract_path[curr_pos++] = '/';
+ strncpy(fsckcfg.extract_path + curr_pos, ctx->dname, ctx->de_namelen);
+ curr_pos += ctx->de_namelen;
+ fsckcfg.extract_path[curr_pos] = '\0';
+ fsckcfg.extract_pos = curr_pos;
+ }
+
+ ret = erofsfsck_check_inode(ctx->dir->nid, ctx->de_nid);
+
+ if (fsckcfg.extract_path) {
+ fsckcfg.extract_path[prev_pos] = '\0';
+ fsckcfg.extract_pos = prev_pos;
+ }
+ return ret;
 }

 static int erofsfsck_check_inode(erofs_nid_t pnid, erofs_nid_t nid)
@@ -394,8 +568,25 @@ static int erofsfsck_check_inode(erofs_nid_t pnid,
erofs_nid_t nid)
  if (ret)
  goto out;

- /* verify data chunk layout */
- ret = erofs_verify_inode_data(&inode);
+ if (fsckcfg.extract_path) {
+ switch (inode.i_mode & S_IFMT) {
+ case S_IFDIR:
+ ret = erofs_extract_dir(&inode);
+ break;
+ case S_IFREG:
+ ret = erofs_extract_file(&inode);
+ break;
+ case S_IFLNK:
+ ret = erofs_extract_symlink(&inode);
+ break;
+ default:
+ goto verify;
+ }
+ } else {
+verify:
+ /* verify data chunk layout */
+ ret = erofs_verify_inode_data(&inode);
+ }
  if (ret)
  goto out;

@@ -425,6 +616,9 @@ int main(int argc, char **argv)
  fsckcfg.corrupted = false;
  fsckcfg.print_comp_ratio = false;
  fsckcfg.check_decomp = false;
+ fsckcfg.extract_path = NULL;
+ fsckcfg.extract_pos = 0;
+ fsckcfg.extract_fd = -1;
  fsckcfg.logical_blocks = 0;
  fsckcfg.physical_blocks = 0;

diff --git a/mkfs/main.c b/mkfs/main.c
index 90cedde..1787b2c 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -589,7 +589,7 @@ int main(int argc, char **argv)
  err = lstat64(cfg.c_src_path, &st);
  if (err)
  return 1;
- if ((st.st_mode & S_IFMT) != S_IFDIR) {
+ if (!S_ISDIR(st.st_mode)) {
  erofs_err("root of the filesystem is not a directory - %s",
    cfg.c_src_path);
  usage();
-- 
2.30.2
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.ozlabs.org/pipermail/linux-erofs/attachments/20211218/fb22b415/attachment.htm>


More information about the Linux-erofs mailing list