erofs-utils: fsck support for --extract=X to extract to path X

Gao Xiang hsiangkao at linux.alibaba.com
Sun Dec 19 03:51:44 AEDT 2021


Hi Igor,

On Sat, Dec 18, 2021 at 06:06:54PM +0200, Igor Eisberg wrote:
> From 9b3c3256d3a630a7bfe825201e9ba06d67a81618 Mon Sep 17 00:00:00 2001
> From: Igor Ostapenko <igoreisberg at gmail.com>
> Date: Sat, 18 Dec 2021 18:01:46 +0200
> Subject: erofs-utils: fsck support for --extract=X to extract to path X
> 
> Extracts dirs, regular files and symlinks (overwrite enabled with warnings,
> mainly for use with WSL for debugging, in case certain files overlap,
> i.e. "path/to/file/alarm" and "path/to/file/Alarm").
> Allocation for extract_path is done only once, then the buffer is reused.
> Raw and compressed data chunks are handled with a unified function to avoid
> repeats, compressed data is verified lineary (with EROFS_GET_BLOCKS_FIEMAP)
> instead of lookback, as it's problematic to extract data when looping
> backwards.
> 

Thanks for the extraction patch update!

Regardless of patch format, the patch itself seemed broken...
I think that is due to the email client, maybe this document helps:
https://www.kernel.org/doc/html/latest/process/email-clients.html

(Actually sending patch by email is a common way for most communities
 which use some mailing list..)

I might need to handle ztailpacking feature this weekend. Will look
into this later. ;)

Thanks,
Gao Xiang

> Signed-off-by: Igor Ostapenko <igoreisberg at gmail.com>
> ---
>  fsck/main.c | 482 ++++++++++++++++++++++++++++++++++++----------------
>  mkfs/main.c |   2 +-
>  2 files changed, 339 insertions(+), 145 deletions(-)
> 
> diff --git a/fsck/main.c b/fsck/main.c
> index 30d0a1b..36d5d76 100644
> --- a/fsck/main.c
> +++ b/fsck/main.c
> @@ -6,6 +6,8 @@
>  #include <stdlib.h>
>  #include <getopt.h>
>  #include <time.h>
> +#include <utime.h>
> +#include <unistd.h>
>  #include <sys/stat.h>
>  #include "erofs/print.h"
>  #include "erofs/io.h"
> @@ -18,6 +20,10 @@ struct erofsfsck_cfg {
>   bool corrupted;
>   bool print_comp_ratio;
>   bool check_decomp;
> + char *extract_path;
> + size_t extract_pos;
> + int extract_fd;
> + bool preserve;
>   u64 physical_blocks;
>   u64 logical_blocks;
>  };
> @@ -25,8 +31,9 @@ static struct erofsfsck_cfg fsckcfg;
> 
>  static struct option long_options[] = {
>   {"help", no_argument, 0, 1},
> - {"extract", no_argument, 0, 2},
> - {"device", required_argument, 0, 3},
> + {"extract", optional_argument, 0, 2},
> + {"preserve", no_argument, 0, 3},
> + {"device", required_argument, 0, 4},
>   {0, 0, 0, 0},
>  };
> 
> @@ -34,12 +41,13 @@ static void usage(void)
>  {
>   fputs("usage: [options] IMAGE\n\n"
>         "Check erofs filesystem integrity of IMAGE, and [options] are:\n"
> -       " -V              print the version number of fsck.erofs and
> exit.\n"
> +       " -V              print the version number of fsck.erofs and exit\n"
>         " -d#             set output message level to # (maximum 9)\n"
>         " -p              print total compression ratio of all files\n"
>         " --device=X      specify an extra device to be used together\n"
> -       " --extract       check if all files are well encoded\n"
> -       " --help          display this help and exit.\n",
> +       " --extract[=X]   check if all files are well encoded, optionally
> extract to X\n"
> +       " --preserve      preserve mode, owner and group (--extract=X is
> required)\n"
> +       " --help          display this help and exit\n",
>         stderr);
>  }
> 
> @@ -74,8 +82,29 @@ static int erofsfsck_parse_options_cfg(int argc, char
> **argv)
>   exit(0);
>   case 2:
>   fsckcfg.check_decomp = true;
> + if (optarg) {
> + size_t len = strlen(optarg);
> + if (len == 0)
> + return -EINVAL;
> + /* remove trailing slashes except root */
> + while (len > 1 && optarg[len - 1] == '/')
> + len--;
> +
> + fsckcfg.extract_path = malloc(PATH_MAX);
> + if (!fsckcfg.extract_path)
> + return -ENOMEM;
> +
> + strncpy(fsckcfg.extract_path, optarg, len);
> + fsckcfg.extract_path[len] = '\0';
> + if (optarg[0] == '/')
> + len = 0;
> + fsckcfg.extract_pos = len;
> + }
>   break;
>   case 3:
> + fsckcfg.preserve = true;
> + break;
> + case 4:
>   ret = blob_open_ro(optarg);
>   if (ret)
>   return ret;
> @@ -89,6 +118,9 @@ static int erofsfsck_parse_options_cfg(int argc, char
> **argv)
>   if (optind >= argc)
>   return -EINVAL;
> 
> + if (fsckcfg.preserve && !fsckcfg.extract_path)
> + return -EINVAL;
> +
>   cfg.c_img_path = strdup(argv[optind++]);
>   if (!cfg.c_img_path)
>   return -ENOMEM;
> @@ -100,6 +132,25 @@ static int erofsfsck_parse_options_cfg(int argc, char
> **argv)
>   return 0;
>  }
> 
> +static void erofsfsck_restore_stat(struct erofs_inode *inode, char *path)
> +{
> + int ret;
> + struct utimbuf ut;
> +
> + ret = chmod(path, inode->i_mode);
> + if (ret < 0)
> + erofs_warn("failed to set permissions: %s", path);
> +
> + ret = chown(path, inode->i_uid, inode->i_gid);
> + if (ret < 0)
> + erofs_warn("failed to change ownership: %s", path);
> +
> + ut.actime = inode->i_ctime;
> + ut.modtime = inode->i_ctime;
> + if (utime(path, &ut) < 0)
> + erofs_warn("failed to set times: %s", path);
> +}
> +
>  static int erofs_check_sb_chksum(void)
>  {
>   int ret;
> @@ -127,137 +178,6 @@ static int erofs_check_sb_chksum(void)
>   return 0;
>  }
> 
> -static int verify_uncompressed_inode(struct erofs_inode *inode)
> -{
> - struct erofs_map_blocks map = {
> - .index = UINT_MAX,
> - };
> - int ret;
> - erofs_off_t ptr = 0;
> - u64 i_blocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
> -
> - while (ptr < inode->i_size) {
> - map.m_la = ptr;
> - ret = erofs_map_blocks(inode, &map, 0);
> - if (ret)
> - return ret;
> -
> - if (map.m_plen != map.m_llen || ptr != map.m_la) {
> - erofs_err("broken data chunk layout m_la %" PRIu64 " ptr %" PRIu64 "
> m_llen %" PRIu64 " m_plen %" PRIu64,
> -   map.m_la, ptr, map.m_llen, map.m_plen);
> - return -EFSCORRUPTED;
> - }
> -
> - if (!(map.m_flags & EROFS_MAP_MAPPED) && !map.m_llen) {
> - /* reached EOF */
> - ptr = inode->i_size;
> - continue;
> - }
> -
> - ptr += map.m_llen;
> - }
> -
> - if (fsckcfg.print_comp_ratio) {
> - fsckcfg.logical_blocks += i_blocks;
> - fsckcfg.physical_blocks += i_blocks;
> - }
> -
> - return 0;
> -}
> -
> -static int verify_compressed_inode(struct erofs_inode *inode)
> -{
> - struct erofs_map_blocks map = {
> - .index = UINT_MAX,
> - };
> - struct erofs_map_dev mdev;
> - int ret = 0;
> - u64 pchunk_len = 0;
> - erofs_off_t end = inode->i_size;
> - unsigned int raw_size = 0, buffer_size = 0;
> - char *raw = NULL, *buffer = NULL;
> -
> - while (end > 0) {
> - map.m_la = end - 1;
> -
> - ret = z_erofs_map_blocks_iter(inode, &map, 0);
> - if (ret)
> - goto out;
> -
> - if (end > map.m_la + map.m_llen) {
> - erofs_err("broken compressed chunk layout m_la %" PRIu64 " m_llen %"
> PRIu64 " end %" PRIu64,
> -   map.m_la, map.m_llen, end);
> - ret = -EFSCORRUPTED;
> - goto out;
> - }
> -
> - pchunk_len += map.m_plen;
> - end = map.m_la;
> -
> - if (!fsckcfg.check_decomp || !(map.m_flags & EROFS_MAP_MAPPED))
> - continue;
> -
> - if (map.m_plen > raw_size) {
> - raw_size = map.m_plen;
> - raw = realloc(raw, raw_size);
> - BUG_ON(!raw);
> - }
> -
> - if (map.m_llen > buffer_size) {
> - buffer_size = map.m_llen;
> - buffer = realloc(buffer, buffer_size);
> - BUG_ON(!buffer);
> - }
> -
> - mdev = (struct erofs_map_dev) {
> - .m_deviceid = map.m_deviceid,
> - .m_pa = map.m_pa,
> - };
> - ret = erofs_map_dev(&sbi, &mdev);
> - if (ret) {
> - erofs_err("failed to map device of m_pa %" PRIu64 ", m_deviceid %u @ nid
> %llu: %d",
> -   map.m_pa, map.m_deviceid, inode->nid | 0ULL, ret);
> - goto out;
> - }
> -
> - ret = dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen);
> - if (ret < 0) {
> - erofs_err("failed to read compressed data of m_pa %" PRIu64 ", m_plen %"
> PRIu64 " @ nid %llu: %d",
> -   mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
> - goto out;
> - }
> -
> - ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
> - .in = raw,
> - .out = buffer,
> - .decodedskip = 0,
> - .inputsize = map.m_plen,
> - .decodedlength = map.m_llen,
> - .alg = map.m_algorithmformat,
> - .partial_decoding = 0
> - });
> -
> - if (ret < 0) {
> - erofs_err("failed to decompress data of m_pa %" PRIu64 ", m_plen %"
> PRIu64 " @ nid %llu: %d",
> -   mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
> - goto out;
> - }
> - }
> -
> - if (fsckcfg.print_comp_ratio) {
> - fsckcfg.logical_blocks +=
> - DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
> - fsckcfg.physical_blocks +=
> - DIV_ROUND_UP(pchunk_len, EROFS_BLKSIZ);
> - }
> -out:
> - if (raw)
> - free(raw);
> - if (buffer)
> - free(buffer);
> - return ret < 0 ? ret : 0;
> -}
> -
>  static int erofs_verify_xattr(struct erofs_inode *inode)
>  {
>   unsigned int xattr_hdr_size = sizeof(struct erofs_xattr_ibody_header);
> @@ -338,7 +258,16 @@ out:
> 
>  static int erofs_verify_inode_data(struct erofs_inode *inode)
>  {
> - int ret;
> + struct erofs_map_blocks map = {
> + .index = UINT_MAX,
> + };
> + struct erofs_map_dev mdev;
> + int ret = 0;
> + bool compressed;
> + erofs_off_t ptr = 0;
> + u64 pchunk_len = 0;
> + unsigned int raw_size = 0, buffer_size = 0;
> + char *raw = NULL, *buffer = NULL;
> 
>   erofs_dbg("verify data chunk of nid(%llu): type(%d)",
>     inode->nid | 0ULL, inode->datalayout);
> @@ -347,30 +276,275 @@ static int erofs_verify_inode_data(struct
> erofs_inode *inode)
>   case EROFS_INODE_FLAT_PLAIN:
>   case EROFS_INODE_FLAT_INLINE:
>   case EROFS_INODE_CHUNK_BASED:
> - ret = verify_uncompressed_inode(inode);
> + compressed = false;
>   break;
>   case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
>   case EROFS_INODE_FLAT_COMPRESSION:
> - ret = verify_compressed_inode(inode);
> + compressed = true;
>   break;
>   default:
> - ret = -EINVAL;
> - break;
> + erofs_err("unknown datalayout");
> + return -EINVAL;
>   }
> 
> + while (ptr < inode->i_size) {
> + map.m_la = ptr;
> + if (compressed)
> + ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
> + else
> + ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
> + if (ret)
> + goto out;
> +
> + if (compressed) {
> + if (ptr != map.m_la || map.m_la + map.m_llen > inode->i_size) {
> + erofs_err("broken compressed chunk layout ptr %" PRIu64 " m_la %" PRIu64
> " m_llen %" PRIu64 " i_size %" PRIu64,
> +   ptr, map.m_la, map.m_llen, inode->i_size);
> + ret = -EFSCORRUPTED;
> + goto out;
> + }
> + } else {
> + if (ptr != map.m_la || map.m_llen != map.m_plen) {
> + erofs_err("broken data chunk layout ptr %" PRIu64 " m_la %" PRIu64 "
> m_llen %" PRIu64 " m_plen %" PRIu64,
> +   ptr, map.m_la, map.m_llen, map.m_plen);
> + ret = -EFSCORRUPTED;
> + goto out;
> + }
> +
> + if (map.m_la + map.m_llen > inode->i_size)
> + map.m_llen = inode->i_size - map.m_la;
> + }
> +
> + pchunk_len += map.m_plen;
> + ptr += map.m_llen;
> +
> + /* reached EOF? */
> + if (!(map.m_flags & EROFS_MAP_MAPPED) && !map.m_llen)
> + break;
> +
> + /* should skip decomp? */
> + if (!fsckcfg.check_decomp)
> + continue;
> +
> + if (map.m_plen > raw_size) {
> + raw_size = map.m_plen;
> + raw = realloc(raw, raw_size);
> + BUG_ON(!raw);
> + }
> +
> + if (compressed && map.m_llen > buffer_size) {
> + buffer_size = map.m_llen;
> + buffer = realloc(buffer, buffer_size);
> + BUG_ON(!buffer);
> + }
> +
> + mdev = (struct erofs_map_dev) {
> + .m_deviceid = map.m_deviceid,
> + .m_pa = map.m_pa,
> + };
> + ret = erofs_map_dev(&sbi, &mdev);
> + if (ret) {
> + erofs_err("failed to map device of m_pa %" PRIu64 ", m_deviceid %u @ nid
> %llu: %d",
> +   map.m_pa, map.m_deviceid, inode->nid | 0ULL, ret);
> + goto out;
> + }
> +
> + ret = dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen);
> + if (ret < 0) {
> + erofs_err("failed to read data of m_pa %" PRIu64 ", m_plen %" PRIu64 " @
> nid %llu: %d",
> +   mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
> + goto out;
> + }
> +
> + if (compressed) {
> + ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
> + .in = raw,
> + .out = buffer,
> + .decodedskip = 0,
> + .inputsize = map.m_plen,
> + .decodedlength = map.m_llen,
> + .alg = map.m_algorithmformat,
> + .partial_decoding = 0
> + });
> +
> + if (ret < 0) {
> + erofs_err("failed to decompress data of m_pa %" PRIu64 ", m_plen %"
> PRIu64 " @ nid %llu: %d",
> +   mdev.m_pa, map.m_plen, inode->nid | 0ULL, ret);
> + goto out;
> + }
> + }
> +
> + if (fsckcfg.extract_fd != -1 &&
> +   write(fsckcfg.extract_fd, compressed ? buffer : raw, map.m_llen) < 0) {
> + ret = -EIO;
> + goto out;
> + }
> + }
> +
> + if (fsckcfg.print_comp_ratio) {
> + fsckcfg.logical_blocks +=
> + DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
> + fsckcfg.physical_blocks +=
> + DIV_ROUND_UP(pchunk_len, EROFS_BLKSIZ);
> + }
> +out:
> + if (raw)
> + free(raw);
> + if (buffer)
> + free(buffer);
>   if (ret == -EIO)
>   erofs_err("I/O error occurred when verifying data chunk of nid(%llu)",
>     inode->nid | 0ULL);
> + return ret < 0 ? ret : 0;
> +}
> +
> +static inline int erofs_extract_dir(struct erofs_inode *inode)
> +{
> + int ret;
> + struct stat sb;
> +
> + /* verify data chunk layout */
> + ret = erofs_verify_inode_data(inode);
> + if (ret)
> + return ret;
> +
> + erofs_dbg("create directory on path: %s", fsckcfg.extract_path);
> +
> + if (!lstat(fsckcfg.extract_path, &sb)) {
> + if (!S_ISDIR(sb.st_mode)) {
> + erofs_err("path is not a directory: %s", fsckcfg.extract_path);
> + return -EIO;
> + }
> + } else if (errno != ENOENT || mkdir(fsckcfg.extract_path, S_IRWXU) < 0) {
> + erofs_err("failed to create directory: %s", fsckcfg.extract_path);
> + return -EIO;
> + }
> +
> + if (fsckcfg.preserve)
> + erofsfsck_restore_stat(inode, fsckcfg.extract_path);
> + return 0;
> +}
> +
> +static inline int erofs_extract_file(struct erofs_inode *inode)
> +{
> + int ret;
> + struct stat sb;
> + int fsync_fail, close_fail;
> +
> + erofs_dbg("extract file to path: %s", fsckcfg.extract_path);
> +
> + if (!lstat(fsckcfg.extract_path, &sb)) {
> + if (S_ISDIR(sb.st_mode)) {
> + erofs_err("path is a directory: %s", fsckcfg.extract_path);
> + return -EIO;
> + }
> + erofs_warn("overwriting: %s", fsckcfg.extract_path);
> + if (unlink(fsckcfg.extract_path) < 0) {
> + erofs_err("failed to remove file: %s", fsckcfg.extract_path);
> + return -EIO;
> + }
> + }
> +
> + fsckcfg.extract_fd = open(fsckcfg.extract_path, O_WRONLY | O_CREAT |
> O_TRUNC, S_IRWXU);
> + if (fsckcfg.extract_fd < 0) {
> + erofs_err("failed to open file: %s", fsckcfg.extract_path);
> + return -EIO;
> + }
> +
> + /* verify data chunk layout */
> + ret = erofs_verify_inode_data(inode);
> +
> + fsync_fail = fsync(fsckcfg.extract_fd) != 0;
> + close_fail = close(fsckcfg.extract_fd) != 0;
> + fsckcfg.extract_fd = -1;
> +
> + if (ret)
> + return ret;
> + if (fsync_fail || close_fail)
> + return -EIO;
> + if (fsckcfg.preserve)
> + erofsfsck_restore_stat(inode, fsckcfg.extract_path);
> + return ret;
> +}
> +
> +static inline int erofs_extract_symlink(struct erofs_inode *inode)
> +{
> + int ret;
> + struct stat sb;
> + char *buf = NULL;
> +
> + /* verify data chunk layout */
> + ret = erofs_verify_inode_data(inode);
> + if (ret)
> + return ret;
> 
> + erofs_dbg("extract symlink to path: %s", fsckcfg.extract_path);
> +
> + if (!lstat(fsckcfg.extract_path, &sb)) {
> + if (S_ISDIR(sb.st_mode)) {
> + erofs_err("path is a directory: %s", fsckcfg.extract_path);
> + return -EIO;
> + }
> + erofs_warn("overwriting: %s", fsckcfg.extract_path);
> + if (unlink(fsckcfg.extract_path) < 0) {
> + erofs_err("failed to remove file: %s", fsckcfg.extract_path);
> + return -EIO;
> + }
> + }
> +
> + buf = malloc(inode->i_size + 1);
> + if (!buf) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + ret = erofs_pread(inode, buf, inode->i_size, 0);
> + if (ret) {
> + erofs_err("I/O error occurred when reading symlink @ nid %llu: %d",
> +   inode->nid | 0ULL, ret);
> + goto out;
> + }
> +
> + buf[inode->i_size] = '\0';
> + if (symlink(buf, fsckcfg.extract_path) < 0) {
> + erofs_err("failed to create symlink: %s", fsckcfg.extract_path);
> + ret = -EIO;
> + goto out;
> + }
> +
> + if (fsckcfg.preserve)
> + erofsfsck_restore_stat(inode, fsckcfg.extract_path);
> +out:
> + if (buf)
> + free(buf);
>   return ret;
>  }
> 
>  static int erofsfsck_dirent_iter(struct erofs_dir_context *ctx)
>  {
> + int ret;
> + size_t prev_pos = fsckcfg.extract_pos;
> +
>   if (ctx->dot_dotdot)
>   return 0;
> 
> - return erofsfsck_check_inode(ctx->dir->nid, ctx->de_nid);
> + if (fsckcfg.extract_path) {
> + size_t curr_pos = prev_pos;
> +
> + fsckcfg.extract_path[curr_pos++] = '/';
> + strncpy(fsckcfg.extract_path + curr_pos, ctx->dname, ctx->de_namelen);
> + curr_pos += ctx->de_namelen;
> + fsckcfg.extract_path[curr_pos] = '\0';
> + fsckcfg.extract_pos = curr_pos;
> + }
> +
> + ret = erofsfsck_check_inode(ctx->dir->nid, ctx->de_nid);
> +
> + if (fsckcfg.extract_path) {
> + fsckcfg.extract_path[prev_pos] = '\0';
> + fsckcfg.extract_pos = prev_pos;
> + }
> + return ret;
>  }
> 
>  static int erofsfsck_check_inode(erofs_nid_t pnid, erofs_nid_t nid)
> @@ -394,8 +568,25 @@ static int erofsfsck_check_inode(erofs_nid_t pnid,
> erofs_nid_t nid)
>   if (ret)
>   goto out;
> 
> - /* verify data chunk layout */
> - ret = erofs_verify_inode_data(&inode);
> + if (fsckcfg.extract_path) {
> + switch (inode.i_mode & S_IFMT) {
> + case S_IFDIR:
> + ret = erofs_extract_dir(&inode);
> + break;
> + case S_IFREG:
> + ret = erofs_extract_file(&inode);
> + break;
> + case S_IFLNK:
> + ret = erofs_extract_symlink(&inode);
> + break;
> + default:
> + goto verify;
> + }
> + } else {
> +verify:
> + /* verify data chunk layout */
> + ret = erofs_verify_inode_data(&inode);
> + }
>   if (ret)
>   goto out;
> 
> @@ -425,6 +616,9 @@ int main(int argc, char **argv)
>   fsckcfg.corrupted = false;
>   fsckcfg.print_comp_ratio = false;
>   fsckcfg.check_decomp = false;
> + fsckcfg.extract_path = NULL;
> + fsckcfg.extract_pos = 0;
> + fsckcfg.extract_fd = -1;
>   fsckcfg.logical_blocks = 0;
>   fsckcfg.physical_blocks = 0;
> 
> diff --git a/mkfs/main.c b/mkfs/main.c
> index 90cedde..1787b2c 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -589,7 +589,7 @@ int main(int argc, char **argv)
>   err = lstat64(cfg.c_src_path, &st);
>   if (err)
>   return 1;
> - if ((st.st_mode & S_IFMT) != S_IFDIR) {
> + if (!S_ISDIR(st.st_mode)) {
>   erofs_err("root of the filesystem is not a directory - %s",
>     cfg.c_src_path);
>   usage();
> -- 
> 2.30.2


More information about the Linux-erofs mailing list