[RFC PATCH 2/2] erofs-utils: dump: support fragments
Yue Hu
zbestahu at gmail.com
Wed Jan 4 18:12:28 AEDT 2023
On Wed, 4 Jan 2023 14:44:35 +0800
Xiang Gao <hsiangkao at linux.alibaba.com> wrote:
> On 2023/1/4 14:16, Yue Hu wrote:
> > Hi Xiang,
> >
> > Thanks for the reviewing. I'm planning to send v2.
> >
> > On Wed, 4 Jan 2023 11:13:55 +0800
> > Xiang Gao <hsiangkao at linux.alibaba.com> wrote:
> >
> >> On 2022/12/19 17:50, Yue Hu wrote:
> >>> From: Yue Hu <huyue2 at coolpad.com>
> >>>
> >>> Add compressed fragments support for dump feature.
> >>>
> >>> Signed-off-by: Yue Hu <huyue2 at coolpad.com>
> >>> ---
> >>> dump/main.c | 78 ++++++++++++++++++++++++++++++++--------
> >>> include/erofs/internal.h | 1 +
> >>> lib/zmap.c | 2 +-
> >>> 3 files changed, 65 insertions(+), 16 deletions(-)
> >>>
> >>> diff --git a/dump/main.c b/dump/main.c
> >>> index bc4f047..d387841 100644
> >>> --- a/dump/main.c
> >>> +++ b/dump/main.c
> >>> @@ -14,6 +14,8 @@
> >>> #include "erofs/inode.h"
> >>> #include "erofs/io.h"
> >>> #include "erofs/dir.h"
> >>> +#include "erofs/compress.h"
> >>> +#include "erofs/fragments.h"
> >>> #include "../lib/liberofs_private.h"
> >>>
> >>> #ifdef HAVE_LIBUUID
> >>> @@ -96,6 +98,7 @@ static struct erofsdump_feature feature_lists[] = {
> >>> { false, EROFS_FEATURE_INCOMPAT_CHUNKED_FILE, "chunked_file" },
> >>> { false, EROFS_FEATURE_INCOMPAT_DEVICE_TABLE, "device_table" },
> >>> { false, EROFS_FEATURE_INCOMPAT_ZTAILPACKING, "ztailpacking" },
> >>> + { false, EROFS_FEATURE_INCOMPAT_FRAGMENTS, "fragments" },
> >>> };
> >>>
> >>> static int erofsdump_readdir(struct erofs_dir_context *ctx);
> >>> @@ -285,10 +288,12 @@ static int erofsdump_readdir(struct erofs_dir_context *ctx)
> >>> }
> >>>
> >>> if (S_ISREG(vi.i_mode)) {
> >>> - stats.files_total_origin_size += vi.i_size;
> >>> - inc_file_extension_count(ctx->dname, ctx->de_namelen);
> >>> + if (!erofs_is_packed_inode(&vi)) {
> >>> + stats.files_total_origin_size += vi.i_size;
> >>> + inc_file_extension_count(ctx->dname, ctx->de_namelen);
> >>> + update_file_size_statistics(vi.i_size, true);
> >>> + }
> >>> stats.files_total_size += occupied_size;
> >>> - update_file_size_statistics(vi.i_size, true);
> >>> update_file_size_statistics(occupied_size, false);
> >>> }
> >>>
> >>> @@ -320,6 +325,10 @@ static void erofsdump_show_fileinfo(bool show_extent)
> >>> "%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " : %10" PRIu64 "..%10" PRIu64 " | %7" PRIu64 "\n",
> >>> "%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " : %10" PRIu64 "..%10" PRIu64 " | %7" PRIu64 " # device %u\n"
> >>> };
> >>> + const char *frag_ext_fmt[] = {
> >>> + "%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 "\n",
> >>> + "%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " # device %u\n"
> >>> + };
> >>
> >> Why do we need another fmt rather than just fill fragment extent with
> >
> > I also plan to remove this fmt in v2.
> >
> >> physical addr 0?
> >
> > Okay. Just leave latter physical length as empty?
> >
> >>
> >>> int err, i;
> >>> erofs_off_t size;
> >>> u16 access_mode;
> >>> @@ -348,16 +357,31 @@ static void erofsdump_show_fileinfo(bool show_extent)
> >>> }
> >>> }
> >>>
> >>> + if (erofs_inode_is_data_compressed(inode.datalayout)) {
> >>> + err = z_erofs_fill_inode_lazy(&inode);
> >>> + if (err) {
> >>> + erofs_err("read inode map header failed @ nid %llu",
> >>> + inode.nid | 0ULL);
> >>> + return;
> >>> + }
> >>> + }
> >>
> >> Why do we need to call z_erofs_fill_inode_lazy here?
> >
> > Used to check if the file has fragment. If yes, i think 'Compression ratio' can not be showed
> > exactly.
> >
> > So, I'd like to show fragment information related instead as below:
> >
> > NID: 715 Links: 1 Layout: 1 Fragment: entire file
> >
> > NID: 715 Links: 1 Layout: 3 Fragment: tail of file
>
> I think no need to distinguish these two stuffs considering extra
> complexity.
So, no need to check fragment or not? just keep the original look?
>
> >
> >>
> >>> +
> >>> err = erofs_get_occupied_size(&inode, &size);
> >>> if (err) {
> >>> erofs_err("get file size failed @ nid %llu", inode.nid | 0ULL);
> >>> return;
> >>> }
> >>>
> >>> - err = erofs_get_pathname(inode.nid, path, sizeof(path));
> >>> - if (err < 0) {
> >>> - erofs_err("file path not found @ nid %llu", inode.nid | 0ULL);
> >>> - return;
> >>
> >> Can we just ignore pathname if it doesn't exist?
> >
> > Okay.
> >
> >>
> >>> + if (erofs_is_packed_inode(&inode) { > + strncpy(path, EROFS_PACKED_INODE, sizeof(path) - 1);
> >>> + path[sizeof(path) - 1] = '\0';
> >>> + } else {
> >>> + err = erofs_get_pathname(inode.nid, path, sizeof(path));
> >>> + if (err < 0) {
> >>> + erofs_err("file path not found @ nid %llu",
> >>> + inode.nid | 0ULL);
> >>> + return;
> >>> + }
> >>> }
> >>>
> >>> strftime(timebuf, sizeof(timebuf),
> >>> @@ -372,9 +396,13 @@ static void erofsdump_show_fileinfo(bool show_extent)
> >>> file_category_types[erofs_mode_to_ftype(inode.i_mode)]);
> >>> fprintf(stdout, "NID: %" PRIu64 " ", inode.nid);
> >>> fprintf(stdout, "Links: %u ", inode.i_nlink);
> >>> - fprintf(stdout, "Layout: %d Compression ratio: %.2f%%\n",
> >>> - inode.datalayout,
> >>> - (double)(100 * size) / (double)(inode.i_size));
> >>> + if (inode.z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
> >>> + fprintf(stdout, "Layout: %d Fragment: %s\n",
> >>> + inode.datalayout, size ? "partial" : "full");
> >>> + else
> >>> + fprintf(stdout, "Layout: %d Compression ratio: %.2f%%\n",
> >>> + inode.datalayout,
> >>> + (double)(100 * size) / (double)(inode.i_size));
> >>> fprintf(stdout, "Inode size: %d ", inode.inode_isize);
> >>> fprintf(stdout, "Extent size: %u ", inode.extent_isize);
> >>> fprintf(stdout, "Xattr size: %u\n", inode.xattr_isize);
> >>> @@ -404,7 +432,8 @@ static void erofsdump_show_fileinfo(bool show_extent)
> >>> if (!dumpcfg.show_extent)
> >>> return;
> >>>
> >>> - fprintf(stdout, "\n Ext: logical offset | length : physical offset | length\n");
> >>> + fprintf(stdout, "\n Ext: logical offset | length%s\n",
> >>> + size ? " : physical offset | length" : "");
> >>> while (map.m_la < inode.i_size) {
> >>> struct erofs_map_dev mdev;
> >>>
> >>> @@ -425,10 +454,17 @@ static void erofsdump_show_fileinfo(bool show_extent)
> >>> return;
> >>> }
> >>>
> >>> - fprintf(stdout, ext_fmt[!!mdev.m_deviceid], extent_count++,
> >>> - map.m_la, map.m_la + map.m_llen, map.m_llen,
> >>> - mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
> >>> - mdev.m_deviceid);
> >>> + if (map.m_flags & EROFS_MAP_FRAGMENT)
> >>> + fprintf(stdout, frag_ext_fmt[!!mdev.m_deviceid],
> >>> + extent_count++,
> >>> + map.m_la, map.m_la + map.m_llen, map.m_llen,
> >>> + mdev.m_deviceid);
> >>
> >> except for the last fragment extent, the other extents all have physical
> >> addr and length...
> >
> > It's true.
> >
> > What bothers me is how to better show fragment extent based on current format (especially
> > no physical address and offset for fragment extent).
> >
> > Now, i think the fragment extent should be showed like below (just use a '0' to represent it)?
> >
> > a) the whole file is a fragment:
> >
> > Ext: logical offset | length : physical offset | length
> > 1: 0.. 1046 | 1046 : 0
>
>
> Can we use
> Ext: logical offset | length : physical offset | length
> 1: 0.. 1046 | 1046 : 0..0 | 0
>
> >
> > b) the tail of file is a fragment:
> >
> > Ext: logical offset | length : physical offset | length
> > 0: 0.. 19672 | 19672 : 1314816.. 1323008 | 8192
> > ...
> > 13: 165989.. 182474 | 16485 : 1421312.. 1429504 | 8192
> > 14: 182474.. 196435 | 13961 : 0
>
> Here
> 14: 182474.. 196435 | 13961 : 0.. 0 | 0
>
> as well?
No problem now.
>
> >
> >>
> >>> + else
> >>> + fprintf(stdout, ext_fmt[!!mdev.m_deviceid],
> >>> + extent_count++,
> >>> + map.m_la, map.m_la + map.m_llen, map.m_llen,
> >>> + mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
> >>> + mdev.m_deviceid);
> >>> map.m_la += map.m_llen;
> >>> }
> >>> fprintf(stdout, "%s: %d extents found\n", path, extent_count);
> >>> @@ -537,6 +573,15 @@ static void erofsdump_print_statistic(void)
> >>> erofs_err("read dir failed");
> >>> return;
> >>> }
> >>> + if (erofs_sb_has_fragments()) {
> >>> + err = erofsdump_readdir(&(struct erofs_dir_context) {
> >>> + .de_nid = sbi.packed_nid
> >>> + });
> >>
> >> why do we need this?
> >
> > Since some status need to be updated such as stats.files_total_size.
>
> why should packed file be recorded in files_total_size?
Mainly related to compression ratio. files_total_size records on-disk (un)compressed size.
296 stats.files_total_size += occupied_size;
544 stats.compress_rate = (double)(100 * stats.files_total_size) /
545 (double)(stats.files_total_origin_size);
The file system compression ratio will be incorrect (since all compressed fragments are
missing).
>
> Thanks,
> Gao Xiang
>
> >
> >>
> >> Thanks,
> >> Gao Xiang
More information about the Linux-erofs
mailing list