[PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
Gao Xiang
hsiangkao at linux.alibaba.com
Mon Sep 13 22:46:51 AEST 2021
On Mon, Sep 13, 2021 at 12:30:04PM +0800, Huang Jianan wrote:
> 在 2021/9/12 0:13, Gao Xiang 写道:
> > (+Cc Jianan.)
> >
> > On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
> > > From: mpiglet <mpiglet at outlook.com>
> > >
> > > Signed-off-by: Guo Xuenan <guoxuenan at huawei.com>
> > > Signed-off-by: mpiglet <mpiglet at outlook.com>
> > > ---
> > > dump/main.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > 1 file changed, 474 insertions(+)
> > >
> > > diff --git a/dump/main.c b/dump/main.c
> > > index 25ac89f..b0acc0b 100644
> > > --- a/dump/main.c
> > > +++ b/dump/main.c
> > > @@ -19,10 +19,78 @@
> > > struct dumpcfg {
> > > bool print_superblock;
> > > + bool print_statistic;
> > > bool print_version;
> > > };
> > > static struct dumpcfg dumpcfg;
> > > +static const char chart_format[] = "%-16s %-11d %8.2f%% |%-50s|\n";
> > > +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
> > > +static char *file_types[] = {
> > > + ".so",
> > > + ".png",
> > > + ".jpg",
> > > + ".xml",
> > > + ".html",
> > > + ".odex",
> > > + ".vdex",
> > > + ".apk",
> > > + ".ttf",
> > > + ".jar",
> > > + ".json",
> > > + ".ogg",
> > > + ".oat",
> > > + ".art",
> > > + ".rc",
> > > + ".otf",
> > > + ".txt",
> > > + "others",
> > > +};
> > > +enum {
> > > + SOFILETYPE = 0,
> > > + PNGFILETYPE,
> > > + JPEGFILETYPE,
> > > + XMLFILETYPE,
> > > + HTMLFILETYPE,
> > > + ODEXFILETYPE,
> > > + VDEXFILETYPE,
> > > + APKFILETYPE,
> > > + TTFFILETYPE,
> > > + JARFILETYPE,
> > > + JSONFILETYPE,
> > > + OGGFILETYPE,
> > > + OATFILETYPE,
> > > + ARTFILETYPE,
> > > + RCFILETYPE,
> > > + OTFFILETYPE,
> > > + TXTFILETYPE,
> > > + OTHERFILETYPE,
> > > +};
> > Why we need enums here? Can these be resolved with some array index?
> >
> > > +
> > > +#define FILE_SIZE_BITS 30
> > > +struct statistics {
> > > + unsigned long blocks;
> > > + unsigned long files;
> > > + unsigned long files_total_size;
> > > + unsigned long files_total_origin_size;
> > > + double compress_rate;
> > > + unsigned long compressed_files;
> > > + unsigned long uncompressed_files;
> > > +
> > > + unsigned long regular_files;
> > > + unsigned long dir_files;
> > > + unsigned long chardev_files;
> > > + unsigned long blkdev_files;
> > > + unsigned long fifo_files;
> > > + unsigned long sock_files;
> > > + unsigned long symlink_files;
> > > +
> > > + unsigned int file_type_stat[OTHERFILETYPE + 1];
> > > + unsigned int file_org_size[FILE_SIZE_BITS];
> > What do "FILE_SIZE_BITS" and "file_org_size" mean?
> >
> > > + unsigned int file_comp_size[FILE_SIZE_BITS];
> > > +};
> > > +static struct statistics stats;
> > > +
> > > static struct option long_options[] = {
> > > {"help", no_argument, 0, 1},
> > > {0, 0, 0, 0},
> > > @@ -33,6 +101,7 @@ static void usage(void)
> > > fputs("usage: [options] erofs-image \n\n"
> > > "Dump erofs layout from erofs-image, and [options] are:\n"
> > > "-s print information about superblock\n"
> > > + "-S print statistic information of the erofs-image\n"
> > > "-v/-V print dump.erofs version info\n"
> > > "-h/--help display this help and exit\n", stderr);
> > > }
> > > @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> > > case 's':
> > > dumpcfg.print_superblock = true;
> > > break;
> > > + case 'S':
> > > + dumpcfg.print_statistic = true;
> > > + break;
> > > case 'v':
> > > case 'V':
> > > dumpfs_print_version();
> > > @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int argc, char **argv)
> > > return 0;
> > > }
> > > +static int z_erofs_get_last_cluster_size_from_disk(struct erofs_map_blocks *map,
> > > + erofs_off_t last_cluster_size,
> > > + erofs_off_t *last_cluster_compressed_size)
> > Hmmm... do we really need the exact compressed bytes?
> > or just compressed blocks is enough?
> >
> > "compressed blocks" can be gotten in erofs inode.
> >
> > Btw, although I think it's useful for fsck (check if an erofs is correct).
> >
> > > +{
> > > + int ret;
> > > + int decomp_len;
> > > + int compressed_len = 0;
> > > + char *decompress;
> > > + char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
> > > +
> > > + ret = dev_read(raw, map->m_pa, map->m_plen);
> > > + if (ret < 0)
> > > + return -EIO;
> > > +
> > > + if (erofs_sb_has_lz4_0padding()) {
> > > + compressed_len = map->m_plen;
> > > + } else {
> > > + // lz4 maximum compression ratio is 255
> > > + decompress = (char *)malloc(map->m_plen * 255);
> > > + if (!decompress) {
> > > + erofs_err("allocate memory for decompress space failed");
> > > + return -1;
> > > + }
> > > + decomp_len = LZ4_decompress_safe_partial(raw, decompress,
> > > + map->m_plen, last_cluster_size,
> > > + map->m_plen * 10);
> > > + if (decomp_len < 0) {
> > > + erofs_err("decompress last cluster to get decompressed size failed");
> > > + free(decompress);
> > > + return -1;
> > > + }
> > > + compressed_len = LZ4_compress_destSize(decompress, raw,
> > > + &decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
> > > + if (compressed_len < 0) {
> > > + erofs_err("compress to get last extent size failed\n");
> > > + free(decompress);
> > > + return -1;
> > > + }
> > > + free(decompress);
> > > + // dut to the use of lz4hc (can use different compress level),
> > > + // our normal lz4 compress result may be bigger
> > > + compressed_len = compressed_len < map->m_plen ?
> > > + compressed_len : map->m_plen;
> > > + }
> > > +
> > > + *last_cluster_compressed_size = compressed_len;
> > > + return 0;
> > > +}
> > > +
> > > +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
> > > + erofs_off_t *size)
> > > +{
> > > + int err;
> > > + erofs_blk_t compressedlcs;
> > > + erofs_off_t last_cluster_size;
> > > + erofs_off_t last_cluster_compressed_size;
> > > + struct erofs_map_blocks map = {
> > > + .index = UINT_MAX,
> > > + .m_la = inode->i_size - 1,
> > > + };
> > > +
> > > + err = z_erofs_map_blocks_iter(inode, &map);
> > (add Jianan here.)
> >
> > Can we port the latest erofs kernel fiemap code to erofs-utils, and add
> > some functionality to get the file distribution as well when the fs isn't
> > mounted?
> Hi Xiang,
>
> I have sent the patch and verified it with a similar function. Better to use
> the
> new interface here.
Yeah, thanks for the patch:
https://lore.kernel.org/linux-erofs/20210913042716.17529-1-huangjianan@oppo.com/
Hopefully Xuenan could base on this work.
Thanks,
Gao XIang
>
> Thanks,
> Jianan
More information about the Linux-erofs
mailing list