[PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem
Guo Xuenan
guoxuenan at huawei.com
Tue Sep 14 12:31:37 AEST 2021
OK,I will send out the patch V2 today, and it will base on jianan's
work.
在 2021/9/13 20:46, Gao Xiang 写道:
> On Mon, Sep 13, 2021 at 12:30:04PM +0800, Huang Jianan wrote:
>> 在 2021/9/12 0:13, Gao Xiang 写道:
>> > (+Cc Jianan.)
>> > > On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
>> > > From: mpiglet <mpiglet at outlook.com>
>> > > > > Signed-off-by: Guo Xuenan <guoxuenan at huawei.com>
>> > > Signed-off-by: mpiglet <mpiglet at outlook.com>
>> > > ---
>> > > dump/main.c | 474
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>> > > 1 file changed, 474 insertions(+)
>> > > > > diff --git a/dump/main.c b/dump/main.c
>> > > index 25ac89f..b0acc0b 100644
>> > > --- a/dump/main.c
>> > > +++ b/dump/main.c
>> > > @@ -19,10 +19,78 @@
>> > > struct dumpcfg {
>> > > bool print_superblock;
>> > > + bool print_statistic;
>> > > bool print_version;
>> > > };
>> > > static struct dumpcfg dumpcfg;
>> > > +static const char chart_format[] = "%-16s %-11d %8.2f%%
>> |%-50s|\n";
>> > > +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
>> > > +static char *file_types[] = {
>> > > + ".so",
>> > > + ".png",
>> > > + ".jpg",
>> > > + ".xml",
>> > > + ".html",
>> > > + ".odex",
>> > > + ".vdex",
>> > > + ".apk",
>> > > + ".ttf",
>> > > + ".jar",
>> > > + ".json",
>> > > + ".ogg",
>> > > + ".oat",
>> > > + ".art",
>> > > + ".rc",
>> > > + ".otf",
>> > > + ".txt",
>> > > + "others",
>> > > +};
>> > > +enum {
>> > > + SOFILETYPE = 0,
>> > > + PNGFILETYPE,
>> > > + JPEGFILETYPE,
>> > > + XMLFILETYPE,
>> > > + HTMLFILETYPE,
>> > > + ODEXFILETYPE,
>> > > + VDEXFILETYPE,
>> > > + APKFILETYPE,
>> > > + TTFFILETYPE,
>> > > + JARFILETYPE,
>> > > + JSONFILETYPE,
>> > > + OGGFILETYPE,
>> > > + OATFILETYPE,
>> > > + ARTFILETYPE,
>> > > + RCFILETYPE,
>> > > + OTFFILETYPE,
>> > > + TXTFILETYPE,
>> > > + OTHERFILETYPE,
>> > > +};
>> > Why we need enums here? Can these be resolved with some array index?
>> > > > +
>> > > +#define FILE_SIZE_BITS 30
>> > > +struct statistics {
>> > > + unsigned long blocks;
>> > > + unsigned long files;
>> > > + unsigned long files_total_size;
>> > > + unsigned long files_total_origin_size;
>> > > + double compress_rate;
>> > > + unsigned long compressed_files;
>> > > + unsigned long uncompressed_files;
>> > > +
>> > > + unsigned long regular_files;
>> > > + unsigned long dir_files;
>> > > + unsigned long chardev_files;
>> > > + unsigned long blkdev_files;
>> > > + unsigned long fifo_files;
>> > > + unsigned long sock_files;
>> > > + unsigned long symlink_files;
>> > > +
>> > > + unsigned int file_type_stat[OTHERFILETYPE + 1];
>> > > + unsigned int file_org_size[FILE_SIZE_BITS];
>> > What do "FILE_SIZE_BITS" and "file_org_size" mean?
>> > > > + unsigned int file_comp_size[FILE_SIZE_BITS];
>> > > +};
>> > > +static struct statistics stats;
>> > > +
>> > > static struct option long_options[] = {
>> > > {"help", no_argument, 0, 1},
>> > > {0, 0, 0, 0},
>> > > @@ -33,6 +101,7 @@ static void usage(void)
>> > > fputs("usage: [options] erofs-image \n\n"
>> > > "Dump erofs layout from erofs-image, and [options] are:\n"
>> > > "-s print information about superblock\n"
>> > > + "-S print statistic information of the erofs-image\n"
>> > > "-v/-V print dump.erofs version info\n"
>> > > "-h/--help display this help and exit\n", stderr);
>> > > }
>> > > @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc,
>> char **argv)
>> > > case 's':
>> > > dumpcfg.print_superblock = true;
>> > > break;
>> > > + case 'S':
>> > > + dumpcfg.print_statistic = true;
>> > > + break;
>> > > case 'v':
>> > > case 'V':
>> > > dumpfs_print_version();
>> > > @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int
>> argc, char **argv)
>> > > return 0;
>> > > }
>> > > +static int z_erofs_get_last_cluster_size_from_disk(struct
>> erofs_map_blocks *map,
>> > > + erofs_off_t last_cluster_size,
>> > > + erofs_off_t *last_cluster_compressed_size)
>> > Hmmm... do we really need the exact compressed bytes?
>> > or just compressed blocks is enough?
>> > > "compressed blocks" can be gotten in erofs inode.
>> > > Btw, although I think it's useful for fsck (check if an erofs is
>> correct).
>> > > > +{
>> > > + int ret;
>> > > + int decomp_len;
>> > > + int compressed_len = 0;
>> > > + char *decompress;
>> > > + char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
>> > > +
>> > > + ret = dev_read(raw, map->m_pa, map->m_plen);
>> > > + if (ret < 0)
>> > > + return -EIO;
>> > > +
>> > > + if (erofs_sb_has_lz4_0padding()) {
>> > > + compressed_len = map->m_plen;
>> > > + } else {
>> > > + // lz4 maximum compression ratio is 255
>> > > + decompress = (char *)malloc(map->m_plen * 255);
>> > > + if (!decompress) {
>> > > + erofs_err("allocate memory for decompress space
>> failed");
>> > > + return -1;
>> > > + }
>> > > + decomp_len = LZ4_decompress_safe_partial(raw, decompress,
>> > > + map->m_plen, last_cluster_size,
>> > > + map->m_plen * 10);
>> > > + if (decomp_len < 0) {
>> > > + erofs_err("decompress last cluster to get
>> decompressed size failed");
>> > > + free(decompress);
>> > > + return -1;
>> > > + }
>> > > + compressed_len = LZ4_compress_destSize(decompress, raw,
>> > > + &decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
>> > > + if (compressed_len < 0) {
>> > > + erofs_err("compress to get last extent size failed\n");
>> > > + free(decompress);
>> > > + return -1;
>> > > + }
>> > > + free(decompress);
>> > > + // dut to the use of lz4hc (can use different compress
>> level),
>> > > + // our normal lz4 compress result may be bigger
>> > > + compressed_len = compressed_len < map->m_plen ?
>> > > + compressed_len : map->m_plen;
>> > > + }
>> > > +
>> > > + *last_cluster_compressed_size = compressed_len;
>> > > + return 0;
>> > > +}
>> > > +
>> > > +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
>> > > + erofs_off_t *size)
>> > > +{
>> > > + int err;
>> > > + erofs_blk_t compressedlcs;
>> > > + erofs_off_t last_cluster_size;
>> > > + erofs_off_t last_cluster_compressed_size;
>> > > + struct erofs_map_blocks map = {
>> > > + .index = UINT_MAX,
>> > > + .m_la = inode->i_size - 1,
>> > > + };
>> > > +
>> > > + err = z_erofs_map_blocks_iter(inode, &map);
>> > (add Jianan here.)
>> > > Can we port the latest erofs kernel fiemap code to erofs-utils,
>> and add
>> > some functionality to get the file distribution as well when the fs
>> isn't
>> > mounted?
>> Hi Xiang,
>>
>> I have sent the patch and verified it with a similar function. Better
>> to use
>> the
>> new interface here.
>
> Yeah, thanks for the patch:
> https://lore.kernel.org/linux-erofs/20210913042716.17529-1-huangjianan@oppo.com/
>
>
> Hopefully Xuenan could base on this work.
>
> Thanks,
> Gao XIang
>
>>
>> Thanks,
>> Jianan
More information about the Linux-erofs
mailing list