[PATCH v1 3/5] dump.erofs: add -S options for collecting statistics of the whole filesystem

Guo Xuenan guoxuenan at huawei.com
Tue Sep 14 12:31:37 AEST 2021


OK,I will  send out the patch V2  today, and it will  base on jianan's 
work.

在 2021/9/13 20:46, Gao Xiang 写道:
> On Mon, Sep 13, 2021 at 12:30:04PM +0800, Huang Jianan wrote:
>> 在 2021/9/12 0:13, Gao Xiang 写道:
>> > (+Cc Jianan.)
>> > > On Sat, Sep 11, 2021 at 09:46:33PM +0800, Guo Xuenan wrote:
>> > > From: mpiglet <mpiglet at outlook.com>
>> > > > > Signed-off-by: Guo Xuenan <guoxuenan at huawei.com>
>> > > Signed-off-by: mpiglet <mpiglet at outlook.com>
>> > > ---
>> > >   dump/main.c | 474 
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>> > >   1 file changed, 474 insertions(+)
>> > > > > diff --git a/dump/main.c b/dump/main.c
>> > > index 25ac89f..b0acc0b 100644
>> > > --- a/dump/main.c
>> > > +++ b/dump/main.c
>> > > @@ -19,10 +19,78 @@
>> > >   struct dumpcfg {
>> > >       bool print_superblock;
>> > > +    bool print_statistic;
>> > >       bool print_version;
>> > >   };
>> > >   static struct dumpcfg dumpcfg;
>> > > +static const char chart_format[] = "%-16s    %-11d %8.2f%% 
>> |%-50s|\n";
>> > > +static const char header_format[] = "%-16s %11s %16s |%-50s|\n";
>> > > +static char *file_types[] = {
>> > > +    ".so",
>> > > +    ".png",
>> > > +    ".jpg",
>> > > +    ".xml",
>> > > +    ".html",
>> > > +    ".odex",
>> > > +    ".vdex",
>> > > +    ".apk",
>> > > +    ".ttf",
>> > > +    ".jar",
>> > > +    ".json",
>> > > +    ".ogg",
>> > > +    ".oat",
>> > > +    ".art",
>> > > +    ".rc",
>> > > +    ".otf",
>> > > +    ".txt",
>> > > +    "others",
>> > > +};
>> > > +enum {
>> > > +    SOFILETYPE = 0,
>> > > +    PNGFILETYPE,
>> > > +    JPEGFILETYPE,
>> > > +    XMLFILETYPE,
>> > > +    HTMLFILETYPE,
>> > > +    ODEXFILETYPE,
>> > > +    VDEXFILETYPE,
>> > > +    APKFILETYPE,
>> > > +    TTFFILETYPE,
>> > > +    JARFILETYPE,
>> > > +    JSONFILETYPE,
>> > > +    OGGFILETYPE,
>> > > +    OATFILETYPE,
>> > > +    ARTFILETYPE,
>> > > +    RCFILETYPE,
>> > > +    OTFFILETYPE,
>> > > +    TXTFILETYPE,
>> > > +    OTHERFILETYPE,
>> > > +};
>> > Why we need enums here? Can these be resolved with some array index?
>> > > > +
>> > > +#define    FILE_SIZE_BITS    30
>> > > +struct statistics {
>> > > +    unsigned long blocks;
>> > > +    unsigned long files;
>> > > +    unsigned long files_total_size;
>> > > +    unsigned long files_total_origin_size;
>> > > +    double compress_rate;
>> > > +    unsigned long compressed_files;
>> > > +    unsigned long uncompressed_files;
>> > > +
>> > > +    unsigned long regular_files;
>> > > +    unsigned long dir_files;
>> > > +    unsigned long chardev_files;
>> > > +    unsigned long blkdev_files;
>> > > +    unsigned long fifo_files;
>> > > +    unsigned long sock_files;
>> > > +    unsigned long symlink_files;
>> > > +
>> > > +    unsigned int file_type_stat[OTHERFILETYPE + 1];
>> > > +    unsigned int file_org_size[FILE_SIZE_BITS];
>> > What do "FILE_SIZE_BITS" and "file_org_size" mean?
>> > > > +    unsigned int file_comp_size[FILE_SIZE_BITS];
>> > > +};
>> > > +static struct statistics stats;
>> > > +
>> > >   static struct option long_options[] = {
>> > >       {"help", no_argument, 0, 1},
>> > >       {0, 0, 0, 0},
>> > > @@ -33,6 +101,7 @@ static void usage(void)
>> > >       fputs("usage: [options] erofs-image \n\n"
>> > >           "Dump erofs layout from erofs-image, and [options] are:\n"
>> > >           "-s          print information about superblock\n"
>> > > +        "-S      print statistic information of the erofs-image\n"
>> > >           "-v/-V      print dump.erofs version info\n"
>> > >           "-h/--help  display this help and exit\n", stderr);
>> > >   }
>> > > @@ -51,6 +120,9 @@ static int dumpfs_parse_options_cfg(int argc, 
>> char **argv)
>> > >           case 's':
>> > >               dumpcfg.print_superblock = true;
>> > >               break;
>> > > +        case 'S':
>> > > +            dumpcfg.print_statistic = true;
>> > > +            break;
>> > >           case 'v':
>> > >           case 'V':
>> > >               dumpfs_print_version();
>> > > @@ -78,6 +150,116 @@ static int dumpfs_parse_options_cfg(int 
>> argc, char **argv)
>> > >       return 0;
>> > >   }
>> > > +static int z_erofs_get_last_cluster_size_from_disk(struct 
>> erofs_map_blocks *map,
>> > > +        erofs_off_t last_cluster_size,
>> > > +        erofs_off_t *last_cluster_compressed_size)
>> > Hmmm... do we really need the exact compressed bytes?
>> > or just compressed blocks is enough?
>> > > "compressed blocks" can be gotten in erofs inode.
>> > > Btw, although I think it's useful for fsck (check if an erofs is 
>> correct).
>> > > > +{
>> > > +    int ret;
>> > > +    int decomp_len;
>> > > +    int compressed_len = 0;
>> > > +    char *decompress;
>> > > +    char raw[Z_EROFS_PCLUSTER_MAX_SIZE] = {0};
>> > > +
>> > > +    ret = dev_read(raw, map->m_pa, map->m_plen);
>> > > +    if (ret < 0)
>> > > +        return -EIO;
>> > > +
>> > > +    if (erofs_sb_has_lz4_0padding()) {
>> > > +        compressed_len = map->m_plen;
>> > > +    } else {
>> > > +        // lz4 maximum compression ratio is 255
>> > > +        decompress = (char *)malloc(map->m_plen * 255);
>> > > +        if (!decompress) {
>> > > +            erofs_err("allocate memory for decompress space 
>> failed");
>> > > +            return -1;
>> > > +        }
>> > > +        decomp_len = LZ4_decompress_safe_partial(raw, decompress,
>> > > +                map->m_plen, last_cluster_size,
>> > > +                map->m_plen * 10);
>> > > +        if (decomp_len < 0) {
>> > > +            erofs_err("decompress last cluster to get 
>> decompressed size failed");
>> > > +            free(decompress);
>> > > +            return -1;
>> > > +        }
>> > > +        compressed_len = LZ4_compress_destSize(decompress, raw,
>> > > +                &decomp_len, Z_EROFS_PCLUSTER_MAX_SIZE);
>> > > +        if (compressed_len < 0) {
>> > > +            erofs_err("compress to get last extent size failed\n");
>> > > +            free(decompress);
>> > > +            return -1;
>> > > +        }
>> > > +        free(decompress);
>> > > +        // dut to the use of lz4hc (can use different compress 
>> level),
>> > > +        // our normal lz4 compress result may be bigger
>> > > +        compressed_len = compressed_len < map->m_plen ?
>> > > +            compressed_len : map->m_plen;
>> > > +    }
>> > > +
>> > > +    *last_cluster_compressed_size = compressed_len;
>> > > +    return 0;
>> > > +}
>> > > +
>> > > +static int z_erofs_get_compressed_size(struct erofs_inode *inode,
>> > > +        erofs_off_t *size)
>> > > +{
>> > > +    int err;
>> > > +    erofs_blk_t compressedlcs;
>> > > +    erofs_off_t last_cluster_size;
>> > > +    erofs_off_t last_cluster_compressed_size;
>> > > +    struct erofs_map_blocks map = {
>> > > +        .index = UINT_MAX,
>> > > +        .m_la = inode->i_size - 1,
>> > > +    };
>> > > +
>> > > +    err = z_erofs_map_blocks_iter(inode, &map);
>> > (add Jianan here.)
>> > > Can we port the latest erofs kernel fiemap code to erofs-utils, 
>> and add
>> > some functionality to get the file distribution as well when the fs 
>> isn't
>> > mounted?
>> Hi Xiang,
>>
>> I have sent the patch and verified it with a similar function. Better 
>> to use
>> the
>> new interface here.
>
> Yeah, thanks for the patch:
> https://lore.kernel.org/linux-erofs/20210913042716.17529-1-huangjianan@oppo.com/ 
>
>
> Hopefully Xuenan could base on this work.
>
> Thanks,
> Gao XIang
>
>>
>> Thanks,
>> Jianan


More information about the Linux-erofs mailing list