[RFC PATCH 2/2] erofs-utils: dump: support fragments

Yue Hu zbestahu at gmail.com
Wed Jan 4 18:12:28 AEDT 2023


On Wed, 4 Jan 2023 14:44:35 +0800
Xiang Gao <hsiangkao at linux.alibaba.com> wrote:

> On 2023/1/4 14:16, Yue Hu wrote:
> > Hi Xiang,
> > 
> > Thanks for the reviewing. I'm planning to send v2.
> > 
> > On Wed, 4 Jan 2023 11:13:55 +0800
> > Xiang Gao <hsiangkao at linux.alibaba.com> wrote:
> >   
> >> On 2022/12/19 17:50, Yue Hu wrote:  
> >>> From: Yue Hu <huyue2 at coolpad.com>
> >>>
> >>> Add compressed fragments support for dump feature.
> >>>
> >>> Signed-off-by: Yue Hu <huyue2 at coolpad.com>
> >>> ---
> >>>    dump/main.c              | 78 ++++++++++++++++++++++++++++++++--------
> >>>    include/erofs/internal.h |  1 +
> >>>    lib/zmap.c               |  2 +-
> >>>    3 files changed, 65 insertions(+), 16 deletions(-)
> >>>
> >>> diff --git a/dump/main.c b/dump/main.c
> >>> index bc4f047..d387841 100644
> >>> --- a/dump/main.c
> >>> +++ b/dump/main.c
> >>> @@ -14,6 +14,8 @@
> >>>    #include "erofs/inode.h"
> >>>    #include "erofs/io.h"
> >>>    #include "erofs/dir.h"
> >>> +#include "erofs/compress.h"
> >>> +#include "erofs/fragments.h"
> >>>    #include "../lib/liberofs_private.h"
> >>>    
> >>>    #ifdef HAVE_LIBUUID
> >>> @@ -96,6 +98,7 @@ static struct erofsdump_feature feature_lists[] = {
> >>>    	{ false, EROFS_FEATURE_INCOMPAT_CHUNKED_FILE, "chunked_file" },
> >>>    	{ false, EROFS_FEATURE_INCOMPAT_DEVICE_TABLE, "device_table" },
> >>>    	{ false, EROFS_FEATURE_INCOMPAT_ZTAILPACKING, "ztailpacking" },
> >>> +	{ false, EROFS_FEATURE_INCOMPAT_FRAGMENTS, "fragments" },
> >>>    };
> >>>    
> >>>    static int erofsdump_readdir(struct erofs_dir_context *ctx);
> >>> @@ -285,10 +288,12 @@ static int erofsdump_readdir(struct erofs_dir_context *ctx)
> >>>    	}
> >>>    
> >>>    	if (S_ISREG(vi.i_mode)) {
> >>> -		stats.files_total_origin_size += vi.i_size;
> >>> -		inc_file_extension_count(ctx->dname, ctx->de_namelen);
> >>> +		if (!erofs_is_packed_inode(&vi)) {
> >>> +			stats.files_total_origin_size += vi.i_size;
> >>> +			inc_file_extension_count(ctx->dname, ctx->de_namelen);
> >>> +			update_file_size_statistics(vi.i_size, true);
> >>> +		}
> >>>    		stats.files_total_size += occupied_size;
> >>> -		update_file_size_statistics(vi.i_size, true);
> >>>    		update_file_size_statistics(occupied_size, false);
> >>>    	}
> >>>    
> >>> @@ -320,6 +325,10 @@ static void erofsdump_show_fileinfo(bool show_extent)
> >>>    		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " : %10" PRIu64 "..%10" PRIu64 " | %7" PRIu64 "\n",
> >>>    		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " : %10" PRIu64 "..%10" PRIu64 " | %7" PRIu64 "  # device %u\n"
> >>>    	};
> >>> +	const char *frag_ext_fmt[] = {
> >>> +		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 "\n",
> >>> +		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 "  # device %u\n"
> >>> +	};  
> >>
> >> Why do we need another fmt rather than just fill fragment extent with  
> > 
> > I also plan to remove this fmt in v2.
> >   
> >> physical addr 0?  
> > 
> > Okay. Just leave latter physical length as empty?
> >   
> >>  
> >>>    	int err, i;
> >>>    	erofs_off_t size;
> >>>    	u16 access_mode;
> >>> @@ -348,16 +357,31 @@ static void erofsdump_show_fileinfo(bool show_extent)
> >>>    		}
> >>>    	}
> >>>    
> >>> +	if (erofs_inode_is_data_compressed(inode.datalayout)) {
> >>> +		err = z_erofs_fill_inode_lazy(&inode);
> >>> +		if (err) {
> >>> +			erofs_err("read inode map header failed @ nid %llu",
> >>> +				  inode.nid | 0ULL);
> >>> +			return;
> >>> +		}
> >>> +	}  
> >>
> >> Why do we need to call z_erofs_fill_inode_lazy here?  
> > 
> > Used to check if the file has fragment. If yes, i think 'Compression ratio' can not be showed
> > exactly.
> > 
> > So, I'd like to show fragment information related instead as below:
> > 
> > NID: 715   Links: 1   Layout: 1   Fragment: entire file
> > 
> > NID: 715   Links: 1   Layout: 3   Fragment: tail of file  
> 
> I think no need to distinguish these two stuffs considering extra 
> complexity.

So, no need to check fragment or not? just keep the original look?

> 
> >   
> >>  
> >>> +
> >>>    	err = erofs_get_occupied_size(&inode, &size);
> >>>    	if (err) {
> >>>    		erofs_err("get file size failed @ nid %llu", inode.nid | 0ULL);
> >>>    		return;
> >>>    	}
> >>>    
> >>> -	err = erofs_get_pathname(inode.nid, path, sizeof(path));
> >>> -	if (err < 0) {
> >>> -		erofs_err("file path not found @ nid %llu", inode.nid | 0ULL);
> >>> -		return;  
> >>
> >> Can we just ignore pathname if it doesn't exist?  
> > 
> > Okay.
> >   
> >>  
> >>> +	if (erofs_is_packed_inode(&inode) { > +		strncpy(path, EROFS_PACKED_INODE, sizeof(path) - 1);
> >>> +		path[sizeof(path) - 1] = '\0';
> >>> +	} else {
> >>> +		err = erofs_get_pathname(inode.nid, path, sizeof(path));
> >>> +		if (err < 0) {
> >>> +			erofs_err("file path not found @ nid %llu",
> >>> +				  inode.nid | 0ULL);
> >>> +			return;
> >>> +		}
> >>>    	}
> >>>    
> >>>    	strftime(timebuf, sizeof(timebuf),
> >>> @@ -372,9 +396,13 @@ static void erofsdump_show_fileinfo(bool show_extent)
> >>>    		file_category_types[erofs_mode_to_ftype(inode.i_mode)]);
> >>>    	fprintf(stdout, "NID: %" PRIu64 "   ", inode.nid);
> >>>    	fprintf(stdout, "Links: %u   ", inode.i_nlink);
> >>> -	fprintf(stdout, "Layout: %d   Compression ratio: %.2f%%\n",
> >>> -		inode.datalayout,
> >>> -		(double)(100 * size) / (double)(inode.i_size));
> >>> +	if (inode.z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
> >>> +		fprintf(stdout, "Layout: %d   Fragment: %s\n",
> >>> +			inode.datalayout, size ? "partial" : "full");
> >>> +	else
> >>> +		fprintf(stdout, "Layout: %d   Compression ratio: %.2f%%\n",
> >>> +			inode.datalayout,
> >>> +			(double)(100 * size) / (double)(inode.i_size));
> >>>    	fprintf(stdout, "Inode size: %d   ", inode.inode_isize);
> >>>    	fprintf(stdout, "Extent size: %u   ", inode.extent_isize);
> >>>    	fprintf(stdout,	"Xattr size: %u\n", inode.xattr_isize);
> >>> @@ -404,7 +432,8 @@ static void erofsdump_show_fileinfo(bool show_extent)
> >>>    	if (!dumpcfg.show_extent)
> >>>    		return;
> >>>    
> >>> -	fprintf(stdout, "\n Ext:   logical offset   |  length :     physical offset    |  length\n");
> >>> +	fprintf(stdout, "\n Ext:   logical offset   |  length%s\n",
> >>> +			size ? " :     physical offset    |  length" : "");
> >>>    	while (map.m_la < inode.i_size) {
> >>>    		struct erofs_map_dev mdev;
> >>>    
> >>> @@ -425,10 +454,17 @@ static void erofsdump_show_fileinfo(bool show_extent)
> >>>    			return;
> >>>    		}
> >>>    
> >>> -		fprintf(stdout, ext_fmt[!!mdev.m_deviceid], extent_count++,
> >>> -			map.m_la, map.m_la + map.m_llen, map.m_llen,
> >>> -			mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
> >>> -			mdev.m_deviceid);
> >>> +		if (map.m_flags & EROFS_MAP_FRAGMENT)
> >>> +			fprintf(stdout, frag_ext_fmt[!!mdev.m_deviceid],
> >>> +				extent_count++,
> >>> +				map.m_la, map.m_la + map.m_llen, map.m_llen,
> >>> +				mdev.m_deviceid);  
> >>
> >> except for the last fragment extent, the other extents all have physical
> >> addr and length...  
> > 
> > It's true.
> > 
> > What bothers me is how to better show fragment extent based on current format (especially
> > no physical address and offset for fragment extent).
> > 
> > Now, i think the fragment extent should be showed like below (just use a '0' to represent it)?
> > 
> > a) the whole file is a fragment:
> > 
> >   Ext:   logical offset   |  length :     physical offset    |  length
> >     1:        0..    1046 |    1046 :    0  
> 
> 
> Can we use
>     Ext:   logical offset   |  length :     physical offset    |  length
>       1:        0..    1046 |    1046 :     0..0               |  0
> 
> > 
> > b) the tail of file is a fragment:
> > 
> >   Ext:   logical offset   |  length :     physical offset    |  length
> >     0:        0..   19672 |   19672 :    1314816..   1323008 |    8192
> >      ...
> >    13:   165989..  182474 |   16485 :    1421312..   1429504 |    8192
> >    14:   182474..  196435 |   13961 :    0  
> 
> Here
>       14:   182474..  196435 |   13961 :    0..         0       |    0
> 
> as well?

No problem now.

> 
> >   
> >>  
> >>> +		else
> >>> +			fprintf(stdout, ext_fmt[!!mdev.m_deviceid],
> >>> +				extent_count++,
> >>> +				map.m_la, map.m_la + map.m_llen, map.m_llen,
> >>> +				mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
> >>> +				mdev.m_deviceid);
> >>>    		map.m_la += map.m_llen;
> >>>    	}
> >>>    	fprintf(stdout, "%s: %d extents found\n", path, extent_count);
> >>> @@ -537,6 +573,15 @@ static void erofsdump_print_statistic(void)
> >>>    		erofs_err("read dir failed");
> >>>    		return;
> >>>    	}
> >>> +	if (erofs_sb_has_fragments()) {
> >>> +		err = erofsdump_readdir(&(struct erofs_dir_context) {
> >>> +					.de_nid = sbi.packed_nid
> >>> +					});  
> >>
> >> why do we need this?  
> > 
> > Since some status need to be updated such as stats.files_total_size.  
> 
> why should packed file be recorded in files_total_size?

Mainly related to compression ratio. files_total_size records on-disk (un)compressed size.

296                 stats.files_total_size += occupied_size;

544         stats.compress_rate = (double)(100 * stats.files_total_size) /
545                 (double)(stats.files_total_origin_size);

The file system compression ratio will be incorrect (since all compressed fragments are
missing).

> 
> Thanks,
> Gao Xiang
> 
> >   
> >>
> >> Thanks,
> >> Gao Xiang  



More information about the Linux-erofs mailing list