[RFC PATCH 2/2] erofs-utils: dump: support fragments

Xiang Gao hsiangkao at linux.alibaba.com
Wed Jan 4 17:44:35 AEDT 2023



On 2023/1/4 14:16, Yue Hu wrote:
> Hi Xiang,
> 
> Thanks for the reviewing. I'm planning to send v2.
> 
> On Wed, 4 Jan 2023 11:13:55 +0800
> Xiang Gao <hsiangkao at linux.alibaba.com> wrote:
> 
>> On 2022/12/19 17:50, Yue Hu wrote:
>>> From: Yue Hu <huyue2 at coolpad.com>
>>>
>>> Add compressed fragments support for dump feature.
>>>
>>> Signed-off-by: Yue Hu <huyue2 at coolpad.com>
>>> ---
>>>    dump/main.c              | 78 ++++++++++++++++++++++++++++++++--------
>>>    include/erofs/internal.h |  1 +
>>>    lib/zmap.c               |  2 +-
>>>    3 files changed, 65 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/dump/main.c b/dump/main.c
>>> index bc4f047..d387841 100644
>>> --- a/dump/main.c
>>> +++ b/dump/main.c
>>> @@ -14,6 +14,8 @@
>>>    #include "erofs/inode.h"
>>>    #include "erofs/io.h"
>>>    #include "erofs/dir.h"
>>> +#include "erofs/compress.h"
>>> +#include "erofs/fragments.h"
>>>    #include "../lib/liberofs_private.h"
>>>    
>>>    #ifdef HAVE_LIBUUID
>>> @@ -96,6 +98,7 @@ static struct erofsdump_feature feature_lists[] = {
>>>    	{ false, EROFS_FEATURE_INCOMPAT_CHUNKED_FILE, "chunked_file" },
>>>    	{ false, EROFS_FEATURE_INCOMPAT_DEVICE_TABLE, "device_table" },
>>>    	{ false, EROFS_FEATURE_INCOMPAT_ZTAILPACKING, "ztailpacking" },
>>> +	{ false, EROFS_FEATURE_INCOMPAT_FRAGMENTS, "fragments" },
>>>    };
>>>    
>>>    static int erofsdump_readdir(struct erofs_dir_context *ctx);
>>> @@ -285,10 +288,12 @@ static int erofsdump_readdir(struct erofs_dir_context *ctx)
>>>    	}
>>>    
>>>    	if (S_ISREG(vi.i_mode)) {
>>> -		stats.files_total_origin_size += vi.i_size;
>>> -		inc_file_extension_count(ctx->dname, ctx->de_namelen);
>>> +		if (!erofs_is_packed_inode(&vi)) {
>>> +			stats.files_total_origin_size += vi.i_size;
>>> +			inc_file_extension_count(ctx->dname, ctx->de_namelen);
>>> +			update_file_size_statistics(vi.i_size, true);
>>> +		}
>>>    		stats.files_total_size += occupied_size;
>>> -		update_file_size_statistics(vi.i_size, true);
>>>    		update_file_size_statistics(occupied_size, false);
>>>    	}
>>>    
>>> @@ -320,6 +325,10 @@ static void erofsdump_show_fileinfo(bool show_extent)
>>>    		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " : %10" PRIu64 "..%10" PRIu64 " | %7" PRIu64 "\n",
>>>    		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " : %10" PRIu64 "..%10" PRIu64 " | %7" PRIu64 "  # device %u\n"
>>>    	};
>>> +	const char *frag_ext_fmt[] = {
>>> +		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 "\n",
>>> +		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 "  # device %u\n"
>>> +	};
>>
>> Why do we need another fmt rather than just fill fragment extent with
> 
> I also plan to remove this fmt in v2.
> 
>> physical addr 0?
> 
> Okay. Just leave latter physical length as empty?
> 
>>
>>>    	int err, i;
>>>    	erofs_off_t size;
>>>    	u16 access_mode;
>>> @@ -348,16 +357,31 @@ static void erofsdump_show_fileinfo(bool show_extent)
>>>    		}
>>>    	}
>>>    
>>> +	if (erofs_inode_is_data_compressed(inode.datalayout)) {
>>> +		err = z_erofs_fill_inode_lazy(&inode);
>>> +		if (err) {
>>> +			erofs_err("read inode map header failed @ nid %llu",
>>> +				  inode.nid | 0ULL);
>>> +			return;
>>> +		}
>>> +	}
>>
>> Why do we need to call z_erofs_fill_inode_lazy here?
> 
> Used to check if the file has fragment. If yes, i think 'Compression ratio' can not be showed
> exactly.
> 
> So, I'd like to show fragment information related instead as below:
> 
> NID: 715   Links: 1   Layout: 1   Fragment: entire file
> 
> NID: 715   Links: 1   Layout: 3   Fragment: tail of file

I think no need to distinguish these two stuffs considering extra 
complexity.

> 
>>
>>> +
>>>    	err = erofs_get_occupied_size(&inode, &size);
>>>    	if (err) {
>>>    		erofs_err("get file size failed @ nid %llu", inode.nid | 0ULL);
>>>    		return;
>>>    	}
>>>    
>>> -	err = erofs_get_pathname(inode.nid, path, sizeof(path));
>>> -	if (err < 0) {
>>> -		erofs_err("file path not found @ nid %llu", inode.nid | 0ULL);
>>> -		return;
>>
>> Can we just ignore pathname if it doesn't exist?
> 
> Okay.
> 
>>
>>> +	if (erofs_is_packed_inode(&inode) { > +		strncpy(path, EROFS_PACKED_INODE, sizeof(path) - 1);
>>> +		path[sizeof(path) - 1] = '\0';
>>> +	} else {
>>> +		err = erofs_get_pathname(inode.nid, path, sizeof(path));
>>> +		if (err < 0) {
>>> +			erofs_err("file path not found @ nid %llu",
>>> +				  inode.nid | 0ULL);
>>> +			return;
>>> +		}
>>>    	}
>>>    
>>>    	strftime(timebuf, sizeof(timebuf),
>>> @@ -372,9 +396,13 @@ static void erofsdump_show_fileinfo(bool show_extent)
>>>    		file_category_types[erofs_mode_to_ftype(inode.i_mode)]);
>>>    	fprintf(stdout, "NID: %" PRIu64 "   ", inode.nid);
>>>    	fprintf(stdout, "Links: %u   ", inode.i_nlink);
>>> -	fprintf(stdout, "Layout: %d   Compression ratio: %.2f%%\n",
>>> -		inode.datalayout,
>>> -		(double)(100 * size) / (double)(inode.i_size));
>>> +	if (inode.z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
>>> +		fprintf(stdout, "Layout: %d   Fragment: %s\n",
>>> +			inode.datalayout, size ? "partial" : "full");
>>> +	else
>>> +		fprintf(stdout, "Layout: %d   Compression ratio: %.2f%%\n",
>>> +			inode.datalayout,
>>> +			(double)(100 * size) / (double)(inode.i_size));
>>>    	fprintf(stdout, "Inode size: %d   ", inode.inode_isize);
>>>    	fprintf(stdout, "Extent size: %u   ", inode.extent_isize);
>>>    	fprintf(stdout,	"Xattr size: %u\n", inode.xattr_isize);
>>> @@ -404,7 +432,8 @@ static void erofsdump_show_fileinfo(bool show_extent)
>>>    	if (!dumpcfg.show_extent)
>>>    		return;
>>>    
>>> -	fprintf(stdout, "\n Ext:   logical offset   |  length :     physical offset    |  length\n");
>>> +	fprintf(stdout, "\n Ext:   logical offset   |  length%s\n",
>>> +			size ? " :     physical offset    |  length" : "");
>>>    	while (map.m_la < inode.i_size) {
>>>    		struct erofs_map_dev mdev;
>>>    
>>> @@ -425,10 +454,17 @@ static void erofsdump_show_fileinfo(bool show_extent)
>>>    			return;
>>>    		}
>>>    
>>> -		fprintf(stdout, ext_fmt[!!mdev.m_deviceid], extent_count++,
>>> -			map.m_la, map.m_la + map.m_llen, map.m_llen,
>>> -			mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
>>> -			mdev.m_deviceid);
>>> +		if (map.m_flags & EROFS_MAP_FRAGMENT)
>>> +			fprintf(stdout, frag_ext_fmt[!!mdev.m_deviceid],
>>> +				extent_count++,
>>> +				map.m_la, map.m_la + map.m_llen, map.m_llen,
>>> +				mdev.m_deviceid);
>>
>> except for the last fragment extent, the other extents all have physical
>> addr and length...
> 
> It's true.
> 
> What bothers me is how to better show fragment extent based on current format (especially
> no physical address and offset for fragment extent).
> 
> Now, i think the fragment extent should be showed like below (just use a '0' to represent it)?
> 
> a) the whole file is a fragment:
> 
>   Ext:   logical offset   |  length :     physical offset    |  length
>     1:        0..    1046 |    1046 :    0


Can we use
    Ext:   logical offset   |  length :     physical offset    |  length
      1:        0..    1046 |    1046 :     0..0               |  0

> 
> b) the tail of file is a fragment:
> 
>   Ext:   logical offset   |  length :     physical offset    |  length
>     0:        0..   19672 |   19672 :    1314816..   1323008 |    8192
>      ...
>    13:   165989..  182474 |   16485 :    1421312..   1429504 |    8192
>    14:   182474..  196435 |   13961 :    0

Here
      14:   182474..  196435 |   13961 :    0..         0       |    0

as well?

> 
>>
>>> +		else
>>> +			fprintf(stdout, ext_fmt[!!mdev.m_deviceid],
>>> +				extent_count++,
>>> +				map.m_la, map.m_la + map.m_llen, map.m_llen,
>>> +				mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
>>> +				mdev.m_deviceid);
>>>    		map.m_la += map.m_llen;
>>>    	}
>>>    	fprintf(stdout, "%s: %d extents found\n", path, extent_count);
>>> @@ -537,6 +573,15 @@ static void erofsdump_print_statistic(void)
>>>    		erofs_err("read dir failed");
>>>    		return;
>>>    	}
>>> +	if (erofs_sb_has_fragments()) {
>>> +		err = erofsdump_readdir(&(struct erofs_dir_context) {
>>> +					.de_nid = sbi.packed_nid
>>> +					});
>>
>> why do we need this?
> 
> Since some status need to be updated such as stats.files_total_size.

why should packed file be recorded in files_total_size?

Thanks,
Gao Xiang

> 
>>
>> Thanks,
>> Gao Xiang


More information about the Linux-erofs mailing list