[RFC PATCH 2/2] erofs-utils: dump: support fragments

Xiang Gao hsiangkao at linux.alibaba.com
Wed Jan 4 18:19:10 AEDT 2023


(+ add Qi Wang)

On 2023/1/4 15:12, Yue Hu wrote:
> On Wed, 4 Jan 2023 14:44:35 +0800
> Xiang Gao <hsiangkao at linux.alibaba.com> wrote:
> 
>> On 2023/1/4 14:16, Yue Hu wrote:
>>> Hi Xiang,
>>>
>>> Thanks for the reviewing. I'm planning to send v2.
>>>
>>> On Wed, 4 Jan 2023 11:13:55 +0800
>>> Xiang Gao <hsiangkao at linux.alibaba.com> wrote:
>>>    
>>>> On 2022/12/19 17:50, Yue Hu wrote:
>>>>> From: Yue Hu <huyue2 at coolpad.com>
>>>>>
>>>>> Add compressed fragments support for dump feature.
>>>>>
>>>>> Signed-off-by: Yue Hu <huyue2 at coolpad.com>
>>>>> ---
>>>>>     dump/main.c              | 78 ++++++++++++++++++++++++++++++++--------
>>>>>     include/erofs/internal.h |  1 +
>>>>>     lib/zmap.c               |  2 +-
>>>>>     3 files changed, 65 insertions(+), 16 deletions(-)
>>>>>
>>>>> diff --git a/dump/main.c b/dump/main.c
>>>>> index bc4f047..d387841 100644
>>>>> --- a/dump/main.c
>>>>> +++ b/dump/main.c
>>>>> @@ -14,6 +14,8 @@
>>>>>     #include "erofs/inode.h"
>>>>>     #include "erofs/io.h"
>>>>>     #include "erofs/dir.h"
>>>>> +#include "erofs/compress.h"
>>>>> +#include "erofs/fragments.h"
>>>>>     #include "../lib/liberofs_private.h"
>>>>>     
>>>>>     #ifdef HAVE_LIBUUID
>>>>> @@ -96,6 +98,7 @@ static struct erofsdump_feature feature_lists[] = {
>>>>>     	{ false, EROFS_FEATURE_INCOMPAT_CHUNKED_FILE, "chunked_file" },
>>>>>     	{ false, EROFS_FEATURE_INCOMPAT_DEVICE_TABLE, "device_table" },
>>>>>     	{ false, EROFS_FEATURE_INCOMPAT_ZTAILPACKING, "ztailpacking" },
>>>>> +	{ false, EROFS_FEATURE_INCOMPAT_FRAGMENTS, "fragments" },
>>>>>     };
>>>>>     
>>>>>     static int erofsdump_readdir(struct erofs_dir_context *ctx);
>>>>> @@ -285,10 +288,12 @@ static int erofsdump_readdir(struct erofs_dir_context *ctx)
>>>>>     	}
>>>>>     
>>>>>     	if (S_ISREG(vi.i_mode)) {
>>>>> -		stats.files_total_origin_size += vi.i_size;
>>>>> -		inc_file_extension_count(ctx->dname, ctx->de_namelen);
>>>>> +		if (!erofs_is_packed_inode(&vi)) {
>>>>> +			stats.files_total_origin_size += vi.i_size;
>>>>> +			inc_file_extension_count(ctx->dname, ctx->de_namelen);
>>>>> +			update_file_size_statistics(vi.i_size, true);
>>>>> +		}
>>>>>     		stats.files_total_size += occupied_size;
>>>>> -		update_file_size_statistics(vi.i_size, true);
>>>>>     		update_file_size_statistics(occupied_size, false);
>>>>>     	}
>>>>>     
>>>>> @@ -320,6 +325,10 @@ static void erofsdump_show_fileinfo(bool show_extent)
>>>>>     		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " : %10" PRIu64 "..%10" PRIu64 " | %7" PRIu64 "\n",
>>>>>     		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " : %10" PRIu64 "..%10" PRIu64 " | %7" PRIu64 "  # device %u\n"
>>>>>     	};
>>>>> +	const char *frag_ext_fmt[] = {
>>>>> +		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 "\n",
>>>>> +		"%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 "  # device %u\n"
>>>>> +	};
>>>>
>>>> Why do we need another fmt rather than just fill fragment extent with
>>>
>>> I also plan to remove this fmt in v2.
>>>    
>>>> physical addr 0?
>>>
>>> Okay. Just leave latter physical length as empty?
>>>    
>>>>   
>>>>>     	int err, i;
>>>>>     	erofs_off_t size;
>>>>>     	u16 access_mode;
>>>>> @@ -348,16 +357,31 @@ static void erofsdump_show_fileinfo(bool show_extent)
>>>>>     		}
>>>>>     	}
>>>>>     
>>>>> +	if (erofs_inode_is_data_compressed(inode.datalayout)) {
>>>>> +		err = z_erofs_fill_inode_lazy(&inode);
>>>>> +		if (err) {
>>>>> +			erofs_err("read inode map header failed @ nid %llu",
>>>>> +				  inode.nid | 0ULL);
>>>>> +			return;
>>>>> +		}
>>>>> +	}
>>>>
>>>> Why do we need to call z_erofs_fill_inode_lazy here?
>>>
>>> Used to check if the file has fragment. If yes, i think 'Compression ratio' can not be showed
>>> exactly.
>>>
>>> So, I'd like to show fragment information related instead as below:
>>>
>>> NID: 715   Links: 1   Layout: 1   Fragment: entire file
>>>
>>> NID: 715   Links: 1   Layout: 3   Fragment: tail of file
>>
>> I think no need to distinguish these two stuffs considering extra
>> complexity.
> 
> So, no need to check fragment or not? just keep the original look?

because we could get such information by observing inode extents directly.

> 
>>
>>>    
>>>>   
>>>>> +
>>>>>     	err = erofs_get_occupied_size(&inode, &size);
>>>>>     	if (err) {
>>>>>     		erofs_err("get file size failed @ nid %llu", inode.nid | 0ULL);
>>>>>     		return;
>>>>>     	}
>>>>>     
>>>>> -	err = erofs_get_pathname(inode.nid, path, sizeof(path));
>>>>> -	if (err < 0) {
>>>>> -		erofs_err("file path not found @ nid %llu", inode.nid | 0ULL);
>>>>> -		return;
>>>>
>>>> Can we just ignore pathname if it doesn't exist?
>>>
>>> Okay.
>>>    
>>>>   
>>>>> +	if (erofs_is_packed_inode(&inode) { > +		strncpy(path, EROFS_PACKED_INODE, sizeof(path) - 1);
>>>>> +		path[sizeof(path) - 1] = '\0';
>>>>> +	} else {
>>>>> +		err = erofs_get_pathname(inode.nid, path, sizeof(path));
>>>>> +		if (err < 0) {
>>>>> +			erofs_err("file path not found @ nid %llu",
>>>>> +				  inode.nid | 0ULL);
>>>>> +			return;
>>>>> +		}
>>>>>     	}
>>>>>     
>>>>>     	strftime(timebuf, sizeof(timebuf),
>>>>> @@ -372,9 +396,13 @@ static void erofsdump_show_fileinfo(bool show_extent)
>>>>>     		file_category_types[erofs_mode_to_ftype(inode.i_mode)]);
>>>>>     	fprintf(stdout, "NID: %" PRIu64 "   ", inode.nid);
>>>>>     	fprintf(stdout, "Links: %u   ", inode.i_nlink);
>>>>> -	fprintf(stdout, "Layout: %d   Compression ratio: %.2f%%\n",
>>>>> -		inode.datalayout,
>>>>> -		(double)(100 * size) / (double)(inode.i_size));
>>>>> +	if (inode.z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
>>>>> +		fprintf(stdout, "Layout: %d   Fragment: %s\n",
>>>>> +			inode.datalayout, size ? "partial" : "full");
>>>>> +	else
>>>>> +		fprintf(stdout, "Layout: %d   Compression ratio: %.2f%%\n",
>>>>> +			inode.datalayout,
>>>>> +			(double)(100 * size) / (double)(inode.i_size));
>>>>>     	fprintf(stdout, "Inode size: %d   ", inode.inode_isize);
>>>>>     	fprintf(stdout, "Extent size: %u   ", inode.extent_isize);
>>>>>     	fprintf(stdout,	"Xattr size: %u\n", inode.xattr_isize);
>>>>> @@ -404,7 +432,8 @@ static void erofsdump_show_fileinfo(bool show_extent)
>>>>>     	if (!dumpcfg.show_extent)
>>>>>     		return;
>>>>>     
>>>>> -	fprintf(stdout, "\n Ext:   logical offset   |  length :     physical offset    |  length\n");
>>>>> +	fprintf(stdout, "\n Ext:   logical offset   |  length%s\n",
>>>>> +			size ? " :     physical offset    |  length" : "");
>>>>>     	while (map.m_la < inode.i_size) {
>>>>>     		struct erofs_map_dev mdev;
>>>>>     
>>>>> @@ -425,10 +454,17 @@ static void erofsdump_show_fileinfo(bool show_extent)
>>>>>     			return;
>>>>>     		}
>>>>>     
>>>>> -		fprintf(stdout, ext_fmt[!!mdev.m_deviceid], extent_count++,
>>>>> -			map.m_la, map.m_la + map.m_llen, map.m_llen,
>>>>> -			mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
>>>>> -			mdev.m_deviceid);
>>>>> +		if (map.m_flags & EROFS_MAP_FRAGMENT)
>>>>> +			fprintf(stdout, frag_ext_fmt[!!mdev.m_deviceid],
>>>>> +				extent_count++,
>>>>> +				map.m_la, map.m_la + map.m_llen, map.m_llen,
>>>>> +				mdev.m_deviceid);
>>>>
>>>> except for the last fragment extent, the other extents all have physical
>>>> addr and length...
>>>
>>> It's true.
>>>
>>> What bothers me is how to better show fragment extent based on current format (especially
>>> no physical address and offset for fragment extent).
>>>
>>> Now, i think the fragment extent should be showed like below (just use a '0' to represent it)?
>>>
>>> a) the whole file is a fragment:
>>>
>>>    Ext:   logical offset   |  length :     physical offset    |  length
>>>      1:        0..    1046 |    1046 :    0
>>
>>
>> Can we use
>>      Ext:   logical offset   |  length :     physical offset    |  length
>>        1:        0..    1046 |    1046 :     0..0               |  0
>>
>>>
>>> b) the tail of file is a fragment:
>>>
>>>    Ext:   logical offset   |  length :     physical offset    |  length
>>>      0:        0..   19672 |   19672 :    1314816..   1323008 |    8192
>>>       ...
>>>     13:   165989..  182474 |   16485 :    1421312..   1429504 |    8192
>>>     14:   182474..  196435 |   13961 :    0
>>
>> Here
>>        14:   182474..  196435 |   13961 :    0..         0       |    0
>>
>> as well?
> 
> No problem now.
> 
>>
>>>    
>>>>   
>>>>> +		else
>>>>> +			fprintf(stdout, ext_fmt[!!mdev.m_deviceid],
>>>>> +				extent_count++,
>>>>> +				map.m_la, map.m_la + map.m_llen, map.m_llen,
>>>>> +				mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
>>>>> +				mdev.m_deviceid);
>>>>>     		map.m_la += map.m_llen;
>>>>>     	}
>>>>>     	fprintf(stdout, "%s: %d extents found\n", path, extent_count);
>>>>> @@ -537,6 +573,15 @@ static void erofsdump_print_statistic(void)
>>>>>     		erofs_err("read dir failed");
>>>>>     		return;
>>>>>     	}
>>>>> +	if (erofs_sb_has_fragments()) {
>>>>> +		err = erofsdump_readdir(&(struct erofs_dir_context) {
>>>>> +					.de_nid = sbi.packed_nid
>>>>> +					});
>>>>
>>>> why do we need this?
>>>
>>> Since some status need to be updated such as stats.files_total_size.
>>
>> why should packed file be recorded in files_total_size?
> 
> Mainly related to compression ratio. files_total_size records on-disk (un)compressed size.
> 
> 296                 stats.files_total_size += occupied_size;
> 
> 544         stats.compress_rate = (double)(100 * stats.files_total_size) /
> 545                 (double)(stats.files_total_origin_size);
> 
> The file system compression ratio will be incorrect (since all compressed fragments are
> missing).

I really think erofsdump should deserve a deep cleanup first. we should 
count the correct compression ratio for the packed file. But I don't 
think we need:


static int erofsdump_readdir(struct erofs_dir_context *ctx)
{
...
             stats.files++;
             stats.file_category_stat[erofs_mode_to_ftype(vi.i_mode)]++;

...
}

Thanks,
Gao Xiang

> 
>>
>> Thanks,
>> Gao Xiang
>>
>>>    
>>>>
>>>> Thanks,
>>>> Gao Xiang


More information about the Linux-erofs mailing list