[PATCH 1/2] erofs-utils: optimize mkfs to O(N), N = #files

Gao Xiang hsiangkao at redhat.com
Sat Jan 9 05:15:45 AEDT 2021


Hi Weiwen,

On Fri, Jan 01, 2021 at 05:11:57PM +0800, 胡玮文 wrote:
> When using EROFS to pack our dataset which consists of millions of
> files, mkfs.erofs is very slow compared with mksquashfs.
> 
> The bottleneck is `erofs_balloc` and `erofs_mapbh` function, which
> iterate over all previously allocated buffer blocks, making the
> complexity of the algrithm O(N^2) where N is the number of files.
> 
> With this patch:
> 
> * global `last_mapped_block` is mantained to avoid full scan in
>   `erofs_mapbh` function.
> 
> * global `non_full_buffer_blocks` mantains a list of buffer block for
>   each type and each possible remaining bytes in the block. Then it is
>   used to identify the most suitable blocks in future `erofs_balloc`,
>   avoiding full scan.
> 
> Some new data structure is allocated in this patch, more RAM usage is
> expected, but not much. When I test it with ImageNet dataset (1.33M
> files), 7GiB RAM is consumed, and it takes about 4 hours. Most time is
> spent on IO.

Thanks for your patch again. I've read your patch, sorry about the delay
due to my pending work.

The idea generally looks to me.
Some suggestion about this as follows... :)

> 
> Signed-off-by: Hu Weiwen <huww98 at outlook.com>
> ---
>  lib/cache.c | 102 +++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 89 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/cache.c b/lib/cache.c
> index 0d5c4a5..3412a0b 100644
> --- a/lib/cache.c
> +++ b/lib/cache.c
> @@ -18,6 +18,18 @@ static struct erofs_buffer_block blkh = {
>  };
>  static erofs_blk_t tail_blkaddr;
>  
> +/**
> + * Some buffers are not full, we can reuse it to store more data
> + * 2 for DATA, META
> + * EROFS_BLKSIZ for each possible remaining bytes in the last block
> + **/
> +static struct erofs_buffer_block_record {
> +	struct list_head list;
> +	struct erofs_buffer_block *bb;
> +} non_full_buffer_blocks[2][EROFS_BLKSIZ];
> +

How about integrating the list_head to struct erofs_buffer_block and therefore
no need to malloc(struct erofs_buffer_block_record)?

and then we have
static struct list_head nonfull_bb_buckets[2][EROFS_BLKSIZ];

> +static struct erofs_buffer_block *last_mapped_block = &blkh;
> +
>  static bool erofs_bh_flush_drop_directly(struct erofs_buffer_head *bh)
>  {
>  	return erofs_bh_flush_generic_end(bh);
> @@ -62,6 +74,12 @@ struct erofs_bhops erofs_buf_write_bhops = {
>  /* return buffer_head of erofs super block (with size 0) */
>  struct erofs_buffer_head *erofs_buffer_init(void)
>  {
> +	for (int i = 0; i < 2; i++) {
> +		for (int j = 0; j < EROFS_BLKSIZ; j++) {
> +			init_list_head(&non_full_buffer_blocks[i][j].list);
> +		}
> +	}
> +

erofs-utils follows kernel coding style. so for the single statement,
no need to introduce braces...

https://www.kernel.org/doc/html/latest/process/coding-style.html

>  	struct erofs_buffer_head *bh = erofs_balloc(META, 0, 0, 0);
>  
>  	if (IS_ERR(bh))
> @@ -131,7 +149,8 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
>  {
>  	struct erofs_buffer_block *cur, *bb;
>  	struct erofs_buffer_head *bh;
> -	unsigned int alignsize, used0, usedmax;
> +	unsigned int alignsize, used0, usedmax, total_size;
> +	struct erofs_buffer_block_record * reusing = NULL;
>  
>  	int ret = get_alignsize(type, &type);
>  
> @@ -143,7 +162,38 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
>  	usedmax = 0;
>  	bb = NULL;
>  
> -	list_for_each_entry(cur, &blkh.list, list) {
> +	erofs_dbg("balloc size=%lu alignsize=%u used0=%u", size, alignsize, used0);
> +	if (used0 == 0 || alignsize == EROFS_BLKSIZ) {
> +		goto alloc;
> +	}

same here.

> +	total_size = size + required_ext + inline_ext;
> +	if (total_size < EROFS_BLKSIZ) {
> +		// Try find a most fit block.
> +		DBG_BUGON(type < 0 || type > 1);
> +		struct erofs_buffer_block_record *non_fulls = non_full_buffer_blocks[type];
> +		for (struct erofs_buffer_block_record *r = non_fulls + round_up(total_size, alignsize);
> +				r < non_fulls + EROFS_BLKSIZ; r++) {
> +			if (!list_empty(&r->list)) {
> +				struct erofs_buffer_block_record *reuse_candidate =
> +						list_first_entry(&r->list, struct erofs_buffer_block_record, list);
> +				ret = __erofs_battach(reuse_candidate->bb, NULL, size, alignsize,
> +						required_ext + inline_ext, true);
> +				if (ret >= 0) {
> +					reusing = reuse_candidate;
> +					bb = reuse_candidate->bb;
> +					usedmax = (ret + required_ext) % EROFS_BLKSIZ + inline_ext;
> +				}
> +				break;
> +			}
> +		}
> +	}
> +
> +	/* Try reuse last ones, which are not mapped and can be extended */
> +	cur = last_mapped_block;
> +	if (cur == &blkh) {
> +		cur = list_next_entry(cur, list);
> +	}

same here. 

> +	for (; cur != &blkh; cur = list_next_entry(cur, list)) {
>  		unsigned int used_before, used;
>  
>  		used_before = cur->buffers.off % EROFS_BLKSIZ;
> @@ -175,22 +225,32 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
>  			continue;
>  
>  		if (usedmax < used) {
> +			reusing = NULL;
>  			bb = cur;
>  			usedmax = used;
>  		}
>  	}
>  
>  	if (bb) {
> +		erofs_dbg("reusing buffer. usedmax=%u", usedmax);
>  		bh = malloc(sizeof(struct erofs_buffer_head));
>  		if (!bh)
>  			return ERR_PTR(-ENOMEM);
> +		if (reusing) {
> +			list_del(&reusing->list);
> +			free(reusing);
> +		}
>  		goto found;
>  	}
>  
> +alloc:
>  	/* allocate a new buffer block */
> -	if (used0 > EROFS_BLKSIZ)
> +	if (used0 > EROFS_BLKSIZ) {
> +		erofs_dbg("used0 > EROFS_BLKSIZ");
>  		return ERR_PTR(-ENOSPC);
> +	}
>  
> +	erofs_dbg("new buffer block");
>  	bb = malloc(sizeof(struct erofs_buffer_block));
>  	if (!bb)
>  		return ERR_PTR(-ENOMEM);
> @@ -211,6 +271,16 @@ found:
>  			      required_ext + inline_ext, false);
>  	if (ret < 0)
>  		return ERR_PTR(ret);
> +	if (ret != 0) {
> +		/* This buffer is not full yet, we may reuse it */
> +		reusing = malloc(sizeof(struct erofs_buffer_block_record));
> +		if (!reusing) {
> +			return ERR_PTR(-ENOMEM);
> +		}
> +		reusing->bb = bb;
> +		list_add_tail(&reusing->list,
> +				&non_full_buffer_blocks[type][EROFS_BLKSIZ - ret - inline_ext].list);
> +	}
>  	return bh;
>  }
>  
> @@ -247,8 +317,10 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
>  {
>  	erofs_blk_t blkaddr;
>  
> -	if (bb->blkaddr == NULL_ADDR)
> +	if (bb->blkaddr == NULL_ADDR) {
>  		bb->blkaddr = tail_blkaddr;
> +		last_mapped_block = bb;
> +	}
>  
>  	blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
>  	if (blkaddr > tail_blkaddr)
> @@ -259,15 +331,16 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
>  
>  erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb, bool end)
>  {
> -	struct erofs_buffer_block *t, *nt;
> -
> -	if (!bb || bb->blkaddr == NULL_ADDR) {
> -		list_for_each_entry_safe(t, nt, &blkh.list, list) {
> -			if (!end && (t == bb || nt == &blkh))
> -				break;
> -			(void)__erofs_mapbh(t);
> -			if (end && t == bb)
> -				break;
> +	DBG_BUGON(!end);
> +	struct erofs_buffer_block *t = last_mapped_block;
> +	while (1) {
> +		t = list_next_entry(t, list);
> +		if (t == &blkh) {
> +			break;
> +		}
> +		(void)__erofs_mapbh(t);
> +		if (t == bb) {
> +			break;
>  		}
>  	}
>  	return tail_blkaddr;
> @@ -334,6 +407,9 @@ void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke)
>  	if (!list_empty(&bb->buffers.list))
>  		return;
>  
> +	if (bb == last_mapped_block) {
> +		last_mapped_block = list_prev_entry(bb, list);
> +	}

same here.

Also, you could sent the patchset (I mean [PATCH 1/2],[PATCH 2/2]) entirely as a thread,
for example by using git send-email * --to="linux-erofs at lists.ozlabs" --cc="...."


Thanks,
Gao Xiang

>  	list_del(&bb->list);
>  	free(bb);
>  
> -- 
> 2.30.0
> 



More information about the Linux-erofs mailing list