[PATCH 1/2] erofs-utils: optimize mkfs to O(N), N = #files
Gao Xiang
hsiangkao at redhat.com
Sat Jan 2 16:09:57 AEDT 2021
Hi Weiwen,
On Fri, Jan 01, 2021 at 09:50:24AM +0000, 胡 玮文 wrote:
> Hi all,
>
> This is my first time sending patches by email. If something is not properly done, please point it out.
>
> I have a question for now: How am I supposed to submit patches to erofs-utils now? Some E-mail address listed in AUTHORS and README seems no longer valid (email to gaoxiang25 at huawei.com<mailto:gaoxiang25 at huawei.com> was rejected). Is it enough to post the patches to this mailing list? I added all addresses in README, but I got a message said my email is rejected. I’m not sure who actually received my email.
My email has been changed, I might need to update my email address to hsiangkao at redhat.com...
Thanks for point out!
Yeah, that could cause bottleneck when files increased (Android only have < 10000 files),
so it needs to be improve. I will look into your patch later (working on another things now...)
Hope Guifu or other folks could also help reviewing that...
Thanks,
Gao Xiang
>
> 发件人: 胡 玮文<mailto:huww98 at outlook.com>
> 发送时间: 2021年1月1日 17:16
> 收件人: Li Guifu<mailto:bluce.liguifu at huawei.com>; Miao Xie<mailto:miaoxie at huawei.com>; Fang Wei<mailto:fangwei1 at huawei.com>
> 抄送: Gao Xiang<mailto:gaoxiang25 at huawei.com>; Chao Yu<mailto:yuchao0 at huawei.com>; linux-erofs at lists.ozlabs.org<mailto:linux-erofs at lists.ozlabs.org>; 胡玮文<mailto:huww98 at outlook.com>
> 主题: [PATCH 1/2] erofs-utils: optimize mkfs to O(N), N = #files
>
> When using EROFS to pack our dataset which consists of millions of
> files, mkfs.erofs is very slow compared with mksquashfs.
>
> The bottleneck is `erofs_balloc` and `erofs_mapbh` function, which
> iterate over all previously allocated buffer blocks, making the
> complexity of the algrithm O(N^2) where N is the number of files.
>
> With this patch:
>
> * global `last_mapped_block` is mantained to avoid full scan in
> `erofs_mapbh` function.
>
> * global `non_full_buffer_blocks` mantains a list of buffer block for
> each type and each possible remaining bytes in the block. Then it is
> used to identify the most suitable blocks in future `erofs_balloc`,
> avoiding full scan.
>
> Some new data structure is allocated in this patch, more RAM usage is
> expected, but not much. When I test it with ImageNet dataset (1.33M
> files), 7GiB RAM is consumed, and it takes about 4 hours. Most time is
> spent on IO.
>
> Signed-off-by: Hu Weiwen <huww98 at outlook.com>
> ---
> lib/cache.c | 102 +++++++++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 89 insertions(+), 13 deletions(-)
>
> diff --git a/lib/cache.c b/lib/cache.c
> index 0d5c4a5..3412a0b 100644
> --- a/lib/cache.c
> +++ b/lib/cache.c
> @@ -18,6 +18,18 @@ static struct erofs_buffer_block blkh = {
> };
> static erofs_blk_t tail_blkaddr;
>
> +/**
> + * Some buffers are not full, we can reuse it to store more data
> + * 2 for DATA, META
> + * EROFS_BLKSIZ for each possible remaining bytes in the last block
> + **/
> +static struct erofs_buffer_block_record {
> + struct list_head list;
> + struct erofs_buffer_block *bb;
> +} non_full_buffer_blocks[2][EROFS_BLKSIZ];
> +
> +static struct erofs_buffer_block *last_mapped_block = &blkh;
> +
> static bool erofs_bh_flush_drop_directly(struct erofs_buffer_head *bh)
> {
> return erofs_bh_flush_generic_end(bh);
> @@ -62,6 +74,12 @@ struct erofs_bhops erofs_buf_write_bhops = {
> /* return buffer_head of erofs super block (with size 0) */
> struct erofs_buffer_head *erofs_buffer_init(void)
> {
> + for (int i = 0; i < 2; i++) {
> + for (int j = 0; j < EROFS_BLKSIZ; j++) {
> + init_list_head(&non_full_buffer_blocks[i][j].list);
> + }
> + }
> +
> struct erofs_buffer_head *bh = erofs_balloc(META, 0, 0, 0);
>
> if (IS_ERR(bh))
> @@ -131,7 +149,8 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
> {
> struct erofs_buffer_block *cur, *bb;
> struct erofs_buffer_head *bh;
> - unsigned int alignsize, used0, usedmax;
> + unsigned int alignsize, used0, usedmax, total_size;
> + struct erofs_buffer_block_record * reusing = NULL;
>
> int ret = get_alignsize(type, &type);
>
> @@ -143,7 +162,38 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
> usedmax = 0;
> bb = NULL;
>
> - list_for_each_entry(cur, &blkh.list, list) {
> + erofs_dbg("balloc size=%lu alignsize=%u used0=%u", size, alignsize, used0);
> + if (used0 == 0 || alignsize == EROFS_BLKSIZ) {
> + goto alloc;
> + }
> + total_size = size + required_ext + inline_ext;
> + if (total_size < EROFS_BLKSIZ) {
> + // Try find a most fit block.
> + DBG_BUGON(type < 0 || type > 1);
> + struct erofs_buffer_block_record *non_fulls = non_full_buffer_blocks[type];
> + for (struct erofs_buffer_block_record *r = non_fulls + round_up(total_size, alignsize);
> + r < non_fulls + EROFS_BLKSIZ; r++) {
> + if (!list_empty(&r->list)) {
> + struct erofs_buffer_block_record *reuse_candidate =
> + list_first_entry(&r->list, struct erofs_buffer_block_record, list);
> + ret = __erofs_battach(reuse_candidate->bb, NULL, size, alignsize,
> + required_ext + inline_ext, true);
> + if (ret >= 0) {
> + reusing = reuse_candidate;
> + bb = reuse_candidate->bb;
> + usedmax = (ret + required_ext) % EROFS_BLKSIZ + inline_ext;
> + }
> + break;
> + }
> + }
> + }
> +
> + /* Try reuse last ones, which are not mapped and can be extended */
> + cur = last_mapped_block;
> + if (cur == &blkh) {
> + cur = list_next_entry(cur, list);
> + }
> + for (; cur != &blkh; cur = list_next_entry(cur, list)) {
> unsigned int used_before, used;
>
> used_before = cur->buffers.off % EROFS_BLKSIZ;
> @@ -175,22 +225,32 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
> continue;
>
> if (usedmax < used) {
> + reusing = NULL;
> bb = cur;
> usedmax = used;
> }
> }
>
> if (bb) {
> + erofs_dbg("reusing buffer. usedmax=%u", usedmax);
> bh = malloc(sizeof(struct erofs_buffer_head));
> if (!bh)
> return ERR_PTR(-ENOMEM);
> + if (reusing) {
> + list_del(&reusing->list);
> + free(reusing);
> + }
> goto found;
> }
>
> +alloc:
> /* allocate a new buffer block */
> - if (used0 > EROFS_BLKSIZ)
> + if (used0 > EROFS_BLKSIZ) {
> + erofs_dbg("used0 > EROFS_BLKSIZ");
> return ERR_PTR(-ENOSPC);
> + }
>
> + erofs_dbg("new buffer block");
> bb = malloc(sizeof(struct erofs_buffer_block));
> if (!bb)
> return ERR_PTR(-ENOMEM);
> @@ -211,6 +271,16 @@ found:
> required_ext + inline_ext, false);
> if (ret < 0)
> return ERR_PTR(ret);
> + if (ret != 0) {
> + /* This buffer is not full yet, we may reuse it */
> + reusing = malloc(sizeof(struct erofs_buffer_block_record));
> + if (!reusing) {
> + return ERR_PTR(-ENOMEM);
> + }
> + reusing->bb = bb;
> + list_add_tail(&reusing->list,
> + &non_full_buffer_blocks[type][EROFS_BLKSIZ - ret - inline_ext].list);
> + }
> return bh;
> }
>
> @@ -247,8 +317,10 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
> {
> erofs_blk_t blkaddr;
>
> - if (bb->blkaddr == NULL_ADDR)
> + if (bb->blkaddr == NULL_ADDR) {
> bb->blkaddr = tail_blkaddr;
> + last_mapped_block = bb;
> + }
>
> blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
> if (blkaddr > tail_blkaddr)
> @@ -259,15 +331,16 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
>
> erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb, bool end)
> {
> - struct erofs_buffer_block *t, *nt;
> -
> - if (!bb || bb->blkaddr == NULL_ADDR) {
> - list_for_each_entry_safe(t, nt, &blkh.list, list) {
> - if (!end && (t == bb || nt == &blkh))
> - break;
> - (void)__erofs_mapbh(t);
> - if (end && t == bb)
> - break;
> + DBG_BUGON(!end);
> + struct erofs_buffer_block *t = last_mapped_block;
> + while (1) {
> + t = list_next_entry(t, list);
> + if (t == &blkh) {
> + break;
> + }
> + (void)__erofs_mapbh(t);
> + if (t == bb) {
> + break;
> }
> }
> return tail_blkaddr;
> @@ -334,6 +407,9 @@ void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke)
> if (!list_empty(&bb->buffers.list))
> return;
>
> + if (bb == last_mapped_block) {
> + last_mapped_block = list_prev_entry(bb, list);
> + }
> list_del(&bb->list);
> free(bb);
>
> --
> 2.30.0
>
More information about the Linux-erofs
mailing list