[PATCH v2] erofs: relaxed temporary buffers allocation on readahead
Chunhai Guo
guochunhai at vivo.com
Mon Jan 22 18:42:01 AEDT 2024
On 2024/1/22 12:37, Gao Xiang wrote:
> [你通常不会收到来自 hsiangkao at linux.alibaba.com 的电子邮件。请访问 https://aka.ms/LearnAboutSenderIdentification,以了解这一点为什么很重要]
>
> On 2024/1/22 11:49, Chunhai Guo wrote:
>> On 2024/1/22 10:07, Gao Xiang wrote:
>>> [你通常不会收到来自 hsiangkao at linux.alibaba.com 的电子邮件。请访问 https://aka.ms/LearnAboutSenderIdentification,以了解这一点为什么很重要]
>>>
>>> On 2024/1/20 22:55, Chunhai Guo wrote:
>>>> Even with inplace decompression, sometimes extra temporary buffers are
>>>> still needed for decompression. In low-memory scenarios, it would be
>>>> better to try to allocate with GFP_NOWAIT on readahead first. That can
>>>> help reduce the time spent on page allocation under memory pressure.
>>>>
>>>> There is an average reduction of 21% in page allocation time under
>>> It would be better to add a table to show the absolute numbers too
>>> (like what you did in the global pool commit.) If it's possible, there
>>> is no need to send a update version for this, just reply the updated
>>> commit message and I will update the commit manually.
>>
>> The table below shows detailed numbers. The reduction I mentioned before
>> was not accurate enough. Please help correct the improvement from 21% to
>> 20.21%.
>>
>>
>> +--------------+----------------+---------------+---------+
>> | | w/o GFP_NOWAIT | w/ GFP_NOWAIT | diff |
>> +--------------+----------------+---------------+---------+
>> | Average (ms) | 3364 | 2684 | -20.21% |
>> +--------------+----------------+---------------+---------+
> Did it test without the 16k sliding window change?
> https://lore.kernel.org/linux-erofs/69711d55-f7a2-420b-9ba8-fa2921f66a4c@vivo.com
The result is tested with 64k sliding window change.
> Could you benchmark these two optimizations together to
> show the extreme optimized case without a global pool?
> With a new table if possible? I will add this to
> the commit message too.
OK. I will reply to this email when the benchmark is finished.
> Thanks,
> Gao Xiang
>
>> Thanks,
>>
>>
>>> Otherwise it looks good to me,
>>>
>>> Thanks,
>>> Gao Xiang
>>>
>>>> multi-app launch benchmark workload [1] on ARM64 Android devices running
>>>> the 5.15 kernel with an 8-core CPU and 8GB of memory.
>>>>
>>>> [1] https://lore.kernel.org/r/20240109074143.4138783-1-guochunhai@vivo.com
>>>>
>>>> Suggested-by: Gao Xiang <xiang at kernel.org>
>>>> Signed-off-by: Chunhai Guo <guochunhai at vivo.com>
>>>> ---
>>>> fs/erofs/compress.h | 5 ++---
>>>> fs/erofs/decompressor.c | 5 +++--
>>>> fs/erofs/decompressor_deflate.c | 19 +++++++++++++------
>>>> fs/erofs/decompressor_lzma.c | 17 ++++++++++++-----
>>>> fs/erofs/zdata.c | 16 ++++++++++++----
>>>> 5 files changed, 42 insertions(+), 20 deletions(-)
>>>>
>>>> diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
>>>> index 279933e007d2..7cc5841577b2 100644
>>>> --- a/fs/erofs/compress.h
>>>> +++ b/fs/erofs/compress.h
>>>> @@ -11,13 +11,12 @@
>>>> struct z_erofs_decompress_req {
>>>> struct super_block *sb;
>>>> struct page **in, **out;
>>>> -
>>>> unsigned short pageofs_in, pageofs_out;
>>>> unsigned int inputsize, outputsize;
>>>>
>>>> - /* indicate the algorithm will be used for decompression */
>>>> - unsigned int alg;
>>>> + unsigned int alg; /* the algorithm for decompression */
>>>> bool inplace_io, partial_decoding, fillgaps;
>>>> + gfp_t gfp; /* allocation flags for extra temporary buffers */
>>>> };
>>>>
>>>> struct z_erofs_decompressor {
>>>> diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
>>>> index 1d65b9f60a39..ef2b08ec9830 100644
>>>> --- a/fs/erofs/decompressor.c
>>>> +++ b/fs/erofs/decompressor.c
>>>> @@ -111,8 +111,9 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
>>>> victim = availables[--top];
>>>> get_page(victim);
>>>> } else {
>>>> - victim = erofs_allocpage(pagepool,
>>>> - GFP_KERNEL | __GFP_NOFAIL);
>>>> + victim = erofs_allocpage(pagepool, rq->gfp);
>>>> + if (!victim)
>>>> + return -ENOMEM;
>>>> set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
>>>> }
>>>> rq->out[i] = victim;
>>>> diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
>>>> index 4a64a9c91dd3..b98872058abe 100644
>>>> --- a/fs/erofs/decompressor_deflate.c
>>>> +++ b/fs/erofs/decompressor_deflate.c
>>>> @@ -95,7 +95,7 @@ int z_erofs_load_deflate_config(struct super_block *sb,
>>>> }
>>>>
>>>> int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
>>>> - struct page **pagepool)
>>>> + struct page **pgpl)
>>>> {
>>>> const unsigned int nrpages_out =
>>>> PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
>>>> @@ -158,8 +158,12 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
>>>> strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs);
>>>> outsz -= strm->z.avail_out;
>>>> if (!rq->out[no]) {
>>>> - rq->out[no] = erofs_allocpage(pagepool,
>>>> - GFP_KERNEL | __GFP_NOFAIL);
>>>> + rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
>>>> + if (!rq->out[no]) {
>>>> + kout = NULL;
>>>> + err = -ENOMEM;
>>>> + break;
>>>> + }
>>>> set_page_private(rq->out[no],
>>>> Z_EROFS_SHORTLIVED_PAGE);
>>>> }
>>>> @@ -211,8 +215,11 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
>>>>
>>>> DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
>>>> rq->in[j]));
>>>> - tmppage = erofs_allocpage(pagepool,
>>>> - GFP_KERNEL | __GFP_NOFAIL);
>>>> + tmppage = erofs_allocpage(pgpl, rq->gfp);
>>>> + if (!tmppage) {
>>>> + err = -ENOMEM;
>>>> + goto failed;
>>>> + }
>>>> set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
>>>> copy_highpage(tmppage, rq->in[j]);
>>>> rq->in[j] = tmppage;
>>>> @@ -230,7 +237,7 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
>>>> break;
>>>> }
>>>> }
>>>> -
>>>> +failed:
>>>> if (zlib_inflateEnd(&strm->z) != Z_OK && !err)
>>>> err = -EIO;
>>>> if (kout)
>>>> diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
>>>> index 2dd14f99c1dc..6ca357d83cfa 100644
>>>> --- a/fs/erofs/decompressor_lzma.c
>>>> +++ b/fs/erofs/decompressor_lzma.c
>>>> @@ -148,7 +148,7 @@ int z_erofs_load_lzma_config(struct super_block *sb,
>>>> }
>>>>
>>>> int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
>>>> - struct page **pagepool)
>>>> + struct page **pgpl)
>>>> {
>>>> const unsigned int nrpages_out =
>>>> PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
>>>> @@ -215,8 +215,11 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
>>>> PAGE_SIZE - pageofs);
>>>> outlen -= strm->buf.out_size;
>>>> if (!rq->out[no] && rq->fillgaps) { /* deduped */
>>>> - rq->out[no] = erofs_allocpage(pagepool,
>>>> - GFP_KERNEL | __GFP_NOFAIL);
>>>> + rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
>>>> + if (!rq->out[no]) {
>>>> + err = -ENOMEM;
>>>> + break;
>>>> + }
>>>> set_page_private(rq->out[no],
>>>> Z_EROFS_SHORTLIVED_PAGE);
>>>> }
>>>> @@ -258,8 +261,11 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
>>>>
>>>> DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
>>>> rq->in[j]));
>>>> - tmppage = erofs_allocpage(pagepool,
>>>> - GFP_KERNEL | __GFP_NOFAIL);
>>>> + tmppage = erofs_allocpage(pgpl, rq->gfp);
>>>> + if (!tmppage) {
>>>> + err = -ENOMEM;
>>>> + goto failed;
>>>> + }
>>>> set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
>>>> copy_highpage(tmppage, rq->in[j]);
>>>> rq->in[j] = tmppage;
>>>> @@ -277,6 +283,7 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
>>>> break;
>>>> }
>>>> }
>>>> +failed:
>>>> if (no < nrpages_out && strm->buf.out)
>>>> kunmap(rq->out[no]);
>>>> if (ni < nrpages_in)
>>>> diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
>>>> index 692c0c39be63..a293de2a60ed 100644
>>>> --- a/fs/erofs/zdata.c
>>>> +++ b/fs/erofs/zdata.c
>>>> @@ -82,6 +82,9 @@ struct z_erofs_pcluster {
>>>> /* L: indicate several pageofs_outs or not */
>>>> bool multibases;
>>>>
>>>> + /* L: whether extra buffer allocations are best-effort */
>>>> + bool besteffort;
>>>> +
>>>> /* A: compressed bvecs (can be cached or inplaced pages) */
>>>> struct z_erofs_bvec compressed_bvecs[];
>>>> };
>>>> @@ -964,7 +967,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
>>>> }
>>>>
>>>> static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
>>>> - struct page *page)
>>>> + struct page *page, bool ra)
>>>> {
>>>> struct inode *const inode = fe->inode;
>>>> struct erofs_map_blocks *const map = &fe->map;
>>>> @@ -1014,6 +1017,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
>>>> err = z_erofs_pcluster_begin(fe);
>>>> if (err)
>>>> goto out;
>>>> + fe->pcl->besteffort |= !ra;
>>>> }
>>>>
>>>> /*
>>>> @@ -1280,7 +1284,11 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
>>>> .inplace_io = overlapped,
>>>> .partial_decoding = pcl->partial,
>>>> .fillgaps = pcl->multibases,
>>>> + .gfp = pcl->besteffort ?
>>>> + GFP_KERNEL | __GFP_NOFAIL :
>>>> + GFP_NOWAIT | __GFP_NORETRY
>>>> }, be->pagepool);
>>>> + pcl->besteffort = false;
>>>>
>>>> /* must handle all compressed pages before actual file pages */
>>>> if (z_erofs_is_inline_pcluster(pcl)) {
>>>> @@ -1785,7 +1793,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
>>>> if (PageUptodate(page))
>>>> unlock_page(page);
>>>> else
>>>> - (void)z_erofs_do_read_page(f, page);
>>>> + (void)z_erofs_do_read_page(f, page, !!rac);
>>>> put_page(page);
>>>> }
>>>>
>>>> @@ -1806,7 +1814,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
>>>> f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
>>>>
>>>> z_erofs_pcluster_readmore(&f, NULL, true);
>>>> - err = z_erofs_do_read_page(&f, &folio->page);
>>>> + err = z_erofs_do_read_page(&f, &folio->page, false);
>>>> z_erofs_pcluster_readmore(&f, NULL, false);
>>>> z_erofs_pcluster_end(&f);
>>>>
>>>> @@ -1847,7 +1855,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
>>>> folio = head;
>>>> head = folio_get_private(folio);
>>>>
>>>> - err = z_erofs_do_read_page(&f, &folio->page);
>>>> + err = z_erofs_do_read_page(&f, &folio->page, true);
>>>> if (err && err != -EINTR)
>>>> erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
>>>> folio->index, EROFS_I(inode)->nid);
>>
More information about the Linux-erofs
mailing list