[PATCH] erofs: alloc readahead page with __GFP_NOFAIL flag during decompression

Sat Jan 20 03:35:37 AEDT 2024

During decompression, it is better to allocate readahead pages with the
GFP_NOWAIT flag, which can help reduce the time spent on page allocation in
low memory scenarios.

>From the result of multi-app launch benchmarks on ARM64 Android devices
running the 5.15 kernel with an 8-core CPU and 8GB of memory, there was an
average reduction of 21% in page allocation time.

Also, I need to revert commit ef4b4b46c6aa ("erofs: remove the member
readahead from struct z_erofs_decompress_frontend") to use the readahead
member in struct z_erofs_decompress_frontend.

Signed-off-by: Chunhai Guo <guochunhai at vivo.com>
---
 fs/erofs/compress.h             |  1 +
 fs/erofs/decompressor.c         |  5 +++--
 fs/erofs/decompressor_deflate.c | 15 ++++++++++++---
 fs/erofs/decompressor_lzma.c    | 16 +++++++++++++---
 fs/erofs/zdata.c                | 32 +++++++++++++++++++++++++-------
 5 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 279933e007d2..95157354ad71 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -18,6 +18,7 @@ struct z_erofs_decompress_req {
 	/* indicate the algorithm will be used for decompression */
 	unsigned int alg;
 	bool inplace_io, partial_decoding, fillgaps;
+	gfp_t gfp;
 };
 
 struct z_erofs_decompressor {
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 1d65b9f60a39..ef2b08ec9830 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -111,8 +111,9 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
 			victim = availables[--top];
 			get_page(victim);
 		} else {
-			victim = erofs_allocpage(pagepool,
-						 GFP_KERNEL | __GFP_NOFAIL);
+			victim = erofs_allocpage(pagepool, rq->gfp);
+			if (!victim)
+				return -ENOMEM;
 			set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
 		}
 		rq->out[i] = victim;
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
index 4a64a9c91dd3..93138ae17250 100644
--- a/fs/erofs/decompressor_deflate.c
+++ b/fs/erofs/decompressor_deflate.c
@@ -159,7 +159,11 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
 			outsz -= strm->z.avail_out;
 			if (!rq->out[no]) {
 				rq->out[no] = erofs_allocpage(pagepool,
-						GFP_KERNEL | __GFP_NOFAIL);
+						rq->gfp);
+				if (!rq->out[no]) {
+					err = -ENOMEM;
+					break;
+				}
 				set_page_private(rq->out[no],
 						 Z_EROFS_SHORTLIVED_PAGE);
 			}
@@ -211,12 +215,17 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
 
 			DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
 							rq->in[j]));
-			tmppage = erofs_allocpage(pagepool,
-						  GFP_KERNEL | __GFP_NOFAIL);
+			tmppage = erofs_allocpage(pagepool, rq->gfp);
+			if (!tmppage) {
+				err = -ENOMEM;
+				break;
+			}
 			set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
 			copy_highpage(tmppage, rq->in[j]);
 			rq->in[j] = tmppage;
 		}
+		if (err)
+			break;
 
 		zerr = zlib_inflate(&strm->z, Z_SYNC_FLUSH);
 		if (zerr != Z_OK || !(outsz + strm->z.avail_out)) {
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 2dd14f99c1dc..a854f60033df 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -216,7 +216,11 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
 			outlen -= strm->buf.out_size;
 			if (!rq->out[no] && rq->fillgaps) {	/* deduped */
 				rq->out[no] = erofs_allocpage(pagepool,
-						GFP_KERNEL | __GFP_NOFAIL);
+						rq->gfp);
+				if (!rq->out[no]) {
+					err = -ENOMEM;
+					break;
+				}
 				set_page_private(rq->out[no],
 						 Z_EROFS_SHORTLIVED_PAGE);
 			}
@@ -258,12 +262,18 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
 
 			DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
 							rq->in[j]));
-			tmppage = erofs_allocpage(pagepool,
-						  GFP_KERNEL | __GFP_NOFAIL);
+			tmppage = erofs_allocpage(pagepool, rq->gfp);
+			if (!tmppage) {
+				err = -ENOMEM;
+				break;
+			}
 			set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
 			copy_highpage(tmppage, rq->in[j]);
 			rq->in[j] = tmppage;
 		}
+		if (err)
+			break;
+
 		xz_err = xz_dec_microlzma_run(strm->state, &strm->buf);
 		DBG_BUGON(strm->buf.out_pos > strm->buf.out_size);
 		DBG_BUGON(strm->buf.in_pos > strm->buf.in_size);
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 692c0c39be63..4ab4b16b435a 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -82,6 +82,9 @@ struct z_erofs_pcluster {
 	/* L: indicate several pageofs_outs or not */
 	bool multibases;
 
+	/* L: whether read with readahead flag or not  */
+	bool readahead;
+
 	/* A: compressed bvecs (can be cached or inplaced pages) */
 	struct z_erofs_bvec compressed_bvecs[];
 };
@@ -525,6 +528,8 @@ struct z_erofs_decompress_frontend {
 
 	/* a pointer used to pick up inplace I/O pages */
 	unsigned int icur;
+
+	bool readahead;
 };
 
 #define DECOMPRESS_FRONTEND_INIT(__i) { \
@@ -797,6 +802,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
 	pcl->algorithmformat = map->m_algorithmformat;
 	pcl->length = 0;
 	pcl->partial = true;
+	pcl->readahead = true;          /* readahead is true by default */
 
 	/* new pclusters should be claimed as type 1, primary and followed */
 	pcl->next = fe->owned_head;
@@ -872,6 +878,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
 		return ret;
 	}
 
+	if (!fe->readahead)
+		fe->pcl->readahead = false;
+
 	z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
 				Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
 	if (!z_erofs_is_inline_pcluster(fe->pcl)) {
@@ -1267,7 +1276,13 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
 	err2 = z_erofs_parse_in_bvecs(be, &overlapped);
 	if (err2)
 		err = err2;
-	if (!err)
+	if (!err) {
+		gfp_t gfp;
+
+		if (pcl->readahead)
+			gfp = GFP_NOWAIT | __GFP_NOWARN;
+		else
+			gfp = GFP_KERNEL | __GFP_NOFAIL;
 		err = decomp->decompress(&(struct z_erofs_decompress_req) {
 					.sb = be->sb,
 					.in = be->compressed_pages,
@@ -1280,7 +1295,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
 					.inplace_io = overlapped,
 					.partial_decoding = pcl->partial,
 					.fillgaps = pcl->multibases,
+					.gfp = gfp,
 				 }, be->pagepool);
+	}
 
 	/* must handle all compressed pages before actual file pages */
 	if (z_erofs_is_inline_pcluster(pcl)) {
@@ -1599,7 +1616,7 @@ static void z_erofs_submissionqueue_endio(struct bio *bio)
 
 static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
 				 struct z_erofs_decompressqueue *fgq,
-				 bool *force_fg, bool readahead)
+				 bool *force_fg)
 {
 	struct super_block *sb = f->inode->i_sb;
 	struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb));
@@ -1677,7 +1694,7 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
 				bio->bi_end_io = z_erofs_submissionqueue_endio;
 				bio->bi_iter.bi_sector = cur >> 9;
 				bio->bi_private = q[JQ_SUBMIT];
-				if (readahead)
+				if (f->readahead)
 					bio->bi_opf |= REQ_RAHEAD;
 				++nr_bios;
 				last_bdev = mdev.m_bdev;
@@ -1717,13 +1734,13 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
 }
 
 static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
-			     bool force_fg, bool ra)
+			     bool force_fg)
 {
 	struct z_erofs_decompressqueue io[NR_JOBQUEUES];
 
 	if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
 		return;
-	z_erofs_submit_queue(f, io, &force_fg, ra);
+	z_erofs_submit_queue(f, io, &force_fg);
 
 	/* handle bypass queue (no i/o pclusters) immediately */
 	z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool);
@@ -1811,7 +1828,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
 	z_erofs_pcluster_end(&f);
 
 	/* if some compressed cluster ready, need submit them anyway */
-	z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false);
+	z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0));
 
 	if (err && err != -EINTR)
 		erofs_err(inode->i_sb, "read error %d @ %lu of nid %llu",
@@ -1831,6 +1848,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
 	unsigned int nr_folios;
 	int err;
 
+	f.readahead = true;
 	f.headoffset = readahead_pos(rac);
 
 	z_erofs_pcluster_readmore(&f, rac, true);
@@ -1855,7 +1873,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
 	z_erofs_pcluster_readmore(&f, rac, false);
 	z_erofs_pcluster_end(&f);
 
-	z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_folios), true);
+	z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_folios));
 	erofs_put_metabuf(&f.map.buf);
 	erofs_release_pages(&f.pagepool);
 }
-- 
2.25.1