[PATCH 4/4] erofs: tidy up `struct z_erofs_bvec`
Gao Xiang
hsiangkao at linux.alibaba.com
Wed Jul 3 22:00:51 AEST 2024
After revisiting the design, I believe `struct z_erofs_bvec` should
be page-based instead of folio-based due to the reasons below:
- The minimized memory mapping block is a page;
- Under the certain circumstances, only temporary pages needs to be
used instead of folios since refcount, mapcount for such pages are
unnecessary;
- Decompressors handle all types of pages including temporary pages,
not only folios.
When handling `struct z_erofs_bvec`, all folio-related information
is now accessed using the page_folio() helper.
The final goal of this round adaptation is to eliminate direct
accesses to `struct page` in the EROFS codebase, except for some
exceptions like `z_erofs_is_shortlived_page()` and
`z_erofs_page_is_invalidated()`, which require a new helper to
determine the memdesc type of an arbitrary page.
Actually large folios of compressed files seems to work now, yet I tend
to conduct more tests before officially enabling this for all scenarios.
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
fs/erofs/zdata.c | 101 +++++++++++++++++++++++------------------------
1 file changed, 49 insertions(+), 52 deletions(-)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index cb017ca2c7e3..988f67cc0145 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -19,10 +19,7 @@
typedef void *z_erofs_next_pcluster_t;
struct z_erofs_bvec {
- union {
- struct page *page;
- struct folio *folio;
- };
+ struct page *page;
int offset;
unsigned int end;
};
@@ -617,32 +614,31 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
}
-/* called by erofs_shrinker to get rid of all cached compressed bvecs */
+/* (erofs_shrinker) disconnect cached encoded data with pclusters */
int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp)
{
struct z_erofs_pcluster *const pcl =
container_of(grp, struct z_erofs_pcluster, obj);
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
+ struct folio *folio;
int i;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
- /* There is no actice user since the pcluster is now freezed */
+ /* Each cached folio contains one page unless bs > ps is supported */
for (i = 0; i < pclusterpages; ++i) {
- struct folio *folio = pcl->compressed_bvecs[i].folio;
+ if (pcl->compressed_bvecs[i].page) {
+ folio = page_folio(pcl->compressed_bvecs[i].page);
+ /* Avoid reclaiming or migrating this folio */
+ if (!folio_trylock(folio))
+ return -EBUSY;
- if (!folio)
- continue;
-
- /* Avoid reclaiming or migrating this folio */
- if (!folio_trylock(folio))
- return -EBUSY;
-
- if (!erofs_folio_is_managed(sbi, folio))
- continue;
- pcl->compressed_bvecs[i].folio = NULL;
- folio_detach_private(folio);
- folio_unlock(folio);
+ if (!erofs_folio_is_managed(sbi, folio))
+ continue;
+ pcl->compressed_bvecs[i].page = NULL;
+ folio_detach_private(folio);
+ folio_unlock(folio);
+ }
}
return 0;
}
@@ -650,9 +646,9 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
{
struct z_erofs_pcluster *pcl = folio_get_private(folio);
- unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
+ struct z_erofs_bvec *bvec = pcl->compressed_bvecs;
+ struct z_erofs_bvec *end = bvec + z_erofs_pclusterpages(pcl);
bool ret;
- int i;
if (!folio_test_private(folio))
return true;
@@ -661,9 +657,9 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
spin_lock(&pcl->obj.lockref.lock);
if (pcl->obj.lockref.count <= 0) {
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
- for (i = 0; i < pclusterpages; ++i) {
- if (pcl->compressed_bvecs[i].folio == folio) {
- pcl->compressed_bvecs[i].folio = NULL;
+ for (; bvec < end; ++bvec) {
+ if (bvec->page && page_folio(bvec->page) == folio) {
+ bvec->page = NULL;
folio_detach_private(folio);
ret = true;
break;
@@ -1062,7 +1058,7 @@ static bool z_erofs_is_sync_decompress(struct erofs_sb_info *sbi,
static bool z_erofs_page_is_invalidated(struct page *page)
{
- return !page->mapping && !z_erofs_is_shortlived_page(page);
+ return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page);
}
struct z_erofs_decompress_backend {
@@ -1415,7 +1411,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
bool tocache = false;
struct z_erofs_bvec zbv;
struct address_space *mapping;
- struct page *page;
+ struct folio *folio;
int bs = i_blocksize(f->inode);
/* Except for inplace folios, the entire folio can be used for I/Os */
@@ -1425,23 +1421,25 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
spin_lock(&pcl->obj.lockref.lock);
zbv = pcl->compressed_bvecs[nr];
spin_unlock(&pcl->obj.lockref.lock);
- if (!zbv.folio)
+ if (!zbv.page)
goto out_allocfolio;
- bvec->bv_page = &zbv.folio->page;
+ bvec->bv_page = zbv.page;
DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page));
+
+ folio = page_folio(zbv.page);
/*
* Handle preallocated cached folios. We tried to allocate such folios
* without triggering direct reclaim. If allocation failed, inplace
* file-backed folios will be used instead.
*/
- if (zbv.folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
- zbv.folio->private = 0;
+ if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
+ folio->private = 0;
tocache = true;
goto out_tocache;
}
- mapping = READ_ONCE(zbv.folio->mapping);
+ mapping = READ_ONCE(folio->mapping);
/*
* File-backed folios for inplace I/Os are all locked steady,
* therefore it is impossible for `mapping` to be NULL.
@@ -1453,21 +1451,21 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
return;
}
- folio_lock(zbv.folio);
- if (zbv.folio->mapping == mc) {
+ folio_lock(folio);
+ if (folio->mapping == mc) {
/*
* The cached folio is still in managed cache but without
* a valid `->private` pcluster hint. Let's reconnect them.
*/
- if (!folio_test_private(zbv.folio)) {
- folio_attach_private(zbv.folio, pcl);
+ if (!folio_test_private(folio)) {
+ folio_attach_private(folio, pcl);
/* compressed_bvecs[] already takes a ref before */
- folio_put(zbv.folio);
+ folio_put(folio);
}
/* no need to submit if it is already up-to-date */
- if (folio_test_uptodate(zbv.folio)) {
- folio_unlock(zbv.folio);
+ if (folio_test_uptodate(folio)) {
+ folio_unlock(folio);
bvec->bv_page = NULL;
}
return;
@@ -1477,32 +1475,31 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
* It has been truncated, so it's unsafe to reuse this one. Let's
* allocate a new page for compressed data.
*/
- DBG_BUGON(zbv.folio->mapping);
+ DBG_BUGON(folio->mapping);
tocache = true;
- folio_unlock(zbv.folio);
- folio_put(zbv.folio);
+ folio_unlock(folio);
+ folio_put(folio);
out_allocfolio:
- page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
+ zbv.page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
spin_lock(&pcl->obj.lockref.lock);
- if (pcl->compressed_bvecs[nr].folio) {
- erofs_pagepool_add(&f->pagepool, page);
+ if (pcl->compressed_bvecs[nr].page) {
+ erofs_pagepool_add(&f->pagepool, zbv.page);
spin_unlock(&pcl->obj.lockref.lock);
cond_resched();
goto repeat;
}
- pcl->compressed_bvecs[nr].folio = zbv.folio = page_folio(page);
+ bvec->bv_page = pcl->compressed_bvecs[nr].page = zbv.page;
+ folio = page_folio(zbv.page);
+ /* first mark it as a temporary shortlived folio (now 1 ref) */
+ folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
spin_unlock(&pcl->obj.lockref.lock);
- bvec->bv_page = page;
out_tocache:
if (!tocache || bs != PAGE_SIZE ||
- filemap_add_folio(mc, zbv.folio, pcl->obj.index + nr, gfp)) {
- /* turn into a temporary shortlived folio (1 ref) */
- zbv.folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
+ filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp))
return;
- }
- folio_attach_private(zbv.folio, pcl);
+ folio_attach_private(folio, pcl);
/* drop a refcount added by allocpage (then 2 refs in total here) */
- folio_put(zbv.folio);
+ folio_put(folio);
}
static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,
--
2.43.5
More information about the Linux-erofs
mailing list