[RFC PATCH v2 chao/erofs-dev 3/5] staging: erofs: fix compressed pages submission flow
Gao Xiang
gaoxiang25 at huawei.com
Tue Nov 20 00:31:04 AEDT 2018
This patch fully closes race between page reclaiming and
compressed pages submitting, which could cause very low
probability of reference leak and double free.
Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
---
drivers/staging/erofs/unzip_vle.c | 344 +++++++++++++++++++++++++-------------
drivers/staging/erofs/unzip_vle.h | 15 ++
2 files changed, 247 insertions(+), 112 deletions(-)
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index 09a88fbba11c..a0f492b3e9e1 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -15,6 +15,15 @@
#include <trace/events/erofs.h>
+/* how to allocate cached pages for a workgroup */
+enum z_erofs_cache_alloctype {
+ DONTALLOC, /* don't allocate any cached pages */
+ TRYALLOC, /* minimal effort (w/o page reclaiming) */
+ DELAYEDALLOC, /* delayed allocation (at the time of submitting io) */
+};
+
+#define PAGE_UNALLOCATED ((void *)0x5F0EF00D)
+
static struct workqueue_struct *z_erofs_workqueue __read_mostly;
static struct kmem_cache *z_erofs_workgroup_cachep __read_mostly;
@@ -125,38 +134,68 @@ struct z_erofs_vle_work_builder {
{ .work = NULL, .role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED }
#ifdef EROFS_FS_HAS_MANAGED_CACHE
-
-static bool grab_managed_cache_pages(struct address_space *mapping,
- erofs_blk_t start,
- struct page **compressed_pages,
- int clusterblks,
- bool reserve_allocation)
+static void preload_compressed_pages(struct z_erofs_vle_work_builder *bl,
+ struct address_space *mc,
+ pgoff_t index,
+ unsigned int clusterpages,
+ enum z_erofs_cache_alloctype type,
+ struct list_head *pagepool,
+ gfp_t gfp)
{
- bool noio = true;
- unsigned int i;
+ struct page **const pages = bl->compressed_pages;
+ const unsigned int remaining = bl->compressed_deficit;
+ bool standalone = true;
+ unsigned int i, j = 0;
+
+ if (bl->role < Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
+ return;
+
+ gfp = mapping_gfp_constraint(mc, gfp) & ~__GFP_RECLAIM;
- /* TODO: optimize by introducing find_get_pages_range */
- for (i = 0; i < clusterblks; ++i) {
- struct page *page, *found;
+ index += clusterpages - remaining;
- if (READ_ONCE(compressed_pages[i]))
+ for (i = 0; i < remaining; ++i) {
+ struct page *page, *newpage = NULL;
+ z_erofs_ctptr_t t;
+
+ /* the compressed page was loaded before */
+ if (READ_ONCE(pages[i]))
continue;
- page = found = find_get_page(mapping, start + i);
- if (!found) {
- noio = false;
- if (!reserve_allocation)
+ page = find_get_page(mc, index + i);
+
+ if (page) {
+ t = z_erofs_ctptr_tag_justfound(page);
+ } else if (type == DELAYEDALLOC) {
+ t = tagptr_init(z_erofs_ctptr_t, PAGE_UNALLOCATED);
+ } else if (type == TRYALLOC) {
+ newpage = erofs_allocpage(pagepool, gfp);
+
+ if (!newpage)
continue;
- page = EROFS_UNALLOCATED_CACHED_PAGE;
+ newpage->mapping = Z_EROFS_MAPPING_PREALLOCATED;
+ t = z_erofs_ctptr_tag_justfound(newpage);
+ } else { /* DONTALLOC */
+ if (standalone)
+ j = i;
+ standalone = false;
+ continue;
}
- if (!cmpxchg(compressed_pages + i, NULL, page))
+ if (!cmpxchg(&pages[i], NULL, tagptr_cast_ptr(t)))
continue;
- if (found)
- put_page(found);
+ if (page)
+ put_page(page);
+ else if (newpage)
+ /* someone just allocated this page, drop our attempt */
+ list_add(&page->lru, pagepool);
}
- return noio;
+ bl->compressed_pages += j;
+ bl->compressed_deficit = remaining - j;
+
+ if (standalone)
+ bl->role = Z_EROFS_VLE_WORK_PRIMARY;
}
/* called by erofs_shrinker to get rid of all compressed_pages */
@@ -228,6 +267,17 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
}
return ret;
}
+#else
+static void preload_compressed_pages(struct z_erofs_vle_work_builder *bl,
+ struct address_space *mc,
+ pgoff_t index,
+ unsigned int clusterpages,
+ enum z_erofs_cache_alloctype type,
+ struct list_head *pagepool,
+ gfp_t gfp)
+{
+ /* nowhere to load compressed pages from */
+}
#endif
/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
@@ -600,6 +650,26 @@ struct z_erofs_vle_frontend {
.owned_head = Z_EROFS_VLE_WORKGRP_TAIL, \
.backmost = true, }
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+static inline bool
+should_alloc_managed_pages(struct z_erofs_vle_frontend *fe, erofs_off_t la)
+{
+ if (fe->backmost)
+ return true;
+
+ if (EROFS_FS_ZIP_CACHE_LVL >= 2)
+ return la < fe->headoffset;
+
+ return false;
+}
+#else
+static inline bool
+should_alloc_managed_pages(struct z_erofs_vle_frontend *fe, erofs_off_t la)
+{
+ return false;
+}
+#endif
+
static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
struct page *page,
struct list_head *page_pool)
@@ -614,12 +684,7 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
bool tight = builder_is_followed(builder);
struct z_erofs_vle_work *work = builder->work;
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
- struct address_space *const mc = MNGD_MAPPING(sbi);
- struct z_erofs_vle_workgroup *grp;
- bool noio_outoforder;
-#endif
-
+ enum z_erofs_cache_alloctype cache_strategy;
enum z_erofs_page_type page_type;
unsigned int cur, end, spiltted, index;
int err = 0;
@@ -659,20 +724,16 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
if (unlikely(err))
goto err_out;
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
- grp = fe->builder.grp;
-
- /* let's do out-of-order decompression for noio */
- noio_outoforder = grab_managed_cache_pages(mc,
- erofs_blknr(map->m_pa),
- grp->compressed_pages, erofs_blknr(map->m_plen),
- /* compressed page caching selection strategy */
- fe->backmost | (EROFS_FS_ZIP_CACHE_LVL >= 2 ?
- map->m_la < fe->headoffset : 0));
-
- if (noio_outoforder && builder_is_followed(builder))
- builder->role = Z_EROFS_VLE_WORK_PRIMARY;
-#endif
+ /* preload all compressed pages and downgrade role if necessary */
+ if (should_alloc_managed_pages(fe, map->m_la))
+ cache_strategy = DELAYEDALLOC;
+ else
+ cache_strategy = DONTALLOC;
+
+ preload_compressed_pages(builder, MNGD_MAPPING(sbi),
+ map->m_pa / PAGE_SIZE,
+ map->m_plen / PAGE_SIZE,
+ cache_strategy, page_pool, GFP_KERNEL);
tight &= builder_is_followed(builder);
work = builder->work;
@@ -1034,6 +1095,124 @@ static void z_erofs_vle_unzip_wq(struct work_struct *work)
kvfree(iosb);
}
+static struct page *
+pickup_page_for_submission(struct z_erofs_vle_workgroup *grp,
+ unsigned int nr,
+ struct list_head *pagepool,
+ struct address_space *mc,
+ gfp_t gfp)
+{
+ /* determined at compile time to avoid using macros. */
+ const bool nocache = __builtin_constant_p(mc) ? !mc : false;
+ const pgoff_t index = grp->obj.index;
+ bool tocache = false;
+
+ struct address_space *mapping;
+ struct page *oldpage, *page;
+
+ z_erofs_ctptr_t t;
+ int justfound;
+
+repeat:
+ page = READ_ONCE(grp->compressed_pages[nr]);
+ oldpage = page;
+
+ if (!page)
+ goto out_allocpage;
+
+ if (!nocache) {
+ if (page == PAGE_UNALLOCATED) {
+ tocache = true;
+ goto out_allocpage;
+ }
+
+ if (z_erofs_is_preallocatedpage(page))
+ goto out_add_to_managed_cache;
+ }
+
+ /* process the target tagged pointer */
+ t = tagptr_init(z_erofs_ctptr_t, page);
+ justfound = tagptr_unfold_tags(t);
+ page = tagptr_unfold_ptr(t);
+
+ mapping = READ_ONCE(page->mapping);
+
+ if (nocache) {
+ /* if managed cache is disabled, it is impossible `justfound' */
+ DBG_BUGON(justfound);
+
+ /* and it should be locked, not uptodate, and not truncated */
+ DBG_BUGON(!PageLocked(page));
+ DBG_BUGON(PageUptodate(page));
+ DBG_BUGON(!mapping);
+ goto out;
+ }
+
+ /*
+ * unmanaged pages are all locked,
+ * therefore it is impossible for `mapping' to be NULL.
+ */
+ if (mapping && mapping != mc)
+ /* ought to be unmanaged pages */
+ goto out;
+
+ lock_page(page);
+ /* only true if page reclaim goes wrong, should never happen */
+ DBG_BUGON(justfound && PagePrivate(page));
+
+ if (page->mapping == mc) {
+ WRITE_ONCE(grp->compressed_pages[nr], page);
+
+ if (!PagePrivate(page)) {
+ /*
+ * impossible to be !PagePrivate(page) for the current
+ * implementation as well if the page is already in
+ * compressed_pages[].
+ */
+ DBG_BUGON(!justfound);
+
+ justfound = 0;
+ set_page_private(page, (unsigned long)grp);
+ SetPagePrivate(page);
+ }
+
+ /* no need to submit bio if the page is already up-to-date */
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ page = NULL;
+ }
+ goto out;
+ }
+
+ /* and for the truncation case (page is still locked) */
+ DBG_BUGON(page->mapping);
+ /* truncation is only after disconnected currently */
+ DBG_BUGON(!justfound);
+
+ tocache = true;
+ unlock_page(page);
+ put_page(page);
+out_allocpage:
+ page = __stagingpage_alloc(pagepool, gfp);
+ if (oldpage != cmpxchg(&grp->compressed_pages[nr], oldpage, page)) {
+ list_add(&page->lru, pagepool);
+ cpu_relax();
+ goto repeat;
+ }
+ if (nocache || !tocache)
+ goto out;
+out_add_to_managed_cache:
+ if (add_to_page_cache_lru(page, mc, index + nr, gfp)) {
+ page->mapping = Z_EROFS_MAPPING_STAGING;
+ goto out;
+ }
+
+ set_page_private(page, (unsigned long)grp);
+ SetPagePrivate(page);
+out: /* the only exit (for tracing and debugging) */
+ return page;
+}
+
static inline struct z_erofs_vle_unzip_io *
prepare_io_handler(struct super_block *sb,
struct z_erofs_vle_unzip_io *io,
@@ -1069,26 +1248,6 @@ prepare_io_handler(struct super_block *sb,
}
#ifdef EROFS_FS_HAS_MANAGED_CACHE
-/* true - unlocked (noio), false - locked (need submit io) */
-static inline bool recover_managed_page(struct z_erofs_vle_workgroup *grp,
- struct page *page)
-{
- wait_on_page_locked(page);
- if (PagePrivate(page) && PageUptodate(page))
- return true;
-
- lock_page(page);
- if (unlikely(!PagePrivate(page))) {
- set_page_private(page, (unsigned long)grp);
- SetPagePrivate(page);
- }
- if (unlikely(PageUptodate(page))) {
- unlock_page(page);
- return true;
- }
- return false;
-}
-
#define __FSIO_1 1
#else
#define __FSIO_1 0
@@ -1104,7 +1263,6 @@ static bool z_erofs_vle_submit_all(struct super_block *sb,
const unsigned int clusterpages = erofs_clusterpages(sbi);
const gfp_t gfp = GFP_NOFS;
#ifdef EROFS_FS_HAS_MANAGED_CACHE
- struct address_space *const mc = MNGD_MAPPING(sbi);
struct z_erofs_vle_workgroup *lstgrp_noio = NULL, *lstgrp_io = NULL;
#endif
struct z_erofs_vle_unzip_io *ios[1 + __FSIO_1];
@@ -1143,13 +1301,9 @@ static bool z_erofs_vle_submit_all(struct super_block *sb,
do {
struct z_erofs_vle_workgroup *grp;
- struct page **compressed_pages, *oldpage, *page;
pgoff_t first_index;
- unsigned int i = 0;
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
- unsigned int noio = 0;
- bool cachemngd;
-#endif
+ struct page *page;
+ unsigned int i = 0, nr_uptodate = 0;
int err;
/* no possible 'owned_head' equals the following */
@@ -1160,51 +1314,19 @@ static bool z_erofs_vle_submit_all(struct super_block *sb,
/* close the main owned chain at first */
owned_head = cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
- Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+ Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
first_index = grp->obj.index;
- compressed_pages = grp->compressed_pages;
-
force_submit |= (first_index != last_index + 1);
-repeat:
- /* fulfill all compressed pages */
- oldpage = page = READ_ONCE(compressed_pages[i]);
-
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
- cachemngd = false;
-
- if (page == EROFS_UNALLOCATED_CACHED_PAGE) {
- cachemngd = true;
- goto do_allocpage;
- } else if (page) {
- if (page->mapping != mc)
- BUG_ON(PageUptodate(page));
- else if (recover_managed_page(grp, page)) {
- /* page is uptodate, skip io submission */
- force_submit = true;
- ++noio;
- goto skippage;
- }
- } else {
-do_allocpage:
-#else
- if (page)
- BUG_ON(PageUptodate(page));
- else {
-#endif
- page = __stagingpage_alloc(pagepool, gfp);
- if (oldpage != cmpxchg(compressed_pages + i,
- oldpage, page)) {
- list_add(&page->lru, pagepool);
- goto repeat;
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
- } else if (cachemngd && !add_to_page_cache_lru(page,
- mc, first_index + i, gfp)) {
- set_page_private(page, (unsigned long)grp);
- SetPagePrivate(page);
-#endif
- }
+ /* fulfill all compressed pages */
+repeat:
+ page = pickup_page_for_submission(grp, i, pagepool,
+ MNGD_MAPPING(sbi), gfp);
+ if (!page) {
+ force_submit = true;
+ ++nr_uptodate;
+ goto skippage;
}
if (bio && force_submit) {
@@ -1227,14 +1349,12 @@ static bool z_erofs_vle_submit_all(struct super_block *sb,
force_submit = false;
last_index = first_index + i;
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
skippage:
-#endif
if (++i < clusterpages)
goto repeat;
#ifdef EROFS_FS_HAS_MANAGED_CACHE
- if (noio < clusterpages) {
+ if (nr_uptodate < clusterpages) {
lstgrp_io = grp;
} else {
z_erofs_vle_owned_workgrp_t iogrp_next =
diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h
index 3316bc36965d..6f4c7440aeb1 100644
--- a/drivers/staging/erofs/unzip_vle.h
+++ b/drivers/staging/erofs/unzip_vle.h
@@ -36,6 +36,15 @@ static inline bool z_erofs_gather_if_stagingpage(struct list_head *page_pool,
return false;
}
+/*
+ * - 0x6A110C8D ('pallocated', Z_EROFS_MAPPING_PREALLOCATED) -
+ * preallocated cached pages, will be added into managed cache later
+ */
+#define Z_EROFS_MAPPING_PREALLOCATED ((void *)0x6A110C8D)
+
+#define z_erofs_is_preallocatedpage(page) \
+ ((page)->mapping == Z_EROFS_MAPPING_PREALLOCATED)
+
/*
* Structure fields follow one of the following exclusion rules.
*
@@ -69,6 +78,12 @@ struct z_erofs_vle_work {
typedef struct z_erofs_vle_workgroup *z_erofs_vle_owned_workgrp_t;
+/* compressed page tagptr (bit 0 - justfound, with an extra reference) */
+typedef tagptr1_t z_erofs_ctptr_t;
+
+#define z_erofs_ctptr_tag_justfound(page) \
+ tagptr_fold(z_erofs_ctptr_t, page, 1)
+
struct z_erofs_vle_workgroup {
struct erofs_workgroup obj;
struct z_erofs_vle_work work;
--
2.14.4
More information about the Linux-erofs
mailing list