[PATCH v2] erofs: change to use asyncronous io for fscache readpage/readahead
Xin Yin
yinxin.x at bytedance.com
Sat May 7 18:31:54 AEST 2022
Use asyncronous io to read data from fscache may greatly improve IO
bandwidth for sequential buffer read scenario.
Change erofs_fscache_read_folios to erofs_fscache_read_folios_async,
and read data from fscache asyncronously. Make .readpage()/.readahead()
to use this new helper.
Signed-off-by: Xin Yin <yinxin.x at bytedance.com>
---
changes from RFC:
1.rebase to fscache,erofs: fscache-based on-demand read semantics v10.
2.fix issues pointed out by Jeffle.
3.simplify parameters, add debug messages for erofs_fscache_read_folios_async.
4.also change .readpage() to use new helper to avoid code duplication.
changes from v1:
fix styling issues.
---
fs/erofs/fscache.c | 240 ++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 200 insertions(+), 40 deletions(-)
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index a402d8f0a063..1008f47d7252 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -5,57 +5,204 @@
#include <linux/fscache.h>
#include "internal.h"
+static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping,
+ loff_t start, size_t len)
+{
+ struct netfs_io_request *rreq;
+
+ rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL);
+ if (!rreq)
+ return ERR_PTR(-ENOMEM);
+
+ rreq->start = start;
+ rreq->len = len;
+ rreq->mapping = mapping;
+ INIT_LIST_HEAD(&rreq->subrequests);
+ refcount_set(&rreq->ref, 1);
+
+ return rreq;
+}
+
+static void erofs_fscache_put_request(struct netfs_io_request *rreq)
+{
+ if (refcount_dec_and_test(&rreq->ref)) {
+ if (rreq->cache_resources.ops)
+ rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
+ kfree(rreq);
+ }
+}
+
+static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq)
+{
+ if (refcount_dec_and_test(&subreq->ref)) {
+ erofs_fscache_put_request(subreq->rreq);
+ kfree(subreq);
+ }
+}
+
+static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq)
+{
+ struct netfs_io_subrequest *subreq;
+
+ while (!list_empty(&rreq->subrequests)) {
+ subreq = list_first_entry(&rreq->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ list_del(&subreq->rreq_link);
+ erofs_fscache_put_subrequest(subreq);
+ }
+}
+
+static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq)
+{
+ struct netfs_io_subrequest *subreq;
+ struct folio *folio;
+ unsigned int iopos;
+ pgoff_t start_page = rreq->start / PAGE_SIZE;
+ pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
+ bool subreq_failed = false;
+
+ XA_STATE(xas, &rreq->mapping->i_pages, start_page);
+
+ subreq = list_first_entry(&rreq->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ iopos = 0;
+ subreq_failed = (subreq->error < 0);
+
+ rcu_read_lock();
+ xas_for_each(&xas, folio, last_page) {
+ unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
+ unsigned int pgend = pgpos + folio_size(folio);
+ bool pg_failed = false;
+
+ for (;;) {
+ if (!subreq) {
+ pg_failed = true;
+ break;
+ }
+
+ pg_failed |= subreq_failed;
+ if (pgend < iopos + subreq->len)
+ break;
+
+ iopos += subreq->len;
+ if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
+ subreq = list_next_entry(subreq, rreq_link);
+ subreq_failed = (subreq->error < 0);
+ } else {
+ subreq = NULL;
+ subreq_failed = false;
+ }
+ if (pgend == iopos)
+ break;
+ }
+
+ if (!pg_failed)
+ folio_mark_uptodate(folio);
+
+ folio_unlock(folio);
+ }
+ rcu_read_unlock();
+}
+
+static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq)
+{
+ erofs_fscache_rreq_unlock_folios(rreq);
+ erofs_fscache_clear_subrequests(rreq);
+ erofs_fscache_put_request(rreq);
+}
+
+static void erofc_fscache_subreq_complete(void *priv, ssize_t transferred_or_error,
+ bool was_async)
+{
+ struct netfs_io_subrequest *subreq = priv;
+ struct netfs_io_request *rreq = subreq->rreq;
+
+ if (IS_ERR_VALUE(transferred_or_error))
+ subreq->error = transferred_or_error;
+
+ if (atomic_dec_and_test(&rreq->nr_outstanding))
+ erofs_fscache_rreq_complete(rreq);
+
+ erofs_fscache_put_subrequest(subreq);
+}
+
/*
* Read data from fscache and fill the read data into page cache described by
- * @start/len, which shall be both aligned with PAGE_SIZE. @pstart describes
+ * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes
* the start physical address in the cache file.
*/
-static int erofs_fscache_read_folios(struct fscache_cookie *cookie,
- struct address_space *mapping,
- loff_t start, size_t len,
+static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
+ struct netfs_io_request *rreq,
loff_t pstart)
{
enum netfs_io_source source;
- struct netfs_io_request rreq = {};
- struct netfs_io_subrequest subreq = { .rreq = &rreq, };
- struct netfs_cache_resources *cres = &rreq.cache_resources;
- struct super_block *sb = mapping->host->i_sb;
+ struct super_block *sb = rreq->mapping->host->i_sb;
+ struct netfs_io_subrequest *subreq;
+ struct netfs_cache_resources *cres = &rreq->cache_resources;
struct iov_iter iter;
+ loff_t start = rreq->start;
+ size_t len = rreq->len;
size_t done = 0;
int ret;
+ atomic_set(&rreq->nr_outstanding, 1);
+
ret = fscache_begin_read_operation(cres, cookie);
if (ret)
- return ret;
+ goto out;
while (done < len) {
- subreq.start = pstart + done;
- subreq.len = len - done;
- subreq.flags = 1 << NETFS_SREQ_ONDEMAND;
+ subreq = kzalloc(sizeof(struct netfs_io_subrequest), GFP_KERNEL);
+ if (subreq) {
+ INIT_LIST_HEAD(&subreq->rreq_link);
+ refcount_set(&subreq->ref, 2);
+ subreq->rreq = rreq;
+ refcount_inc(&rreq->ref);
+ } else {
+ ret = -ENOMEM;
+ goto out;
+ }
- source = cres->ops->prepare_read(&subreq, LLONG_MAX);
- if (WARN_ON(subreq.len == 0))
+ subreq->start = pstart + done;
+ subreq->len = len - done;
+ subreq->flags = 1 << NETFS_SREQ_ONDEMAND;
+
+ list_add_tail(&subreq->rreq_link, &rreq->subrequests);
+
+ source = cres->ops->prepare_read(subreq, LLONG_MAX);
+ if (WARN_ON(subreq->len == 0))
source = NETFS_INVALID_READ;
if (source != NETFS_READ_FROM_CACHE) {
erofs_err(sb, "failed to fscache prepare_read (source %d)",
source);
ret = -EIO;
+ subreq->error = ret;
+ erofs_fscache_put_subrequest(subreq);
goto out;
}
- iov_iter_xarray(&iter, READ, &mapping->i_pages,
- start + done, subreq.len);
- ret = fscache_read(cres, subreq.start, &iter,
- NETFS_READ_HOLE_FAIL, NULL, NULL);
+ atomic_inc(&rreq->nr_outstanding);
+
+ iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
+ start + done, subreq->len);
+
+ ret = fscache_read(cres, subreq->start, &iter,
+ NETFS_READ_HOLE_FAIL, erofc_fscache_subreq_complete, subreq);
+
+ if (ret == -EIOCBQUEUED)
+ ret = 0;
+
if (ret) {
erofs_err(sb, "failed to fscache_read (ret %d)", ret);
goto out;
}
- done += subreq.len;
+ done += subreq->len;
}
out:
- fscache_end_operation(cres);
+ if (atomic_dec_and_test(&rreq->nr_outstanding))
+ erofs_fscache_rreq_complete(rreq);
+
return ret;
}
@@ -64,6 +211,7 @@ static int erofs_fscache_meta_readpage(struct file *data, struct page *page)
int ret;
struct folio *folio = page_folio(page);
struct super_block *sb = folio_mapping(folio)->host->i_sb;
+ struct netfs_io_request *rreq;
struct erofs_map_dev mdev = {
.m_deviceid = 0,
.m_pa = folio_pos(folio),
@@ -73,11 +221,13 @@ static int erofs_fscache_meta_readpage(struct file *data, struct page *page)
if (ret)
goto out;
- ret = erofs_fscache_read_folios(mdev.m_fscache->cookie,
- folio_mapping(folio), folio_pos(folio),
- folio_size(folio), mdev.m_pa);
- if (!ret)
- folio_mark_uptodate(folio);
+ rreq = erofs_fscache_alloc_request(folio_mapping(folio),
+ folio_pos(folio), folio_size(folio));
+ if (IS_ERR(rreq))
+ goto out;
+
+ return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
+ rreq, mdev.m_pa);
out:
folio_unlock(folio);
return ret;
@@ -117,6 +267,7 @@ static int erofs_fscache_readpage(struct file *file, struct page *page)
struct super_block *sb = inode->i_sb;
struct erofs_map_blocks map;
struct erofs_map_dev mdev;
+ struct netfs_io_request *rreq;
erofs_off_t pos;
loff_t pstart;
int ret;
@@ -149,10 +300,15 @@ static int erofs_fscache_readpage(struct file *file, struct page *page)
if (ret)
goto out_unlock;
+
+ rreq = erofs_fscache_alloc_request(folio_mapping(folio),
+ folio_pos(folio), folio_size(folio));
+ if (IS_ERR(rreq))
+ goto out_unlock;
+
pstart = mdev.m_pa + (pos - map.m_la);
- ret = erofs_fscache_read_folios(mdev.m_fscache->cookie,
- folio_mapping(folio), folio_pos(folio),
- folio_size(folio), pstart);
+ return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
+ rreq, pstart);
out_uptodate:
if (!ret)
@@ -162,15 +318,16 @@ static int erofs_fscache_readpage(struct file *file, struct page *page)
return ret;
}
-static void erofs_fscache_unlock_folios(struct readahead_control *rac,
- size_t len)
+static void erofs_fscache_advance_folios(struct readahead_control *rac,
+ size_t len, bool unlock)
{
while (len) {
struct folio *folio = readahead_folio(rac);
-
len -= folio_size(folio);
- folio_mark_uptodate(folio);
- folio_unlock(folio);
+ if (unlock) {
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
+ }
}
}
@@ -192,6 +349,7 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
do {
struct erofs_map_blocks map;
struct erofs_map_dev mdev;
+ struct netfs_io_request *rreq;
pos = start + done;
map.m_la = pos;
@@ -211,7 +369,7 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
offset, count);
iov_iter_zero(count, &iter);
- erofs_fscache_unlock_folios(rac, count);
+ erofs_fscache_advance_folios(rac, count, true);
ret = count;
continue;
}
@@ -237,15 +395,17 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
if (ret)
return;
- ret = erofs_fscache_read_folios(mdev.m_fscache->cookie,
- rac->mapping, offset, count,
- mdev.m_pa + (pos - map.m_la));
+ rreq = erofs_fscache_alloc_request(rac->mapping, offset, count);
+ if (IS_ERR(rreq))
+ return;
/*
- * For the error cases, the folios will be unlocked when
- * .readahead() returns.
+ * Drop the ref of folios here. Unlock them in
+ * rreq_unlock_folios() when rreq complete.
*/
+ erofs_fscache_advance_folios(rac, count, false);
+ ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
+ rreq, mdev.m_pa + (pos - map.m_la));
if (!ret) {
- erofs_fscache_unlock_folios(rac, count);
ret = count;
}
} while (ret > 0 && ((done += ret) < len));
--
2.11.0
More information about the Linux-erofs
mailing list