[PATCH] erofs: support direct IO for ondemand mode
Gao Xiang
hsiangkao at linux.alibaba.com
Thu Jul 18 12:40:28 AEST 2024
Hi Hongbo,
I'd like to request Jingbo's review too.
On 2024/7/18 09:05, Hongbo Li wrote:
> erofs over fscache cannot handle the direct read io. When the file
> is opened with O_DIRECT flag, -EINVAL will reback. We support the
> DIO in erofs over fscache by bypassing the erofs page cache and
> reading target data into ubuf from fscache's file directly.
Could you give more hints in the commit message on the target user
of fscache DIO?
For Android use cases, direct I/O support is mainly used for loop
device direct mode.
>
> The alignment for buffer memory, offset and size now is restricted
> by erofs, since `i_blocksize` is enough for the under filesystems.
>
> Signed-off-by: Hongbo Li <lihongbo22 at huawei.com>
> ---
> fs/erofs/data.c | 3 ++
> fs/erofs/fscache.c | 95 +++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 93 insertions(+), 5 deletions(-)
>
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index 8be60797ea2f..dbfafe358de4 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -391,6 +391,9 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
> iov_iter_alignment(to)) & blksize_mask)
> return -EINVAL;
>
> + if (erofs_is_fscache_mode(inode->i_sb))
> + return generic_file_read_iter(iocb, to);
> +
> return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
> NULL, 0, NULL, 0);
> }
> diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
> index fda16eedafb5..f5a09b168539 100644
> --- a/fs/erofs/fscache.c
> +++ b/fs/erofs/fscache.c
> @@ -35,6 +35,8 @@ struct erofs_fscache_io {
>
> struct erofs_fscache_rq {
> struct address_space *mapping; /* The mapping being accessed */
> + struct iov_iter *iter; /* dst buf for direct io */
> + struct completion done; /* for synced direct io */
> loff_t start; /* Start position */
> size_t len; /* Length of the request */
> size_t submitted; /* Length of submitted */
> @@ -76,7 +78,11 @@ static void erofs_fscache_req_put(struct erofs_fscache_rq *req)
> {
> if (!refcount_dec_and_test(&req->ref))
> return;
> - erofs_fscache_req_complete(req);
> +
> + if (req->iter)
> + complete(&req->done);
> + else
> + erofs_fscache_req_complete(req);
> kfree(req);
> }
>
> @@ -88,6 +94,7 @@ static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *ma
> if (!req)
> return NULL;
> req->mapping = mapping;
> + req->iter = NULL;
> req->start = start;
> req->len = len;
> refcount_set(&req->ref, 1);
> @@ -253,6 +260,55 @@ static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
> return ret;
> }
>
> +static int erofs_fscache_data_dio_read(struct erofs_fscache_rq *req)
> +{
> + struct address_space *mapping = req->mapping;
> + struct inode *inode = mapping->host;
> + struct super_block *sb = inode->i_sb;
> + struct iov_iter *iter = req->iter;
> + struct erofs_fscache_io *io;
> + struct erofs_map_blocks map;
> + struct erofs_map_dev mdev;
> + loff_t pos = req->start + req->submitted;
> + size_t count;
> + int ret;
> +
> + map.m_la = pos;
> + ret = erofs_map_blocks(inode, &map);
> + if (ret)
> + return ret;
> +
> + count = req->len - req->submitted;
> + if (!(map.m_flags & EROFS_MAP_MAPPED)) {
> + iov_iter_zero(count, iter);
> + req->submitted += count;
> + return 0;
> + }
> +
> + count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
> + DBG_BUGON(!count || count % PAGE_SIZE);
> +
> + mdev = (struct erofs_map_dev) {
> + .m_deviceid = map.m_deviceid,
> + .m_pa = map.m_pa,
> + };
> + ret = erofs_map_dev(sb, &mdev);
> + if (ret)
> + return ret;
> +
> + io = erofs_fscache_req_io_alloc(req);
> + if (!io)
> + return -ENOMEM;
> +
> + iov_iter_ubuf(&io->iter, ITER_DEST, iter->ubuf + iter->iov_offset, count);
> + ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie, mdev.m_pa + (pos - map.m_la), io);
> + iov_iter_advance(iter, io->iter.iov_offset);
> + erofs_fscache_req_io_put(io);
> +
> + req->submitted += count;
> + return ret;
> +}
> +
> static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
> {
> struct address_space *mapping = req->mapping;
> @@ -324,12 +380,13 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
> return ret;
> }
>
> -static int erofs_fscache_data_read(struct erofs_fscache_rq *req)
> +static int erofs_fscache_data_read(struct erofs_fscache_rq *req, bool direct)
> {
> int ret;
>
> do {
> - ret = erofs_fscache_data_read_slice(req);
> + ret = (direct) ? erofs_fscache_data_dio_read(req)
> + : erofs_fscache_data_read_slice(req);
> if (ret)
> req->error = ret;
> } while (!ret && req->submitted < req->len);
> @@ -348,7 +405,7 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
> return -ENOMEM;
> }
>
> - ret = erofs_fscache_data_read(req);
> + ret = erofs_fscache_data_read(req, false);
> erofs_fscache_req_put(req);
> return ret;
> }
> @@ -369,8 +426,35 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
> while (readahead_folio(rac))
> ;
>
> - erofs_fscache_data_read(req);
> + erofs_fscache_data_read(req, false);
> + erofs_fscache_req_put(req);
> +}
> +
> +static ssize_t erofs_fscache_directIO(struct kiocb *iocb, struct iov_iter *iter)
How about get rid of upper case names, e.g.
using erofs_fscache_direct_io instead?
Thanks,
Gao Xiang
> +{
> + struct file *file = iocb->ki_filp;
> + size_t count = iov_iter_count(iter);
> + struct erofs_fscache_rq *req;
> + struct completion *ctr;
> + ssize_t rsize;
> + int ret;
> +
> + if (unlikely(iov_iter_rw(iter) == WRITE))
> + return -EROFS;
> +
> + req = erofs_fscache_req_alloc(file->f_mapping, iocb->ki_pos, count);
> + if (!req)
> + return -ENOMEM;
> +
> + req->iter = iter;
> + init_completion(&req->done);
> + ctr = &req->done;
> + ret = erofs_fscache_data_read(req, true);
> + rsize = (ret == 0) ? (ssize_t)req->submitted : ret;
> erofs_fscache_req_put(req);
> + wait_for_completion(ctr);
> +
> + return rsize;
> }
>
> static const struct address_space_operations erofs_fscache_meta_aops = {
> @@ -380,6 +464,7 @@ static const struct address_space_operations erofs_fscache_meta_aops = {
> const struct address_space_operations erofs_fscache_access_aops = {
> .read_folio = erofs_fscache_read_folio,
> .readahead = erofs_fscache_readahead,
> + .direct_IO = erofs_fscache_directIO,
> };
>
> static void erofs_fscache_domain_put(struct erofs_domain *domain)
More information about the Linux-erofs
mailing list