[PATCH v2] erofs: support direct IO for ondemand mode

Hongbo Li lihongbo22 at huawei.com
Fri Jul 26 12:06:27 AEST 2024


erofs over fscache cannot handle the direct read io. When the file
is opened with O_DIRECT flag, -EINVAL will reback. We support the
DIO in erofs over fscache by bypassing the erofs page cache and
reading target data into ubuf from fscache's file directly.

The alignment for buffer memory, offset and size now is restricted
by erofs, since `i_blocksize` is enough for the under filesystems.

Signed-off-by: Hongbo Li <lihongbo22 at huawei.com>

---
v2:
  - Change the directIO helper name to erofs_fscache_direct_io
  - Add some io interception when begin direct io
  - Optimize the direct io logical in erofs_fscache_data_read_slice

v1:
  - https://lore.kernel.org/linux-erofs/6f1a5c1c-d44d-4561-9377-b935ff4531f2@huawei.com/T/#t
---
 fs/erofs/data.c    |  3 ++
 fs/erofs/fscache.c | 84 +++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 8be60797ea2f..dbfafe358de4 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -391,6 +391,9 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		     iov_iter_alignment(to)) & blksize_mask)
 			return -EINVAL;
 
+		if (erofs_is_fscache_mode(inode->i_sb))
+			return generic_file_read_iter(iocb, to);
+
 		return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
 				    NULL, 0, NULL, 0);
 	}
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index fda16eedafb5..e3e6c579eb81 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -35,6 +35,8 @@ struct erofs_fscache_io {
 
 struct erofs_fscache_rq {
 	struct address_space	*mapping;	/* The mapping being accessed */
+	struct iov_iter		*iter;		/* Destination for direct io */
+	struct completion	done;		/* Sync for direct io */
 	loff_t			start;		/* Start position */
 	size_t			len;		/* Length of the request */
 	size_t			submitted;	/* Length of submitted */
@@ -76,7 +78,11 @@ static void erofs_fscache_req_put(struct erofs_fscache_rq *req)
 {
 	if (!refcount_dec_and_test(&req->ref))
 		return;
-	erofs_fscache_req_complete(req);
+
+	if (req->iter)
+		complete(&req->done);
+	else
+		erofs_fscache_req_complete(req);
 	kfree(req);
 }
 
@@ -88,6 +94,7 @@ static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *ma
 	if (!req)
 		return NULL;
 	req->mapping = mapping;
+	req->iter = NULL;
 	req->start = start;
 	req->len = len;
 	refcount_set(&req->ref, 1);
@@ -258,6 +265,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
 	struct address_space *mapping = req->mapping;
 	struct inode *inode = mapping->host;
 	struct super_block *sb = inode->i_sb;
+	struct iov_iter *dest_iter = req->iter;
 	struct erofs_fscache_io *io;
 	struct erofs_map_blocks map;
 	struct erofs_map_dev mdev;
@@ -270,33 +278,45 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
 	if (ret)
 		return ret;
 
+	count = req->len - req->submitted;
 	if (map.m_flags & EROFS_MAP_META) {
 		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
 		struct iov_iter iter;
-		size_t size = map.m_llen;
+		struct iov_iter *iterp;
+		size_t size = min_t(size_t, map.m_llen, count);
 		void *src;
+		ssize_t filled = 0;
 
 		src = erofs_read_metabuf(&buf, sb, map.m_pa, EROFS_KMAP);
 		if (IS_ERR(src))
 			return PTR_ERR(src);
 
 		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
-		if (copy_to_iter(src, size, &iter) != size) {
+		iterp = (dest_iter) ?: &iter;
+		if (copy_to_iter(src, size, iterp) != size) {
 			erofs_put_metabuf(&buf);
 			return -EFAULT;
 		}
-		iov_iter_zero(PAGE_SIZE - size, &iter);
+
+		filled += size;
+		if (!dest_iter) {
+			iov_iter_zero(PAGE_SIZE - size, iterp);
+			filled += (PAGE_SIZE - size);
+		}
+
 		erofs_put_metabuf(&buf);
-		req->submitted += PAGE_SIZE;
+		req->submitted += filled;
 		return 0;
 	}
 
-	count = req->len - req->submitted;
 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
 		struct iov_iter iter;
 
-		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
-		iov_iter_zero(count, &iter);
+		if (!dest_iter) {
+			iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
+			iov_iter_zero(count, &iter);
+		}
+
 		req->submitted += count;
 		return 0;
 	}
@@ -315,9 +335,17 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
 	io = erofs_fscache_req_io_alloc(req);
 	if (!io)
 		return -ENOMEM;
-	iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
+
+	if (dest_iter)
+		iov_iter_ubuf(&io->iter, ITER_DEST,
+			dest_iter->ubuf + dest_iter->iov_offset, count);
+	else
+		iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
+
 	ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
 			mdev.m_pa + (pos - map.m_la), io);
+	if (dest_iter)
+		iov_iter_advance(dest_iter, io->iter.iov_offset);
 	erofs_fscache_req_io_put(io);
 
 	req->submitted += count;
@@ -373,6 +401,43 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
 	erofs_fscache_req_put(req);
 }
 
+static ssize_t erofs_fscache_direct_io(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+	size_t count = iov_iter_count(iter);
+	size_t i_size = i_size_read(file->f_inode);
+	struct erofs_fscache_rq *req;
+	struct completion *ctr;
+	ssize_t rsize;
+	int ret;
+
+	if (unlikely(iov_iter_rw(iter) == WRITE))
+		return -EROFS;
+
+	if (unlikely(iocb->ki_pos >= i_size))
+		return 0;
+
+	if (count + iocb->ki_pos > i_size)
+		count = i_size - iocb->ki_pos;
+
+	if (!count)
+		return 0;
+
+	req = erofs_fscache_req_alloc(file->f_mapping, iocb->ki_pos, count);
+	if (!req)
+		return -ENOMEM;
+
+	req->iter = iter;
+	init_completion(&req->done);
+	ctr = &req->done;
+	ret = erofs_fscache_data_read(req);
+	rsize = (ret == 0) ? (ssize_t)req->submitted : ret;
+	erofs_fscache_req_put(req);
+	wait_for_completion(ctr);
+
+	return rsize;
+}
+
 static const struct address_space_operations erofs_fscache_meta_aops = {
 	.read_folio = erofs_fscache_meta_read_folio,
 };
@@ -380,6 +445,7 @@ static const struct address_space_operations erofs_fscache_meta_aops = {
 const struct address_space_operations erofs_fscache_access_aops = {
 	.read_folio = erofs_fscache_read_folio,
 	.readahead = erofs_fscache_readahead,
+	.direct_IO = erofs_fscache_direct_io,
 };
 
 static void erofs_fscache_domain_put(struct erofs_domain *domain)
-- 
2.34.1



More information about the Linux-erofs mailing list