[PATCH RFC] erofs: file-backed mount supports direct io

Hongbo Li lihongbo22 at huawei.com
Wed Jan 15 18:24:05 AEDT 2025



On 2025/1/15 15:09, Hongbo Li wrote:
> erofs has add file-backed mount support. In this scenario, only buffer
> io is allowed. So we enhance the io mode by implementing the direct
> io. Also, this can make the iov_iter (user buffer) interact with the
> backed file's page cache directly.
> 
Base on this, we might decrease the memory overhead by the following io 
stack:

erofs io (buffer io, direct io) --> fileio --> file-backed's page cache.

That means we can implement direct page cache pass-through in EROFS, and 
the under mounted file use buffer io for the backed file system.

Thanks,
Hongbo

> Signed-off-by: Hongbo Li <lihongbo22 at huawei.com>
> ---
>   fs/erofs/data.c   |  11 +++--
>   fs/erofs/fileio.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 130 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index 0cd6b5c4df98..b5baff61be16 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -395,9 +395,14 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>   	if (IS_DAX(inode))
>   		return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
>   #endif
> -	if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev)
> -		return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
> -				    NULL, 0, NULL, 0);
> +	if (iocb->ki_flags & IOCB_DIRECT) {
> +		if (inode->i_sb->s_bdev)
> +			return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
> +						NULL, 0, NULL, 0);
> +		if (erofs_is_fileio_mode(EROFS_SB(inode->i_sb)))
> +			return generic_file_read_iter(iocb, to);
> +	}
> +
>   	return filemap_read(iocb, to, 0);
>   }
>   
> diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
> index 33f8539dda4a..76ed16a8ee75 100644
> --- a/fs/erofs/fileio.c
> +++ b/fs/erofs/fileio.c
> @@ -10,12 +10,17 @@ struct erofs_fileio_rq {
>   	struct bio bio;
>   	struct kiocb iocb;
>   	struct super_block *sb;
> +	ssize_t ret;
> +	void *private;
>   };
>   
>   struct erofs_fileio {
> +	struct file *file;
>   	struct erofs_map_blocks map;
>   	struct erofs_map_dev dev;
>   	struct erofs_fileio_rq *rq;
> +	size_t total;
> +	size_t done;
>   };
>   
>   static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
> @@ -24,6 +29,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
>   			container_of(iocb, struct erofs_fileio_rq, iocb);
>   	struct folio_iter fi;
>   
> +	rq->ret = ret;
>   	if (ret > 0) {
>   		if (ret != rq->bio.bi_iter.bi_size) {
>   			bio_advance(&rq->bio, ret);
> @@ -43,6 +49,17 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
>   	kfree(rq);
>   }
>   
> +static void erofs_fileio_end_io(struct bio *bio)
> +{
> +	struct erofs_fileio_rq *rq =
> +			container_of(bio, struct erofs_fileio_rq, bio);
> +	struct erofs_fileio *io = rq->private;
> +
> +	if (rq->ret > 0) {
> +		io->done += rq->ret;
> +	}
> +}
> +
>   static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
>   {
>   	struct iov_iter iter;
> @@ -189,7 +206,112 @@ static void erofs_fileio_readahead(struct readahead_control *rac)
>   	erofs_fileio_rq_submit(io.rq);
>   }
>   
> +static int erofs_fileio_scan_iter(struct erofs_fileio *io, struct kiocb *iocb,
> +				  struct iov_iter *iter)
> +{
> +	struct file *file = iocb->ki_filp;
> +	struct inode *inode = file_inode(file);
> +	struct erofs_map_blocks *map = &io->map;
> +	struct iov_iter dest_iter = *iter;
> +	unsigned int cur = 0, end = io->total, len;
> +	loff_t pos = iocb->ki_pos;
> +	int err = 0;
> +
> +	while (cur < end) {
> +		/* submit the last fileio rq */
> +		if (io->rq) {
> +			erofs_fileio_rq_submit(io->rq);
> +			io->rq = NULL;
> +		}
> +
> +		if (!in_range(pos + cur, map->m_la, map->m_llen)) {
> +			map->m_la = pos + cur;
> +			map->m_llen = end - cur;
> +			err = erofs_map_blocks(inode, map);
> +			if (err)
> +				break;
> +		}
> +
> +		len = min_t(loff_t, map->m_llen, end - cur);
> +		/* split the whole iter with (cur, len) */
> +		dest_iter = *iter;
> +		iov_iter_advance(&dest_iter, cur);
> +		iov_iter_truncate(&dest_iter, len);
> +		if (map->m_flags & EROFS_MAP_META) {
> +			struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
> +			void *src;
> +
> +			src = erofs_read_metabuf(&buf, inode->i_sb, map->m_pa, EROFS_KMAP);
> +			if (IS_ERR(src)) {
> +				err = PTR_ERR(src);
> +				break;
> +			}
> +			if (copy_to_iter(src, len, &dest_iter) != len) {
> +				erofs_put_metabuf(&buf);
> +				err = -EIO;
> +				break;
> +			}
> +			erofs_put_metabuf(&buf);
> +			io->done += len;
> +		} else if (!(map->m_flags & EROFS_MAP_MAPPED)) {
> +			iov_iter_zero(len, &dest_iter);
> +			io->done += len;
> +		} else {
> +			io->dev = (struct erofs_map_dev) {
> +				.m_pa = map->m_pa,
> +				.m_deviceid = map->m_deviceid,
> +			};
> +			err = erofs_map_dev(inode->i_sb, &io->dev);
> +			if (err)
> +				break;
> +			io->rq = erofs_fileio_rq_alloc(&io->dev);
> +			io->rq->private = io;
> +			io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> SECTOR_SHIFT;
> +			io->rq->bio.bi_end_io = erofs_fileio_end_io;
> +
> +			if (bio_iov_iter_get_pages(&io->rq->bio, &dest_iter)) {
> +				err = -EIO;
> +				break;
> +			}
> +			io->dev.m_pa += len;
> +		}
> +		cur += len;
> +	}
> +
> +	return err;
> +}
> +
> +static ssize_t erofs_fileio_direct_io(struct kiocb *iocb, struct iov_iter *iter)
> +{
> +	struct file *file = iocb->ki_filp;
> +	struct inode *inode = file_inode(file);
> +	size_t i_size = i_size_read(inode);
> +	struct erofs_fileio io = {};
> +	int err = 0;
> +
> +	if (unlikely(iocb->ki_pos >= i_size))
> +		return 0;
> +
> +	iter->count = min_t(size_t, iter->count,
> +			    max_t(size_t, 0, i_size - iocb->ki_pos));
> +
> +	io.total = iter->count;
> +	if (!io.total)
> +		return 0;
> +
> +	err = erofs_fileio_scan_iter(&io, iocb, iter);
> +	if (err)
> +		return err;
> +	erofs_fileio_rq_submit(io.rq);
> +	if (io.total != io.done)
> +		return -EIO;
> +
> +	iov_iter_advance(iter, io.done);
> +	return io.done;
> +}
> +
>   const struct address_space_operations erofs_fileio_aops = {
>   	.read_folio = erofs_fileio_read_folio,
>   	.readahead = erofs_fileio_readahead,
> +	.direct_IO = erofs_fileio_direct_io,
>   };


More information about the Linux-erofs mailing list