[PATCH RFC] erofs: file-backed mount supports direct io

Hongbo Li lihongbo22 at huawei.com
Mon Jan 20 14:02:07 AEDT 2025



On 2025/1/20 9:58, Gao Xiang wrote:
> Hi Hongbo,
> 
> On 2025/1/15 15:09, Hongbo Li wrote:
>> erofs has add file-backed mount support. In this scenario, only buffer
>> io is allowed. So we enhance the io mode by implementing the direct
>> io. Also, this can make the iov_iter (user buffer) interact with the
>> backed file's page cache directly.
>>
>> Signed-off-by: Hongbo Li <lihongbo22 at huawei.com>
>> ---
>>   fs/erofs/data.c   |  11 +++--
>>   fs/erofs/fileio.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 130 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
>> index 0cd6b5c4df98..b5baff61be16 100644
>> --- a/fs/erofs/data.c
>> +++ b/fs/erofs/data.c
>> @@ -395,9 +395,14 @@ static ssize_t erofs_file_read_iter(struct kiocb 
>> *iocb, struct iov_iter *to)
>>       if (IS_DAX(inode))
>>           return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
>>   #endif
>> -    if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev)
>> -        return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
>> -                    NULL, 0, NULL, 0);
>> +    if (iocb->ki_flags & IOCB_DIRECT) {
>> +        if (inode->i_sb->s_bdev)
>> +            return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
>> +                        NULL, 0, NULL, 0);
>> +        if (erofs_is_fileio_mode(EROFS_SB(inode->i_sb)))
>> +            return generic_file_read_iter(iocb, to);
>> +    }
>> +
>>       return filemap_read(iocb, to, 0);
>>   }
>> diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
>> index 33f8539dda4a..76ed16a8ee75 100644
>> --- a/fs/erofs/fileio.c
>> +++ b/fs/erofs/fileio.c
>> @@ -10,12 +10,17 @@ struct erofs_fileio_rq {
>>       struct bio bio;
>>       struct kiocb iocb;
>>       struct super_block *sb;
>> +    ssize_t ret;
>> +    void *private;
>>   };
>>   struct erofs_fileio {
>> +    struct file *file;
>>       struct erofs_map_blocks map;
>>       struct erofs_map_dev dev;
>>       struct erofs_fileio_rq *rq;
>> +    size_t total;
>> +    size_t done;
>>   };
>>   static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
>> @@ -24,6 +29,7 @@ static void erofs_fileio_ki_complete(struct kiocb 
>> *iocb, long ret)
>>               container_of(iocb, struct erofs_fileio_rq, iocb);
>>       struct folio_iter fi;
>> +    rq->ret = ret;
>>       if (ret > 0) {
>>           if (ret != rq->bio.bi_iter.bi_size) {
>>               bio_advance(&rq->bio, ret);
>> @@ -43,6 +49,17 @@ static void erofs_fileio_ki_complete(struct kiocb 
>> *iocb, long ret)
>>       kfree(rq);
>>   }
>> +static void erofs_fileio_end_io(struct bio *bio)
>> +{
>> +    struct erofs_fileio_rq *rq =
>> +            container_of(bio, struct erofs_fileio_rq, bio);
>> +    struct erofs_fileio *io = rq->private;
>> +
>> +    if (rq->ret > 0) {
>> +        io->done += rq->ret;
>> +    }
>> +}
>> +
>>   static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
>>   {
>>       struct iov_iter iter;
>> @@ -189,7 +206,112 @@ static void erofs_fileio_readahead(struct 
>> readahead_control *rac)
>>       erofs_fileio_rq_submit(io.rq);
>>   }
>> +static int erofs_fileio_scan_iter(struct erofs_fileio *io, struct 
>> kiocb *iocb,
>> +                  struct iov_iter *iter)
> 
> I wonder if it's possible to just extract a folio from
> `struct iov_iter` and reuse erofs_fileio_scan_folio() logic.
Thanks for reviewing. Ok, I'll think about reusing the 
erofs_fileio_scan_folio logic in later version.

Additionally, for the file-backed mount case, can we consider removing 
the erofs's page cache and just using the backend file's page cache? If 
in this way, it will use buffer io for reading the backend's mounted 
files in default, and it also can decrease the memory overhead.

This is just my initial idea, for uncompressed mode, this should make 
sense. But for compressed layout, it needs to be verified.

Thanks,
Hongbo

> 
> It simplifies the codebase a lot, and I think the performance
> is almost the same.
> 
> Otherwise currently it looks good to me.
> 
> Thanks,
> Gao Xiang


More information about the Linux-erofs mailing list