[PATCH v2 2/4] erofs: support unencoded inodes for fileio

Gao Xiang hsiangkao at linux.alibaba.com
Sat Aug 31 12:54:02 AEST 2024



On 2024/8/31 08:19, Sandeep Dhavale wrote:
> On Thu, Aug 29, 2024 at 8:29 PM Gao Xiang <hsiangkao at linux.alibaba.com> wrote:
>>
>> Since EROFS only needs to handle read requests in simple contexts,
>> Just directly use vfs_iocb_iter_read() for data I/Os.
>>
>> Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
>> ---
>> v2:
>>   - fix redundant refcount which cause hanging on chunked inodes.
>>
>>   fs/erofs/Makefile   |   1 +
>>   fs/erofs/data.c     |  50 +++++++++++-
>>   fs/erofs/fileio.c   | 181 ++++++++++++++++++++++++++++++++++++++++++++
>>   fs/erofs/inode.c    |  17 +++--
>>   fs/erofs/internal.h |   7 +-
>>   fs/erofs/zdata.c    |  46 ++---------
>>   6 files changed, 251 insertions(+), 51 deletions(-)
>>   create mode 100644 fs/erofs/fileio.c
>>
>> diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
>> index 097d672e6b14..4331d53c7109 100644
>> --- a/fs/erofs/Makefile
>> +++ b/fs/erofs/Makefile
>> @@ -7,4 +7,5 @@ erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
>>   erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
>>   erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
>>   erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
>> +erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
>>   erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
>> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
>> index 0fb31c588ae0..b4c07ce7a294 100644
>> --- a/fs/erofs/data.c
>> +++ b/fs/erofs/data.c
>> @@ -132,7 +132,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
>>          if (map->m_la >= inode->i_size) {
>>                  /* leave out-of-bound access unmapped */
>>                  map->m_flags = 0;
>> -               map->m_plen = 0;
>> +               map->m_plen = map->m_llen;
>>                  goto out;
>>          }
>>
>> @@ -197,8 +197,13 @@ static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
>>                                      struct erofs_device_info *dif)
>>   {
>>          map->m_bdev = NULL;
>> -       if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
>> -               map->m_bdev = file_bdev(dif->file);
>> +       map->m_fp = NULL;
>> +       if (dif->file) {
>> +               if (S_ISBLK(file_inode(dif->file)->i_mode))
>> +                       map->m_bdev = file_bdev(dif->file);
>> +               else
>> +                       map->m_fp = dif->file;
>> +       }
>>          map->m_daxdev = dif->dax_dev;
>>          map->m_dax_part_off = dif->dax_part_off;
>>          map->m_fscache = dif->fscache;
>> @@ -215,6 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
>>          map->m_daxdev = EROFS_SB(sb)->dax_dev;
>>          map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
>>          map->m_fscache = EROFS_SB(sb)->s_fscache;
>> +       map->m_fp = EROFS_SB(sb)->fdev;
>>
>>          if (map->m_deviceid) {
>>                  down_read(&devs->rwsem);
>> @@ -250,6 +256,42 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
>>          return 0;
>>   }
>>
>> +/*
>> + * bit 30: I/O error occurred on this folio
>> + * bit 0 - 29: remaining parts to complete this folio
>> + */
>> +#define EROFS_ONLINEFOLIO_EIO                  (1 << 30)
>> +
>> +void erofs_onlinefolio_init(struct folio *folio)
>> +{
>> +       union {
>> +               atomic_t o;
>> +               void *v;
>> +       } u = { .o = ATOMIC_INIT(1) };
>> +
>> +       folio->private = u.v;   /* valid only if file-backed folio is locked */
>> +}
>> +
>> +void erofs_onlinefolio_split(struct folio *folio)
>> +{
>> +       atomic_inc((atomic_t *)&folio->private);
>> +}
>> +
>> +void erofs_onlinefolio_end(struct folio *folio, int err)
>> +{
>> +       int orig, v;
>> +
>> +       do {
>> +               orig = atomic_read((atomic_t *)&folio->private);
>> +               v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
>> +       } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
>> +
>> +       if (v & ~EROFS_ONLINEFOLIO_EIO)
>> +               return;
>> +       folio->private = 0;
>> +       folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
>> +}
>> +
>>   static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>>                  unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
>>   {
>> @@ -399,7 +441,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>>   }
>>
>>   /* for uncompressed (aligned) files and raw access for other files */
>> -const struct address_space_operations erofs_raw_access_aops = {
>> +const struct address_space_operations erofs_aops = {
>>          .read_folio = erofs_read_folio,
>>          .readahead = erofs_readahead,
>>          .bmap = erofs_bmap,
>> diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
>> new file mode 100644
>> index 000000000000..eab52b8abd0b
>> --- /dev/null
>> +++ b/fs/erofs/fileio.c
>> @@ -0,0 +1,181 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +/*
>> + * Copyright (C) 2024, Alibaba Cloud
>> + */
>> +#include "internal.h"
>> +#include <trace/events/erofs.h>
>> +
>> +struct erofs_fileio_rq {
>> +       struct bio_vec bvecs[BIO_MAX_VECS];
>> +       struct bio bio;
>> +       struct kiocb iocb;
>> +};
>> +
>> +struct erofs_fileio {
>> +       struct erofs_map_blocks map;
>> +       struct erofs_map_dev dev;
>> +       struct erofs_fileio_rq *rq;
>> +};
>> +
>> +static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
>> +{
>> +       struct erofs_fileio_rq *rq =
>> +                       container_of(iocb, struct erofs_fileio_rq, iocb);
>> +       struct folio_iter fi;
>> +
>> +       DBG_BUGON(rq->bio.bi_end_io);
>> +       if (ret > 0) {
>> +               if (ret != rq->bio.bi_iter.bi_size) {
>> +                       bio_advance(&rq->bio, ret);
>> +                       zero_fill_bio(&rq->bio);
>> +               }
>> +               ret = 0;
>> +       }
>> +       bio_for_each_folio_all(fi, &rq->bio) {
>> +               DBG_BUGON(folio_test_uptodate(fi.folio));
>> +               erofs_onlinefolio_end(fi.folio, ret);
>> +       }
>> +       kfree(rq);
>> +}
>> +
>> +static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
>> +{
>> +       struct iov_iter iter;
>> +       int ret;
>> +
>> +       if (!rq)
>> +               return;
>> +       rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << 9;
>> +       rq->iocb.ki_ioprio = get_current_ioprio();
>> +       rq->iocb.ki_complete = erofs_fileio_ki_complete;
>> +       rq->iocb.ki_flags = (rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ?
>> +                               IOCB_DIRECT : 0;
> Hi Gao,
> Does this mean, direct IO by default if the backing file supports it
> (technically filesystem where image/backing file reside)?

Yes.

Thanks,
Gao Xiang


More information about the Linux-erofs mailing list