[PATCH v7] erofs-utils: add support for fuse 2/3 lowlevel API
Li Yiyan
lyy0627 at sjtu.edu.cn
Thu Aug 31 20:57:33 AEST 2023
Hi Xiang,
Thanks for your thoughtful comments, here are my replies.
On 2023/8/29 22:17, Gao Xiang wrote:
> Hi Yiyan,
>
> Would you mind addressing the following comments? I think it's almost
> the time to formalize this:
>
> On 2023/8/26 17:25, Huang Jianan wrote:
>> Hi Yiyan,
>> Thanks for your work, I have some comments below, please check.
>>
>> On 2023/8/23 19:59, Li Yiyan wrote:
>>> Add support for the fuse low-level API in erofsfuse, proven correct in
>>> 22 test cases. Lowlevel API offers improved performance compared to the
>>> high-level API, while maintaining compatibility with fuse version
>>> 2(>=2.6) and 3 (>=3.0).
>>>
>>> Dataset: linux 5.15
>>> Compression algorithm: -z4hc,12
>>> Additional options: -T0 -C16384
>>> Test options: --warmup 3 -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1"
>>>
>>> Evaluation result (highlevel->lowlevel avg time):
>>> - Sequence metadata: 777.3 ms->180.9 ms
>>> - Sequence data: 3.282 s->818.1 ms
>>> - Random metadata: 1.571 s->928.3 ms
>>> - Random data: 2.461 s->597.8 ms
>>>
>>> Signed-off-by: Li Yiyan <lyy0627 at sjtu.edu.cn>
>>> ---
>>> Changes since v6:
>>> - remove redundant code
>>> - optimize naming
>>> - add eval data
>>>
>>> Changes since v5:
>>> - name retval to `ret` from `err`
>>>
>>> Changes since v4:
>>> - support fuse option
>>> - default run in daemon
>>> - remove redundant log
>>>
>>> Changes since v3:
>>> - remove ll identifier
>>> - optimize naming
>>> - remove redundant log
>>> - add fixme label for confusing xattr_buf
>>>
>>> Changes since v2:
>>> - merge lowlevel.c into main.c
>>> - fix typo in autoconf
>>> - optimize naming
>>> - remove redundant code
>>> - avoid global sbi dependencies
>>>
>>> Changes since v1:
>>> - remove highlevel fallback path
>>> - remove redundant code
>>> - add static for erofsfuse_ll_func
>>> ---
>>> configure.ac | 23 +-
>>> fuse/Makefile.am | 4 +-
>>> fuse/main.c | 621 ++++++++++++++++++++++++++++++++++++++---------
>>> 3 files changed, 523 insertions(+), 125 deletions(-)
>>>
>>> diff --git a/configure.ac b/configure.ac
>>> index a8cecd0..6d08d96 100644
>>> --- a/configure.ac
>>> +++ b/configure.ac
>>> @@ -336,15 +336,26 @@ AS_IF([test "x$with_selinux" != "xno"], [
>>>
>>> # Configure fuse
>>> AS_IF([test "x$enable_fuse" != "xno"], [
>>> - PKG_CHECK_MODULES([libfuse], [fuse >= 2.6])
>>> # Paranoia: don't trust the result reported by pkgconfig before trying out
>>> saved_LIBS="$LIBS"
>>> saved_CPPFLAGS=${CPPFLAGS}
>>> - CPPFLAGS="${libfuse_CFLAGS} ${CPPFLAGS}"
>>> - LIBS="${libfuse_LIBS} $LIBS"
>>> - AC_CHECK_LIB(fuse, fuse_main, [
>>> - have_fuse="yes" ], [
>>> - AC_MSG_ERROR([libfuse (>= 2.6) doesn't work properly])])
>>> + PKG_CHECK_MODULES([libfuse3], [fuse3 >= 3.0], [
>>> + AC_DEFINE([FUSE_USE_VERSION], [30], [used FUSE API version])
>>> + CPPFLAGS="${libfuse3_CFLAGS} ${CPPFLAGS}"
>>> + LIBS="${libfuse3_LIBS} $LIBS"
>>> + AC_CHECK_LIB(fuse3, fuse_session_new, [], [
>>> + AC_MSG_ERROR([libfuse3 (>= 3.0) doesn't work properly for lowlevel api])])
>>> + have_fuse="yes"
>>> + ], [
>>> + PKG_CHECK_MODULES([libfuse2], [fuse >= 2.6], [
>>> + AC_DEFINE([FUSE_USE_VERSION], [26], [used FUSE API version])
>>> + CPPFLAGS="${libfuse2_CFLAGS} ${CPPFLAGS}"
>>> + LIBS="${libfuse2_LIBS} $LIBS"
>>> + AC_CHECK_LIB(fuse, fuse_lowlevel_new, [], [
>>> + AC_MSG_ERROR([libfuse (>= 2.6) doesn't work properly for lowlevel api])])
>>> + have_fuse="yes"
>>> + ], [have_fuse="no"])
>>> + ])
>>> LIBS="${saved_LIBS}"
>>> CPPFLAGS="${saved_CPPFLAGS}"], [have_fuse="no"])
>>>
>>> diff --git a/fuse/Makefile.am b/fuse/Makefile.am
>>> index 50be783..c63efcd 100644
>>> --- a/fuse/Makefile.am
>>> +++ b/fuse/Makefile.am
>>> @@ -5,6 +5,6 @@ noinst_HEADERS = $(top_srcdir)/fuse/macosx.h
>>> bin_PROGRAMS = erofsfuse
>>> erofsfuse_SOURCES = main.c
>>> erofsfuse_CFLAGS = -Wall -I$(top_srcdir)/include
>>> -erofsfuse_CFLAGS += -DFUSE_USE_VERSION=26 ${libfuse_CFLAGS} ${libselinux_CFLAGS}
>>> -erofsfuse_LDADD = $(top_builddir)/lib/liberofs.la ${libfuse_LIBS} ${liblz4_LIBS} \
>>> +erofsfuse_CFLAGS += ${libfuse2_CFLAGS} ${libfuse3_CFLAGS} ${libselinux_CFLAGS}
>>> +erofsfuse_LDADD = $(top_builddir)/lib/liberofs.la ${libfuse2_LIBS} ${libfuse3_LIBS} ${liblz4_LIBS} \
>>> ${libselinux_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
>>> diff --git a/fuse/main.c b/fuse/main.c
>>> index 821d98c..ecb4e81 100644
>>> --- a/fuse/main.c
>>> +++ b/fuse/main.c
>>> @@ -1,13 +1,12 @@
>>> // SPDX-License-Identifier: GPL-2.0+
>>> /*
>>> * Created by Li Guifu <blucerlee at gmail.com>
>>> + * Lowlevel added by Li Yiyan <lyy0627 at sjtu.edu.cn>
>>> */
>>> #include <stdlib.h>
>>> #include <string.h>
>>> #include <signal.h>
>>> #include <libgen.h>
>>> -#include <fuse.h>
>>> -#include <fuse_opt.h>
>>> #include "macosx.h"
>>> #include "erofs/config.h"
>>> #include "erofs/print.h"
>>> @@ -15,177 +14,507 @@
>>> #include "erofs/dir.h"
>>> #include "erofs/inode.h"
>>>
>>> -struct erofsfuse_dir_context {
>>> +#include <float.h>
>>> +#include <fuse.h>
>>> +#include <fuse_lowlevel.h>
>>> +
>>> +/* used in list/getxattr, given buf size is 0 and
>>> + * expected return val is xattr size
>>> + * FIXME: remove this workaround by new lib interface
>>> + */
>>> +#define EROFSFUSE_XATTR_BUF_SIZE 4096
>>> +#define EROFSFUSE_TIMEOUT DBL_MAX
>>> +
>>> +struct erofsfuse_readdir_context {
>>> struct erofs_dir_context ctx;
>>> - fuse_fill_dir_t filler;
>>> - struct fuse_file_info *fi;
>>> +
>>> + fuse_req_t req;
>>> void *buf;
>>> + int is_plus;
>>> + size_t offset;
>>> + size_t buf_size;
>>> + size_t start_off;
>>> + struct fuse_file_info *fi;
>>> +};
>>> +
>>> +struct erofsfuse_lookupdir_context {
>>> + struct erofs_dir_context ctx;
>>> +
>>> + const char *target_name;
>>> + struct fuse_entry_param *ent;
>>> };
>>>
>>> -static int erofsfuse_fill_dentries(struct erofs_dir_context *ctx)
>>> +static int erofsfuse_add_dentry(struct erofs_dir_context *ctx)
>>> {
>>> - struct erofsfuse_dir_context *fusectx = (void *)ctx;
>>> - struct stat st = {0};
>>> + size_t size = 0;
>>> char dname[EROFS_NAME_LEN + 1];
>>> + struct erofsfuse_readdir_context *readdir_ctx = (void *)ctx;
>>> +
>>> + if (readdir_ctx->offset < readdir_ctx->start_off) {
>>> + readdir_ctx->offset +=
>>> + ctx->de_namelen + sizeof(struct erofs_dirent);
>>> + return 0;
>>> + }
>>>
>>> strncpy(dname, ctx->dname, ctx->de_namelen);
>>> dname[ctx->de_namelen] = '\0';
>>> - st.st_mode = erofs_ftype_to_dtype(ctx->de_ftype) << 12;
>>> - fusectx->filler(fusectx->buf, dname, &st, 0);
>>> + readdir_ctx->offset += ctx->de_namelen + sizeof(struct erofs_dirent);
>>> +
>>> + if (!readdir_ctx->is_plus) { /* fuse 3 still use non-plus readdir */
>>> + struct stat st = { 0 };
>>> +
>>> + st.st_mode = erofs_ftype_to_dtype(ctx->de_ftype);
>>> + st.st_ino = ctx->de_nid;
>>> + size = fuse_add_direntry(readdir_ctx->req, readdir_ctx->buf,
>>> + readdir_ctx->buf_size, dname, &st,
>>> + readdir_ctx->offset);
>>> + } else {
>>> +#if FUSE_MAJOR_VERSION >= 3
>>> + struct fuse_entry_param param;
>>> +
>>> + param.ino = ctx->de_nid;
>>> + param.generation = 0;
>>> + param.attr.st_ino = ctx->de_nid;
>>> + param.attr.st_mode = erofs_ftype_to_dtype(ctx->de_ftype);
>>> + param.attr_timeout = param.entry_timeout = EROFSFUSE_TIMEOUT;
>>> + size = fuse_add_direntry_plus(readdir_ctx->req,
>>> + readdir_ctx->buf,
>>> + readdir_ctx->buf_size, dname,
>>> + ¶m, readdir_ctx->offset);
>>> +#else
>>> + erofs_err("fuse 2 readdirplus is not supported");
>
> Does this really happen? If so, should we fallback instead?
>
>>> +#endif
>>> + }
>>> +
>>> + if (size > readdir_ctx->buf_size) {
>>> + readdir_ctx->offset -=
>>> + ctx->de_namelen + sizeof(struct erofs_dirent);
>>> + return 1;
>>> + }
>>> + readdir_ctx->buf += size;
>>> + readdir_ctx->buf_size -= size;
>>> return 0;
>>> }
>>>
>>> -int erofsfuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
>>> - off_t offset, struct fuse_file_info *fi)
>>> +static void erofsfuse_fill_stat(struct erofs_inode *vi, struct stat *stbuf)
>
> Please rename it as erofsfuse_getattr().
It seems to have the same name as an existing function.
>
>>> {
>>> - int ret;
>>> - struct erofs_inode dir;
>>> - struct erofsfuse_dir_context ctx = {
>>> - .ctx.dir = &dir,
>>> - .ctx.cb = erofsfuse_fill_dentries,
>>> - .filler = filler,
>>> - .fi = fi,
>>> - .buf = buf,
>>> - };
>>> - erofs_dbg("readdir:%s offset=%llu", path, (long long)offset);
>>> -
>>> - dir.sbi = &sbi;
>>> - ret = erofs_ilookup(path, &dir);
>>> - if (ret)
>>> - return ret;
>>> + stbuf->st_mode = vi->i_mode;
>>> + stbuf->st_nlink = vi->i_nlink;
>>> +
>>> + if (!S_ISDIR(stbuf->st_mode))
>>> + stbuf->st_size = vi->i_size;
>>
>> Why are directories skipped here? This causes the directory size read
>> from erofsfuse to be 0.
>>
>>> + if (S_ISBLK(vi->i_mode) || S_ISCHR(vi->i_mode))
>>> + stbuf->st_rdev = vi->u.i_rdev;
>>> + stbuf->st_blocks = roundup(vi->i_size, erofs_blksiz(&sbi)) >> 9;
>>> + stbuf->st_uid = vi->i_uid;
>>> + stbuf->st_gid = vi->i_gid;
>>> + stbuf->st_ctime = vi->i_mtime;
>>> + stbuf->st_mtime = stbuf->st_ctime;
>>> + stbuf->st_atime = stbuf->st_ctime;
>>> +}
>>>
>>> - erofs_dbg("path=%s nid = %llu", path, dir.nid | 0ULL);
>>> - if (!S_ISDIR(dir.i_mode))
>>> - return -ENOTDIR;
>>> +static int erofsfuse_lookup_dentry(struct erofs_dir_context *ctx)
>>> +{
>>> + struct erofsfuse_lookupdir_context *lookup_ctx = (void *)ctx;
>>>
>>> - if (!dir.i_size)
>>> + if (lookup_ctx->ent->ino != 0 ||
>>> + strlen(lookup_ctx->target_name) != ctx->de_namelen)
>>> return 0;
>>> +
>>> + if (!strncmp(lookup_ctx->target_name, ctx->dname, ctx->de_namelen)) {
>>> + int ret;
>>> + struct erofs_inode vi = {
>>> + .sbi = &sbi,
>>> + .nid = (erofs_nid_t)ctx->de_nid,
>>> + };
>>> +
>>> + ret = erofs_read_inode_from_disk(&vi);
>>> + if (ret < 0)
>>> + return ret;
>>> +
>>> + lookup_ctx->ent->ino = ctx->de_nid;
>>> + lookup_ctx->ent->attr.st_ino = ctx->de_nid;
>>> +
>>> + erofsfuse_fill_stat(&vi, &(lookup_ctx->ent->attr));
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> +static inline erofs_nid_t erofsfuse_getnid(fuse_ino_t ino)
>>> +{
>>> + return ino == FUSE_ROOT_ID ? sbi.root_nid : (erofs_nid_t)ino;
>
> I think you should use
>
> static inline erofs_nid_t erofsfuse_to_nid(fuse_ino_t ino)
>
> if (ino == FUSE_ROOT_ID)
> return sbi.root_nid;
> return (erofs_nid_t)(ino - FUSE_ROOT_ID);
>
> static inline erofs_nid_t erofsfuse_to_ino(erofs_nid_t nid)
>
> if (nid == sbi.root_nid)
> return FUSE_ROOT_NID;
> return (nid + FUSE_ROOT_ID);
>
> instead.
Thanks.
>
>>> +}
>>> +
>
> ...
>
>>> +
>>> +static void erofsfuse_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
>>> + size_t size)
>>> +{
>>> + int ret;
>>> + char *buf;
>>> + size_t bufsize = size;
>>> + struct erofs_inode vi = { .sbi = &sbi, .nid = erofsfuse_getnid(ino) };
>>>
>>> - return erofs_listxattr(&vi, list, size);
>>> + erofs_dbg("ino = %lu, name = %s, size = %lu", ino, name, size);
>>> +
>>> + ret = erofs_read_inode_from_disk(&vi);
>>> + if (ret < 0) {
>>> + fuse_reply_err(req, EIO);
>>
>> Ditto.
>>
>>> + return;
>>> + }
>>> +
>>> + if (bufsize == 0)
>>> + bufsize = EROFSFUSE_XATTR_BUF_SIZE;
>>
>> Why do we need to reconfigure bufsize here? erofs_listxattr should
>> handle bufsize of 0.
>
> Yes, I don't know what happens too.
The new email to Jianan explains the problem, please check it.
>
> Thanks,
> Gao Xiang
Thanks,
Yiyan
More information about the Linux-erofs
mailing list