[PATCH v10 07/10] erofs: introduce the page cache share feature
Gao Xiang
hsiangkao at linux.alibaba.com
Tue Dec 23 19:11:09 AEDT 2025
On 2025/12/23 09:56, Hongbo Li wrote:
> From: Hongzhen Luo <hongzhen at linux.alibaba.com>
>
> Currently, reading files with different paths (or names) but the same
> content will consume multiple copies of the page cache, even if the
> content of these page caches is the same. For example, reading
> identical files (e.g., *.so files) from two different minor versions of
> container images will cost multiple copies of the same page cache,
> since different containers have different mount points. Therefore,
> sharing the page cache for files with the same content can save memory.
>
> This introduces the page cache share feature in erofs. It allocate a
> deduplicated inode and use its page cache as shared. Reads for files
> with identical content will ultimately be routed to the page cache of
> the deduplicated inode. In this way, a single page cache satisfies
> multiple read requests for different files with the same contents.
>
> Signed-off-by: Hongzhen Luo <hongzhen at linux.alibaba.com>
> Signed-off-by: Hongbo Li <lihongbo22 at huawei.com>
> ---
> fs/erofs/Makefile | 1 +
> fs/erofs/internal.h | 29 ++++++
> fs/erofs/ishare.c | 211 ++++++++++++++++++++++++++++++++++++++++++++
> fs/erofs/super.c | 34 ++++++-
> 4 files changed, 272 insertions(+), 3 deletions(-)
> create mode 100644 fs/erofs/ishare.c
>
> diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
> index 549abc424763..a80e1762b607 100644
> --- a/fs/erofs/Makefile
> +++ b/fs/erofs/Makefile
> @@ -10,3 +10,4 @@ erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
> erofs-$(CONFIG_EROFS_FS_ZIP_ACCEL) += decompressor_crypto.o
> erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
> erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
> +erofs-$(CONFIG_EROFS_FS_PAGE_CACHE_SHARE) += ishare.o
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index 99e2857173c3..ae9560434324 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -304,6 +304,22 @@ struct erofs_inode {
> };
> #endif /* CONFIG_EROFS_FS_ZIP */
> };
> +#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> + union {
> + /* internal dedup inode */
> + struct {
> + char *fingerprint;
> + spinlock_t lock;
> + /* all backing inodes */
> + struct list_head backing_head;
> + };
> +
> + struct {
> + struct inode *ishare;
> + struct list_head backing_link;
> + };
I think it would be better to reform as below:
struct erofs_inode_fingerprint {
u8 *opaque;
int size;
};
struct list_head ishare_list;
union {
struct {
struct erofs_inode_fingerprint fingerprint;
spinlock_t ishare_lock;
};
struct inode *realinode;
};
> + };
> +#endif
> /* the corresponding vfs inode */
> struct inode vfs_inode;
> };
> @@ -410,6 +426,7 @@ extern const struct inode_operations erofs_dir_iops;
>
> extern const struct file_operations erofs_file_fops;
> extern const struct file_operations erofs_dir_fops;
> +extern const struct file_operations erofs_ishare_fops;
>
> extern const struct iomap_ops z_erofs_iomap_report_ops;
>
> @@ -541,6 +558,18 @@ static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) {
> static inline void erofs_fscache_submit_bio(struct bio *bio) {}
> #endif
>
> +#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> +int __init erofs_init_ishare(void);
> +void erofs_exit_ishare(void);
> +bool erofs_ishare_fill_inode(struct inode *inode);
> +void erofs_ishare_free_inode(struct inode *inode);
> +#else
> +static inline int erofs_init_ishare(void) { return 0; }
> +static inline void erofs_exit_ishare(void) {}
> +static inline bool erofs_ishare_fill_inode(struct inode *inode) { return false; }
> +static inline void erofs_ishare_free_inode(struct inode *inode) {}
> +#endif // CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> +
> long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
> long erofs_compat_ioctl(struct file *filp, unsigned int cmd,
> unsigned long arg);
> diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c
> new file mode 100644
> index 000000000000..4b46016bcd03
> --- /dev/null
> +++ b/fs/erofs/ishare.c
> @@ -0,0 +1,211 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024, Alibaba Cloud
> + */
> +#include <linux/xxhash.h>
> +#include <linux/refcount.h>
> +#include <linux/mount.h>
> +#include <linux/mutex.h>
> +#include <linux/ramfs.h>
> +#include "internal.h"
> +#include "xattr.h"
> +
> +#include "../internal.h"
> +
> +static struct vfsmount *erofs_ishare_mnt;
> +
> +static int erofs_ishare_iget5_eq(struct inode *inode, void *data)
> +{
> + struct erofs_inode *vi = EROFS_I(inode);
struct erofs_inode_fingerprint *fp1 = &EROFS_I(inode)->fingerprint;
struct erofs_inode_fingerprint *fp2 = data;
return fp1->size == fp2->size &&
!memcmp(fp1->opaque, fp2->opaque, fp2->size);
return vi->fingerprint.opaque && memcmp(vi->
> +
> + return vi->fingerprint && memcmp(vi->fingerprint, data,
> + sizeof(size_t) + *(size_t *)data) == 0;
> +}
> +
> +static int erofs_ishare_iget5_set(struct inode *inode, void *data)
> +{
> + struct erofs_inode *vi = EROFS_I(inode);
> +> + vi->fingerprint = data;
vi->fingerprint = *(struct erofs_inode_fingerprint *)data;
> + INIT_LIST_HEAD(&vi->backing_head);
> + spin_lock_init(&vi->lock);
> + return 0;
> +}
> +
> +bool erofs_ishare_fill_inode(struct inode *inode)
> +{
> + struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
> + struct erofs_xattr_prefix_item *ishare_prefix;
just call
struct erofs_xattr_prefix_item *prefix;
is fine, since it's unambiguous.
> + struct erofs_inode *vi = EROFS_I(inode);
> + struct inode *idedup;
> + /*
> + * fingerprint layout:
> + * fingerprint length + fingerprint content (xattr_value + domain_id)
> + */
That is too hard to follow, just convert to what I mentioned above;
struct erofs_inode_fingerprint fp;
> + char *ishare_key, *fingerprint;
char *infix;
> + ssize_t ishare_vlen;
size_t valuelen;
> + unsigned long hash;
> + int key_idx;
int base_index;
> +
> + if (!sbi->domain_id || !erofs_sb_has_ishare_xattrs(sbi))
> + return false;
> +
> + ishare_prefix = sbi->xattr_prefixes + sbi->ishare_xattr_pfx;
> + ishare_key = ishare_prefix->prefix->infix;
> + key_idx = ishare_prefix->prefix->base_index;
> + ishare_vlen = erofs_getxattr(inode, key_idx, ishare_key, NULL, 0);
> + if (ishare_vlen <= 0 || ishare_vlen > (1 << sbi->blkszbits))
> + return false;
> +
Then:
fp.size = valuelen + strlen(sbi->domain_id);
fp.opaque = kmalloc(fp.size, GFP_KERNEL);
And fix the remaining logic.
Thanks,
Gao Xiang
More information about the Linux-erofs
mailing list