[PATCH v10 07/10] erofs: introduce the page cache share feature

Gao Xiang hsiangkao at linux.alibaba.com
Tue Dec 23 19:11:09 AEDT 2025



On 2025/12/23 09:56, Hongbo Li wrote:
> From: Hongzhen Luo <hongzhen at linux.alibaba.com>
> 
> Currently, reading files with different paths (or names) but the same
> content will consume multiple copies of the page cache, even if the
> content of these page caches is the same. For example, reading
> identical files (e.g., *.so files) from two different minor versions of
> container images will cost multiple copies of the same page cache,
> since different containers have different mount points. Therefore,
> sharing the page cache for files with the same content can save memory.
> 
> This introduces the page cache share feature in erofs. It allocate a
> deduplicated inode and use its page cache as shared. Reads for files
> with identical content will ultimately be routed to the page cache of
> the deduplicated inode. In this way, a single page cache satisfies
> multiple read requests for different files with the same contents.
> 
> Signed-off-by: Hongzhen Luo <hongzhen at linux.alibaba.com>
> Signed-off-by: Hongbo Li <lihongbo22 at huawei.com>
> ---
>   fs/erofs/Makefile   |   1 +
>   fs/erofs/internal.h |  29 ++++++
>   fs/erofs/ishare.c   | 211 ++++++++++++++++++++++++++++++++++++++++++++
>   fs/erofs/super.c    |  34 ++++++-
>   4 files changed, 272 insertions(+), 3 deletions(-)
>   create mode 100644 fs/erofs/ishare.c
> 
> diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
> index 549abc424763..a80e1762b607 100644
> --- a/fs/erofs/Makefile
> +++ b/fs/erofs/Makefile
> @@ -10,3 +10,4 @@ erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
>   erofs-$(CONFIG_EROFS_FS_ZIP_ACCEL) += decompressor_crypto.o
>   erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
>   erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
> +erofs-$(CONFIG_EROFS_FS_PAGE_CACHE_SHARE) += ishare.o
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index 99e2857173c3..ae9560434324 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -304,6 +304,22 @@ struct erofs_inode {
>   		};
>   #endif	/* CONFIG_EROFS_FS_ZIP */
>   	};
> +#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> +	union {
> +		/* internal dedup inode */
> +		struct {
> +			char *fingerprint;
> +			spinlock_t lock;
> +			/* all backing inodes */
> +			struct list_head backing_head;
> +		};
> +
> +		struct {
> +			struct inode *ishare;
> +			struct list_head backing_link;
> +		};

I think it would be better to reform as below:

struct erofs_inode_fingerprint {
	u8 *opaque;
	int size;
};

	struct list_head ishare_list;
	union {
		struct {
			struct erofs_inode_fingerprint fingerprint;
			spinlock_t ishare_lock;
		};
		struct inode *realinode;
	};


> +	};
> +#endif
>   	/* the corresponding vfs inode */
>   	struct inode vfs_inode;
>   };
> @@ -410,6 +426,7 @@ extern const struct inode_operations erofs_dir_iops;
>   
>   extern const struct file_operations erofs_file_fops;
>   extern const struct file_operations erofs_dir_fops;
> +extern const struct file_operations erofs_ishare_fops;
>   
>   extern const struct iomap_ops z_erofs_iomap_report_ops;
>   
> @@ -541,6 +558,18 @@ static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) {
>   static inline void erofs_fscache_submit_bio(struct bio *bio) {}
>   #endif
>   
> +#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> +int __init erofs_init_ishare(void);
> +void erofs_exit_ishare(void);
> +bool erofs_ishare_fill_inode(struct inode *inode);
> +void erofs_ishare_free_inode(struct inode *inode);
> +#else
> +static inline int erofs_init_ishare(void) { return 0; }
> +static inline void erofs_exit_ishare(void) {}
> +static inline bool erofs_ishare_fill_inode(struct inode *inode) { return false; }
> +static inline void erofs_ishare_free_inode(struct inode *inode) {}
> +#endif // CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> +
>   long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
>   long erofs_compat_ioctl(struct file *filp, unsigned int cmd,
>   			unsigned long arg);
> diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c
> new file mode 100644
> index 000000000000..4b46016bcd03
> --- /dev/null
> +++ b/fs/erofs/ishare.c
> @@ -0,0 +1,211 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024, Alibaba Cloud
> + */
> +#include <linux/xxhash.h>
> +#include <linux/refcount.h>
> +#include <linux/mount.h>
> +#include <linux/mutex.h>
> +#include <linux/ramfs.h>
> +#include "internal.h"
> +#include "xattr.h"
> +
> +#include "../internal.h"
> +
> +static struct vfsmount *erofs_ishare_mnt;
> +
> +static int erofs_ishare_iget5_eq(struct inode *inode, void *data)
> +{
> +	struct erofs_inode *vi = EROFS_I(inode);

	struct erofs_inode_fingerprint *fp1 = &EROFS_I(inode)->fingerprint;
	struct erofs_inode_fingerprint *fp2 = data;

	return fp1->size == fp2->size &&
		!memcmp(fp1->opaque, fp2->opaque, fp2->size);

	return vi->fingerprint.opaque && memcmp(vi->

> +
> +	return vi->fingerprint && memcmp(vi->fingerprint, data,
> +			sizeof(size_t) + *(size_t *)data) == 0;
> +}
> +
> +static int erofs_ishare_iget5_set(struct inode *inode, void *data)
> +{
> +	struct erofs_inode *vi = EROFS_I(inode);
> +> +	vi->fingerprint = data;

	vi->fingerprint = *(struct erofs_inode_fingerprint *)data;

> +	INIT_LIST_HEAD(&vi->backing_head);
> +	spin_lock_init(&vi->lock);
> +	return 0;
> +}
> +
> +bool erofs_ishare_fill_inode(struct inode *inode)
> +{
> +	struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
> +	struct erofs_xattr_prefix_item *ishare_prefix;

just call
	struct erofs_xattr_prefix_item *prefix;

is fine, since it's unambiguous.

> +	struct erofs_inode *vi = EROFS_I(inode);
> +	struct inode *idedup;
> +	/*
> +	 * fingerprint layout:
> +	 * fingerprint length + fingerprint content (xattr_value + domain_id)
> +	 */

That is too hard to follow, just convert to what I mentioned above;

	struct erofs_inode_fingerprint fp;

> +	char *ishare_key, *fingerprint;

	char *infix;

> +	ssize_t ishare_vlen;

	size_t valuelen;

> +	unsigned long hash;
> +	int key_idx;

	int base_index;

> +
> +	if (!sbi->domain_id || !erofs_sb_has_ishare_xattrs(sbi))
> +		return false;
> +
> +	ishare_prefix = sbi->xattr_prefixes + sbi->ishare_xattr_pfx;
> +	ishare_key = ishare_prefix->prefix->infix;
> +	key_idx = ishare_prefix->prefix->base_index;
> +	ishare_vlen = erofs_getxattr(inode, key_idx, ishare_key, NULL, 0);
> +	if (ishare_vlen <= 0 || ishare_vlen > (1 << sbi->blkszbits))
> +		return false;
> +

Then:
	fp.size = valuelen + strlen(sbi->domain_id);
	fp.opaque = kmalloc(fp.size, GFP_KERNEL);

And fix the remaining logic.

Thanks,
Gao Xiang


More information about the Linux-erofs mailing list