[PATCH kvm-next V11 4/7] KVM: guest_memfd: Use guest mem inodes instead of anonymous inodes

Garg, Shivank shivankg at amd.com
Thu Sep 25 21:44:15 AEST 2025



On 9/25/2025 8:20 AM, Sean Christopherson wrote:
> My apologies for the super late feedback.  None of this is critical (mechanical
> things that can be cleaned up after the fact), so if there's any urgency to
> getting this series into 6.18, just ignore it.
> 
> On Wed, Aug 27, 2025, Ackerley Tng wrote:
>> Shivank Garg <shivankg at amd.com> writes:
>> @@ -463,11 +502,70 @@ bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm)
>>  	return true;
>>  }
>>
>> +static struct inode *kvm_gmem_inode_create(const char *name, loff_t size,
>> +					   u64 flags)
>> +{
>> +	struct inode *inode;
>> +
>> +	inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL);
>> +	if (IS_ERR(inode))
>> +		return inode;
>> +
>> +	inode->i_private = (void *)(unsigned long)flags;
>> +	inode->i_op = &kvm_gmem_iops;
>> +	inode->i_mapping->a_ops = &kvm_gmem_aops;
>> +	inode->i_mode |= S_IFREG;
>> +	inode->i_size = size;
>> +	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
>> +	mapping_set_inaccessible(inode->i_mapping);
>> +	/* Unmovable mappings are supposed to be marked unevictable as well. */
>> +	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>> +
>> +	return inode;
>> +}
>> +
>> +static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size,
>> +						  u64 flags)
>> +{
>> +	static const char *name = "[kvm-gmem]";
>> +	struct inode *inode;
>> +	struct file *file;
>> +	int err;
>> +
>> +	err = -ENOENT;
>> +	/* __fput() will take care of fops_put(). */
>> +	if (!fops_get(&kvm_gmem_fops))
>> +		goto err;
>> +
>> +	inode = kvm_gmem_inode_create(name, size, flags);
>> +	if (IS_ERR(inode)) {
>> +		err = PTR_ERR(inode);
>> +		goto err_fops_put;
>> +	}
>> +
>> +	file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR,
>> +				 &kvm_gmem_fops);
>> +	if (IS_ERR(file)) {
>> +		err = PTR_ERR(file);
>> +		goto err_put_inode;
>> +	}
>> +
>> +	file->f_flags |= O_LARGEFILE;
>> +	file->private_data = priv;
>> +
>> +	return file;
>> +
>> +err_put_inode:
>> +	iput(inode);
>> +err_fops_put:
>> +	fops_put(&kvm_gmem_fops);
>> +err:
>> +	return ERR_PTR(err);
>> +}
> 
> I don't see any reason to add two helpers.  It requires quite a bit more lines
> of code due to adding more error paths and local variables, and IMO doesn't make
> the code any easier to read.
> 
> Passing in "gmem" as @priv is especially ridiculous, as it adds code and
> obfuscates what file->private_data is set to.
> 
> I get the sense that the code was written to be a "replacement" for common APIs,
> but that is nonsensical (no pun intended).
> 
>>  static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>>  {
>> -	const char *anon_name = "[kvm-gmem]";
>>  	struct kvm_gmem *gmem;
>> -	struct inode *inode;
>>  	struct file *file;
>>  	int fd, err;
>>
>> @@ -481,32 +579,16 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>>  		goto err_fd;
>>  	}
>>
>> -	file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
>> -					 O_RDWR, NULL);
>> +	file = kvm_gmem_inode_create_getfile(gmem, size, flags);
>>  	if (IS_ERR(file)) {
>>  		err = PTR_ERR(file);
>>  		goto err_gmem;
>>  	}
>>
>> -	file->f_flags |= O_LARGEFILE;
>> -
>> -	inode = file->f_inode;
>> -	WARN_ON(file->f_mapping != inode->i_mapping);
>> -
>> -	inode->i_private = (void *)(unsigned long)flags;
>> -	inode->i_op = &kvm_gmem_iops;
>> -	inode->i_mapping->a_ops = &kvm_gmem_aops;
>> -	inode->i_mode |= S_IFREG;
>> -	inode->i_size = size;
>> -	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
>> -	mapping_set_inaccessible(inode->i_mapping);
>> -	/* Unmovable mappings are supposed to be marked unevictable as well. */
>> -	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>> -
>>  	kvm_get_kvm(kvm);
>>  	gmem->kvm = kvm;
>>  	xa_init(&gmem->bindings);
>> -	list_add(&gmem->entry, &inode->i_mapping->i_private_list);
>> +	list_add(&gmem->entry, &file_inode(file)->i_mapping->i_private_list);
> 
> I don't understand this change?  Isn't file_inode(file) == inode?
> 
> Compile tested only, and again not critical, but it's -40 LoC...
> 
> 

Thanks.
I did functional testing and it works fine.


> ---
>  include/uapi/linux/magic.h |  1 +
>  virt/kvm/guest_memfd.c     | 75 ++++++++++++++++++++++++++++++++------
>  virt/kvm/kvm_main.c        |  7 +++-
>  virt/kvm/kvm_mm.h          |  9 +++--
>  4 files changed, 76 insertions(+), 16 deletions(-)
> 
> diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
> index bb575f3ab45e..638ca21b7a90 100644
> --- a/include/uapi/linux/magic.h
> +++ b/include/uapi/linux/magic.h
> @@ -103,5 +103,6 @@
>  #define DEVMEM_MAGIC		0x454d444d	/* "DMEM" */
>  #define SECRETMEM_MAGIC		0x5345434d	/* "SECM" */
>  #define PID_FS_MAGIC		0x50494446	/* "PIDF" */
> +#define GUEST_MEMFD_MAGIC	0x474d454d	/* "GMEM" */
>  
>  #endif /* __LINUX_MAGIC_H__ */
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 08a6bc7d25b6..73c9791879d5 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -1,12 +1,16 @@
>  // SPDX-License-Identifier: GPL-2.0
> +#include <linux/anon_inodes.h>
>  #include <linux/backing-dev.h>
>  #include <linux/falloc.h>
> +#include <linux/fs.h>
>  #include <linux/kvm_host.h>
> +#include <linux/pseudo_fs.h>
>  #include <linux/pagemap.h>
> -#include <linux/anon_inodes.h>
>  
>  #include "kvm_mm.h"
>  
> +static struct vfsmount *kvm_gmem_mnt;
> +
>  struct kvm_gmem {
>  	struct kvm *kvm;
>  	struct xarray bindings;
> @@ -385,9 +389,45 @@ static struct file_operations kvm_gmem_fops = {
>  	.fallocate	= kvm_gmem_fallocate,
>  };
>  
> -void kvm_gmem_init(struct module *module)
> +static int kvm_gmem_init_fs_context(struct fs_context *fc)
> +{
> +	if (!init_pseudo(fc, GUEST_MEMFD_MAGIC))
> +		return -ENOMEM;
> +
> +	fc->s_iflags |= SB_I_NOEXEC;
> +	fc->s_iflags |= SB_I_NODEV;
> +
> +	return 0;
> +}
> +
> +static struct file_system_type kvm_gmem_fs = {
> +	.name		 = "guest_memfd",
> +	.init_fs_context = kvm_gmem_init_fs_context,
> +	.kill_sb	 = kill_anon_super,
> +};
> +
> +static int kvm_gmem_init_mount(void)
> +{
> +	kvm_gmem_mnt = kern_mount(&kvm_gmem_fs);
> +
> +	if (IS_ERR(kvm_gmem_mnt))
> +		return PTR_ERR(kvm_gmem_mnt);
> +
> +	kvm_gmem_mnt->mnt_flags |= MNT_NOEXEC;
> +	return 0;
> +}
> +
> +int kvm_gmem_init(struct module *module)
>  {
>  	kvm_gmem_fops.owner = module;
> +
> +	return kvm_gmem_init_mount();
> +}
> +
> +void kvm_gmem_exit(void)
> +{
> +	kern_unmount(kvm_gmem_mnt);
> +	kvm_gmem_mnt = NULL;
>  }
>  
>  static int kvm_gmem_migrate_folio(struct address_space *mapping,
> @@ -465,7 +505,7 @@ bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm)
>  
>  static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  {
> -	const char *anon_name = "[kvm-gmem]";
> +	static const char *name = "[kvm-gmem]";
>  	struct kvm_gmem *gmem;
>  	struct inode *inode;
>  	struct file *file;
> @@ -481,17 +521,17 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  		goto err_fd;
>  	}
>  
> -	file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
> -					 O_RDWR, NULL);
> -	if (IS_ERR(file)) {
> -		err = PTR_ERR(file);
> +	/* __fput() will take care of fops_put(). */
> +	if (!fops_get(&kvm_gmem_fops)) {
> +		err = -ENOENT;
>  		goto err_gmem;
>  	}
>  
> -	file->f_flags |= O_LARGEFILE;
> -
> -	inode = file->f_inode;
> -	WARN_ON(file->f_mapping != inode->i_mapping);
> +	inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL);
> +	if (IS_ERR(inode)) {
> +		err = PTR_ERR(inode);
> +		goto err_fops;
> +	}
>  
>  	inode->i_private = (void *)(unsigned long)flags;
>  	inode->i_op = &kvm_gmem_iops;
> @@ -503,6 +543,15 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  	/* Unmovable mappings are supposed to be marked unevictable as well. */
>  	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>  
> +	file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, &kvm_gmem_fops);
> +	if (IS_ERR(file)) {
> +		err = PTR_ERR(file);
> +		goto err_inode;
> +	}
> +
> +	file->f_flags |= O_LARGEFILE;
> +	file->private_data = gmem;
> +
>  	kvm_get_kvm(kvm);
>  	gmem->kvm = kvm;
>  	xa_init(&gmem->bindings);
> @@ -511,6 +560,10 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  	fd_install(fd, file);
>  	return fd;
>  
> +err_inode:
> +	iput(inode);
> +err_fops:
> +	fops_put(&kvm_gmem_fops);
>  err_gmem:
>  	kfree(gmem);
>  err_fd:
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 18f29ef93543..301d48d6e00d 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -6489,7 +6489,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
>  	if (WARN_ON_ONCE(r))
>  		goto err_vfio;
>  
> -	kvm_gmem_init(module);
> +	r = kvm_gmem_init(module);
> +	if (r)
> +		goto err_gmem;
>  
>  	r = kvm_init_virtualization();
>  	if (r)
> @@ -6510,6 +6512,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
>  err_register:
>  	kvm_uninit_virtualization();
>  err_virt:
> +	kvm_gmem_exit();
> +err_gmem:
>  	kvm_vfio_ops_exit();
>  err_vfio:
>  	kvm_async_pf_deinit();
> @@ -6541,6 +6545,7 @@ void kvm_exit(void)
>  	for_each_possible_cpu(cpu)
>  		free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
>  	kmem_cache_destroy(kvm_vcpu_cache);
> +	kvm_gmem_exit();
>  	kvm_vfio_ops_exit();
>  	kvm_async_pf_deinit();
>  	kvm_irqfd_exit();
> diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
> index 31defb08ccba..9fcc5d5b7f8d 100644
> --- a/virt/kvm/kvm_mm.h
> +++ b/virt/kvm/kvm_mm.h
> @@ -68,17 +68,18 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
>  #endif /* HAVE_KVM_PFNCACHE */
>  
>  #ifdef CONFIG_KVM_GUEST_MEMFD
> -void kvm_gmem_init(struct module *module);
> +int kvm_gmem_init(struct module *module);
> +void kvm_gmem_exit(void);
>  int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
>  int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
>  		  unsigned int fd, loff_t offset);
>  void kvm_gmem_unbind(struct kvm_memory_slot *slot);
>  #else
> -static inline void kvm_gmem_init(struct module *module)
> +static inline int kvm_gmem_init(struct module *module)
>  {
> -
> +	return 0;
>  }
> -
> +static inline void kvm_gmem_exit(void) {};
>  static inline int kvm_gmem_bind(struct kvm *kvm,
>  					 struct kvm_memory_slot *slot,
>  					 unsigned int fd, loff_t offset)
> 
> base-commit: d133892dddd6607de651b7e32510359a6af97c4c
> --


More information about the Linux-erofs mailing list