[PATCH kernel] KVM: PPC: Improve KVM reference counting

Michael Ellerman mpe at ellerman.id.au
Thu Feb 21 17:26:33 AEDT 2019


Alexey Kardashevskiy <aik at ozlabs.ru> writes:

> The anon fd's ops releases the KVM reference in the release hook.
> However we reference the KVM object after we create the fd so there is
> small window when the release function can be called and
> dereferenced the KVM object which potentially may free it.
  dereference
>
> It is not a problem at the moment as the file is created and KVM is
> referenced under the KVM lock and the release function obtains the same
> lock before dereferencing the KVM (although the lock is not held when
> calling kvm_put_kvm()) but it is a fragile against future changes.
>
> This references the KVM object before creating a file.
>
> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> ---
>
> The original bug is described here:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cfa393811
> But in this case kvm_put_kvm() is called straight away with no locks before/after/around.
>
> ---
>  arch/powerpc/kvm/book3s_64_vio.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
> index 532ab797..d68c969 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -338,14 +338,15 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  		}
>  	}
>  
> +	kvm_get_kvm(kvm);
>  	if (!ret)
>  		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
>  				       stt, O_RDWR | O_CLOEXEC);
>  
> -	if (ret >= 0) {
> +	if (ret >= 0)
>  		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> -		kvm_get_kvm(kvm);
> -	}
> +	else
> +		kvm_put_kvm(kvm);
>  
>  	mutex_unlock(&kvm->lock);

This looks correct to me. But I feel like the logic could be cleaner,
perhaps like this (patch below):

	mutex_lock(&kvm->lock);

	/* Check this LIOBN hasn't been previously allocated */
	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
		if (siter->liobn == args->liobn) {
			ret = -EBUSY;
			goto fail_unlock;
		}
	}

	kvm_get_kvm(kvm);
	ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
				stt, O_RDWR | O_CLOEXEC);

	if (ret < 0) {
		kvm_put_kvm(kvm);
		goto fail_unlock;
	}

	list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);

	mutex_unlock(&kvm->lock);

	return ret;

 fail_unlock:
	mutex_unlock(&kvm->lock);
 fail:


cheers

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 532ab79734c7..5d74602db0d0 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -296,7 +296,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	struct kvmppc_spapr_tce_table *stt = NULL;
 	struct kvmppc_spapr_tce_table *siter;
 	unsigned long npages, size = args->size;
-	int ret = -ENOMEM;
+	int ret;
 	int i;
 
 	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
@@ -330,28 +330,30 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	mutex_lock(&kvm->lock);
 
 	/* Check this LIOBN hasn't been previously allocated */
-	ret = 0;
 	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
 		if (siter->liobn == args->liobn) {
 			ret = -EBUSY;
-			break;
+			goto fail_unlock;
 		}
 	}
 
-	if (!ret)
-		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
-				       stt, O_RDWR | O_CLOEXEC);
+	kvm_get_kvm(kvm);
+	ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+				stt, O_RDWR | O_CLOEXEC);
 
-	if (ret >= 0) {
-		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
-		kvm_get_kvm(kvm);
+	if (ret < 0) {
+		kvm_put_kvm(kvm);
+		goto fail_unlock;
 	}
 
+	list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
+
 	mutex_unlock(&kvm->lock);
 
-	if (ret >= 0)
-		return ret;
+	return ret;
 
+ fail_unlock:
+	mutex_unlock(&kvm->lock);
  fail:
 	for (i = 0; i < npages; i++)
 		if (stt->pages[i])


More information about the Linuxppc-dev mailing list