[PATCH kernel] KVM: PPC: Improve KVM reference counting

Alexey Kardashevskiy aik at ozlabs.ru
Fri Feb 22 12:33:32 AEDT 2019



On 21/02/2019 17:26, Michael Ellerman wrote:
> Alexey Kardashevskiy <aik at ozlabs.ru> writes:
> 
>> The anon fd's ops releases the KVM reference in the release hook.
>> However we reference the KVM object after we create the fd so there is
>> small window when the release function can be called and
>> dereferenced the KVM object which potentially may free it.
>   dereference
>>
>> It is not a problem at the moment as the file is created and KVM is
>> referenced under the KVM lock and the release function obtains the same
>> lock before dereferencing the KVM (although the lock is not held when
>> calling kvm_put_kvm()) but it is a fragile against future changes.
>>
>> This references the KVM object before creating a file.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
>> ---
>>
>> The original bug is described here:
>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cfa393811
>> But in this case kvm_put_kvm() is called straight away with no locks before/after/around.
>>
>> ---
>>  arch/powerpc/kvm/book3s_64_vio.c | 7 ++++---
>>  1 file changed, 4 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
>> index 532ab797..d68c969 100644
>> --- a/arch/powerpc/kvm/book3s_64_vio.c
>> +++ b/arch/powerpc/kvm/book3s_64_vio.c
>> @@ -338,14 +338,15 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>>  		}
>>  	}
>>  
>> +	kvm_get_kvm(kvm);
>>  	if (!ret)
>>  		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
>>  				       stt, O_RDWR | O_CLOEXEC);
>>  
>> -	if (ret >= 0) {
>> +	if (ret >= 0)
>>  		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
>> -		kvm_get_kvm(kvm);
>> -	}
>> +	else
>> +		kvm_put_kvm(kvm);
>>  
>>  	mutex_unlock(&kvm->lock);
> 
> This looks correct to me. But I feel like the logic could be cleaner,
> perhaps like this (patch below):


And I feel that 1) your patch tries to hide what it actually does 2)
having 2 unlocks for one lock is an invite for future bugs imho, I'd
think the whole point of adding new gotos is exactly to have 1 unlock.


> 
> 	mutex_lock(&kvm->lock);
> 
> 	/* Check this LIOBN hasn't been previously allocated */
> 	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
> 		if (siter->liobn == args->liobn) {
> 			ret = -EBUSY;
> 			goto fail_unlock;
> 		}
> 	}
> 
> 	kvm_get_kvm(kvm);
> 	ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
> 				stt, O_RDWR | O_CLOEXEC);
> 
> 	if (ret < 0) {
> 		kvm_put_kvm(kvm);
> 		goto fail_unlock;
> 	}
> 
> 	list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> 
> 	mutex_unlock(&kvm->lock);
> 
> 	return ret;
> 
>  fail_unlock:
> 	mutex_unlock(&kvm->lock);
>  fail:
> 
> 
> cheers
> 
> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
> index 532ab79734c7..5d74602db0d0 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -296,7 +296,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  	struct kvmppc_spapr_tce_table *stt = NULL;
>  	struct kvmppc_spapr_tce_table *siter;
>  	unsigned long npages, size = args->size;
> -	int ret = -ENOMEM;
> +	int ret;
>  	int i;
>  
>  	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
> @@ -330,28 +330,30 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  	mutex_lock(&kvm->lock);
>  
>  	/* Check this LIOBN hasn't been previously allocated */
> -	ret = 0;
>  	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
>  		if (siter->liobn == args->liobn) {
>  			ret = -EBUSY;
> -			break;
> +			goto fail_unlock;
>  		}
>  	}
>  
> -	if (!ret)
> -		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
> -				       stt, O_RDWR | O_CLOEXEC);
> +	kvm_get_kvm(kvm);
> +	ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
> +				stt, O_RDWR | O_CLOEXEC);
>  
> -	if (ret >= 0) {
> -		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> -		kvm_get_kvm(kvm);
> +	if (ret < 0) {
> +		kvm_put_kvm(kvm);
> +		goto fail_unlock;
>  	}
>  
> +	list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> +
>  	mutex_unlock(&kvm->lock);
>  
> -	if (ret >= 0)
> -		return ret;
> +	return ret;
>  
> + fail_unlock:
> +	mutex_unlock(&kvm->lock);
>   fail:
>  	for (i = 0; i < npages; i++)
>  		if (stt->pages[i])
> 

-- 
Alexey


More information about the Linuxppc-dev mailing list