[PATCH v3 3/4] KVM: PPC: Book3S HV: migrate remaining normal-GFNs to secure-GFNs in H_SVM_INIT_DONE

Laurent Dufour ldufour at linux.ibm.com
Mon Jun 29 18:48:18 AEST 2020


Le 28/06/2020 à 18:20, Bharata B Rao a écrit :
> On Fri, Jun 19, 2020 at 03:43:41PM -0700, Ram Pai wrote:
>> H_SVM_INIT_DONE incorrectly assumes that the Ultravisor has explicitly
> 
> As noted in the last iteration, can you reword the above please?
> I don't see it as an incorrect assumption, but see it as extension of
> scope now :-)
> 
>> called H_SVM_PAGE_IN for all secure pages. These GFNs continue to be
>> normal GFNs associated with normal PFNs; when infact, these GFNs should
>> have been secure GFNs, associated with device PFNs.
>>
>> Move all the PFNs associated with the SVM's GFNs, to secure-PFNs, in
>> H_SVM_INIT_DONE. Skip the GFNs that are already Paged-in or Shared
>> through H_SVM_PAGE_IN, or Paged-in followed by a Paged-out through
>> UV_PAGE_OUT.
>>
>> Cc: Paul Mackerras <paulus at ozlabs.org>
>> Cc: Benjamin Herrenschmidt <benh at kernel.crashing.org>
>> Cc: Michael Ellerman <mpe at ellerman.id.au>
>> Cc: Bharata B Rao <bharata at linux.ibm.com>
>> Cc: Aneesh Kumar K.V <aneesh.kumar at linux.ibm.com>
>> Cc: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
>> Cc: Laurent Dufour <ldufour at linux.ibm.com>
>> Cc: Thiago Jung Bauermann <bauerman at linux.ibm.com>
>> Cc: David Gibson <david at gibson.dropbear.id.au>
>> Cc: Claudio Carvalho <cclaudio at linux.ibm.com>
>> Cc: kvm-ppc at vger.kernel.org
>> Cc: linuxppc-dev at lists.ozlabs.org
>> Signed-off-by: Ram Pai <linuxram at us.ibm.com>
>> ---
>>   Documentation/powerpc/ultravisor.rst        |   2 +
>>   arch/powerpc/include/asm/kvm_book3s_uvmem.h |   2 +
>>   arch/powerpc/kvm/book3s_hv_uvmem.c          | 154 +++++++++++++++++++++++-----
>>   3 files changed, 132 insertions(+), 26 deletions(-)
>>
>> diff --git a/Documentation/powerpc/ultravisor.rst b/Documentation/powerpc/ultravisor.rst
>> index 363736d..3bc8957 100644
>> --- a/Documentation/powerpc/ultravisor.rst
>> +++ b/Documentation/powerpc/ultravisor.rst
>> @@ -933,6 +933,8 @@ Return values
>>   	* H_UNSUPPORTED		if called from the wrong context (e.g.
>>   				from an SVM or before an H_SVM_INIT_START
>>   				hypercall).
>> +	* H_STATE		if the hypervisor could not successfully
>> +                                transition the VM to Secure VM.
>>   
>>   Description
>>   ~~~~~~~~~~~
>> diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
>> index 5a9834e..b9cd7eb 100644
>> --- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
>> +++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
>> @@ -22,6 +22,8 @@ unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
>>   unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
>>   void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
>>   			     struct kvm *kvm, bool skip_page_out);
>> +int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
>> +			const struct kvm_memory_slot *memslot);
>>   #else
>>   static inline int kvmppc_uvmem_init(void)
>>   {
>> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
>> index c8c0290..449e8a7 100644
>> --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
>> +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
>> @@ -93,6 +93,7 @@
>>   #include <asm/ultravisor.h>
>>   #include <asm/mman.h>
>>   #include <asm/kvm_ppc.h>
>> +#include <asm/kvm_book3s_uvmem.h>
>>   
>>   static struct dev_pagemap kvmppc_uvmem_pgmap;
>>   static unsigned long *kvmppc_uvmem_bitmap;
>> @@ -339,6 +340,21 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
>>   	return false;
>>   }
>>   
>> +/* return true, if the GFN is a shared-GFN, or a secure-GFN */
>> +bool kvmppc_gfn_has_transitioned(unsigned long gfn, struct kvm *kvm)
>> +{
>> +	struct kvmppc_uvmem_slot *p;
>> +
>> +	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
>> +		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
>> +			unsigned long index = gfn - p->base_pfn;
>> +
>> +			return (p->pfns[index] & KVMPPC_GFN_FLAG_MASK);
>> +		}
>> +	}
>> +	return false;
>> +}
>> +
>>   unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
>>   {
>>   	struct kvm_memslots *slots;
>> @@ -379,12 +395,31 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
>>   
>>   unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
>>   {
>> +	struct kvm_memslots *slots;
>> +	struct kvm_memory_slot *memslot;
>> +	int srcu_idx;
>> +	long ret = H_SUCCESS;
>> +
>>   	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
>>   		return H_UNSUPPORTED;
>>   
>> +	/* migrate any unmoved normal pfn to device pfns*/
>> +	srcu_idx = srcu_read_lock(&kvm->srcu);
>> +	slots = kvm_memslots(kvm);
>> +	kvm_for_each_memslot(memslot, slots) {
>> +		ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
>> +		if (ret) {
>> +			ret = H_STATE;
>> +			goto out;
>> +		}
>> +	}
>> +
>>   	kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
>>   	pr_info("LPID %d went secure\n", kvm->arch.lpid);
>> -	return H_SUCCESS;
>> +
>> +out:
>> +	srcu_read_unlock(&kvm->srcu, srcu_idx);
>> +	return ret;
>>   }
>>   
>>   /*
>> @@ -505,12 +540,14 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
>>   }
>>   
>>   /*
>> - * Alloc a PFN from private device memory pool and copy page from normal
>> - * memory to secure memory using UV_PAGE_IN uvcall.
>> + * Alloc a PFN from private device memory pool. If @pagein is true,
>> + * copy page from normal memory to secure memory using UV_PAGE_IN uvcall.
>>    */
>> -static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
>> -		   unsigned long end, unsigned long gpa, struct kvm *kvm,
>> -		   unsigned long page_shift, bool *downgrade)
>> +static int kvmppc_svm_migrate_page(struct vm_area_struct *vma,
>> +		unsigned long start,
>> +		unsigned long end, unsigned long gpa, struct kvm *kvm,
>> +		unsigned long page_shift,
>> +		bool pagein)
>>   {
>>   	unsigned long src_pfn, dst_pfn = 0;
>>   	struct migrate_vma mig;
>> @@ -526,18 +563,6 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
>>   	mig.src = &src_pfn;
>>   	mig.dst = &dst_pfn;
>>   
>> -	/*
>> -	 * We come here with mmap_sem write lock held just for
>> -	 * ksm_madvise(), otherwise we only need read mmap_sem.
>> -	 * Hence downgrade to read lock once ksm_madvise() is done.
>> -	 */
> 
> Can you please retain this comment? This explains why we take write lock
> and then downgrade.
> 
>> -	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
>> -			  MADV_UNMERGEABLE, &vma->vm_flags);
>> -	downgrade_write(&kvm->mm->mmap_sem);
>> -	*downgrade = true;
> 
> When I introduced this variable, there was a suggestion to rename it
> to "downgraded", but we were a bit late then. When you are touching
> this now, can you rename it appropriately?
> 
>> -	if (ret)
>> -		return ret;
>> -
>>   	ret = migrate_vma_setup(&mig);
>>   	if (ret)
>>   		return ret;
>> @@ -553,11 +578,16 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
>>   		goto out_finalize;
>>   	}
>>   
>> -	pfn = *mig.src >> MIGRATE_PFN_SHIFT;
>> -	spage = migrate_pfn_to_page(*mig.src);
>> -	if (spage)
>> -		uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
>> -			   page_shift);
>> +	if (pagein) {
>> +		pfn = *mig.src >> MIGRATE_PFN_SHIFT;
>> +		spage = migrate_pfn_to_page(*mig.src);
>> +		if (spage) {
>> +			ret = uv_page_in(kvm->arch.lpid, pfn << page_shift,
>> +					gpa, 0, page_shift);
>> +			if (ret)
>> +				goto out_finalize;
>> +		}
>> +	}
>>   
>>   	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
>>   	migrate_vma_pages(&mig);
>> @@ -566,6 +596,66 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
>>   	return ret;
>>   }
>>   
>> +int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
>> +		const struct kvm_memory_slot *memslot)
>> +{
>> +	unsigned long gfn = memslot->base_gfn;
>> +	unsigned long end;
>> +	bool downgrade = false;
>> +	struct vm_area_struct *vma;
>> +	int i, ret = 0;
>> +	unsigned long start = gfn_to_hva(kvm, gfn);
>> +
>> +	if (kvm_is_error_hva(start))
>> +		return H_STATE;
>> +
>> +	end = start + (memslot->npages << PAGE_SHIFT);
>> +
>> +	down_write(&kvm->mm->mmap_sem);
>> +
>> +	mutex_lock(&kvm->arch.uvmem_lock);
>> +	vma = find_vma_intersection(kvm->mm, start, end);
>> +	if (!vma || vma->vm_start > start || vma->vm_end < end) {
>> +		ret = H_STATE;
>> +		goto out_unlock;
>> +	}
>> +
>> +	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
>> +			  MADV_UNMERGEABLE, &vma->vm_flags);
>> +	downgrade_write(&kvm->mm->mmap_sem);
>> +	downgrade = true;
>> +	if (ret) {
>> +		ret = H_STATE;
>> +		goto out_unlock;
>> +	}
>> +
>> +	for (i = 0; i < memslot->npages; i++, ++gfn) {
>> +		/*
>> +		 * skip GFNs that have already tranistioned.
>> +		 * paged-in GFNs, shared GFNs, paged-in GFNs
>> +		 * that were later paged-out.
>> +		 */
>> +		if (kvmppc_gfn_has_transitioned(gfn, kvm))
>> +			continue;
>> +
>> +		start = gfn_to_hva(kvm, gfn);
>> +		end = start + (1UL << PAGE_SHIFT);
>> +		ret = kvmppc_svm_migrate_page(vma, start, end,
>> +			(gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false);
>> +
> 
> As I said last time, you are assuming that the vma that you obtained
> in the beginning actually spans the entire memslot range. This might
> be true as you haven't found any issues during testing, but I feel it
> is better if there is no such implicit assumption in the code here.

I agree that assumptions are sometimes not good for future work, but here the 
mmap_sem is held, and the VMA's boundaries have already been checked, so how 
could the VMA not spans over the memslot's range?

Am I missing something?

Cheers,
Laurent.


More information about the Linuxppc-dev mailing list