[PATCH v3 3/4] KVM: PPC: Book3S HV: migrate remaining normal-GFNs to secure-GFNs in H_SVM_INIT_DONE
Laurent Dufour
ldufour at linux.ibm.com
Mon Jun 29 18:48:18 AEST 2020
Le 28/06/2020 à 18:20, Bharata B Rao a écrit :
> On Fri, Jun 19, 2020 at 03:43:41PM -0700, Ram Pai wrote:
>> H_SVM_INIT_DONE incorrectly assumes that the Ultravisor has explicitly
>
> As noted in the last iteration, can you reword the above please?
> I don't see it as an incorrect assumption, but see it as extension of
> scope now :-)
>
>> called H_SVM_PAGE_IN for all secure pages. These GFNs continue to be
>> normal GFNs associated with normal PFNs; when infact, these GFNs should
>> have been secure GFNs, associated with device PFNs.
>>
>> Move all the PFNs associated with the SVM's GFNs, to secure-PFNs, in
>> H_SVM_INIT_DONE. Skip the GFNs that are already Paged-in or Shared
>> through H_SVM_PAGE_IN, or Paged-in followed by a Paged-out through
>> UV_PAGE_OUT.
>>
>> Cc: Paul Mackerras <paulus at ozlabs.org>
>> Cc: Benjamin Herrenschmidt <benh at kernel.crashing.org>
>> Cc: Michael Ellerman <mpe at ellerman.id.au>
>> Cc: Bharata B Rao <bharata at linux.ibm.com>
>> Cc: Aneesh Kumar K.V <aneesh.kumar at linux.ibm.com>
>> Cc: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
>> Cc: Laurent Dufour <ldufour at linux.ibm.com>
>> Cc: Thiago Jung Bauermann <bauerman at linux.ibm.com>
>> Cc: David Gibson <david at gibson.dropbear.id.au>
>> Cc: Claudio Carvalho <cclaudio at linux.ibm.com>
>> Cc: kvm-ppc at vger.kernel.org
>> Cc: linuxppc-dev at lists.ozlabs.org
>> Signed-off-by: Ram Pai <linuxram at us.ibm.com>
>> ---
>> Documentation/powerpc/ultravisor.rst | 2 +
>> arch/powerpc/include/asm/kvm_book3s_uvmem.h | 2 +
>> arch/powerpc/kvm/book3s_hv_uvmem.c | 154 +++++++++++++++++++++++-----
>> 3 files changed, 132 insertions(+), 26 deletions(-)
>>
>> diff --git a/Documentation/powerpc/ultravisor.rst b/Documentation/powerpc/ultravisor.rst
>> index 363736d..3bc8957 100644
>> --- a/Documentation/powerpc/ultravisor.rst
>> +++ b/Documentation/powerpc/ultravisor.rst
>> @@ -933,6 +933,8 @@ Return values
>> * H_UNSUPPORTED if called from the wrong context (e.g.
>> from an SVM or before an H_SVM_INIT_START
>> hypercall).
>> + * H_STATE if the hypervisor could not successfully
>> + transition the VM to Secure VM.
>>
>> Description
>> ~~~~~~~~~~~
>> diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
>> index 5a9834e..b9cd7eb 100644
>> --- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
>> +++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
>> @@ -22,6 +22,8 @@ unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
>> unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
>> void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
>> struct kvm *kvm, bool skip_page_out);
>> +int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
>> + const struct kvm_memory_slot *memslot);
>> #else
>> static inline int kvmppc_uvmem_init(void)
>> {
>> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
>> index c8c0290..449e8a7 100644
>> --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
>> +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
>> @@ -93,6 +93,7 @@
>> #include <asm/ultravisor.h>
>> #include <asm/mman.h>
>> #include <asm/kvm_ppc.h>
>> +#include <asm/kvm_book3s_uvmem.h>
>>
>> static struct dev_pagemap kvmppc_uvmem_pgmap;
>> static unsigned long *kvmppc_uvmem_bitmap;
>> @@ -339,6 +340,21 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
>> return false;
>> }
>>
>> +/* return true, if the GFN is a shared-GFN, or a secure-GFN */
>> +bool kvmppc_gfn_has_transitioned(unsigned long gfn, struct kvm *kvm)
>> +{
>> + struct kvmppc_uvmem_slot *p;
>> +
>> + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
>> + if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
>> + unsigned long index = gfn - p->base_pfn;
>> +
>> + return (p->pfns[index] & KVMPPC_GFN_FLAG_MASK);
>> + }
>> + }
>> + return false;
>> +}
>> +
>> unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
>> {
>> struct kvm_memslots *slots;
>> @@ -379,12 +395,31 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
>>
>> unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
>> {
>> + struct kvm_memslots *slots;
>> + struct kvm_memory_slot *memslot;
>> + int srcu_idx;
>> + long ret = H_SUCCESS;
>> +
>> if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
>> return H_UNSUPPORTED;
>>
>> + /* migrate any unmoved normal pfn to device pfns*/
>> + srcu_idx = srcu_read_lock(&kvm->srcu);
>> + slots = kvm_memslots(kvm);
>> + kvm_for_each_memslot(memslot, slots) {
>> + ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
>> + if (ret) {
>> + ret = H_STATE;
>> + goto out;
>> + }
>> + }
>> +
>> kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
>> pr_info("LPID %d went secure\n", kvm->arch.lpid);
>> - return H_SUCCESS;
>> +
>> +out:
>> + srcu_read_unlock(&kvm->srcu, srcu_idx);
>> + return ret;
>> }
>>
>> /*
>> @@ -505,12 +540,14 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
>> }
>>
>> /*
>> - * Alloc a PFN from private device memory pool and copy page from normal
>> - * memory to secure memory using UV_PAGE_IN uvcall.
>> + * Alloc a PFN from private device memory pool. If @pagein is true,
>> + * copy page from normal memory to secure memory using UV_PAGE_IN uvcall.
>> */
>> -static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
>> - unsigned long end, unsigned long gpa, struct kvm *kvm,
>> - unsigned long page_shift, bool *downgrade)
>> +static int kvmppc_svm_migrate_page(struct vm_area_struct *vma,
>> + unsigned long start,
>> + unsigned long end, unsigned long gpa, struct kvm *kvm,
>> + unsigned long page_shift,
>> + bool pagein)
>> {
>> unsigned long src_pfn, dst_pfn = 0;
>> struct migrate_vma mig;
>> @@ -526,18 +563,6 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
>> mig.src = &src_pfn;
>> mig.dst = &dst_pfn;
>>
>> - /*
>> - * We come here with mmap_sem write lock held just for
>> - * ksm_madvise(), otherwise we only need read mmap_sem.
>> - * Hence downgrade to read lock once ksm_madvise() is done.
>> - */
>
> Can you please retain this comment? This explains why we take write lock
> and then downgrade.
>
>> - ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
>> - MADV_UNMERGEABLE, &vma->vm_flags);
>> - downgrade_write(&kvm->mm->mmap_sem);
>> - *downgrade = true;
>
> When I introduced this variable, there was a suggestion to rename it
> to "downgraded", but we were a bit late then. When you are touching
> this now, can you rename it appropriately?
>
>> - if (ret)
>> - return ret;
>> -
>> ret = migrate_vma_setup(&mig);
>> if (ret)
>> return ret;
>> @@ -553,11 +578,16 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
>> goto out_finalize;
>> }
>>
>> - pfn = *mig.src >> MIGRATE_PFN_SHIFT;
>> - spage = migrate_pfn_to_page(*mig.src);
>> - if (spage)
>> - uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
>> - page_shift);
>> + if (pagein) {
>> + pfn = *mig.src >> MIGRATE_PFN_SHIFT;
>> + spage = migrate_pfn_to_page(*mig.src);
>> + if (spage) {
>> + ret = uv_page_in(kvm->arch.lpid, pfn << page_shift,
>> + gpa, 0, page_shift);
>> + if (ret)
>> + goto out_finalize;
>> + }
>> + }
>>
>> *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
>> migrate_vma_pages(&mig);
>> @@ -566,6 +596,66 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
>> return ret;
>> }
>>
>> +int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
>> + const struct kvm_memory_slot *memslot)
>> +{
>> + unsigned long gfn = memslot->base_gfn;
>> + unsigned long end;
>> + bool downgrade = false;
>> + struct vm_area_struct *vma;
>> + int i, ret = 0;
>> + unsigned long start = gfn_to_hva(kvm, gfn);
>> +
>> + if (kvm_is_error_hva(start))
>> + return H_STATE;
>> +
>> + end = start + (memslot->npages << PAGE_SHIFT);
>> +
>> + down_write(&kvm->mm->mmap_sem);
>> +
>> + mutex_lock(&kvm->arch.uvmem_lock);
>> + vma = find_vma_intersection(kvm->mm, start, end);
>> + if (!vma || vma->vm_start > start || vma->vm_end < end) {
>> + ret = H_STATE;
>> + goto out_unlock;
>> + }
>> +
>> + ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
>> + MADV_UNMERGEABLE, &vma->vm_flags);
>> + downgrade_write(&kvm->mm->mmap_sem);
>> + downgrade = true;
>> + if (ret) {
>> + ret = H_STATE;
>> + goto out_unlock;
>> + }
>> +
>> + for (i = 0; i < memslot->npages; i++, ++gfn) {
>> + /*
>> + * skip GFNs that have already tranistioned.
>> + * paged-in GFNs, shared GFNs, paged-in GFNs
>> + * that were later paged-out.
>> + */
>> + if (kvmppc_gfn_has_transitioned(gfn, kvm))
>> + continue;
>> +
>> + start = gfn_to_hva(kvm, gfn);
>> + end = start + (1UL << PAGE_SHIFT);
>> + ret = kvmppc_svm_migrate_page(vma, start, end,
>> + (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false);
>> +
>
> As I said last time, you are assuming that the vma that you obtained
> in the beginning actually spans the entire memslot range. This might
> be true as you haven't found any issues during testing, but I feel it
> is better if there is no such implicit assumption in the code here.
I agree that assumptions are sometimes not good for future work, but here the
mmap_sem is held, and the VMA's boundaries have already been checked, so how
could the VMA not spans over the memslot's range?
Am I missing something?
Cheers,
Laurent.
More information about the Linuxppc-dev
mailing list