[PATCH v1 2/4] KVM: PPC: Book3S HV: track shared GFNs of secure VMs
Laurent Dufour
ldufour at linux.ibm.com
Fri Jun 5 19:48:30 AEST 2020
Le 31/05/2020 à 04:27, Ram Pai a écrit :
> During the life of SVM, its GFNs can transition from secure to shared
> state and vice-versa. Since the kernel does not track GFNs that are
> shared, it is not possible to disambiguate a shared GFN from a GFN whose
> PFN has not yet been migrated to a device-PFN.
>
> The ability to identify a shared GFN is needed to skip migrating its PFN
> to device PFN. This functionality is leveraged in a subsequent patch.
>
> Add the ability to identify the state of a GFN.
>
> Cc: Paul Mackerras <paulus at ozlabs.org>
> Cc: Benjamin Herrenschmidt <benh at kernel.crashing.org>
> Cc: Michael Ellerman <mpe at ellerman.id.au>
> Cc: Bharata B Rao <bharata at linux.ibm.com>
> Cc: Aneesh Kumar K.V <aneesh.kumar at linux.ibm.com>
> Cc: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
> Cc: Laurent Dufour <ldufour at linux.ibm.com>
> Cc: Thiago Jung Bauermann <bauerman at linux.ibm.com>
> Cc: David Gibson <david at gibson.dropbear.id.au>
> Cc: Claudio Carvalho <cclaudio at linux.ibm.com>
> Cc: kvm-ppc at vger.kernel.org
> Cc: linuxppc-dev at lists.ozlabs.org
> Reviewed-by: Thiago Jung Bauermann <bauerman at linux.ibm.com>
> Signed-off-by: Ram Pai <linuxram at us.ibm.com>
> ---
> arch/powerpc/include/asm/kvm_book3s_uvmem.h | 6 +-
> arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +-
> arch/powerpc/kvm/book3s_hv.c | 2 +-
> arch/powerpc/kvm/book3s_hv_uvmem.c | 115 ++++++++++++++++++++++++++--
> 4 files changed, 113 insertions(+), 12 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
> index 5a9834e..f0c5708 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
> @@ -21,7 +21,8 @@ unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
> int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
> unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
> void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
> - struct kvm *kvm, bool skip_page_out);
> + struct kvm *kvm, bool skip_page_out,
> + bool purge_gfn);
> #else
> static inline int kvmppc_uvmem_init(void)
> {
> @@ -75,6 +76,7 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
>
> static inline void
> kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
> - struct kvm *kvm, bool skip_page_out) { }
> + struct kvm *kvm, bool skip_page_out,
> + bool purge_gfn) { }
> #endif /* CONFIG_PPC_UV */
> #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index 803940d..3448459 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -1100,7 +1100,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
> unsigned int shift;
>
> if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
> - kvmppc_uvmem_drop_pages(memslot, kvm, true);
> + kvmppc_uvmem_drop_pages(memslot, kvm, true, false);
Why purge_gfn is false here?
That call function is called when dropping an hot plugged memslot.
That's being said, when called by kvmppc_core_commit_memory_region_hv(), the mem
slot is then free by kvmppc_uvmem_slot_free() so that shared state will not
remain long but there is a window...
>
> if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
> return;
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 103d13e..4c62bfe 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -5467,7 +5467,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
> continue;
>
> kvm_for_each_memslot(memslot, slots) {
> - kvmppc_uvmem_drop_pages(memslot, kvm, true);
> + kvmppc_uvmem_drop_pages(memslot, kvm, true, true);
> uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
> }
> }
> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
> index ea4a1f1..2ef1e03 100644
> --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> @@ -99,14 +99,56 @@
> static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
>
> #define KVMPPC_UVMEM_PFN (1UL << 63)
> +#define KVMPPC_UVMEM_SHARED (1UL << 62)
> +#define KVMPPC_UVMEM_FLAG_MASK (KVMPPC_UVMEM_PFN | KVMPPC_UVMEM_SHARED)
> +#define KVMPPC_UVMEM_PFN_MASK (~KVMPPC_UVMEM_FLAG_MASK)
>
> struct kvmppc_uvmem_slot {
> struct list_head list;
> unsigned long nr_pfns;
> unsigned long base_pfn;
> + /*
> + * pfns array has an entry for each GFN of the memory slot.
> + *
> + * The GFN can be in one of the following states.
> + *
> + * (a) Secure - The GFN is secure. Only Ultravisor can access it.
> + * (b) Shared - The GFN is shared. Both Hypervisor and Ultravisor
> + * can access it.
> + * (c) Normal - The GFN is a normal. Only Hypervisor can access it.
> + *
> + * Secure GFN is associated with a devicePFN. Its pfn[] has
> + * KVMPPC_UVMEM_PFN flag set, and has the value of the device PFN
> + * KVMPPC_UVMEM_SHARED flag unset, and has the value of the device PFN
> + *
> + * Shared GFN is associated with a memoryPFN. Its pfn[] has
> + * KVMPPC_UVMEM_SHARED flag set. But its KVMPPC_UVMEM_PFN is not set,
> + * and there is no PFN value stored.
> + *
> + * Normal GFN is not associated with memoryPFN. Its pfn[] has
> + * KVMPPC_UVMEM_SHARED and KVMPPC_UVMEM_PFN flag unset, and no PFN
> + * value is stored.
> + *
> + * Any other combination of values in pfn[] leads to undefined
> + * behavior.
> + *
> + * Life cycle of a GFN --
> + *
> + * ---------------------------------------------------------
> + * | | Share | Unshare | SVM |slot |
> + * | | | | abort/ |flush |
> + * | | | | terminate | |
> + * ---------------------------------------------------------
> + * | | | | | |
> + * | Secure | Shared | Secure |Normal |Secure |
> + * | | | | | |
> + * | Shared | Shared | Secure |Normal |Shared |
> + * | | | | | |
> + * | Normal | Shared | Secure |Normal |Normal |
> + * ---------------------------------------------------------
> + */
> unsigned long *pfns;
> };
> -
> struct kvmppc_uvmem_page_pvt {
> struct kvm *kvm;
> unsigned long gpa;
> @@ -175,7 +217,12 @@ static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
>
> list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
> if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
> - p->pfns[gfn - p->base_pfn] = 0;
> + /*
> + * Reset everything, but keep the KVMPPC_UVMEM_SHARED
> + * flag intact. A gfn continues to be shared or
> + * unshared, with or without an associated device pfn.
> + */
> + p->pfns[gfn - p->base_pfn] &= KVMPPC_UVMEM_SHARED;
> return;
> }
> }
> @@ -193,7 +240,7 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
> if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
> if (uvmem_pfn)
> *uvmem_pfn = p->pfns[index] &
> - ~KVMPPC_UVMEM_PFN;
> + KVMPPC_UVMEM_PFN_MASK;
> return true;
> } else
> return false;
> @@ -202,6 +249,38 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
> return false;
> }
>
> +static void kvmppc_gfn_uvmem_shared(unsigned long gfn, struct kvm *kvm,
> + bool set)
> +{
> + struct kvmppc_uvmem_slot *p;
> +
> + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
> + if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
> + unsigned long index = gfn - p->base_pfn;
> +
> + if (set)
> + p->pfns[index] |= KVMPPC_UVMEM_SHARED;
> + else
> + p->pfns[index] &= ~KVMPPC_UVMEM_SHARED;
> + return;
> + }
> + }
> +}
> +
> +bool kvmppc_gfn_is_uvmem_shared(unsigned long gfn, struct kvm *kvm)
> +{
> + struct kvmppc_uvmem_slot *p;
> +
> + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
> + if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
> + unsigned long index = gfn - p->base_pfn;
> +
> + return (p->pfns[index] & KVMPPC_UVMEM_SHARED);
> + }
> + }
> + return false;
> +}
> +
> unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
> {
> struct kvm_memslots *slots;
> @@ -256,9 +335,13 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
> * is HV side fault on these pages. Next we *get* these pages, forcing
> * fault on them, do fault time migration to replace the device PTEs in
> * QEMU page table with normal PTEs from newly allocated pages.
> + *
> + * if @purge_gfn is set, cleanup any information related to each of
> + * the GFNs associated with this memory slot.
> */
> void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
> - struct kvm *kvm, bool skip_page_out)
> + struct kvm *kvm, bool skip_page_out,
> + bool purge_gfn)
> {
> int i;
> struct kvmppc_uvmem_page_pvt *pvt;
> @@ -269,11 +352,22 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
> struct page *uvmem_page;
>
> mutex_lock(&kvm->arch.uvmem_lock);
> +
> + if (purge_gfn) {
> + /*
> + * cleanup the shared status of the GFN here.
> + * Any device PFN associated with the GFN shall
> + * be cleaned up later, in kvmppc_uvmem_page_free()
> + * when the device PFN is actually disassociated
> + * from the GFN.
> + */
> + kvmppc_gfn_uvmem_shared(gfn, kvm, false);
> + }
> +
> if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
> mutex_unlock(&kvm->arch.uvmem_lock);
> continue;
> }
> -
> uvmem_page = pfn_to_page(uvmem_pfn);
> pvt = uvmem_page->zone_device_data;
> pvt->skip_page_out = skip_page_out;
> @@ -304,7 +398,7 @@ unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
> srcu_idx = srcu_read_lock(&kvm->srcu);
>
> kvm_for_each_memslot(memslot, kvm_memslots(kvm))
> - kvmppc_uvmem_drop_pages(memslot, kvm, false);
> + kvmppc_uvmem_drop_pages(memslot, kvm, false, true);
>
> srcu_read_unlock(&kvm->srcu, srcu_idx);
>
> @@ -470,8 +564,11 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
> goto retry;
> }
>
> - if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
> + if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
> + page_shift)) {
> + kvmppc_gfn_uvmem_shared(gfn, kvm, true);
> ret = H_SUCCESS;
> + }
> kvm_release_pfn_clean(pfn);
> mutex_unlock(&kvm->arch.uvmem_lock);
> out:
> @@ -527,8 +624,10 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
> goto out_unlock;
>
> if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
> - &downgrade))
> + &downgrade)) {
> + kvmppc_gfn_uvmem_shared(gfn, kvm, false);
> ret = H_SUCCESS;
> + }
> out_unlock:
> mutex_unlock(&kvm->arch.uvmem_lock);
> out:
>
More information about the Linuxppc-dev
mailing list