[PATCH v3 08/14] KVM: PPC: Allow use of small pages to back Book3S HV guests
Paul Mackerras
paulus at samba.org
Tue Dec 13 09:31:41 EST 2011
This relaxes the requirement that the guest memory be provided as
16MB huge pages, allowing it to be provided as normal memory, i.e.
in pages of PAGE_SIZE bytes (4k or 64k). To allow this, we index
the kvm->arch.slot_phys[] arrays with a small page index, even if
huge pages are being used, and use the low-order 5 bits of each
entry to store the order of the enclosing page with respect to
normal pages, i.e. log_2(enclosing_page_size / PAGE_SIZE).
Signed-off-by: Paul Mackerras <paulus at samba.org>
---
arch/powerpc/include/asm/kvm_book3s_64.h | 10 +++
arch/powerpc/include/asm/kvm_host.h | 3 +-
arch/powerpc/include/asm/kvm_ppc.h | 2 +-
arch/powerpc/include/asm/reg.h | 1 +
arch/powerpc/kvm/book3s_64_mmu_hv.c | 122 ++++++++++++++++++++----------
arch/powerpc/kvm/book3s_hv.c | 57 ++++++++------
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 +-
7 files changed, 132 insertions(+), 69 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 7e6f2ed..10920f7 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -113,4 +113,14 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
return 0; /* error */
}
+static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
+ unsigned long pagesize)
+{
+ unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
+
+ if (pagesize <= PAGE_SIZE)
+ return 1;
+ return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
+}
+
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index beb22ba..9252d5e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -177,14 +177,13 @@ struct revmap_entry {
};
/* Low-order bits in kvm->arch.slot_phys[][] */
+#define KVMPPC_PAGE_ORDER_MASK 0x1f
#define KVMPPC_GOT_PAGE 0x80
struct kvm_arch {
#ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
struct revmap_entry *revmap;
- unsigned long ram_psize;
- unsigned long ram_porder;
unsigned int lpid;
unsigned int host_lpid;
unsigned long host_lpcr;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 111e1b4..a61b5b5 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -122,7 +122,7 @@ extern void kvmppc_free_hpt(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
- struct kvm_memory_slot *memslot);
+ struct kvm_memory_slot *memslot, unsigned long porder);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 559da19..4599d12 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -237,6 +237,7 @@
#define LPCR_ISL (1ul << (63-2))
#define LPCR_VC_SH (63-2)
#define LPCR_DPFD_SH (63-11)
+#define LPCR_VRMASD (0x1ful << (63-16))
#define LPCR_VRMA_L (1ul << (63-12))
#define LPCR_VRMA_LP0 (1ul << (63-15))
#define LPCR_VRMA_LP1 (1ul << (63-16))
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 87016cc..cc18f3d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -34,8 +34,6 @@
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
-/* Pages in the VRMA are 16MB pages */
-#define VRMA_PAGE_ORDER 24
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
@@ -95,17 +93,31 @@ void kvmppc_free_hpt(struct kvm *kvm)
free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
}
-void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
+/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
+}
+
+/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize == 0x10000) ? 0x1000 : 0;
+}
+
+void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
+ unsigned long porder)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long i;
unsigned long npages;
unsigned long hp_v, hp_r;
unsigned long addr, hash;
- unsigned long porder = kvm->arch.ram_porder;
+ unsigned long psize;
+ unsigned long hp0, hp1;
long ret;
- npages = kvm->arch.slot_npages[memslot->id];
+ psize = 1ul << porder;
+ npages = memslot->npages >> (porder - PAGE_SHIFT);
/* VRMA can't be > 1TB */
if (npages > 1ul << (40 - porder))
@@ -114,6 +126,11 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
if (npages > HPT_NPTEG)
npages = HPT_NPTEG;
+ hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
+ HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
+ hp1 = hpte1_pgsize_encoding(psize) |
+ HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
+
for (i = 0; i < npages; ++i) {
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
@@ -125,10 +142,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
* is available and use it.
*/
hash = (hash << 3) + 7;
- hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
- (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
- HPTE_V_LARGE | HPTE_V_VALID;
- hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
+ hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
+ hp_r = hp1 | addr;
ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
if (ret != H_SUCCESS) {
pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
@@ -176,22 +191,25 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
* one already in the kvm->arch.slot_phys[][] arrays.
*/
static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
- struct kvm_memory_slot *memslot)
+ struct kvm_memory_slot *memslot,
+ unsigned long psize)
{
unsigned long start;
- long np;
- struct page *page, *pages[1];
+ long np, err;
+ struct page *page, *hpage, *pages[1];
+ unsigned long s, pgsize;
unsigned long *physp;
- unsigned long pfn, i;
+ unsigned int got, pgorder;
+ unsigned long pfn, i, npages;
physp = kvm->arch.slot_phys[memslot->id];
if (!physp)
return -EINVAL;
- i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT);
- if (physp[i])
+ if (physp[gfn - memslot->base_gfn])
return 0;
page = NULL;
+ pgsize = psize;
start = gfn_to_hva_memslot(memslot, gfn);
/* Instantiate and get the page we want access to */
@@ -199,25 +217,46 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
if (np != 1)
return -EINVAL;
page = pages[0];
-
- /* Check it's a 16MB page */
- if (!PageHead(page) ||
- compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) {
- pr_err("page at %lx isn't 16MB (o=%d)\n",
- start, compound_order(page));
- put_page(page);
- return -EINVAL;
+ got = KVMPPC_GOT_PAGE;
+
+ /* See if this is a large page */
+ s = PAGE_SIZE;
+ if (PageHuge(page)) {
+ hpage = compound_head(page);
+ s <<= compound_order(hpage);
+ /* Get the whole large page if slot alignment is ok */
+ if (s > psize && slot_is_aligned(memslot, s) &&
+ !(memslot->userspace_addr & (s - 1))) {
+ start &= ~(s - 1);
+ pgsize = s;
+ page = hpage;
+ }
}
+ err = -EINVAL;
+ if (s < psize)
+ goto out;
pfn = page_to_pfn(page);
+ npages = pgsize >> PAGE_SHIFT;
+ pgorder = __ilog2(npages);
+ physp += (gfn - memslot->base_gfn) & ~(npages - 1);
spin_lock(&kvm->arch.slot_phys_lock);
- if (!physp[i])
- physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE;
- else
- put_page(page);
+ for (i = 0; i < npages; ++i) {
+ if (!physp[i]) {
+ physp[i] = ((pfn + i) << PAGE_SHIFT) + got + pgorder;
+ got = 0;
+ }
+ }
spin_unlock(&kvm->arch.slot_phys_lock);
+ err = 0;
- return 0;
+ out:
+ if (got) {
+ if (PageHuge(page))
+ page = compound_head(page);
+ put_page(page);
+ }
+ return err;
}
/*
@@ -242,7 +281,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
memslot = gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
return H_PARAMETER;
- if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
+ if (!slot_is_aligned(memslot, psize))
+ return H_PARAMETER;
+ if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
return H_PARAMETER;
preempt_disable();
@@ -269,8 +310,8 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
struct kvm_memory_slot *memslot;
unsigned long gfn = gpa >> PAGE_SHIFT;
struct page *page;
- unsigned long offset;
- unsigned long pfn, pa;
+ unsigned long psize, offset;
+ unsigned long pa;
unsigned long *physp;
memslot = gfn_to_memslot(kvm, gfn);
@@ -279,20 +320,23 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
physp = kvm->arch.slot_phys[memslot->id];
if (!physp)
return NULL;
- physp += (gfn - memslot->base_gfn) >>
- (kvm->arch.ram_porder - PAGE_SHIFT);
+ physp += gfn - memslot->base_gfn;
pa = *physp;
if (!pa) {
- if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
+ if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0)
return NULL;
pa = *physp;
}
- pfn = pa >> PAGE_SHIFT;
- page = pfn_to_page(pfn);
+ page = pfn_to_page(pa >> PAGE_SHIFT);
+ psize = PAGE_SIZE;
+ if (PageHuge(page)) {
+ page = compound_head(page);
+ psize <<= compound_order(page);
+ }
get_page(page);
- offset = gpa & (kvm->arch.ram_psize - 1);
+ offset = gpa & (psize - 1);
if (nb_ret)
- *nb_ret = kvm->arch.ram_psize - offset;
+ *nb_ret = psize - offset;
return page_address(page) + offset;
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 767272c..b07f545 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -51,8 +51,6 @@
#include <linux/highmem.h>
#include <linux/hugetlb.h>
-#define LARGE_PAGE_ORDER 24 /* 16MB pages */
-
/* #define EXIT_DEBUG */
/* #define EXIT_DEBUG_SIMPLE */
/* #define EXIT_DEBUG_INT */
@@ -1107,24 +1105,26 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
return fd;
}
+static unsigned long slb_pgsize_encoding(unsigned long psize)
+{
+ unsigned long senc = 0;
+
+ if (psize > 0x1000) {
+ senc = SLB_VSID_L;
+ if (psize == 0x10000)
+ senc |= SLB_VSID_LP_01;
+ }
+ return senc;
+}
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
- unsigned long psize;
unsigned long npages;
unsigned long *phys;
- /* For now, only allow 16MB-aligned slots */
- psize = kvm->arch.ram_psize;
- if ((mem->memory_size & (psize - 1)) ||
- (mem->guest_phys_addr & (psize - 1))) {
- pr_err("bad memory_size=%llx @ %llx\n",
- mem->memory_size, mem->guest_phys_addr);
- return -EINVAL;
- }
-
/* Allocate a slot_phys array */
- npages = mem->memory_size >> kvm->arch.ram_porder;
+ npages = mem->memory_size >> PAGE_SHIFT;
phys = kvm->arch.slot_phys[mem->slot];
if (!phys) {
phys = vzalloc(npages * sizeof(unsigned long));
@@ -1152,6 +1152,8 @@ static void unpin_slot(struct kvm *kvm, int slot_id)
continue;
pfn = physp[j] >> PAGE_SHIFT;
page = pfn_to_page(pfn);
+ if (PageHuge(page))
+ page = compound_head(page);
SetPageDirty(page);
put_page(page);
}
@@ -1174,12 +1176,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
unsigned long hva;
struct kvm_memory_slot *memslot;
struct vm_area_struct *vma;
- unsigned long lpcr;
+ unsigned long lpcr, senc;
unsigned long psize, porder;
unsigned long rma_size;
unsigned long rmls;
unsigned long *physp;
- unsigned long i, npages, pa;
+ unsigned long i, npages;
mutex_lock(&kvm->lock);
if (kvm->arch.rma_setup_done)
@@ -1201,8 +1203,7 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
goto up_out;
psize = vma_kernel_pagesize(vma);
- if (psize != kvm->arch.ram_psize)
- goto up_out;
+ porder = __ilog2(psize);
/* Is this one of our preallocated RMAs? */
if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
@@ -1219,13 +1220,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
goto out;
}
+ /* We can handle 4k, 64k or 16M pages in the VRMA */
+ err = -EINVAL;
+ if (!(psize == 0x1000 || psize == 0x10000 ||
+ psize == 0x1000000))
+ goto out;
+
/* Update VRMASD field in the LPCR */
- lpcr = kvm->arch.lpcr & ~(0x1fUL << LPCR_VRMASD_SH);
- lpcr |= LPCR_VRMA_L;
+ senc = slb_pgsize_encoding(psize);
+ lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
+ lpcr |= senc << (LPCR_VRMASD_SH - 4);
kvm->arch.lpcr = lpcr;
/* Create HPTEs in the hash page table for the VRMA */
- kvmppc_map_vrma(vcpu, memslot);
+ kvmppc_map_vrma(vcpu, memslot, porder);
} else {
/* Set up to use an RMO region */
@@ -1264,13 +1272,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
/* Initialize phys addrs of pages in RMO */
- porder = kvm->arch.ram_porder;
- npages = rma_size >> porder;
- pa = ri->base_pfn << PAGE_SHIFT;
+ npages = ri->npages;
+ porder = __ilog2(npages);
physp = kvm->arch.slot_phys[memslot->id];
spin_lock(&kvm->arch.slot_phys_lock);
for (i = 0; i < npages; ++i)
- physp[i] = pa + (i << porder);
+ physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder;
spin_unlock(&kvm->arch.slot_phys_lock);
}
@@ -1299,8 +1306,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
- kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
- kvm->arch.ram_porder = LARGE_PAGE_ORDER;
kvm->arch.rma = NULL;
kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 047c5e1..c086eb0 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -77,6 +77,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
memslot = builtin_gfn_to_memslot(kvm, gfn);
if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)))
return H_PARAMETER;
+
+ /* Check if the requested page fits entirely in the memslot. */
+ if (!slot_is_aligned(memslot, psize))
+ return H_PARAMETER;
slot_fn = gfn - memslot->base_gfn;
physp = kvm->arch.slot_phys[memslot->id];
@@ -88,9 +92,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
pa = *physp;
if (!pa)
return H_TOO_HARD;
+ pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
pa &= PAGE_MASK;
- pte_size = kvm->arch.ram_psize;
if (pte_size < psize)
return H_PARAMETER;
if (pa && pte_size > psize)
--
1.7.7.3
More information about the Linuxppc-dev
mailing list