[PATCH -V3 25/25] powerpc: Handle hugepages in kvm
Aneesh Kumar K.V
aneesh.kumar at linux.vnet.ibm.com
Fri Mar 15 20:40:07 EST 2013
From: "Aneesh Kumar K.V" <aneesh.kumar at linux.vnet.ibm.com>
We could possibly avoid some of these changes because most of the HUGE PMD bits
map to PTE bits.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.vnet.ibm.com>
---
arch/powerpc/include/asm/kvm_book3s_64.h | 31 ++++++++++++
arch/powerpc/kvm/book3s_64_mmu_hv.c | 12 ++++-
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 75 ++++++++++++++++++++++--------
3 files changed, 97 insertions(+), 21 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 38bec1d..1c5c799 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -110,6 +110,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
return rb;
}
+/* FIXME !! should we use hpte_actual_psize or hpte decode ? */
static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
{
/* only handle 4k, 64k and 16M pages for now */
@@ -189,6 +190,36 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
return pte;
}
+/*
+ * Lock and read a linux hugepage PMD. If it's present and writable, atomically
+ * set dirty and referenced bits and return the PMD, otherwise return 0.
+ */
+static inline pmd_t kvmppc_read_update_linux_hugepmd(pmd_t *p, int writing)
+{
+ pmd_t pmd, tmp;
+
+ /* wait until _PAGE_BUSY is clear then set it atomically */
+ __asm__ __volatile__ (
+ "1: ldarx %0,0,%3\n"
+ " andi. %1,%0,%4\n"
+ " bne- 1b\n"
+ " ori %1,%0,%4\n"
+ " stdcx. %1,0,%3\n"
+ " bne- 1b"
+ : "=&r" (pmd), "=&r" (tmp), "=m" (*p)
+ : "r" (p), "i" (PMD_HUGE_BUSY)
+ : "cc");
+
+ if (pmd_large(pmd)) {
+ pmd = pmd_mkyoung(pmd);
+ if (writing && pmd_write(pmd))
+ pmd = pte_mkdirty(pmd);
+ }
+
+ *p = pmd; /* clears PMD_HUGE_BUSY */
+ return pmd;
+}
+
/* Return HPTE cache control bits corresponding to Linux pte bits */
static inline unsigned long hpte_cache_bits(unsigned long pte_val)
{
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 4f2a7dc..da006da 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
/* if the guest wants write access, see if that is OK */
if (!writing && hpte_is_writable(r)) {
+ int hugepage;
pte_t *ptep, pte;
/*
@@ -683,11 +684,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
rcu_read_lock_sched();
ptep = find_linux_pte_or_hugepte(current->mm->pgd,
- hva, NULL, NULL);
- if (ptep && pte_present(*ptep)) {
+ hva, NULL, &hugepage);
+ if (!hugepage && ptep && pte_present(*ptep)) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (pte_write(pte))
write_ok = 1;
+ } else if (hugepage && ptep) {
+ pmd_t pmd = *(pmd_t *)ptep;
+ if (pmd_large(pmd)) {
+ pmd = kvmppc_read_update_linux_hugepmd((pmd_t *)ptep, 1);
+ if (pmd_write(pmd))
+ write_ok = 1;
+ }
}
rcu_read_unlock_sched();
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7c8e1ed..e9d4e3a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -146,24 +146,37 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
}
static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
- int writing, unsigned long *pte_sizep)
+ int writing, unsigned long *pte_sizep,
+ int *hugepage)
{
pte_t *ptep;
unsigned long ps = *pte_sizep;
unsigned int shift;
- ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, NULL);
+ ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, hugepage);
if (!ptep)
return __pte(0);
- if (shift)
- *pte_sizep = 1ul << shift;
- else
- *pte_sizep = PAGE_SIZE;
+ if (*hugepage) {
+ *pte_sizep = 1ul << 24;
+ } else {
+ if (shift)
+ *pte_sizep = 1ul << shift;
+ else
+ *pte_sizep = PAGE_SIZE;
+ }
if (ps > *pte_sizep)
return __pte(0);
- if (!pte_present(*ptep))
- return __pte(0);
- return kvmppc_read_update_linux_pte(ptep, writing);
+
+ if (*hugepage) {
+ pmd_t *pmdp = (pmd_t *)ptep;
+ if (!pmd_large(*pmdp))
+ return __pmd(0);
+ return kvmppc_read_update_linux_hugepmd(pmdp, writing);
+ } else {
+ if (!pte_present(*ptep))
+ return __pte(0);
+ return kvmppc_read_update_linux_pte(ptep, writing);
+ }
}
static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
@@ -239,18 +252,34 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
pa &= PAGE_MASK;
} else {
+ int hugepage;
+
/* Translate to host virtual address */
hva = __gfn_to_hva_memslot(memslot, gfn);
/* Look up the Linux PTE for the backing page */
pte_size = psize;
- pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
- if (pte_present(pte)) {
- if (writing && !pte_write(pte))
- /* make the actual HPTE be read-only */
- ptel = hpte_make_readonly(ptel);
- is_io = hpte_cache_bits(pte_val(pte));
- pa = pte_pfn(pte) << PAGE_SHIFT;
+ pte = lookup_linux_pte(pgdir, hva, writing, &pte_size, &hugepage);
+ if (hugepage) {
+ pmd_t pmd = (pmd_t)pte;
+ if (!pmd_large(pmd)) {
+ if (writing && !pmd_write(pmd))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
+ /*
+ * we support hugepage only for RAM
+ */
+ is_io = 0;
+ pa = pmd_pfn(pmd) << PAGE_SHIFT;
+ }
+ } else {
+ if (pte_present(pte)) {
+ if (writing && !pte_write(pte))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
+ is_io = hpte_cache_bits(pte_val(pte));
+ pa = pte_pfn(pte) << PAGE_SHIFT;
+ }
}
}
@@ -645,10 +674,18 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
if (memslot) {
+ int hugepage;
hva = __gfn_to_hva_memslot(memslot, gfn);
- pte = lookup_linux_pte(pgdir, hva, 1, &psize);
- if (pte_present(pte) && !pte_write(pte))
- r = hpte_make_readonly(r);
+ pte = lookup_linux_pte(pgdir, hva, 1,
+ &psize, &hugepage);
+ if (hugepage) {
+ pmd_t pmd = (pmd_t)pte;
+ if (pmd_large(pmd) && !pmd_write(pmd))
+ r = hpte_make_readonly(r);
+ } else {
+ if (pte_present(pte) && !pte_write(pte))
+ r = hpte_make_readonly(r);
+ }
}
}
}
--
1.7.10
More information about the Linuxppc-dev
mailing list