[PATCH 4/4] kvm: powerpc: set cache coherency only for RAM pages
Bhushan Bharat-R65777
R65777 at freescale.com
Wed Jul 31 02:22:54 EST 2013
> -----Original Message-----
> From: Benjamin Herrenschmidt [mailto:benh at kernel.crashing.org]
> Sent: Saturday, July 27, 2013 3:57 AM
> To: Bhushan Bharat-R65777
> Cc: Alexander Graf; kvm-ppc at vger.kernel.org; kvm at vger.kernel.org; linuxppc-
> dev at lists.ozlabs.org; Wood Scott-B07421
> Subject: Re: [PATCH 4/4] kvm: powerpc: set cache coherency only for RAM pages
>
> On Fri, 2013-07-26 at 15:03 +0000, Bhushan Bharat-R65777 wrote:
> > Will not searching the Linux PTE is a overkill?
>
> That's the best approach. Also we are searching it already to resolve the page
> fault. That does mean we search twice but on the other hand that also means it's
> hot in the cache.
Below is early git diff (not a proper cleanup patch), to be sure that this is what we want on PowerPC and take early feedback. Also I run some benchmark to understand the overhead if any.
Using kvm_is_mmio_pfn(); what the current patch does:
Real: 0m46.616s + 0m49.517s + 0m49.510s + 0m46.936s + 0m46.889s + 0m46.684s = Avg; 47.692s
User: 0m31.636s + 0m31.816s + 0m31.456s + 0m31.752s + 0m32.028s + 0m31.848s = Avg; 31.756s
Sys: 0m11.596s + 0m11.868s + 0m12.244s + 0m11.672s + 0m11.356s + 0m11.432s = Avg; 11.695s
Using kernel page table search (below changes):
Real: 0m46.431s + 0m50.269s + 0m46.724s + 0m46.645s + 0m46.670s + 0m50.259s = Avg; 47.833s
User: 0m31.568s + 0m31.816s + 0m31.444s + 0m31.808s + 0m31.312s + 0m31.740s = Avg; 31.614s
Sys: 0m11.516s + 0m12.060s + 0m11.872s + 0m11.476s + 0m12.000s + 0m12.152s = Avg; 11.846s
------------------
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3328353..d6d0dac 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -532,6 +532,7 @@ struct kvm_vcpu_arch {
u32 epr;
u32 crit_save;
struct kvmppc_booke_debug_reg dbg_reg;
+ pgd_t *pgdir;
#endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 17722d8..ebcccc2 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -697,7 +697,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
#endif
kvmppc_fix_ee_before_entry();
-
+ vcpu->arch.pgdir = current->mm->pgd;
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
/* No need for kvm_guest_exit. It's done in handle_exit.
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 4fd9650..fc4b2f6 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -31,11 +31,13 @@ enum vcpu_ftr {
#define E500_TLB_NUM 2
/* entry is mapped somewhere in host TLB */
-#define E500_TLB_VALID (1 << 0)
+#define E500_TLB_VALID (1 << 31)
/* TLB1 entry is mapped by host TLB1, tracked by bitmaps */
-#define E500_TLB_BITMAP (1 << 1)
+#define E500_TLB_BITMAP (1 << 30)
/* TLB1 entry is mapped by host TLB0 */
-#define E500_TLB_TLB0 (1 << 2)
+#define E500_TLB_TLB0 (1 << 29)
+/* Lower 5 bits have WIMGE value */
+#define E500_TLB_WIMGE_MASK (0x1f)
struct tlbe_ref {
pfn_t pfn; /* valid only for TLB0, except briefly */
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 5cbdc8f..a48c13f 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -40,6 +40,84 @@
static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
+/*
+ * find_linux_pte returns the address of a linux pte for a given
+ * effective address and directory. If not found, it returns zero.
+ */
+static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
+{
+ pgd_t *pg;
+ pud_t *pu;
+ pmd_t *pm;
+ pte_t *pt = NULL;
+
+ pg = pgdir + pgd_index(ea);
+ if (!pgd_none(*pg)) {
+ pu = pud_offset(pg, ea);
+ if (!pud_none(*pu)) {
+ pm = pmd_offset(pu, ea);
+ if (pmd_present(*pm))
+ pt = pte_offset_kernel(pm, ea);
+ }
+ }
+ return pt;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+ unsigned *shift);
+#else
+static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+ unsigned *shift)
+{
+ if (shift)
+ *shift = 0;
+ return find_linux_pte(pgdir, ea);
+}
+#endif /* !CONFIG_HUGETLB_PAGE */
+
+/*
+ * Lock and read a linux PTE. If it's present and writable, atomically
+ * set dirty and referenced bits and return the PTE, otherwise return 0.
+ */
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
+{
+ pte_t pte = pte_val(*p);
+
+ if (pte_present(pte)) {
+ pte = pte_mkyoung(pte);
+ if (writing && pte_write(pte))
+ pte = pte_mkdirty(pte);
+ }
+
+ *p = pte;
+
+ return pte;
+}
+
+static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
+ int writing, unsigned long *pte_sizep)
+{
+ pte_t *ptep;
+ unsigned long ps = *pte_sizep;
+ unsigned int shift;
+
+ ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
+ if (!ptep)
+ return __pte(0);
+ if (shift)
+ *pte_sizep = 1ul << shift;
+ else
+ *pte_sizep = PAGE_SIZE;
+
+ if (ps > *pte_sizep)
+ return __pte(0);
+ if (!pte_present(*ptep))
+ return __pte(0);
+
+ return kvmppc_read_update_linux_pte(ptep, writing);
+}
+
static inline unsigned int tlb1_max_shadow_size(void)
{
/* reserve one entry for magic page */
@@ -262,10 +340,11 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
struct kvm_book3e_206_tlb_entry *gtlbe,
- pfn_t pfn)
+ pfn_t pfn, int wimg)
{
ref->pfn = pfn;
ref->flags |= E500_TLB_VALID;
+ ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;
if (tlbe_is_writable(gtlbe))
kvm_set_pfn_dirty(pfn);
@@ -326,8 +405,8 @@ static void kvmppc_e500_setup_stlbe(
/* Force IPROT=0 for all guest mappings. */
stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
- stlbe->mas2 = (gvaddr & MAS2_EPN) |
- e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
+ stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_WIMGE_MASK);
+// e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
@@ -346,6 +425,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
unsigned long hva;
int pfnmap = 0;
int tsize = BOOK3E_PAGESZ_4K;
+ pte_t pte;
+ int wimg = 0;
/*
* Translate guest physical to true physical, acquiring
@@ -451,6 +532,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
if (likely(!pfnmap)) {
unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
+ pgd_t *pgdir;
+
pfn = gfn_to_pfn_memslot(slot, gfn);
if (is_error_noslot_pfn(pfn)) {
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
@@ -461,9 +544,15 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
/* Align guest and physical address to page map boundaries */
pfn &= ~(tsize_pages - 1);
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+ pgdir = vcpu_e500->vcpu.arch.pgdir;
+ pte = lookup_linux_pte(pgdir, hva, 1, &tsize_pages);
+ if (pte_present(pte))
+ wimg = (pte >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
+ else
+ wimg = MAS2_I | MAS2_G;
}
- kvmppc_e500_ref_setup(ref, gtlbe, pfn);
+ kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
ref, gvaddr, stlbe);
---------------------------------------
>
> Cheers,
> Ben
>
>
More information about the Linuxppc-dev
mailing list