[PATCH 4/4] kvm: powerpc: set cache coherency only for RAM pages

Scott Wood scottwood at freescale.com
Wed Jul 31 04:49:26 EST 2013


On 07/30/2013 11:22:54 AM, Bhushan Bharat-R65777 wrote:
> diff --git a/arch/powerpc/kvm/e500_mmu_host.c  
> b/arch/powerpc/kvm/e500_mmu_host.c
> index 5cbdc8f..a48c13f 100644
> --- a/arch/powerpc/kvm/e500_mmu_host.c
> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> @@ -40,6 +40,84 @@
> 
>  static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
> 
> +/*
> + * find_linux_pte returns the address of a linux pte for a given
> + * effective address and directory.  If not found, it returns zero.
> + */
> +static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
> +{
> +        pgd_t *pg;
> +        pud_t *pu;
> +        pmd_t *pm;
> +        pte_t *pt = NULL;
> +
> +        pg = pgdir + pgd_index(ea);
> +        if (!pgd_none(*pg)) {
> +                pu = pud_offset(pg, ea);
> +                if (!pud_none(*pu)) {
> +                        pm = pmd_offset(pu, ea);
> +                        if (pmd_present(*pm))
> +                                pt = pte_offset_kernel(pm, ea);
> +                }
> +        }
> +        return pt;
> +}

How is this specific to KVM or e500?

> +#ifdef CONFIG_HUGETLB_PAGE
> +pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
> +                                 unsigned *shift);
> +#else
> +static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir,  
> unsigned long ea,
> +                                               unsigned *shift)
> +{
> +        if (shift)
> +                *shift = 0;
> +        return find_linux_pte(pgdir, ea);
> +}
> +#endif /* !CONFIG_HUGETLB_PAGE */

This is already declared in asm/pgtable.h.  If we need a non-hugepage  
alternative, that should also go in asm/pgtable.h.

> +/*
> + * Lock and read a linux PTE.  If it's present and writable,  
> atomically
> + * set dirty and referenced bits and return the PTE, otherwise  
> return 0.
> + */
> +static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int  
> writing)
> +{
> +       pte_t pte = pte_val(*p);
> +
> +       if (pte_present(pte)) {
> +               pte = pte_mkyoung(pte);
> +               if (writing && pte_write(pte))
> +                       pte = pte_mkdirty(pte);
> +       }
> +
> +       *p = pte;
> +
> +       return pte;
> +}
> +
> +static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
> +                             int writing, unsigned long *pte_sizep)
> +{
> +       pte_t *ptep;
> +       unsigned long ps = *pte_sizep;
> +       unsigned int shift;
> +
> +       ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
> +       if (!ptep)
> +               return __pte(0);
> +       if (shift)
> +               *pte_sizep = 1ul << shift;
> +       else
> +               *pte_sizep = PAGE_SIZE;
> +
> +       if (ps > *pte_sizep)
> +               return __pte(0);
> +       if (!pte_present(*ptep))
> +               return __pte(0);
> +
> +       return kvmppc_read_update_linux_pte(ptep, writing);
> +}
> +

None of this belongs in this file either.

> @@ -326,8 +405,8 @@ static void kvmppc_e500_setup_stlbe(
> 
>         /* Force IPROT=0 for all guest mappings. */
>         stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) |  
> MAS1_VALID;
> -       stlbe->mas2 = (gvaddr & MAS2_EPN) |
> -                     e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
> +       stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags &  
> E500_TLB_WIMGE_MASK);
> +//                   e500_shadow_mas2_attrib(gtlbe->mas2, pfn);

MAS2_E and MAS2_G should be safe to come from the guest.

How does this work for TLB1?  One ref corresponds to one guest entry,  
which may correspond to multiple host entries, potentially each with  
different WIM settings.

>         stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>                         e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
> 
> @@ -346,6 +425,8 @@ static inline int kvmppc_e500_shadow_map(struct  
> kvmppc_vcpu_e500 *vcpu_e500,
>         unsigned long hva;
>         int pfnmap = 0;
>         int tsize = BOOK3E_PAGESZ_4K;
> +       pte_t pte;
> +       int wimg = 0;
> 
>         /*
>          * Translate guest physical to true physical, acquiring
> @@ -451,6 +532,8 @@ static inline int kvmppc_e500_shadow_map(struct  
> kvmppc_vcpu_e500 *vcpu_e500,
> 
>         if (likely(!pfnmap)) {
>                 unsigned long tsize_pages = 1 << (tsize + 10 -  
> PAGE_SHIFT);
> +               pgd_t *pgdir;
> +
>                 pfn = gfn_to_pfn_memslot(slot, gfn);
>                 if (is_error_noslot_pfn(pfn)) {
>                         printk(KERN_ERR "Couldn't get real page for  
> gfn %lx!\n",
> @@ -461,9 +544,15 @@ static inline int kvmppc_e500_shadow_map(struct  
> kvmppc_vcpu_e500 *vcpu_e500,
>                 /* Align guest and physical address to page map  
> boundaries */
>                 pfn &= ~(tsize_pages - 1);
>                 gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
> +               pgdir = vcpu_e500->vcpu.arch.pgdir;
> +               pte = lookup_linux_pte(pgdir, hva, 1, &tsize_pages);
> +               if (pte_present(pte))
> +                       wimg = (pte >> PTE_WIMGE_SHIFT) &  
> MAS2_WIMGE_MASK;
> +               else
> +                       wimg = MAS2_I | MAS2_G;

If the PTE is not present, then we can't map it, right?  So why I+G?

-Scott


More information about the Linuxppc-dev mailing list