[PATCH kernel] KVM: PPC: Avoid mapping compound pages to TCEs in real mode
David Gibson
david at gibson.dropbear.id.au
Mon Sep 3 13:28:44 AEST 2018
On Fri, Aug 31, 2018 at 04:08:50PM +1000, Alexey Kardashevskiy wrote:
> At the moment the real mode handler of H_PUT_TCE calls iommu_tce_xchg_rm()
> which in turn reads the old TCE and if it was a valid entry - marks
> the physical page dirty if it was mapped for writing. Since it is
> the real mode, realmode_pfn_to_page() is used instead of pfn_to_page()
> to get the page struct. However SetPageDirty() itself reads the compound
> page head and returns a virtual address for the head page struct and
> setting dirty bit for that kills the system.
>
> This moves dirty bit setting before updating the hardware table
Um.. but now you're setting DIRTY based on the *new* TCE's
permissions, instead of the old TCE's permissions, which I don't think
is correct.
> to make
> sure compound pages are never mapped in the real mode so when H_PUT_TCE
> or H_STUFF_TCE try clearing a TCE, they won't get a compound page to mark
> dirty.
>
> This changes kvmppc_rm_tce_validate() to check if the preregistered
> memory is backed with pages bigger than hardware IOMMU pages; if this is
> the case, we forward the request to the virtual mode handlers where it
> can be safely processed.
>
> The second check makes the first check rather unnecessary but since
> the actual crash happened at the SetPageDirty() call site, this marks
> the spot with WARN_ON_ONCE.
>
> In order to keep virtual and real mode handlers in sync, this adjusts
> iommu_tce_xchg() as well.
>
> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> ---
>
> This is made on top of:
> [PATCH kernel 0/4] KVM: PPC: Some error handling rework
> KVM: PPC: Validate all tces before updating tables
> KVM: PPC: Inform the userspace about TCE update failures
> KVM: PPC: Validate TCEs against preregistered memory page sizes
> KVM: PPC: Propagate errors to the guest when failed instead of
> ignoring
> ---
> arch/powerpc/include/asm/mmu_context.h | 3 ++-
> arch/powerpc/kernel/iommu.c | 19 ++++++++-----------
> arch/powerpc/kvm/book3s_64_vio_hv.c | 15 +++++++++++----
> arch/powerpc/mm/mmu_context_iommu.c | 4 +++-
> 4 files changed, 24 insertions(+), 17 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> index b2f89b6..073d72f9b 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -31,7 +31,8 @@ extern void mm_iommu_cleanup(struct mm_struct *mm);
> extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
> unsigned long ua, unsigned long size);
> extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
> - struct mm_struct *mm, unsigned long ua, unsigned long size);
> + struct mm_struct *mm, unsigned long ua, unsigned long size,
> + unsigned int *pshift);
> extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
> unsigned long ua, unsigned long entries);
> extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index af7a20d..62e014d 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -998,12 +998,11 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
> {
> long ret;
>
> - ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
> -
> - if (!ret && ((*direction == DMA_FROM_DEVICE) ||
> - (*direction == DMA_BIDIRECTIONAL)))
> + if (*direction == DMA_FROM_DEVICE || *direction == DMA_BIDIRECTIONAL)
> SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
>
> + ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
> +
> /* if (unlikely(ret))
> pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
> __func__, hwaddr, entry << tbl->it_page_shift,
> @@ -1019,20 +1018,18 @@ long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
> {
> long ret;
>
> - ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
> -
> - if (!ret && ((*direction == DMA_FROM_DEVICE) ||
> - (*direction == DMA_BIDIRECTIONAL))) {
> + if (*direction == DMA_FROM_DEVICE || *direction == DMA_BIDIRECTIONAL) {
> struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
>
> if (likely(pg)) {
> + if (WARN_ON_ONCE(PageCompound(pg)))
> + return -EPERM;
> SetPageDirty(pg);
> - } else {
> - tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
> - ret = -EFAULT;
> }
> }
>
> + ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
> +
> return ret;
> }
> EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
> diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
> index 8d82133..ce4d2f4 100644
> --- a/arch/powerpc/kvm/book3s_64_vio_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
> @@ -118,11 +118,17 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
> unsigned long hpa = 0;
> struct mm_iommu_table_group_mem_t *mem;
> long shift = stit->tbl->it_page_shift;
> + unsigned int memshift = 0;
>
> - mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift);
> + mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift,
> + &memshift);
> if (!mem)
> return H_TOO_HARD;
>
> + /* Marking compound pages dirty in real mode is too complex */
> + if (memshift > shift)
> + return H_TOO_HARD;
> +
> if (mm_iommu_ua_to_hpa_rm(mem, ua, shift, &hpa))
> return H_TOO_HARD;
> }
> @@ -222,7 +228,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
> /* it_userspace allocation might be delayed */
> return H_TOO_HARD;
>
> - mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize);
> + mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize, NULL);
> if (!mem)
> return H_TOO_HARD;
>
> @@ -287,7 +293,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
> /* it_userspace allocation might be delayed */
> return H_TOO_HARD;
>
> - mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift);
> + mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift, NULL);
> if (!mem)
> return H_TOO_HARD;
>
> @@ -472,7 +478,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
> if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL))
> return H_TOO_HARD;
>
> - mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
> + mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K,
> + NULL);
> if (mem)
> prereg = mm_iommu_ua_to_hpa_rm(mem, ua,
> IOMMU_PAGE_SHIFT_4K, &tces) == 0;
> diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
> index c9ee9e2..5c31fa5 100644
> --- a/arch/powerpc/mm/mmu_context_iommu.c
> +++ b/arch/powerpc/mm/mmu_context_iommu.c
> @@ -344,7 +344,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
> EXPORT_SYMBOL_GPL(mm_iommu_lookup);
>
> struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
> - unsigned long ua, unsigned long size)
> + unsigned long ua, unsigned long size, unsigned int *pshift)
> {
> struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
>
> @@ -354,6 +354,8 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
> (ua + size <= mem->ua +
> (mem->entries << PAGE_SHIFT))) {
> ret = mem;
> + if (pshift)
> + *pshift = mem->pageshift;
> break;
> }
> }
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20180903/cf2f12d8/attachment.sig>
More information about the Linuxppc-dev
mailing list