[PATCH kernel] KVM: PPC: Avoid mapping compound pages to TCEs in real mode

David Gibson david at gibson.dropbear.id.au
Mon Sep 3 13:28:44 AEST 2018


On Fri, Aug 31, 2018 at 04:08:50PM +1000, Alexey Kardashevskiy wrote:
> At the moment the real mode handler of H_PUT_TCE calls iommu_tce_xchg_rm()
> which in turn reads the old TCE and if it was a valid entry - marks
> the physical page dirty if it was mapped for writing. Since it is
> the real mode, realmode_pfn_to_page() is used instead of pfn_to_page()
> to get the page struct. However SetPageDirty() itself reads the compound
> page head and returns a virtual address for the head page struct and
> setting dirty bit for that kills the system.
> 
> This moves dirty bit setting before updating the hardware table

Um.. but now you're setting DIRTY based on the *new* TCE's
permissions, instead of the old TCE's permissions, which I don't think
is correct.

> to make
> sure compound pages are never mapped in the real mode so when H_PUT_TCE
> or H_STUFF_TCE try clearing a TCE, they won't get a compound page to mark
> dirty.
> 
> This changes kvmppc_rm_tce_validate() to check if the preregistered
> memory is backed with pages bigger than hardware IOMMU pages; if this is
> the case, we forward the request to the virtual mode handlers where it
> can be safely processed.
> 
> The second check makes the first check rather unnecessary but since
> the actual crash happened at the SetPageDirty() call site, this marks
> the spot with WARN_ON_ONCE.
> 
> In order to keep virtual and real mode handlers in sync, this adjusts
> iommu_tce_xchg() as well.
> 
> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> ---
> 
> This is made on top of:
> [PATCH kernel 0/4] KVM: PPC: Some error handling rework
>   KVM: PPC: Validate all tces before updating tables
>   KVM: PPC: Inform the userspace about TCE update failures
>   KVM: PPC: Validate TCEs against preregistered memory page sizes
>   KVM: PPC: Propagate errors to the guest when failed instead of
>     ignoring
> ---
>  arch/powerpc/include/asm/mmu_context.h |  3 ++-
>  arch/powerpc/kernel/iommu.c            | 19 ++++++++-----------
>  arch/powerpc/kvm/book3s_64_vio_hv.c    | 15 +++++++++++----
>  arch/powerpc/mm/mmu_context_iommu.c    |  4 +++-
>  4 files changed, 24 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> index b2f89b6..073d72f9b 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -31,7 +31,8 @@ extern void mm_iommu_cleanup(struct mm_struct *mm);
>  extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
>  		unsigned long ua, unsigned long size);
>  extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
> -		struct mm_struct *mm, unsigned long ua, unsigned long size);
> +		struct mm_struct *mm, unsigned long ua, unsigned long size,
> +		unsigned int *pshift);
>  extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
>  		unsigned long ua, unsigned long entries);
>  extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index af7a20d..62e014d 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -998,12 +998,11 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
>  {
>  	long ret;
>  
> -	ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
> -
> -	if (!ret && ((*direction == DMA_FROM_DEVICE) ||
> -			(*direction == DMA_BIDIRECTIONAL)))
> +	if (*direction == DMA_FROM_DEVICE || *direction == DMA_BIDIRECTIONAL)
>  		SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
>  
> +	ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
> +
>  	/* if (unlikely(ret))
>  		pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
>  			__func__, hwaddr, entry << tbl->it_page_shift,
> @@ -1019,20 +1018,18 @@ long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
>  {
>  	long ret;
>  
> -	ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
> -
> -	if (!ret && ((*direction == DMA_FROM_DEVICE) ||
> -			(*direction == DMA_BIDIRECTIONAL))) {
> +	if (*direction == DMA_FROM_DEVICE || *direction == DMA_BIDIRECTIONAL) {
>  		struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
>  
>  		if (likely(pg)) {
> +			if (WARN_ON_ONCE(PageCompound(pg)))
> +				return -EPERM;
>  			SetPageDirty(pg);
> -		} else {
> -			tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
> -			ret = -EFAULT;
>  		}
>  	}
>  
> +	ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
> +
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
> diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
> index 8d82133..ce4d2f4 100644
> --- a/arch/powerpc/kvm/book3s_64_vio_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
> @@ -118,11 +118,17 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
>  		unsigned long hpa = 0;
>  		struct mm_iommu_table_group_mem_t *mem;
>  		long shift = stit->tbl->it_page_shift;
> +		unsigned int memshift = 0;
>  
> -		mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift);
> +		mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift,
> +				&memshift);
>  		if (!mem)
>  			return H_TOO_HARD;
>  
> +		/* Marking compound pages dirty in real mode is too complex */
> +		if (memshift > shift)
> +			return H_TOO_HARD;
> +
>  		if (mm_iommu_ua_to_hpa_rm(mem, ua, shift, &hpa))
>  			return H_TOO_HARD;
>  	}
> @@ -222,7 +228,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
>  		/* it_userspace allocation might be delayed */
>  		return H_TOO_HARD;
>  
> -	mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize);
> +	mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize, NULL);
>  	if (!mem)
>  		return H_TOO_HARD;
>  
> @@ -287,7 +293,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
>  		/* it_userspace allocation might be delayed */
>  		return H_TOO_HARD;
>  
> -	mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift);
> +	mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift, NULL);
>  	if (!mem)
>  		return H_TOO_HARD;
>  
> @@ -472,7 +478,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
>  		if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL))
>  			return H_TOO_HARD;
>  
> -		mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
> +		mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K,
> +				NULL);
>  		if (mem)
>  			prereg = mm_iommu_ua_to_hpa_rm(mem, ua,
>  					IOMMU_PAGE_SHIFT_4K, &tces) == 0;
> diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
> index c9ee9e2..5c31fa5 100644
> --- a/arch/powerpc/mm/mmu_context_iommu.c
> +++ b/arch/powerpc/mm/mmu_context_iommu.c
> @@ -344,7 +344,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
>  EXPORT_SYMBOL_GPL(mm_iommu_lookup);
>  
>  struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
> -		unsigned long ua, unsigned long size)
> +		unsigned long ua, unsigned long size, unsigned int *pshift)
>  {
>  	struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
>  
> @@ -354,6 +354,8 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
>  				(ua + size <= mem->ua +
>  				 (mem->entries << PAGE_SHIFT))) {
>  			ret = mem;
> +			if (pshift)
> +				*pshift = mem->pageshift;
>  			break;
>  		}
>  	}

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20180903/cf2f12d8/attachment.sig>


More information about the Linuxppc-dev mailing list