[RFC PATCH 2/4] KVM: PPC: Book3E: Handle LRAT error exception

Alexander Graf agraf at suse.de
Fri Jul 4 18:15:07 EST 2014


On 03.07.14 16:45, Mihai Caraman wrote:
> Handle LRAT error exception with support for lrat mapping and invalidation.
>
> Signed-off-by: Mihai Caraman <mihai.caraman at freescale.com>
> ---
>   arch/powerpc/include/asm/kvm_host.h   |   1 +
>   arch/powerpc/include/asm/kvm_ppc.h    |   2 +
>   arch/powerpc/include/asm/mmu-book3e.h |   3 +
>   arch/powerpc/include/asm/reg_booke.h  |  13 ++++
>   arch/powerpc/kernel/asm-offsets.c     |   1 +
>   arch/powerpc/kvm/booke.c              |  40 +++++++++++
>   arch/powerpc/kvm/bookehv_interrupts.S |   9 ++-
>   arch/powerpc/kvm/e500_mmu_host.c      | 125 ++++++++++++++++++++++++++++++++++
>   arch/powerpc/kvm/e500mc.c             |   2 +
>   9 files changed, 195 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index bb66d8b..7b6b2ec 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -433,6 +433,7 @@ struct kvm_vcpu_arch {
>   	u32 eplc;
>   	u32 epsc;
>   	u32 oldpir;
> +	u64 fault_lper;
>   #endif
>   
>   #if defined(CONFIG_BOOKE)
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 9c89cdd..2730a29 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -86,6 +86,8 @@ extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
>                                 gva_t eaddr);
>   extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu);
>   extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
> +extern void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn);
> +extern void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu);
>   
>   extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
>                                                   unsigned int id);
> diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
> index 088fd9f..ac6acf7 100644
> --- a/arch/powerpc/include/asm/mmu-book3e.h
> +++ b/arch/powerpc/include/asm/mmu-book3e.h
> @@ -40,6 +40,8 @@
>   
>   /* MAS registers bit definitions */
>   
> +#define MAS0_ATSEL		0x80000000
> +#define MAS0_ATSEL_SHIFT	31
>   #define MAS0_TLBSEL_MASK        0x30000000
>   #define MAS0_TLBSEL_SHIFT       28
>   #define MAS0_TLBSEL(x)          (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK)
> @@ -53,6 +55,7 @@
>   #define MAS0_WQ_CLR_RSRV       	0x00002000
>   
>   #define MAS1_VALID		0x80000000
> +#define MAS1_VALID_SHIFT	31
>   #define MAS1_IPROT		0x40000000
>   #define MAS1_TID(x)		(((x) << 16) & 0x3FFF0000)
>   #define MAS1_IND		0x00002000
> diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
> index 75bda23..783d617 100644
> --- a/arch/powerpc/include/asm/reg_booke.h
> +++ b/arch/powerpc/include/asm/reg_booke.h
> @@ -43,6 +43,8 @@
>   
>   /* Special Purpose Registers (SPRNs)*/
>   #define SPRN_DECAR	0x036	/* Decrementer Auto Reload Register */
> +#define SPRN_LPER	0x038	/* Logical Page Exception Register */
> +#define SPRN_LPERU	0x039	/* Logical Page Exception Register Upper */
>   #define SPRN_IVPR	0x03F	/* Interrupt Vector Prefix Register */
>   #define SPRN_USPRG0	0x100	/* User Special Purpose Register General 0 */
>   #define SPRN_SPRG3R	0x103	/* Special Purpose Register General 3 Read */
> @@ -358,6 +360,9 @@
>   #define ESR_ILK		0x00100000	/* Instr. Cache Locking */
>   #define ESR_PUO		0x00040000	/* Unimplemented Operation exception */
>   #define ESR_BO		0x00020000	/* Byte Ordering */
> +#define ESR_DATA	0x00000400	/* Page Table Data Access */
> +#define ESR_TLBI	0x00000200	/* Page Table TLB Ineligible */
> +#define ESR_PT		0x00000100	/* Page Table Translation */
>   #define ESR_SPV		0x00000080	/* Signal Processing operation */
>   
>   /* Bit definitions related to the DBCR0. */
> @@ -649,6 +654,14 @@
>   #define EPC_EPID	0x00003fff
>   #define EPC_EPID_SHIFT	0
>   
> +/* Bit definitions for LPER */
> +#define LPER_ALPN		0x000FFFFFFFFFF000ULL
> +#define LPER_ALPN_SHIFT		12
> +#define LPER_WIMGE		0x00000F80
> +#define LPER_WIMGE_SHIFT	7
> +#define LPER_LPS		0x0000000F
> +#define LPER_LPS_SHIFT		0
> +
>   /*
>    * The IBM-403 is an even more odd special case, as it is much
>    * older than the IBM-405 series.  We put these down here incase someone
> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index f5995a9..be6e329 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -713,6 +713,7 @@ int main(void)
>   	DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
>   	DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
>   	DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc));
> +	DEFINE(VCPU_FAULT_LPER, offsetof(struct kvm_vcpu, arch.fault_lper));
>   #endif
>   
>   #ifdef CONFIG_KVM_EXIT_TIMING
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index a192975..ab1077f 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -1286,6 +1286,46 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
>   		break;
>   	}
>   
> +#ifdef CONFIG_KVM_BOOKE_HV
> +	case BOOKE_INTERRUPT_LRAT_ERROR:
> +	{
> +		gfn_t gfn;
> +
> +		/*
> +		 * Guest TLB management instructions (EPCR.DGTMI == 0) is not
> +		 * supported for now
> +		 */
> +		if (!(vcpu->arch.fault_esr & ESR_PT)) {
> +			WARN(1, "%s: Guest TLB management instructions not supported!\n", __func__);

Wouldn't this allow a guest to flood the host's kernel log?

> +			break;
> +		}
> +
> +		gfn = (vcpu->arch.fault_lper & LPER_ALPN) >> LPER_ALPN_SHIFT;

Maybe add an #ifdef and #error check to make sure that LPER_ALPN_SHIFT 
== PAGE_SHIFT?

> +
> +		idx = srcu_read_lock(&vcpu->kvm->srcu);
> +
> +		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
> +			kvmppc_lrat_map(vcpu, gfn);
> +			r = RESUME_GUEST;
> +		} else if (vcpu->arch.fault_esr & ESR_DATA) {
> +			vcpu->arch.paddr_accessed = (gfn << PAGE_SHIFT)
> +				| (vcpu->arch.fault_dear & (PAGE_SIZE - 1));
> +			vcpu->arch.vaddr_accessed =
> +				vcpu->arch.fault_dear;
> +
> +			r = kvmppc_emulate_mmio(run, vcpu);
> +			kvmppc_account_exit(vcpu, MMIO_EXITS);

It's a shame we have to duplicate that logic from the normal TLB miss 
path, but I can't see any good way to combine them either.

> +		} else {
> +			kvmppc_booke_queue_irqprio(vcpu,
> +						BOOKE_IRQPRIO_MACHINE_CHECK);
> +			r = RESUME_GUEST;
> +		}
> +
> +		srcu_read_unlock(&vcpu->kvm->srcu, idx);
> +		break;
> +	}
> +#endif
> +
>   	case BOOKE_INTERRUPT_DEBUG: {
>   		r = kvmppc_handle_debug(run, vcpu);
>   		if (r == RESUME_HOST)
> diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
> index b3ecdd6..341c3a8 100644
> --- a/arch/powerpc/kvm/bookehv_interrupts.S
> +++ b/arch/powerpc/kvm/bookehv_interrupts.S
> @@ -64,6 +64,7 @@
>   #define NEED_EMU		0x00000001 /* emulation -- save nv regs */
>   #define NEED_DEAR		0x00000002 /* save faulting DEAR */
>   #define NEED_ESR		0x00000004 /* save faulting ESR */
> +#define NEED_LPER		0x00000008 /* save faulting LPER */
>   
>   /*
>    * On entry:
> @@ -203,6 +204,12 @@
>   	PPC_STL	r9, VCPU_FAULT_DEAR(r4)
>   	.endif
>   
> +	/* Only suppported on 64-bit cores for now */
> +	.if	\flags & NEED_LPER
> +	mfspr	r7, SPRN_LPER
> +	std	r7, VCPU_FAULT_LPER(r4)
> +	.endif
> +
>   	b	kvmppc_resume_host
>   .endm
>   
> @@ -325,7 +332,7 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
>   kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
>   	SPRN_CSRR0, SPRN_CSRR1, 0
>   kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \
> -	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
> +	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR | NEED_LPER)
>   #else
>   /*
>    * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
> diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
> index 79677d7..be1454b 100644
> --- a/arch/powerpc/kvm/e500_mmu_host.c
> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> @@ -95,6 +95,131 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
>   	                              stlbe->mas2, stlbe->mas7_3);
>   }
>   
> +#ifdef CONFIG_KVM_BOOKE_HV
> +#ifdef CONFIG_64BIT
> +static inline int lrat_next(void)

No inline in .c files please. Just only make them "static".

> +{
> +	int this, next;
> +
> +	this = local_paca->tcd.lrat_next;
> +	next = (this + 1) % local_paca->tcd.lrat_max;

Can we assume that lrat_max is always a power of 2? IIRC modulo 
functions with variables can be quite expensive. So if we can instead do

   next = (this + 1) & local_paca->tcd.lrat_mask;

we should be faster and not rely on division helpers.

> +	local_paca->tcd.lrat_next = next;
> +
> +	return this;
> +}
> +
> +static inline int lrat_size(void)
> +{
> +	return local_paca->tcd.lrat_max;
> +}
> +#else
> +/* LRAT is only supported in 64-bit kernel for now */
> +static inline int lrat_next(void)
> +{
> +	BUG();
> +}
> +
> +static inline int lrat_size(void)
> +{
> +	return 0;
> +}
> +#endif
> +
> +void write_host_lrate(int tsize, gfn_t gfn, unsigned long pfn, uint32_t lpid,
> +		      int valid, int lrat_entry)
> +{
> +	struct kvm_book3e_206_tlb_entry stlbe;
> +	int esel = lrat_entry;
> +	unsigned long flags;
> +
> +	stlbe.mas1 = (valid ? MAS1_VALID : 0) | MAS1_TSIZE(tsize);
> +	stlbe.mas2 = ((u64)gfn << PAGE_SHIFT);
> +	stlbe.mas7_3 = ((u64)pfn << PAGE_SHIFT);
> +	stlbe.mas8 = MAS8_TGS | lpid;
> +
> +	local_irq_save(flags);
> +	/* book3e_tlb_lock(); */

Hm?

> +
> +	if (esel == -1)
> +		esel = lrat_next();
> +	__write_host_tlbe(&stlbe, MAS0_ATSEL | MAS0_ESEL(esel));
> +
> +	/* book3e_tlb_unlock(); */
> +	local_irq_restore(flags);
> +}
> +
> +void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn)
> +{
> +	struct kvm_memory_slot *slot;
> +	unsigned long pfn;
> +	unsigned long hva;
> +	struct vm_area_struct *vma;
> +	unsigned long psize;
> +	int tsize;
> +	unsigned long tsize_pages;
> +
> +	slot = gfn_to_memslot(vcpu->kvm, gfn);
> +	if (!slot) {
> +		pr_err_ratelimited("%s: couldn't find memslot for gfn %lx!\n",
> +				   __func__, (long)gfn);
> +		return;
> +	}
> +
> +	hva = slot->userspace_addr;
> +
> +	down_read(&current->mm->mmap_sem);
> +	vma = find_vma(current->mm, hva);
> +	if (vma && (hva >= vma->vm_start)) {
> +		psize = vma_kernel_pagesize(vma);
> +	} else {
> +		pr_err_ratelimited("%s: couldn't find virtual memory address for gfn %lx!\n", __func__, (long)gfn);
> +		return;
> +	}
> +	up_read(&current->mm->mmap_sem);
> +
> +	pfn = gfn_to_pfn_memslot(slot, gfn);
> +	if (is_error_noslot_pfn(pfn)) {
> +		pr_err_ratelimited("%s: couldn't get real page for gfn %lx!\n",
> +				   __func__, (long)gfn);
> +		return;
> +	}
> +
> +	tsize = __ilog2(psize) - 10;
> +	tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
> +	gfn &= ~(tsize_pages - 1);
> +	pfn &= ~(tsize_pages - 1);
> +
> +	write_host_lrate(tsize, gfn, pfn, vcpu->kvm->arch.lpid, 1, -1);
> +	kvm_release_pfn_clean(pfn);

Don't we have to keep the page locked so it doesn't get swapped away?


Alex

> +}
> +
> +void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu)
> +{
> +	uint32_t mas0, mas1 = 0;
> +	int esel;
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +	/* book3e_tlb_lock(); */
> +
> +	/* LRAT does not have a dedicated instruction for invalidation */
> +	for (esel = 0; esel < lrat_size(); esel++) {
> +		mas0 = MAS0_ATSEL | MAS0_ESEL(esel);
> +		mtspr(SPRN_MAS0, mas0);
> +		asm volatile("isync; tlbre" : : : "memory");
> +		mas1 = mfspr(SPRN_MAS1) & ~MAS1_VALID;
> +		mtspr(SPRN_MAS1, mas1);
> +		asm volatile("isync; tlbwe" : : : "memory");
> +	}
> +	/* Must clear mas8 for other host tlbwe's */
> +	mtspr(SPRN_MAS8, 0);
> +	isync();
> +
> +	/* book3e_tlb_unlock(); */
> +	local_irq_restore(flags);
> +}
> +#endif
> +
>   /*
>    * Acquire a mas0 with victim hint, as if we just took a TLB miss.
>    *
> diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
> index b1d9939..5622d9a 100644
> --- a/arch/powerpc/kvm/e500mc.c
> +++ b/arch/powerpc/kvm/e500mc.c
> @@ -99,6 +99,8 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
>   	asm volatile("tlbilxlpid");
>   	mtspr(SPRN_MAS5, 0);
>   	local_irq_restore(flags);
> +
> +	kvmppc_lrat_invalidate(&vcpu_e500->vcpu);
>   }
>   
>   void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)



More information about the Linuxppc-dev mailing list