[PATCH kernel] KVM: PPC: Book3s: Remove real mode interrupt controller hcalls handlers
Alexey Kardashevskiy
aik at ozlabs.ru
Wed May 11 11:23:27 AEST 2022
On 5/11/22 03:58, Cédric Le Goater wrote:
> Hello Alexey,
>
> On 5/9/22 09:11, Alexey Kardashevskiy wrote:
>> Currently we have 2 sets of interrupt controller hypercalls handlers
>> for real and virtual modes, this is from POWER8 times when switching
>> MMU on was considered an expensive operation.
>>
>> POWER9 however does not have dependent threads and MMU is enabled for
>> handling hcalls so the XIVE native
>
> XIVE native does not have any real-mode hcall handlers. In fact, all
> are handled at the QEMU level.
>
>> or XICS-on-XIVE real mode handlers never execute on real P9 and >
>> later CPUs.
>
> They are not ? I am surprised. It must be a "recent" change. Any how,
> if you can remove them safely, this is good news and you should be able
> to clean up some more code in the PowerNV native interface.
Yes, this is the result of that massive work of Nick to move the KVM's
asm to c for p9. It could have been the case even before that but harder
to see in that asm code :)
>>
>> This untemplate the handlers and only keeps the real mode handlers for
>> XICS native (up to POWER8) and remove the rest of dead code. Changes
>> in functions are mechanical except few missing empty lines to make
>> checkpatch.pl happy.
>>
>> The default implemented hcalls list already contains XICS hcalls so
>> no change there.
>>
>> This should not cause any behavioral change.
>
> In the worse case, it impacts performance a bit but only on "old" distros
> (kernel < 4.14), I doubt anyone will complain.
>
>> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
>
> Acked-by: Cédric Le Goater <clg at kaod.org>
Thanks!
>
> Thanks,
>
> C.
>
>
>> ---
>> arch/powerpc/kvm/Makefile | 2 +-
>> arch/powerpc/include/asm/kvm_ppc.h | 7 -
>> arch/powerpc/kvm/book3s_xive.h | 7 -
>> arch/powerpc/kvm/book3s_hv_builtin.c | 64 ---
>> arch/powerpc/kvm/book3s_hv_rm_xics.c | 5 +
>> arch/powerpc/kvm/book3s_hv_rm_xive.c | 46 --
>> arch/powerpc/kvm/book3s_xive.c | 638 +++++++++++++++++++++++-
>> arch/powerpc/kvm/book3s_xive_template.c | 636 -----------------------
>> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 12 +-
>> 9 files changed, 632 insertions(+), 785 deletions(-)
>> delete mode 100644 arch/powerpc/kvm/book3s_hv_rm_xive.c
>> delete mode 100644 arch/powerpc/kvm/book3s_xive_template.c
>>
>> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
>> index 8e3681a86074..f17379b0f161 100644
>> --- a/arch/powerpc/kvm/Makefile
>> +++ b/arch/powerpc/kvm/Makefile
>> @@ -73,7 +73,7 @@ kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
>> book3s_hv_tm.o
>> kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
>> - book3s_hv_rm_xics.o book3s_hv_rm_xive.o
>> + book3s_hv_rm_xics.o
>> kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
>> book3s_hv_tm_builtin.o
>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
>> b/arch/powerpc/include/asm/kvm_ppc.h
>> index 44200a27371b..a775377a570e 100644
>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>> @@ -787,13 +787,6 @@ long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu,
>> unsigned long flags,
>> unsigned long dest, unsigned long src);
>> long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
>> unsigned long slb_v, unsigned int status,
>> bool data);
>> -unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
>> -unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu);
>> -unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long
>> server);
>> -int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
>> - unsigned long mfrr);
>> -int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
>> -int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
>> void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
>> /*
>> diff --git a/arch/powerpc/kvm/book3s_xive.h
>> b/arch/powerpc/kvm/book3s_xive.h
>> index 09d0657596c3..1e48f72e8aa5 100644
>> --- a/arch/powerpc/kvm/book3s_xive.h
>> +++ b/arch/powerpc/kvm/book3s_xive.h
>> @@ -285,13 +285,6 @@ static inline u32 __xive_read_eq(__be32 *qpage,
>> u32 msk, u32 *idx, u32 *toggle)
>> return cur & 0x7fffffff;
>> }
>> -extern unsigned long xive_rm_h_xirr(struct kvm_vcpu *vcpu);
>> -extern unsigned long xive_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned
>> long server);
>> -extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
>> - unsigned long mfrr);
>> -extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
>> -extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
>> -
>> /*
>> * Common Xive routines for XICS-over-XIVE and XIVE native
>> */
>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c
>> b/arch/powerpc/kvm/book3s_hv_builtin.c
>> index 7e52d0beee77..88a8f6473c4e 100644
>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>> @@ -489,70 +489,6 @@ static long kvmppc_read_one_intr(bool *again)
>> return kvmppc_check_passthru(xisr, xirr, again);
>> }
>> -#ifdef CONFIG_KVM_XICS
>> -unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
>> -{
>> - if (!kvmppc_xics_enabled(vcpu))
>> - return H_TOO_HARD;
>> - if (xics_on_xive())
>> - return xive_rm_h_xirr(vcpu);
>> - else
>> - return xics_rm_h_xirr(vcpu);
>> -}
>> -
>> -unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
>> -{
>> - if (!kvmppc_xics_enabled(vcpu))
>> - return H_TOO_HARD;
>> - vcpu->arch.regs.gpr[5] = get_tb();
>> - if (xics_on_xive())
>> - return xive_rm_h_xirr(vcpu);
>> - else
>> - return xics_rm_h_xirr(vcpu);
>> -}
>> -
>> -unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long
>> server)
>> -{
>> - if (!kvmppc_xics_enabled(vcpu))
>> - return H_TOO_HARD;
>> - if (xics_on_xive())
>> - return xive_rm_h_ipoll(vcpu, server);
>> - else
>> - return H_TOO_HARD;
>> -}
>> -
>> -int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
>> - unsigned long mfrr)
>> -{
>> - if (!kvmppc_xics_enabled(vcpu))
>> - return H_TOO_HARD;
>> - if (xics_on_xive())
>> - return xive_rm_h_ipi(vcpu, server, mfrr);
>> - else
>> - return xics_rm_h_ipi(vcpu, server, mfrr);
>> -}
>> -
>> -int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
>> -{
>> - if (!kvmppc_xics_enabled(vcpu))
>> - return H_TOO_HARD;
>> - if (xics_on_xive())
>> - return xive_rm_h_cppr(vcpu, cppr);
>> - else
>> - return xics_rm_h_cppr(vcpu, cppr);
>> -}
>> -
>> -int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
>> -{
>> - if (!kvmppc_xics_enabled(vcpu))
>> - return H_TOO_HARD;
>> - if (xics_on_xive())
>> - return xive_rm_h_eoi(vcpu, xirr);
>> - else
>> - return xics_rm_h_eoi(vcpu, xirr);
>> -}
>> -#endif /* CONFIG_KVM_XICS */
>> -
>> void kvmppc_bad_interrupt(struct pt_regs *regs)
>> {
>> /*
>> diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c
>> b/arch/powerpc/kvm/book3s_hv_rm_xics.c
>> index 587c33fc4564..e2246b715f68 100644
>> --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
>> +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
>> @@ -479,6 +479,11 @@ static void icp_rm_down_cppr(struct kvmppc_xics
>> *xics, struct kvmppc_icp *icp,
>> }
>> }
>> +unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu)
>> +{
>> + vcpu->arch.regs.gpr[5] = get_tb();
>> + return xics_rm_h_xirr(vcpu);
>> +}
>> unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
>> {
>> diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c
>> b/arch/powerpc/kvm/book3s_hv_rm_xive.c
>> deleted file mode 100644
>> index dd9880731bd6..000000000000
>> --- a/arch/powerpc/kvm/book3s_hv_rm_xive.c
>> +++ /dev/null
>> @@ -1,46 +0,0 @@
>> -// SPDX-License-Identifier: GPL-2.0
>> -#include <linux/kernel.h>
>> -#include <linux/kvm_host.h>
>> -#include <linux/err.h>
>> -#include <linux/kernel_stat.h>
>> -#include <linux/pgtable.h>
>> -
>> -#include <asm/kvm_book3s.h>
>> -#include <asm/kvm_ppc.h>
>> -#include <asm/hvcall.h>
>> -#include <asm/xics.h>
>> -#include <asm/debug.h>
>> -#include <asm/synch.h>
>> -#include <asm/cputhreads.h>
>> -#include <asm/ppc-opcode.h>
>> -#include <asm/pnv-pci.h>
>> -#include <asm/opal.h>
>> -#include <asm/smp.h>
>> -#include <asm/xive.h>
>> -#include <asm/xive-regs.h>
>> -
>> -#include "book3s_xive.h"
>> -
>> -/* XXX */
>> -#include <asm/udbg.h>
>> -//#define DBG(fmt...) udbg_printf(fmt)
>> -#define DBG(fmt...) do { } while(0)
>> -
>> -static inline void __iomem *get_tima_phys(void)
>> -{
>> - return local_paca->kvm_hstate.xive_tima_phys;
>> -}
>> -
>> -#undef XIVE_RUNTIME_CHECKS
>> -#define X_PFX xive_rm_
>> -#define X_STATIC
>> -#define X_STAT_PFX stat_rm_
>> -#define __x_tima get_tima_phys()
>> -#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_page))
>> -#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_page))
>> -#define __x_writeb __raw_rm_writeb
>> -#define __x_readw __raw_rm_readw
>> -#define __x_readq __raw_rm_readq
>> -#define __x_writeq __raw_rm_writeq
>> -
>> -#include "book3s_xive_template.c"
>> diff --git a/arch/powerpc/kvm/book3s_xive.c
>> b/arch/powerpc/kvm/book3s_xive.c
>> index c0ce5531d9bc..65515a96498a 100644
>> --- a/arch/powerpc/kvm/book3s_xive.c
>> +++ b/arch/powerpc/kvm/book3s_xive.c
>> @@ -30,27 +30,629 @@
>> #include "book3s_xive.h"
>> -
>> -/*
>> - * Virtual mode variants of the hcalls for use on radix/radix
>> - * with AIL. They require the VCPU's VP to be "pushed"
>> - *
>> - * We still instantiate them here because we use some of the
>> - * generated utility functions as well in this file.
>> - */
>> -#define XIVE_RUNTIME_CHECKS
>> -#define X_PFX xive_vm_
>> -#define X_STATIC static
>> -#define X_STAT_PFX stat_vm_
>> -#define __x_tima xive_tima
>> #define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio))
>> #define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio))
>> -#define __x_writeb __raw_writeb
>> -#define __x_readw __raw_readw
>> -#define __x_readq __raw_readq
>> -#define __x_writeq __raw_writeq
>> -#include "book3s_xive_template.c"
>> +/* Dummy interrupt used when taking interrupts out of a queue in
>> H_CPPR */
>> +#define XICS_DUMMY 1
>> +
>> +static void xive_vm_ack_pending(struct kvmppc_xive_vcpu *xc)
>> +{
>> + u8 cppr;
>> + u16 ack;
>> +
>> + /*
>> + * Ensure any previous store to CPPR is ordered vs.
>> + * the subsequent loads from PIPR or ACK.
>> + */
>> + eieio();
>> +
>> + /* Perform the acknowledge OS to register cycle. */
>> + ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
>> +
>> + /* Synchronize subsequent queue accesses */
>> + mb();
>> +
>> + /* XXX Check grouping level */
>> +
>> + /* Anything ? */
>> + if (!((ack >> 8) & TM_QW1_NSR_EO))
>> + return;
>> +
>> + /* Grab CPPR of the most favored pending interrupt */
>> + cppr = ack & 0xff;
>> + if (cppr < 8)
>> + xc->pending |= 1 << cppr;
>> +
>> + /* Check consistency */
>> + if (cppr >= xc->hw_cppr)
>> + pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
>> + smp_processor_id(), cppr, xc->hw_cppr);
>> +
>> + /*
>> + * Update our image of the HW CPPR. We don't yet modify
>> + * xc->cppr, this will be done as we scan for interrupts
>> + * in the queues.
>> + */
>> + xc->hw_cppr = cppr;
>> +}
>> +
>> +static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
>> +{
>> + u64 val;
>> +
>> + if (offset == XIVE_ESB_SET_PQ_10 && xd->flags &
>> XIVE_IRQ_FLAG_STORE_EOI)
>> + offset |= XIVE_ESB_LD_ST_MO;
>> +
>> + val = __raw_readq(__x_eoi_page(xd) + offset);
>> +#ifdef __LITTLE_ENDIAN__
>> + val >>= 64-8;
>> +#endif
>> + return (u8)val;
>> +}
>> +
>> +
>> +static void xive_vm_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
>> +{
>> + /* If the XIVE supports the new "store EOI facility, use it */
>> + if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
>> + __raw_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
>> + else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
>> + /*
>> + * For LSIs the HW EOI cycle is used rather than PQ bits,
>> + * as they are automatically re-triggred in HW when still
>> + * pending.
>> + */
>> + __raw_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
>> + } else {
>> + uint64_t eoi_val;
>> +
>> + /*
>> + * Otherwise for EOI, we use the special MMIO that does
>> + * a clear of both P and Q and returns the old Q,
>> + * except for LSIs where we use the "EOI cycle" special
>> + * load.
>> + *
>> + * This allows us to then do a re-trigger if Q was set
>> + * rather than synthetizing an interrupt in software
>> + */
>> + eoi_val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_00);
>> +
>> + /* Re-trigger if needed */
>> + if ((eoi_val & 1) && __x_trig_page(xd))
>> + __raw_writeq(0, __x_trig_page(xd));
>> + }
>> +}
>> +
>> +enum {
>> + scan_fetch,
>> + scan_poll,
>> + scan_eoi,
>> +};
>> +
>> +static u32 xive_vm_scan_interrupts(struct kvmppc_xive_vcpu *xc,
>> + u8 pending, int scan_type)
>> +{
>> + u32 hirq = 0;
>> + u8 prio = 0xff;
>> +
>> + /* Find highest pending priority */
>> + while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
>> + struct xive_q *q;
>> + u32 idx, toggle;
>> + __be32 *qpage;
>> +
>> + /*
>> + * If pending is 0 this will return 0xff which is what
>> + * we want
>> + */
>> + prio = ffs(pending) - 1;
>> +
>> + /* Don't scan past the guest cppr */
>> + if (prio >= xc->cppr || prio > 7) {
>> + if (xc->mfrr < xc->cppr) {
>> + prio = xc->mfrr;
>> + hirq = XICS_IPI;
>> + }
>> + break;
>> + }
>> +
>> + /* Grab queue and pointers */
>> + q = &xc->queues[prio];
>> + idx = q->idx;
>> + toggle = q->toggle;
>> +
>> + /*
>> + * Snapshot the queue page. The test further down for EOI
>> + * must use the same "copy" that was used by __xive_read_eq
>> + * since qpage can be set concurrently and we don't want
>> + * to miss an EOI.
>> + */
>> + qpage = READ_ONCE(q->qpage);
>> +
>> +skip_ipi:
>> + /*
>> + * Try to fetch from the queue. Will return 0 for a
>> + * non-queueing priority (ie, qpage = 0).
>> + */
>> + hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
>> +
>> + /*
>> + * If this was a signal for an MFFR change done by
>> + * H_IPI we skip it. Additionally, if we were fetching
>> + * we EOI it now, thus re-enabling reception of a new
>> + * such signal.
>> + *
>> + * We also need to do that if prio is 0 and we had no
>> + * page for the queue. In this case, we have non-queued
>> + * IPI that needs to be EOId.
>> + *
>> + * This is safe because if we have another pending MFRR
>> + * change that wasn't observed above, the Q bit will have
>> + * been set and another occurrence of the IPI will trigger.
>> + */
>> + if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
>> + if (scan_type == scan_fetch) {
>> + xive_vm_source_eoi(xc->vp_ipi,
>> + &xc->vp_ipi_data);
>> + q->idx = idx;
>> + q->toggle = toggle;
>> + }
>> + /* Loop back on same queue with updated idx/toggle */
>> + WARN_ON(hirq && hirq != XICS_IPI);
>> + if (hirq)
>> + goto skip_ipi;
>> + }
>> +
>> + /* If it's the dummy interrupt, continue searching */
>> + if (hirq == XICS_DUMMY)
>> + goto skip_ipi;
>> +
>> + /* Clear the pending bit if the queue is now empty */
>> + if (!hirq) {
>> + pending &= ~(1 << prio);
>> +
>> + /*
>> + * Check if the queue count needs adjusting due to
>> + * interrupts being moved away.
>> + */
>> + if (atomic_read(&q->pending_count)) {
>> + int p = atomic_xchg(&q->pending_count, 0);
>> +
>> + if (p) {
>> + WARN_ON(p > atomic_read(&q->count));
>> + atomic_sub(p, &q->count);
>> + }
>> + }
>> + }
>> +
>> + /*
>> + * If the most favoured prio we found pending is less
>> + * favored (or equal) than a pending IPI, we return
>> + * the IPI instead.
>> + */
>> + if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
>> + prio = xc->mfrr;
>> + hirq = XICS_IPI;
>> + break;
>> + }
>> +
>> + /* If fetching, update queue pointers */
>> + if (scan_type == scan_fetch) {
>> + q->idx = idx;
>> + q->toggle = toggle;
>> + }
>> + }
>> +
>> + /* If we are just taking a "peek", do nothing else */
>> + if (scan_type == scan_poll)
>> + return hirq;
>> +
>> + /* Update the pending bits */
>> + xc->pending = pending;
>> +
>> + /*
>> + * If this is an EOI that's it, no CPPR adjustment done here,
>> + * all we needed was cleanup the stale pending bits and check
>> + * if there's anything left.
>> + */
>> + if (scan_type == scan_eoi)
>> + return hirq;
>> +
>> + /*
>> + * If we found an interrupt, adjust what the guest CPPR should
>> + * be as if we had just fetched that interrupt from HW.
>> + *
>> + * Note: This can only make xc->cppr smaller as the previous
>> + * loop will only exit with hirq != 0 if prio is lower than
>> + * the current xc->cppr. Thus we don't need to re-check xc->mfrr
>> + * for pending IPIs.
>> + */
>> + if (hirq)
>> + xc->cppr = prio;
>> + /*
>> + * If it was an IPI the HW CPPR might have been lowered too much
>> + * as the HW interrupt we use for IPIs is routed to priority 0.
>> + *
>> + * We re-sync it here.
>> + */
>> + if (xc->cppr != xc->hw_cppr) {
>> + xc->hw_cppr = xc->cppr;
>> + __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
>> + }
>> +
>> + return hirq;
>> +}
>> +
>> +static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu)
>> +{
>> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> + u8 old_cppr;
>> + u32 hirq;
>> +
>> + pr_devel("H_XIRR\n");
>> +
>> + xc->stat_vm_h_xirr++;
>> +
>> + /* First collect pending bits from HW */
>> + xive_vm_ack_pending(xc);
>> +
>> + pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
>> + xc->pending, xc->hw_cppr, xc->cppr);
>> +
>> + /* Grab previous CPPR and reverse map it */
>> + old_cppr = xive_prio_to_guest(xc->cppr);
>> +
>> + /* Scan for actual interrupts */
>> + hirq = xive_vm_scan_interrupts(xc, xc->pending, scan_fetch);
>> +
>> + pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
>> + hirq, xc->hw_cppr, xc->cppr);
>> +
>> + /* That should never hit */
>> + if (hirq & 0xff000000)
>> + pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
>> +
>> + /*
>> + * XXX We could check if the interrupt is masked here and
>> + * filter it. If we chose to do so, we would need to do:
>> + *
>> + * if (masked) {
>> + * lock();
>> + * if (masked) {
>> + * old_Q = true;
>> + * hirq = 0;
>> + * }
>> + * unlock();
>> + * }
>> + */
>> +
>> + /* Return interrupt and old CPPR in GPR4 */
>> + vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
>> +
>> + return H_SUCCESS;
>> +}
>> +
>> +static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned
>> long server)
>> +{
>> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> + u8 pending = xc->pending;
>> + u32 hirq;
>> +
>> + pr_devel("H_IPOLL(server=%ld)\n", server);
>> +
>> + xc->stat_vm_h_ipoll++;
>> +
>> + /* Grab the target VCPU if not the current one */
>> + if (xc->server_num != server) {
>> + vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
>> + if (!vcpu)
>> + return H_PARAMETER;
>> + xc = vcpu->arch.xive_vcpu;
>> +
>> + /* Scan all priorities */
>> + pending = 0xff;
>> + } else {
>> + /* Grab pending interrupt if any */
>> + __be64 qw1 = __raw_readq(xive_tima + TM_QW1_OS);
>> + u8 pipr = be64_to_cpu(qw1) & 0xff;
>> +
>> + if (pipr < 8)
>> + pending |= 1 << pipr;
>> + }
>> +
>> + hirq = xive_vm_scan_interrupts(xc, pending, scan_poll);
>> +
>> + /* Return interrupt and old CPPR in GPR4 */
>> + vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
>> +
>> + return H_SUCCESS;
>> +}
>> +
>> +static void xive_vm_push_pending_to_hw(struct kvmppc_xive_vcpu *xc)
>> +{
>> + u8 pending, prio;
>> +
>> + pending = xc->pending;
>> + if (xc->mfrr != 0xff) {
>> + if (xc->mfrr < 8)
>> + pending |= 1 << xc->mfrr;
>> + else
>> + pending |= 0x80;
>> + }
>> + if (!pending)
>> + return;
>> + prio = ffs(pending) - 1;
>> +
>> + __raw_writeb(prio, xive_tima + TM_SPC_SET_OS_PENDING);
>> +}
>> +
>> +static void xive_vm_scan_for_rerouted_irqs(struct kvmppc_xive *xive,
>> + struct kvmppc_xive_vcpu *xc)
>> +{
>> + unsigned int prio;
>> +
>> + /* For each priority that is now masked */
>> + for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
>> + struct xive_q *q = &xc->queues[prio];
>> + struct kvmppc_xive_irq_state *state;
>> + struct kvmppc_xive_src_block *sb;
>> + u32 idx, toggle, entry, irq, hw_num;
>> + struct xive_irq_data *xd;
>> + __be32 *qpage;
>> + u16 src;
>> +
>> + idx = q->idx;
>> + toggle = q->toggle;
>> + qpage = READ_ONCE(q->qpage);
>> + if (!qpage)
>> + continue;
>> +
>> + /* For each interrupt in the queue */
>> + for (;;) {
>> + entry = be32_to_cpup(qpage + idx);
>> +
>> + /* No more ? */
>> + if ((entry >> 31) == toggle)
>> + break;
>> + irq = entry & 0x7fffffff;
>> +
>> + /* Skip dummies and IPIs */
>> + if (irq == XICS_DUMMY || irq == XICS_IPI)
>> + goto next;
>> + sb = kvmppc_xive_find_source(xive, irq, &src);
>> + if (!sb)
>> + goto next;
>> + state = &sb->irq_state[src];
>> +
>> + /* Has it been rerouted ? */
>> + if (xc->server_num == state->act_server)
>> + goto next;
>> +
>> + /*
>> + * Allright, it *has* been re-routed, kill it from
>> + * the queue.
>> + */
>> + qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
>> +
>> + /* Find the HW interrupt */
>> + kvmppc_xive_select_irq(state, &hw_num, &xd);
>> +
>> + /* If it's not an LSI, set PQ to 11 the EOI will force a
>> resend */
>> + if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
>> + xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
>> +
>> + /* EOI the source */
>> + xive_vm_source_eoi(hw_num, xd);
>> +
>> +next:
>> + idx = (idx + 1) & q->msk;
>> + if (idx == 0)
>> + toggle ^= 1;
>> + }
>> + }
>> +}
>> +
>> +static int xive_vm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
>> +{
>> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> + struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>> + u8 old_cppr;
>> +
>> + pr_devel("H_CPPR(cppr=%ld)\n", cppr);
>> +
>> + xc->stat_vm_h_cppr++;
>> +
>> + /* Map CPPR */
>> + cppr = xive_prio_from_guest(cppr);
>> +
>> + /* Remember old and update SW state */
>> + old_cppr = xc->cppr;
>> + xc->cppr = cppr;
>> +
>> + /*
>> + * Order the above update of xc->cppr with the subsequent
>> + * read of xc->mfrr inside push_pending_to_hw()
>> + */
>> + smp_mb();
>> +
>> + if (cppr > old_cppr) {
>> + /*
>> + * We are masking less, we need to look for pending things
>> + * to deliver and set VP pending bits accordingly to trigger
>> + * a new interrupt otherwise we might miss MFRR changes for
>> + * which we have optimized out sending an IPI signal.
>> + */
>> + xive_vm_push_pending_to_hw(xc);
>> + } else {
>> + /*
>> + * We are masking more, we need to check the queue for any
>> + * interrupt that has been routed to another CPU, take
>> + * it out (replace it with the dummy) and retrigger it.
>> + *
>> + * This is necessary since those interrupts may otherwise
>> + * never be processed, at least not until this CPU restores
>> + * its CPPR.
>> + *
>> + * This is in theory racy vs. HW adding new interrupts to
>> + * the queue. In practice this works because the interesting
>> + * cases are when the guest has done a set_xive() to move the
>> + * interrupt away, which flushes the xive, followed by the
>> + * target CPU doing a H_CPPR. So any new interrupt coming into
>> + * the queue must still be routed to us and isn't a source
>> + * of concern.
>> + */
>> + xive_vm_scan_for_rerouted_irqs(xive, xc);
>> + }
>> +
>> + /* Apply new CPPR */
>> + xc->hw_cppr = cppr;
>> + __raw_writeb(cppr, xive_tima + TM_QW1_OS + TM_CPPR);
>> +
>> + return H_SUCCESS;
>> +}
>> +
>> +static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
>> +{
>> + struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>> + struct kvmppc_xive_src_block *sb;
>> + struct kvmppc_xive_irq_state *state;
>> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> + struct xive_irq_data *xd;
>> + u8 new_cppr = xirr >> 24;
>> + u32 irq = xirr & 0x00ffffff, hw_num;
>> + u16 src;
>> + int rc = 0;
>> +
>> + pr_devel("H_EOI(xirr=%08lx)\n", xirr);
>> +
>> + xc->stat_vm_h_eoi++;
>> +
>> + xc->cppr = xive_prio_from_guest(new_cppr);
>> +
>> + /*
>> + * IPIs are synthetized from MFRR and thus don't need
>> + * any special EOI handling. The underlying interrupt
>> + * used to signal MFRR changes is EOId when fetched from
>> + * the queue.
>> + */
>> + if (irq == XICS_IPI || irq == 0) {
>> + /*
>> + * This barrier orders the setting of xc->cppr vs.
>> + * subsquent test of xc->mfrr done inside
>> + * scan_interrupts and push_pending_to_hw
>> + */
>> + smp_mb();
>> + goto bail;
>> + }
>> +
>> + /* Find interrupt source */
>> + sb = kvmppc_xive_find_source(xive, irq, &src);
>> + if (!sb) {
>> + pr_devel(" source not found !\n");
>> + rc = H_PARAMETER;
>> + /* Same as above */
>> + smp_mb();
>> + goto bail;
>> + }
>> + state = &sb->irq_state[src];
>> + kvmppc_xive_select_irq(state, &hw_num, &xd);
>> +
>> + state->in_eoi = true;
>> +
>> + /*
>> + * This barrier orders both setting of in_eoi above vs,
>> + * subsequent test of guest_priority, and the setting
>> + * of xc->cppr vs. subsquent test of xc->mfrr done inside
>> + * scan_interrupts and push_pending_to_hw
>> + */
>> + smp_mb();
>> +
>> +again:
>> + if (state->guest_priority == MASKED) {
>> + arch_spin_lock(&sb->lock);
>> + if (state->guest_priority != MASKED) {
>> + arch_spin_unlock(&sb->lock);
>> + goto again;
>> + }
>> + pr_devel(" EOI on saved P...\n");
>> +
>> + /* Clear old_p, that will cause unmask to perform an EOI */
>> + state->old_p = false;
>> +
>> + arch_spin_unlock(&sb->lock);
>> + } else {
>> + pr_devel(" EOI on source...\n");
>> +
>> + /* Perform EOI on the source */
>> + xive_vm_source_eoi(hw_num, xd);
>> +
>> + /* If it's an emulated LSI, check level and resend */
>> + if (state->lsi && state->asserted)
>> + __raw_writeq(0, __x_trig_page(xd));
>> +
>> + }
>> +
>> + /*
>> + * This barrier orders the above guest_priority check
>> + * and spin_lock/unlock with clearing in_eoi below.
>> + *
>> + * It also has to be a full mb() as it must ensure
>> + * the MMIOs done in source_eoi() are completed before
>> + * state->in_eoi is visible.
>> + */
>> + mb();
>> + state->in_eoi = false;
>> +bail:
>> +
>> + /* Re-evaluate pending IRQs and update HW */
>> + xive_vm_scan_interrupts(xc, xc->pending, scan_eoi);
>> + xive_vm_push_pending_to_hw(xc);
>> + pr_devel(" after scan pending=%02x\n", xc->pending);
>> +
>> + /* Apply new CPPR */
>> + xc->hw_cppr = xc->cppr;
>> + __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
>> +
>> + return rc;
>> +}
>> +
>> +static int xive_vm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
>> + unsigned long mfrr)
>> +{
>> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> +
>> + pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
>> +
>> + xc->stat_vm_h_ipi++;
>> +
>> + /* Find target */
>> + vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
>> + if (!vcpu)
>> + return H_PARAMETER;
>> + xc = vcpu->arch.xive_vcpu;
>> +
>> + /* Locklessly write over MFRR */
>> + xc->mfrr = mfrr;
>> +
>> + /*
>> + * The load of xc->cppr below and the subsequent MMIO store
>> + * to the IPI must happen after the above mfrr update is
>> + * globally visible so that:
>> + *
>> + * - Synchronize with another CPU doing an H_EOI or a H_CPPR
>> + * updating xc->cppr then reading xc->mfrr.
>> + *
>> + * - The target of the IPI sees the xc->mfrr update
>> + */
>> + mb();
>> +
>> + /* Shoot the IPI if most favored than target cppr */
>> + if (mfrr < xc->cppr)
>> + __raw_writeq(0, __x_trig_page(&xc->vp_ipi_data));
>> +
>> + return H_SUCCESS;
>> +}
>> /*
>> * We leave a gap of a couple of interrupts in the queue to
>> diff --git a/arch/powerpc/kvm/book3s_xive_template.c
>> b/arch/powerpc/kvm/book3s_xive_template.c
>> deleted file mode 100644
>> index b0015e05d99a..000000000000
>> --- a/arch/powerpc/kvm/book3s_xive_template.c
>> +++ /dev/null
>> @@ -1,636 +0,0 @@
>> -// SPDX-License-Identifier: GPL-2.0-only
>> -/*
>> - * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
>> - */
>> -
>> -/* File to be included by other .c files */
>> -
>> -#define XGLUE(a,b) a##b
>> -#define GLUE(a,b) XGLUE(a,b)
>> -
>> -/* Dummy interrupt used when taking interrupts out of a queue in
>> H_CPPR */
>> -#define XICS_DUMMY 1
>> -
>> -static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
>> -{
>> - u8 cppr;
>> - u16 ack;
>> -
>> - /*
>> - * Ensure any previous store to CPPR is ordered vs.
>> - * the subsequent loads from PIPR or ACK.
>> - */
>> - eieio();
>> -
>> - /* Perform the acknowledge OS to register cycle. */
>> - ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
>> -
>> - /* Synchronize subsequent queue accesses */
>> - mb();
>> -
>> - /* XXX Check grouping level */
>> -
>> - /* Anything ? */
>> - if (!((ack >> 8) & TM_QW1_NSR_EO))
>> - return;
>> -
>> - /* Grab CPPR of the most favored pending interrupt */
>> - cppr = ack & 0xff;
>> - if (cppr < 8)
>> - xc->pending |= 1 << cppr;
>> -
>> -#ifdef XIVE_RUNTIME_CHECKS
>> - /* Check consistency */
>> - if (cppr >= xc->hw_cppr)
>> - pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
>> - smp_processor_id(), cppr, xc->hw_cppr);
>> -#endif
>> -
>> - /*
>> - * Update our image of the HW CPPR. We don't yet modify
>> - * xc->cppr, this will be done as we scan for interrupts
>> - * in the queues.
>> - */
>> - xc->hw_cppr = cppr;
>> -}
>> -
>> -static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
>> -{
>> - u64 val;
>> -
>> - if (offset == XIVE_ESB_SET_PQ_10 && xd->flags &
>> XIVE_IRQ_FLAG_STORE_EOI)
>> - offset |= XIVE_ESB_LD_ST_MO;
>> -
>> - val =__x_readq(__x_eoi_page(xd) + offset);
>> -#ifdef __LITTLE_ENDIAN__
>> - val >>= 64-8;
>> -#endif
>> - return (u8)val;
>> -}
>> -
>> -
>> -static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
>> -{
>> - /* If the XIVE supports the new "store EOI facility, use it */
>> - if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
>> - __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
>> - else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
>> - /*
>> - * For LSIs the HW EOI cycle is used rather than PQ bits,
>> - * as they are automatically re-triggred in HW when still
>> - * pending.
>> - */
>> - __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
>> - } else {
>> - uint64_t eoi_val;
>> -
>> - /*
>> - * Otherwise for EOI, we use the special MMIO that does
>> - * a clear of both P and Q and returns the old Q,
>> - * except for LSIs where we use the "EOI cycle" special
>> - * load.
>> - *
>> - * This allows us to then do a re-trigger if Q was set
>> - * rather than synthetizing an interrupt in software
>> - */
>> - eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
>> -
>> - /* Re-trigger if needed */
>> - if ((eoi_val & 1) && __x_trig_page(xd))
>> - __x_writeq(0, __x_trig_page(xd));
>> - }
>> -}
>> -
>> -enum {
>> - scan_fetch,
>> - scan_poll,
>> - scan_eoi,
>> -};
>> -
>> -static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
>> - u8 pending, int scan_type)
>> -{
>> - u32 hirq = 0;
>> - u8 prio = 0xff;
>> -
>> - /* Find highest pending priority */
>> - while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
>> - struct xive_q *q;
>> - u32 idx, toggle;
>> - __be32 *qpage;
>> -
>> - /*
>> - * If pending is 0 this will return 0xff which is what
>> - * we want
>> - */
>> - prio = ffs(pending) - 1;
>> -
>> - /* Don't scan past the guest cppr */
>> - if (prio >= xc->cppr || prio > 7) {
>> - if (xc->mfrr < xc->cppr) {
>> - prio = xc->mfrr;
>> - hirq = XICS_IPI;
>> - }
>> - break;
>> - }
>> -
>> - /* Grab queue and pointers */
>> - q = &xc->queues[prio];
>> - idx = q->idx;
>> - toggle = q->toggle;
>> -
>> - /*
>> - * Snapshot the queue page. The test further down for EOI
>> - * must use the same "copy" that was used by __xive_read_eq
>> - * since qpage can be set concurrently and we don't want
>> - * to miss an EOI.
>> - */
>> - qpage = READ_ONCE(q->qpage);
>> -
>> -skip_ipi:
>> - /*
>> - * Try to fetch from the queue. Will return 0 for a
>> - * non-queueing priority (ie, qpage = 0).
>> - */
>> - hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
>> -
>> - /*
>> - * If this was a signal for an MFFR change done by
>> - * H_IPI we skip it. Additionally, if we were fetching
>> - * we EOI it now, thus re-enabling reception of a new
>> - * such signal.
>> - *
>> - * We also need to do that if prio is 0 and we had no
>> - * page for the queue. In this case, we have non-queued
>> - * IPI that needs to be EOId.
>> - *
>> - * This is safe because if we have another pending MFRR
>> - * change that wasn't observed above, the Q bit will have
>> - * been set and another occurrence of the IPI will trigger.
>> - */
>> - if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
>> - if (scan_type == scan_fetch) {
>> - GLUE(X_PFX,source_eoi)(xc->vp_ipi,
>> - &xc->vp_ipi_data);
>> - q->idx = idx;
>> - q->toggle = toggle;
>> - }
>> - /* Loop back on same queue with updated idx/toggle */
>> -#ifdef XIVE_RUNTIME_CHECKS
>> - WARN_ON(hirq && hirq != XICS_IPI);
>> -#endif
>> - if (hirq)
>> - goto skip_ipi;
>> - }
>> -
>> - /* If it's the dummy interrupt, continue searching */
>> - if (hirq == XICS_DUMMY)
>> - goto skip_ipi;
>> -
>> - /* Clear the pending bit if the queue is now empty */
>> - if (!hirq) {
>> - pending &= ~(1 << prio);
>> -
>> - /*
>> - * Check if the queue count needs adjusting due to
>> - * interrupts being moved away.
>> - */
>> - if (atomic_read(&q->pending_count)) {
>> - int p = atomic_xchg(&q->pending_count, 0);
>> - if (p) {
>> -#ifdef XIVE_RUNTIME_CHECKS
>> - WARN_ON(p > atomic_read(&q->count));
>> -#endif
>> - atomic_sub(p, &q->count);
>> - }
>> - }
>> - }
>> -
>> - /*
>> - * If the most favoured prio we found pending is less
>> - * favored (or equal) than a pending IPI, we return
>> - * the IPI instead.
>> - */
>> - if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
>> - prio = xc->mfrr;
>> - hirq = XICS_IPI;
>> - break;
>> - }
>> -
>> - /* If fetching, update queue pointers */
>> - if (scan_type == scan_fetch) {
>> - q->idx = idx;
>> - q->toggle = toggle;
>> - }
>> - }
>> -
>> - /* If we are just taking a "peek", do nothing else */
>> - if (scan_type == scan_poll)
>> - return hirq;
>> -
>> - /* Update the pending bits */
>> - xc->pending = pending;
>> -
>> - /*
>> - * If this is an EOI that's it, no CPPR adjustment done here,
>> - * all we needed was cleanup the stale pending bits and check
>> - * if there's anything left.
>> - */
>> - if (scan_type == scan_eoi)
>> - return hirq;
>> -
>> - /*
>> - * If we found an interrupt, adjust what the guest CPPR should
>> - * be as if we had just fetched that interrupt from HW.
>> - *
>> - * Note: This can only make xc->cppr smaller as the previous
>> - * loop will only exit with hirq != 0 if prio is lower than
>> - * the current xc->cppr. Thus we don't need to re-check xc->mfrr
>> - * for pending IPIs.
>> - */
>> - if (hirq)
>> - xc->cppr = prio;
>> - /*
>> - * If it was an IPI the HW CPPR might have been lowered too much
>> - * as the HW interrupt we use for IPIs is routed to priority 0.
>> - *
>> - * We re-sync it here.
>> - */
>> - if (xc->cppr != xc->hw_cppr) {
>> - xc->hw_cppr = xc->cppr;
>> - __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
>> - }
>> -
>> - return hirq;
>> -}
>> -
>> -X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
>> -{
>> - struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> - u8 old_cppr;
>> - u32 hirq;
>> -
>> - pr_devel("H_XIRR\n");
>> -
>> - xc->GLUE(X_STAT_PFX,h_xirr)++;
>> -
>> - /* First collect pending bits from HW */
>> - GLUE(X_PFX,ack_pending)(xc);
>> -
>> - pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
>> - xc->pending, xc->hw_cppr, xc->cppr);
>> -
>> - /* Grab previous CPPR and reverse map it */
>> - old_cppr = xive_prio_to_guest(xc->cppr);
>> -
>> - /* Scan for actual interrupts */
>> - hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
>> -
>> - pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
>> - hirq, xc->hw_cppr, xc->cppr);
>> -
>> -#ifdef XIVE_RUNTIME_CHECKS
>> - /* That should never hit */
>> - if (hirq & 0xff000000)
>> - pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
>> -#endif
>> -
>> - /*
>> - * XXX We could check if the interrupt is masked here and
>> - * filter it. If we chose to do so, we would need to do:
>> - *
>> - * if (masked) {
>> - * lock();
>> - * if (masked) {
>> - * old_Q = true;
>> - * hirq = 0;
>> - * }
>> - * unlock();
>> - * }
>> - */
>> -
>> - /* Return interrupt and old CPPR in GPR4 */
>> - vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
>> -
>> - return H_SUCCESS;
>> -}
>> -
>> -X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu,
>> unsigned long server)
>> -{
>> - struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> - u8 pending = xc->pending;
>> - u32 hirq;
>> -
>> - pr_devel("H_IPOLL(server=%ld)\n", server);
>> -
>> - xc->GLUE(X_STAT_PFX,h_ipoll)++;
>> -
>> - /* Grab the target VCPU if not the current one */
>> - if (xc->server_num != server) {
>> - vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
>> - if (!vcpu)
>> - return H_PARAMETER;
>> - xc = vcpu->arch.xive_vcpu;
>> -
>> - /* Scan all priorities */
>> - pending = 0xff;
>> - } else {
>> - /* Grab pending interrupt if any */
>> - __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
>> - u8 pipr = be64_to_cpu(qw1) & 0xff;
>> - if (pipr < 8)
>> - pending |= 1 << pipr;
>> - }
>> -
>> - hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
>> -
>> - /* Return interrupt and old CPPR in GPR4 */
>> - vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
>> -
>> - return H_SUCCESS;
>> -}
>> -
>> -static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
>> -{
>> - u8 pending, prio;
>> -
>> - pending = xc->pending;
>> - if (xc->mfrr != 0xff) {
>> - if (xc->mfrr < 8)
>> - pending |= 1 << xc->mfrr;
>> - else
>> - pending |= 0x80;
>> - }
>> - if (!pending)
>> - return;
>> - prio = ffs(pending) - 1;
>> -
>> - __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
>> -}
>> -
>> -static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
>> - struct kvmppc_xive_vcpu *xc)
>> -{
>> - unsigned int prio;
>> -
>> - /* For each priority that is now masked */
>> - for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
>> - struct xive_q *q = &xc->queues[prio];
>> - struct kvmppc_xive_irq_state *state;
>> - struct kvmppc_xive_src_block *sb;
>> - u32 idx, toggle, entry, irq, hw_num;
>> - struct xive_irq_data *xd;
>> - __be32 *qpage;
>> - u16 src;
>> -
>> - idx = q->idx;
>> - toggle = q->toggle;
>> - qpage = READ_ONCE(q->qpage);
>> - if (!qpage)
>> - continue;
>> -
>> - /* For each interrupt in the queue */
>> - for (;;) {
>> - entry = be32_to_cpup(qpage + idx);
>> -
>> - /* No more ? */
>> - if ((entry >> 31) == toggle)
>> - break;
>> - irq = entry & 0x7fffffff;
>> -
>> - /* Skip dummies and IPIs */
>> - if (irq == XICS_DUMMY || irq == XICS_IPI)
>> - goto next;
>> - sb = kvmppc_xive_find_source(xive, irq, &src);
>> - if (!sb)
>> - goto next;
>> - state = &sb->irq_state[src];
>> -
>> - /* Has it been rerouted ? */
>> - if (xc->server_num == state->act_server)
>> - goto next;
>> -
>> - /*
>> - * Allright, it *has* been re-routed, kill it from
>> - * the queue.
>> - */
>> - qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
>> -
>> - /* Find the HW interrupt */
>> - kvmppc_xive_select_irq(state, &hw_num, &xd);
>> -
>> - /* If it's not an LSI, set PQ to 11 the EOI will force a
>> resend */
>> - if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
>> - GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
>> -
>> - /* EOI the source */
>> - GLUE(X_PFX,source_eoi)(hw_num, xd);
>> -
>> - next:
>> - idx = (idx + 1) & q->msk;
>> - if (idx == 0)
>> - toggle ^= 1;
>> - }
>> - }
>> -}
>> -
>> -X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long
>> cppr)
>> -{
>> - struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> - struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>> - u8 old_cppr;
>> -
>> - pr_devel("H_CPPR(cppr=%ld)\n", cppr);
>> -
>> - xc->GLUE(X_STAT_PFX,h_cppr)++;
>> -
>> - /* Map CPPR */
>> - cppr = xive_prio_from_guest(cppr);
>> -
>> - /* Remember old and update SW state */
>> - old_cppr = xc->cppr;
>> - xc->cppr = cppr;
>> -
>> - /*
>> - * Order the above update of xc->cppr with the subsequent
>> - * read of xc->mfrr inside push_pending_to_hw()
>> - */
>> - smp_mb();
>> -
>> - if (cppr > old_cppr) {
>> - /*
>> - * We are masking less, we need to look for pending things
>> - * to deliver and set VP pending bits accordingly to trigger
>> - * a new interrupt otherwise we might miss MFRR changes for
>> - * which we have optimized out sending an IPI signal.
>> - */
>> - GLUE(X_PFX,push_pending_to_hw)(xc);
>> - } else {
>> - /*
>> - * We are masking more, we need to check the queue for any
>> - * interrupt that has been routed to another CPU, take
>> - * it out (replace it with the dummy) and retrigger it.
>> - *
>> - * This is necessary since those interrupts may otherwise
>> - * never be processed, at least not until this CPU restores
>> - * its CPPR.
>> - *
>> - * This is in theory racy vs. HW adding new interrupts to
>> - * the queue. In practice this works because the interesting
>> - * cases are when the guest has done a set_xive() to move the
>> - * interrupt away, which flushes the xive, followed by the
>> - * target CPU doing a H_CPPR. So any new interrupt coming into
>> - * the queue must still be routed to us and isn't a source
>> - * of concern.
>> - */
>> - GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
>> - }
>> -
>> - /* Apply new CPPR */
>> - xc->hw_cppr = cppr;
>> - __x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
>> -
>> - return H_SUCCESS;
>> -}
>> -
>> -X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long
>> xirr)
>> -{
>> - struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>> - struct kvmppc_xive_src_block *sb;
>> - struct kvmppc_xive_irq_state *state;
>> - struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> - struct xive_irq_data *xd;
>> - u8 new_cppr = xirr >> 24;
>> - u32 irq = xirr & 0x00ffffff, hw_num;
>> - u16 src;
>> - int rc = 0;
>> -
>> - pr_devel("H_EOI(xirr=%08lx)\n", xirr);
>> -
>> - xc->GLUE(X_STAT_PFX,h_eoi)++;
>> -
>> - xc->cppr = xive_prio_from_guest(new_cppr);
>> -
>> - /*
>> - * IPIs are synthetized from MFRR and thus don't need
>> - * any special EOI handling. The underlying interrupt
>> - * used to signal MFRR changes is EOId when fetched from
>> - * the queue.
>> - */
>> - if (irq == XICS_IPI || irq == 0) {
>> - /*
>> - * This barrier orders the setting of xc->cppr vs.
>> - * subsquent test of xc->mfrr done inside
>> - * scan_interrupts and push_pending_to_hw
>> - */
>> - smp_mb();
>> - goto bail;
>> - }
>> -
>> - /* Find interrupt source */
>> - sb = kvmppc_xive_find_source(xive, irq, &src);
>> - if (!sb) {
>> - pr_devel(" source not found !\n");
>> - rc = H_PARAMETER;
>> - /* Same as above */
>> - smp_mb();
>> - goto bail;
>> - }
>> - state = &sb->irq_state[src];
>> - kvmppc_xive_select_irq(state, &hw_num, &xd);
>> -
>> - state->in_eoi = true;
>> -
>> - /*
>> - * This barrier orders both setting of in_eoi above vs,
>> - * subsequent test of guest_priority, and the setting
>> - * of xc->cppr vs. subsquent test of xc->mfrr done inside
>> - * scan_interrupts and push_pending_to_hw
>> - */
>> - smp_mb();
>> -
>> -again:
>> - if (state->guest_priority == MASKED) {
>> - arch_spin_lock(&sb->lock);
>> - if (state->guest_priority != MASKED) {
>> - arch_spin_unlock(&sb->lock);
>> - goto again;
>> - }
>> - pr_devel(" EOI on saved P...\n");
>> -
>> - /* Clear old_p, that will cause unmask to perform an EOI */
>> - state->old_p = false;
>> -
>> - arch_spin_unlock(&sb->lock);
>> - } else {
>> - pr_devel(" EOI on source...\n");
>> -
>> - /* Perform EOI on the source */
>> - GLUE(X_PFX,source_eoi)(hw_num, xd);
>> -
>> - /* If it's an emulated LSI, check level and resend */
>> - if (state->lsi && state->asserted)
>> - __x_writeq(0, __x_trig_page(xd));
>> -
>> - }
>> -
>> - /*
>> - * This barrier orders the above guest_priority check
>> - * and spin_lock/unlock with clearing in_eoi below.
>> - *
>> - * It also has to be a full mb() as it must ensure
>> - * the MMIOs done in source_eoi() are completed before
>> - * state->in_eoi is visible.
>> - */
>> - mb();
>> - state->in_eoi = false;
>> -bail:
>> -
>> - /* Re-evaluate pending IRQs and update HW */
>> - GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
>> - GLUE(X_PFX,push_pending_to_hw)(xc);
>> - pr_devel(" after scan pending=%02x\n", xc->pending);
>> -
>> - /* Apply new CPPR */
>> - xc->hw_cppr = xc->cppr;
>> - __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
>> -
>> - return rc;
>> -}
>> -
>> -X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long
>> server,
>> - unsigned long mfrr)
>> -{
>> - struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> -
>> - pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
>> -
>> - xc->GLUE(X_STAT_PFX,h_ipi)++;
>> -
>> - /* Find target */
>> - vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
>> - if (!vcpu)
>> - return H_PARAMETER;
>> - xc = vcpu->arch.xive_vcpu;
>> -
>> - /* Locklessly write over MFRR */
>> - xc->mfrr = mfrr;
>> -
>> - /*
>> - * The load of xc->cppr below and the subsequent MMIO store
>> - * to the IPI must happen after the above mfrr update is
>> - * globally visible so that:
>> - *
>> - * - Synchronize with another CPU doing an H_EOI or a H_CPPR
>> - * updating xc->cppr then reading xc->mfrr.
>> - *
>> - * - The target of the IPI sees the xc->mfrr update
>> - */
>> - mb();
>> -
>> - /* Shoot the IPI if most favored than target cppr */
>> - if (mfrr < xc->cppr)
>> - __x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
>> -
>> - return H_SUCCESS;
>> -}
>> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> index 44d74bfe05df..5003563ca38f 100644
>> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> @@ -1803,11 +1803,11 @@ hcall_real_table:
>> .long 0 /* 0x5c */
>> .long 0 /* 0x60 */
>> #ifdef CONFIG_KVM_XICS
>> - .long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
>> - .long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
>> - .long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
>> - .long DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
>> - .long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
>> + .long DOTSYM(xics_rm_h_eoi) - hcall_real_table
>> + .long DOTSYM(xics_rm_h_cppr) - hcall_real_table
>> + .long DOTSYM(xics_rm_h_ipi) - hcall_real_table
>> + .long 0 /* 0x70 - H_IPOLL */
>> + .long DOTSYM(xics_rm_h_xirr) - hcall_real_table
>> #else
>> .long 0 /* 0x64 - H_EOI */
>> .long 0 /* 0x68 - H_CPPR */
>> @@ -1977,7 +1977,7 @@ hcall_real_table:
>> .long 0 /* 0x2f4 */
>> .long 0 /* 0x2f8 */
>> #ifdef CONFIG_KVM_XICS
>> - .long DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
>> + .long DOTSYM(xics_rm_h_xirr_x) - hcall_real_table
>> #else
>> .long 0 /* 0x2fc - H_XIRR_X*/
>> #endif
>
More information about the Linuxppc-dev
mailing list