[PATCH kernel] KVM: PPC: Book3s: Remove real mode interrupt controller hcalls handlers
Cédric Le Goater
clg at kaod.org
Wed May 11 03:58:44 AEST 2022
Hello Alexey,
On 5/9/22 09:11, Alexey Kardashevskiy wrote:
> Currently we have 2 sets of interrupt controller hypercalls handlers
> for real and virtual modes, this is from POWER8 times when switching
> MMU on was considered an expensive operation.
>
> POWER9 however does not have dependent threads and MMU is enabled for
> handling hcalls so the XIVE native
XIVE native does not have any real-mode hcall handlers. In fact, all
are handled at the QEMU level.
> or XICS-on-XIVE real mode handlers never execute on real P9 and > later CPUs.
They are not ? I am surprised. It must be a "recent" change. Any how,
if you can remove them safely, this is good news and you should be able
to clean up some more code in the PowerNV native interface.
>
> This untemplate the handlers and only keeps the real mode handlers for
> XICS native (up to POWER8) and remove the rest of dead code. Changes
> in functions are mechanical except few missing empty lines to make
> checkpatch.pl happy.
>
> The default implemented hcalls list already contains XICS hcalls so
> no change there.
>
> This should not cause any behavioral change.
In the worse case, it impacts performance a bit but only on "old" distros
(kernel < 4.14), I doubt anyone will complain.
> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
Acked-by: Cédric Le Goater <clg at kaod.org>
Thanks,
C.
> ---
> arch/powerpc/kvm/Makefile | 2 +-
> arch/powerpc/include/asm/kvm_ppc.h | 7 -
> arch/powerpc/kvm/book3s_xive.h | 7 -
> arch/powerpc/kvm/book3s_hv_builtin.c | 64 ---
> arch/powerpc/kvm/book3s_hv_rm_xics.c | 5 +
> arch/powerpc/kvm/book3s_hv_rm_xive.c | 46 --
> arch/powerpc/kvm/book3s_xive.c | 638 +++++++++++++++++++++++-
> arch/powerpc/kvm/book3s_xive_template.c | 636 -----------------------
> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 12 +-
> 9 files changed, 632 insertions(+), 785 deletions(-)
> delete mode 100644 arch/powerpc/kvm/book3s_hv_rm_xive.c
> delete mode 100644 arch/powerpc/kvm/book3s_xive_template.c
>
> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
> index 8e3681a86074..f17379b0f161 100644
> --- a/arch/powerpc/kvm/Makefile
> +++ b/arch/powerpc/kvm/Makefile
> @@ -73,7 +73,7 @@ kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
> book3s_hv_tm.o
>
> kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
> - book3s_hv_rm_xics.o book3s_hv_rm_xive.o
> + book3s_hv_rm_xics.o
>
> kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
> book3s_hv_tm_builtin.o
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 44200a27371b..a775377a570e 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -787,13 +787,6 @@ long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
> unsigned long dest, unsigned long src);
> long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
> unsigned long slb_v, unsigned int status, bool data);
> -unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
> -unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu);
> -unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
> -int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
> - unsigned long mfrr);
> -int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
> -int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
> void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
>
> /*
> diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
> index 09d0657596c3..1e48f72e8aa5 100644
> --- a/arch/powerpc/kvm/book3s_xive.h
> +++ b/arch/powerpc/kvm/book3s_xive.h
> @@ -285,13 +285,6 @@ static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
> return cur & 0x7fffffff;
> }
>
> -extern unsigned long xive_rm_h_xirr(struct kvm_vcpu *vcpu);
> -extern unsigned long xive_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
> -extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
> - unsigned long mfrr);
> -extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
> -extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
> -
> /*
> * Common Xive routines for XICS-over-XIVE and XIVE native
> */
> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
> index 7e52d0beee77..88a8f6473c4e 100644
> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
> @@ -489,70 +489,6 @@ static long kvmppc_read_one_intr(bool *again)
> return kvmppc_check_passthru(xisr, xirr, again);
> }
>
> -#ifdef CONFIG_KVM_XICS
> -unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
> -{
> - if (!kvmppc_xics_enabled(vcpu))
> - return H_TOO_HARD;
> - if (xics_on_xive())
> - return xive_rm_h_xirr(vcpu);
> - else
> - return xics_rm_h_xirr(vcpu);
> -}
> -
> -unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
> -{
> - if (!kvmppc_xics_enabled(vcpu))
> - return H_TOO_HARD;
> - vcpu->arch.regs.gpr[5] = get_tb();
> - if (xics_on_xive())
> - return xive_rm_h_xirr(vcpu);
> - else
> - return xics_rm_h_xirr(vcpu);
> -}
> -
> -unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
> -{
> - if (!kvmppc_xics_enabled(vcpu))
> - return H_TOO_HARD;
> - if (xics_on_xive())
> - return xive_rm_h_ipoll(vcpu, server);
> - else
> - return H_TOO_HARD;
> -}
> -
> -int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
> - unsigned long mfrr)
> -{
> - if (!kvmppc_xics_enabled(vcpu))
> - return H_TOO_HARD;
> - if (xics_on_xive())
> - return xive_rm_h_ipi(vcpu, server, mfrr);
> - else
> - return xics_rm_h_ipi(vcpu, server, mfrr);
> -}
> -
> -int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
> -{
> - if (!kvmppc_xics_enabled(vcpu))
> - return H_TOO_HARD;
> - if (xics_on_xive())
> - return xive_rm_h_cppr(vcpu, cppr);
> - else
> - return xics_rm_h_cppr(vcpu, cppr);
> -}
> -
> -int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
> -{
> - if (!kvmppc_xics_enabled(vcpu))
> - return H_TOO_HARD;
> - if (xics_on_xive())
> - return xive_rm_h_eoi(vcpu, xirr);
> - else
> - return xics_rm_h_eoi(vcpu, xirr);
> -}
> -#endif /* CONFIG_KVM_XICS */
> -
> void kvmppc_bad_interrupt(struct pt_regs *regs)
> {
> /*
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
> index 587c33fc4564..e2246b715f68 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
> @@ -479,6 +479,11 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
> }
> }
>
> +unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu)
> +{
> + vcpu->arch.regs.gpr[5] = get_tb();
> + return xics_rm_h_xirr(vcpu);
> +}
>
> unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
> {
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c
> deleted file mode 100644
> index dd9880731bd6..000000000000
> --- a/arch/powerpc/kvm/book3s_hv_rm_xive.c
> +++ /dev/null
> @@ -1,46 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0
> -#include <linux/kernel.h>
> -#include <linux/kvm_host.h>
> -#include <linux/err.h>
> -#include <linux/kernel_stat.h>
> -#include <linux/pgtable.h>
> -
> -#include <asm/kvm_book3s.h>
> -#include <asm/kvm_ppc.h>
> -#include <asm/hvcall.h>
> -#include <asm/xics.h>
> -#include <asm/debug.h>
> -#include <asm/synch.h>
> -#include <asm/cputhreads.h>
> -#include <asm/ppc-opcode.h>
> -#include <asm/pnv-pci.h>
> -#include <asm/opal.h>
> -#include <asm/smp.h>
> -#include <asm/xive.h>
> -#include <asm/xive-regs.h>
> -
> -#include "book3s_xive.h"
> -
> -/* XXX */
> -#include <asm/udbg.h>
> -//#define DBG(fmt...) udbg_printf(fmt)
> -#define DBG(fmt...) do { } while(0)
> -
> -static inline void __iomem *get_tima_phys(void)
> -{
> - return local_paca->kvm_hstate.xive_tima_phys;
> -}
> -
> -#undef XIVE_RUNTIME_CHECKS
> -#define X_PFX xive_rm_
> -#define X_STATIC
> -#define X_STAT_PFX stat_rm_
> -#define __x_tima get_tima_phys()
> -#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_page))
> -#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_page))
> -#define __x_writeb __raw_rm_writeb
> -#define __x_readw __raw_rm_readw
> -#define __x_readq __raw_rm_readq
> -#define __x_writeq __raw_rm_writeq
> -
> -#include "book3s_xive_template.c"
> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
> index c0ce5531d9bc..65515a96498a 100644
> --- a/arch/powerpc/kvm/book3s_xive.c
> +++ b/arch/powerpc/kvm/book3s_xive.c
> @@ -30,27 +30,629 @@
>
> #include "book3s_xive.h"
>
> -
> -/*
> - * Virtual mode variants of the hcalls for use on radix/radix
> - * with AIL. They require the VCPU's VP to be "pushed"
> - *
> - * We still instantiate them here because we use some of the
> - * generated utility functions as well in this file.
> - */
> -#define XIVE_RUNTIME_CHECKS
> -#define X_PFX xive_vm_
> -#define X_STATIC static
> -#define X_STAT_PFX stat_vm_
> -#define __x_tima xive_tima
> #define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio))
> #define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio))
> -#define __x_writeb __raw_writeb
> -#define __x_readw __raw_readw
> -#define __x_readq __raw_readq
> -#define __x_writeq __raw_writeq
>
> -#include "book3s_xive_template.c"
> +/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
> +#define XICS_DUMMY 1
> +
> +static void xive_vm_ack_pending(struct kvmppc_xive_vcpu *xc)
> +{
> + u8 cppr;
> + u16 ack;
> +
> + /*
> + * Ensure any previous store to CPPR is ordered vs.
> + * the subsequent loads from PIPR or ACK.
> + */
> + eieio();
> +
> + /* Perform the acknowledge OS to register cycle. */
> + ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
> +
> + /* Synchronize subsequent queue accesses */
> + mb();
> +
> + /* XXX Check grouping level */
> +
> + /* Anything ? */
> + if (!((ack >> 8) & TM_QW1_NSR_EO))
> + return;
> +
> + /* Grab CPPR of the most favored pending interrupt */
> + cppr = ack & 0xff;
> + if (cppr < 8)
> + xc->pending |= 1 << cppr;
> +
> + /* Check consistency */
> + if (cppr >= xc->hw_cppr)
> + pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
> + smp_processor_id(), cppr, xc->hw_cppr);
> +
> + /*
> + * Update our image of the HW CPPR. We don't yet modify
> + * xc->cppr, this will be done as we scan for interrupts
> + * in the queues.
> + */
> + xc->hw_cppr = cppr;
> +}
> +
> +static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
> +{
> + u64 val;
> +
> + if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
> + offset |= XIVE_ESB_LD_ST_MO;
> +
> + val = __raw_readq(__x_eoi_page(xd) + offset);
> +#ifdef __LITTLE_ENDIAN__
> + val >>= 64-8;
> +#endif
> + return (u8)val;
> +}
> +
> +
> +static void xive_vm_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
> +{
> + /* If the XIVE supports the new "store EOI facility, use it */
> + if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
> + __raw_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
> + else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
> + /*
> + * For LSIs the HW EOI cycle is used rather than PQ bits,
> + * as they are automatically re-triggred in HW when still
> + * pending.
> + */
> + __raw_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
> + } else {
> + uint64_t eoi_val;
> +
> + /*
> + * Otherwise for EOI, we use the special MMIO that does
> + * a clear of both P and Q and returns the old Q,
> + * except for LSIs where we use the "EOI cycle" special
> + * load.
> + *
> + * This allows us to then do a re-trigger if Q was set
> + * rather than synthetizing an interrupt in software
> + */
> + eoi_val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_00);
> +
> + /* Re-trigger if needed */
> + if ((eoi_val & 1) && __x_trig_page(xd))
> + __raw_writeq(0, __x_trig_page(xd));
> + }
> +}
> +
> +enum {
> + scan_fetch,
> + scan_poll,
> + scan_eoi,
> +};
> +
> +static u32 xive_vm_scan_interrupts(struct kvmppc_xive_vcpu *xc,
> + u8 pending, int scan_type)
> +{
> + u32 hirq = 0;
> + u8 prio = 0xff;
> +
> + /* Find highest pending priority */
> + while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
> + struct xive_q *q;
> + u32 idx, toggle;
> + __be32 *qpage;
> +
> + /*
> + * If pending is 0 this will return 0xff which is what
> + * we want
> + */
> + prio = ffs(pending) - 1;
> +
> + /* Don't scan past the guest cppr */
> + if (prio >= xc->cppr || prio > 7) {
> + if (xc->mfrr < xc->cppr) {
> + prio = xc->mfrr;
> + hirq = XICS_IPI;
> + }
> + break;
> + }
> +
> + /* Grab queue and pointers */
> + q = &xc->queues[prio];
> + idx = q->idx;
> + toggle = q->toggle;
> +
> + /*
> + * Snapshot the queue page. The test further down for EOI
> + * must use the same "copy" that was used by __xive_read_eq
> + * since qpage can be set concurrently and we don't want
> + * to miss an EOI.
> + */
> + qpage = READ_ONCE(q->qpage);
> +
> +skip_ipi:
> + /*
> + * Try to fetch from the queue. Will return 0 for a
> + * non-queueing priority (ie, qpage = 0).
> + */
> + hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
> +
> + /*
> + * If this was a signal for an MFFR change done by
> + * H_IPI we skip it. Additionally, if we were fetching
> + * we EOI it now, thus re-enabling reception of a new
> + * such signal.
> + *
> + * We also need to do that if prio is 0 and we had no
> + * page for the queue. In this case, we have non-queued
> + * IPI that needs to be EOId.
> + *
> + * This is safe because if we have another pending MFRR
> + * change that wasn't observed above, the Q bit will have
> + * been set and another occurrence of the IPI will trigger.
> + */
> + if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
> + if (scan_type == scan_fetch) {
> + xive_vm_source_eoi(xc->vp_ipi,
> + &xc->vp_ipi_data);
> + q->idx = idx;
> + q->toggle = toggle;
> + }
> + /* Loop back on same queue with updated idx/toggle */
> + WARN_ON(hirq && hirq != XICS_IPI);
> + if (hirq)
> + goto skip_ipi;
> + }
> +
> + /* If it's the dummy interrupt, continue searching */
> + if (hirq == XICS_DUMMY)
> + goto skip_ipi;
> +
> + /* Clear the pending bit if the queue is now empty */
> + if (!hirq) {
> + pending &= ~(1 << prio);
> +
> + /*
> + * Check if the queue count needs adjusting due to
> + * interrupts being moved away.
> + */
> + if (atomic_read(&q->pending_count)) {
> + int p = atomic_xchg(&q->pending_count, 0);
> +
> + if (p) {
> + WARN_ON(p > atomic_read(&q->count));
> + atomic_sub(p, &q->count);
> + }
> + }
> + }
> +
> + /*
> + * If the most favoured prio we found pending is less
> + * favored (or equal) than a pending IPI, we return
> + * the IPI instead.
> + */
> + if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
> + prio = xc->mfrr;
> + hirq = XICS_IPI;
> + break;
> + }
> +
> + /* If fetching, update queue pointers */
> + if (scan_type == scan_fetch) {
> + q->idx = idx;
> + q->toggle = toggle;
> + }
> + }
> +
> + /* If we are just taking a "peek", do nothing else */
> + if (scan_type == scan_poll)
> + return hirq;
> +
> + /* Update the pending bits */
> + xc->pending = pending;
> +
> + /*
> + * If this is an EOI that's it, no CPPR adjustment done here,
> + * all we needed was cleanup the stale pending bits and check
> + * if there's anything left.
> + */
> + if (scan_type == scan_eoi)
> + return hirq;
> +
> + /*
> + * If we found an interrupt, adjust what the guest CPPR should
> + * be as if we had just fetched that interrupt from HW.
> + *
> + * Note: This can only make xc->cppr smaller as the previous
> + * loop will only exit with hirq != 0 if prio is lower than
> + * the current xc->cppr. Thus we don't need to re-check xc->mfrr
> + * for pending IPIs.
> + */
> + if (hirq)
> + xc->cppr = prio;
> + /*
> + * If it was an IPI the HW CPPR might have been lowered too much
> + * as the HW interrupt we use for IPIs is routed to priority 0.
> + *
> + * We re-sync it here.
> + */
> + if (xc->cppr != xc->hw_cppr) {
> + xc->hw_cppr = xc->cppr;
> + __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
> + }
> +
> + return hirq;
> +}
> +
> +static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu)
> +{
> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> + u8 old_cppr;
> + u32 hirq;
> +
> + pr_devel("H_XIRR\n");
> +
> + xc->stat_vm_h_xirr++;
> +
> + /* First collect pending bits from HW */
> + xive_vm_ack_pending(xc);
> +
> + pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
> + xc->pending, xc->hw_cppr, xc->cppr);
> +
> + /* Grab previous CPPR and reverse map it */
> + old_cppr = xive_prio_to_guest(xc->cppr);
> +
> + /* Scan for actual interrupts */
> + hirq = xive_vm_scan_interrupts(xc, xc->pending, scan_fetch);
> +
> + pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
> + hirq, xc->hw_cppr, xc->cppr);
> +
> + /* That should never hit */
> + if (hirq & 0xff000000)
> + pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
> +
> + /*
> + * XXX We could check if the interrupt is masked here and
> + * filter it. If we chose to do so, we would need to do:
> + *
> + * if (masked) {
> + * lock();
> + * if (masked) {
> + * old_Q = true;
> + * hirq = 0;
> + * }
> + * unlock();
> + * }
> + */
> +
> + /* Return interrupt and old CPPR in GPR4 */
> + vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
> +
> + return H_SUCCESS;
> +}
> +
> +static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
> +{
> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> + u8 pending = xc->pending;
> + u32 hirq;
> +
> + pr_devel("H_IPOLL(server=%ld)\n", server);
> +
> + xc->stat_vm_h_ipoll++;
> +
> + /* Grab the target VCPU if not the current one */
> + if (xc->server_num != server) {
> + vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
> + if (!vcpu)
> + return H_PARAMETER;
> + xc = vcpu->arch.xive_vcpu;
> +
> + /* Scan all priorities */
> + pending = 0xff;
> + } else {
> + /* Grab pending interrupt if any */
> + __be64 qw1 = __raw_readq(xive_tima + TM_QW1_OS);
> + u8 pipr = be64_to_cpu(qw1) & 0xff;
> +
> + if (pipr < 8)
> + pending |= 1 << pipr;
> + }
> +
> + hirq = xive_vm_scan_interrupts(xc, pending, scan_poll);
> +
> + /* Return interrupt and old CPPR in GPR4 */
> + vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
> +
> + return H_SUCCESS;
> +}
> +
> +static void xive_vm_push_pending_to_hw(struct kvmppc_xive_vcpu *xc)
> +{
> + u8 pending, prio;
> +
> + pending = xc->pending;
> + if (xc->mfrr != 0xff) {
> + if (xc->mfrr < 8)
> + pending |= 1 << xc->mfrr;
> + else
> + pending |= 0x80;
> + }
> + if (!pending)
> + return;
> + prio = ffs(pending) - 1;
> +
> + __raw_writeb(prio, xive_tima + TM_SPC_SET_OS_PENDING);
> +}
> +
> +static void xive_vm_scan_for_rerouted_irqs(struct kvmppc_xive *xive,
> + struct kvmppc_xive_vcpu *xc)
> +{
> + unsigned int prio;
> +
> + /* For each priority that is now masked */
> + for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
> + struct xive_q *q = &xc->queues[prio];
> + struct kvmppc_xive_irq_state *state;
> + struct kvmppc_xive_src_block *sb;
> + u32 idx, toggle, entry, irq, hw_num;
> + struct xive_irq_data *xd;
> + __be32 *qpage;
> + u16 src;
> +
> + idx = q->idx;
> + toggle = q->toggle;
> + qpage = READ_ONCE(q->qpage);
> + if (!qpage)
> + continue;
> +
> + /* For each interrupt in the queue */
> + for (;;) {
> + entry = be32_to_cpup(qpage + idx);
> +
> + /* No more ? */
> + if ((entry >> 31) == toggle)
> + break;
> + irq = entry & 0x7fffffff;
> +
> + /* Skip dummies and IPIs */
> + if (irq == XICS_DUMMY || irq == XICS_IPI)
> + goto next;
> + sb = kvmppc_xive_find_source(xive, irq, &src);
> + if (!sb)
> + goto next;
> + state = &sb->irq_state[src];
> +
> + /* Has it been rerouted ? */
> + if (xc->server_num == state->act_server)
> + goto next;
> +
> + /*
> + * Allright, it *has* been re-routed, kill it from
> + * the queue.
> + */
> + qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
> +
> + /* Find the HW interrupt */
> + kvmppc_xive_select_irq(state, &hw_num, &xd);
> +
> + /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
> + if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
> + xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
> +
> + /* EOI the source */
> + xive_vm_source_eoi(hw_num, xd);
> +
> +next:
> + idx = (idx + 1) & q->msk;
> + if (idx == 0)
> + toggle ^= 1;
> + }
> + }
> +}
> +
> +static int xive_vm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
> +{
> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> + struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
> + u8 old_cppr;
> +
> + pr_devel("H_CPPR(cppr=%ld)\n", cppr);
> +
> + xc->stat_vm_h_cppr++;
> +
> + /* Map CPPR */
> + cppr = xive_prio_from_guest(cppr);
> +
> + /* Remember old and update SW state */
> + old_cppr = xc->cppr;
> + xc->cppr = cppr;
> +
> + /*
> + * Order the above update of xc->cppr with the subsequent
> + * read of xc->mfrr inside push_pending_to_hw()
> + */
> + smp_mb();
> +
> + if (cppr > old_cppr) {
> + /*
> + * We are masking less, we need to look for pending things
> + * to deliver and set VP pending bits accordingly to trigger
> + * a new interrupt otherwise we might miss MFRR changes for
> + * which we have optimized out sending an IPI signal.
> + */
> + xive_vm_push_pending_to_hw(xc);
> + } else {
> + /*
> + * We are masking more, we need to check the queue for any
> + * interrupt that has been routed to another CPU, take
> + * it out (replace it with the dummy) and retrigger it.
> + *
> + * This is necessary since those interrupts may otherwise
> + * never be processed, at least not until this CPU restores
> + * its CPPR.
> + *
> + * This is in theory racy vs. HW adding new interrupts to
> + * the queue. In practice this works because the interesting
> + * cases are when the guest has done a set_xive() to move the
> + * interrupt away, which flushes the xive, followed by the
> + * target CPU doing a H_CPPR. So any new interrupt coming into
> + * the queue must still be routed to us and isn't a source
> + * of concern.
> + */
> + xive_vm_scan_for_rerouted_irqs(xive, xc);
> + }
> +
> + /* Apply new CPPR */
> + xc->hw_cppr = cppr;
> + __raw_writeb(cppr, xive_tima + TM_QW1_OS + TM_CPPR);
> +
> + return H_SUCCESS;
> +}
> +
> +static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
> +{
> + struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
> + struct kvmppc_xive_src_block *sb;
> + struct kvmppc_xive_irq_state *state;
> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> + struct xive_irq_data *xd;
> + u8 new_cppr = xirr >> 24;
> + u32 irq = xirr & 0x00ffffff, hw_num;
> + u16 src;
> + int rc = 0;
> +
> + pr_devel("H_EOI(xirr=%08lx)\n", xirr);
> +
> + xc->stat_vm_h_eoi++;
> +
> + xc->cppr = xive_prio_from_guest(new_cppr);
> +
> + /*
> + * IPIs are synthetized from MFRR and thus don't need
> + * any special EOI handling. The underlying interrupt
> + * used to signal MFRR changes is EOId when fetched from
> + * the queue.
> + */
> + if (irq == XICS_IPI || irq == 0) {
> + /*
> + * This barrier orders the setting of xc->cppr vs.
> + * subsquent test of xc->mfrr done inside
> + * scan_interrupts and push_pending_to_hw
> + */
> + smp_mb();
> + goto bail;
> + }
> +
> + /* Find interrupt source */
> + sb = kvmppc_xive_find_source(xive, irq, &src);
> + if (!sb) {
> + pr_devel(" source not found !\n");
> + rc = H_PARAMETER;
> + /* Same as above */
> + smp_mb();
> + goto bail;
> + }
> + state = &sb->irq_state[src];
> + kvmppc_xive_select_irq(state, &hw_num, &xd);
> +
> + state->in_eoi = true;
> +
> + /*
> + * This barrier orders both setting of in_eoi above vs,
> + * subsequent test of guest_priority, and the setting
> + * of xc->cppr vs. subsquent test of xc->mfrr done inside
> + * scan_interrupts and push_pending_to_hw
> + */
> + smp_mb();
> +
> +again:
> + if (state->guest_priority == MASKED) {
> + arch_spin_lock(&sb->lock);
> + if (state->guest_priority != MASKED) {
> + arch_spin_unlock(&sb->lock);
> + goto again;
> + }
> + pr_devel(" EOI on saved P...\n");
> +
> + /* Clear old_p, that will cause unmask to perform an EOI */
> + state->old_p = false;
> +
> + arch_spin_unlock(&sb->lock);
> + } else {
> + pr_devel(" EOI on source...\n");
> +
> + /* Perform EOI on the source */
> + xive_vm_source_eoi(hw_num, xd);
> +
> + /* If it's an emulated LSI, check level and resend */
> + if (state->lsi && state->asserted)
> + __raw_writeq(0, __x_trig_page(xd));
> +
> + }
> +
> + /*
> + * This barrier orders the above guest_priority check
> + * and spin_lock/unlock with clearing in_eoi below.
> + *
> + * It also has to be a full mb() as it must ensure
> + * the MMIOs done in source_eoi() are completed before
> + * state->in_eoi is visible.
> + */
> + mb();
> + state->in_eoi = false;
> +bail:
> +
> + /* Re-evaluate pending IRQs and update HW */
> + xive_vm_scan_interrupts(xc, xc->pending, scan_eoi);
> + xive_vm_push_pending_to_hw(xc);
> + pr_devel(" after scan pending=%02x\n", xc->pending);
> +
> + /* Apply new CPPR */
> + xc->hw_cppr = xc->cppr;
> + __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
> +
> + return rc;
> +}
> +
> +static int xive_vm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
> + unsigned long mfrr)
> +{
> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +
> + pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
> +
> + xc->stat_vm_h_ipi++;
> +
> + /* Find target */
> + vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
> + if (!vcpu)
> + return H_PARAMETER;
> + xc = vcpu->arch.xive_vcpu;
> +
> + /* Locklessly write over MFRR */
> + xc->mfrr = mfrr;
> +
> + /*
> + * The load of xc->cppr below and the subsequent MMIO store
> + * to the IPI must happen after the above mfrr update is
> + * globally visible so that:
> + *
> + * - Synchronize with another CPU doing an H_EOI or a H_CPPR
> + * updating xc->cppr then reading xc->mfrr.
> + *
> + * - The target of the IPI sees the xc->mfrr update
> + */
> + mb();
> +
> + /* Shoot the IPI if most favored than target cppr */
> + if (mfrr < xc->cppr)
> + __raw_writeq(0, __x_trig_page(&xc->vp_ipi_data));
> +
> + return H_SUCCESS;
> +}
>
> /*
> * We leave a gap of a couple of interrupts in the queue to
> diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
> deleted file mode 100644
> index b0015e05d99a..000000000000
> --- a/arch/powerpc/kvm/book3s_xive_template.c
> +++ /dev/null
> @@ -1,636 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0-only
> -/*
> - * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
> - */
> -
> -/* File to be included by other .c files */
> -
> -#define XGLUE(a,b) a##b
> -#define GLUE(a,b) XGLUE(a,b)
> -
> -/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
> -#define XICS_DUMMY 1
> -
> -static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
> -{
> - u8 cppr;
> - u16 ack;
> -
> - /*
> - * Ensure any previous store to CPPR is ordered vs.
> - * the subsequent loads from PIPR or ACK.
> - */
> - eieio();
> -
> - /* Perform the acknowledge OS to register cycle. */
> - ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
> -
> - /* Synchronize subsequent queue accesses */
> - mb();
> -
> - /* XXX Check grouping level */
> -
> - /* Anything ? */
> - if (!((ack >> 8) & TM_QW1_NSR_EO))
> - return;
> -
> - /* Grab CPPR of the most favored pending interrupt */
> - cppr = ack & 0xff;
> - if (cppr < 8)
> - xc->pending |= 1 << cppr;
> -
> -#ifdef XIVE_RUNTIME_CHECKS
> - /* Check consistency */
> - if (cppr >= xc->hw_cppr)
> - pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
> - smp_processor_id(), cppr, xc->hw_cppr);
> -#endif
> -
> - /*
> - * Update our image of the HW CPPR. We don't yet modify
> - * xc->cppr, this will be done as we scan for interrupts
> - * in the queues.
> - */
> - xc->hw_cppr = cppr;
> -}
> -
> -static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
> -{
> - u64 val;
> -
> - if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
> - offset |= XIVE_ESB_LD_ST_MO;
> -
> - val =__x_readq(__x_eoi_page(xd) + offset);
> -#ifdef __LITTLE_ENDIAN__
> - val >>= 64-8;
> -#endif
> - return (u8)val;
> -}
> -
> -
> -static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
> -{
> - /* If the XIVE supports the new "store EOI facility, use it */
> - if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
> - __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
> - else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
> - /*
> - * For LSIs the HW EOI cycle is used rather than PQ bits,
> - * as they are automatically re-triggred in HW when still
> - * pending.
> - */
> - __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
> - } else {
> - uint64_t eoi_val;
> -
> - /*
> - * Otherwise for EOI, we use the special MMIO that does
> - * a clear of both P and Q and returns the old Q,
> - * except for LSIs where we use the "EOI cycle" special
> - * load.
> - *
> - * This allows us to then do a re-trigger if Q was set
> - * rather than synthetizing an interrupt in software
> - */
> - eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
> -
> - /* Re-trigger if needed */
> - if ((eoi_val & 1) && __x_trig_page(xd))
> - __x_writeq(0, __x_trig_page(xd));
> - }
> -}
> -
> -enum {
> - scan_fetch,
> - scan_poll,
> - scan_eoi,
> -};
> -
> -static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
> - u8 pending, int scan_type)
> -{
> - u32 hirq = 0;
> - u8 prio = 0xff;
> -
> - /* Find highest pending priority */
> - while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
> - struct xive_q *q;
> - u32 idx, toggle;
> - __be32 *qpage;
> -
> - /*
> - * If pending is 0 this will return 0xff which is what
> - * we want
> - */
> - prio = ffs(pending) - 1;
> -
> - /* Don't scan past the guest cppr */
> - if (prio >= xc->cppr || prio > 7) {
> - if (xc->mfrr < xc->cppr) {
> - prio = xc->mfrr;
> - hirq = XICS_IPI;
> - }
> - break;
> - }
> -
> - /* Grab queue and pointers */
> - q = &xc->queues[prio];
> - idx = q->idx;
> - toggle = q->toggle;
> -
> - /*
> - * Snapshot the queue page. The test further down for EOI
> - * must use the same "copy" that was used by __xive_read_eq
> - * since qpage can be set concurrently and we don't want
> - * to miss an EOI.
> - */
> - qpage = READ_ONCE(q->qpage);
> -
> -skip_ipi:
> - /*
> - * Try to fetch from the queue. Will return 0 for a
> - * non-queueing priority (ie, qpage = 0).
> - */
> - hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
> -
> - /*
> - * If this was a signal for an MFFR change done by
> - * H_IPI we skip it. Additionally, if we were fetching
> - * we EOI it now, thus re-enabling reception of a new
> - * such signal.
> - *
> - * We also need to do that if prio is 0 and we had no
> - * page for the queue. In this case, we have non-queued
> - * IPI that needs to be EOId.
> - *
> - * This is safe because if we have another pending MFRR
> - * change that wasn't observed above, the Q bit will have
> - * been set and another occurrence of the IPI will trigger.
> - */
> - if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
> - if (scan_type == scan_fetch) {
> - GLUE(X_PFX,source_eoi)(xc->vp_ipi,
> - &xc->vp_ipi_data);
> - q->idx = idx;
> - q->toggle = toggle;
> - }
> - /* Loop back on same queue with updated idx/toggle */
> -#ifdef XIVE_RUNTIME_CHECKS
> - WARN_ON(hirq && hirq != XICS_IPI);
> -#endif
> - if (hirq)
> - goto skip_ipi;
> - }
> -
> - /* If it's the dummy interrupt, continue searching */
> - if (hirq == XICS_DUMMY)
> - goto skip_ipi;
> -
> - /* Clear the pending bit if the queue is now empty */
> - if (!hirq) {
> - pending &= ~(1 << prio);
> -
> - /*
> - * Check if the queue count needs adjusting due to
> - * interrupts being moved away.
> - */
> - if (atomic_read(&q->pending_count)) {
> - int p = atomic_xchg(&q->pending_count, 0);
> - if (p) {
> -#ifdef XIVE_RUNTIME_CHECKS
> - WARN_ON(p > atomic_read(&q->count));
> -#endif
> - atomic_sub(p, &q->count);
> - }
> - }
> - }
> -
> - /*
> - * If the most favoured prio we found pending is less
> - * favored (or equal) than a pending IPI, we return
> - * the IPI instead.
> - */
> - if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
> - prio = xc->mfrr;
> - hirq = XICS_IPI;
> - break;
> - }
> -
> - /* If fetching, update queue pointers */
> - if (scan_type == scan_fetch) {
> - q->idx = idx;
> - q->toggle = toggle;
> - }
> - }
> -
> - /* If we are just taking a "peek", do nothing else */
> - if (scan_type == scan_poll)
> - return hirq;
> -
> - /* Update the pending bits */
> - xc->pending = pending;
> -
> - /*
> - * If this is an EOI that's it, no CPPR adjustment done here,
> - * all we needed was cleanup the stale pending bits and check
> - * if there's anything left.
> - */
> - if (scan_type == scan_eoi)
> - return hirq;
> -
> - /*
> - * If we found an interrupt, adjust what the guest CPPR should
> - * be as if we had just fetched that interrupt from HW.
> - *
> - * Note: This can only make xc->cppr smaller as the previous
> - * loop will only exit with hirq != 0 if prio is lower than
> - * the current xc->cppr. Thus we don't need to re-check xc->mfrr
> - * for pending IPIs.
> - */
> - if (hirq)
> - xc->cppr = prio;
> - /*
> - * If it was an IPI the HW CPPR might have been lowered too much
> - * as the HW interrupt we use for IPIs is routed to priority 0.
> - *
> - * We re-sync it here.
> - */
> - if (xc->cppr != xc->hw_cppr) {
> - xc->hw_cppr = xc->cppr;
> - __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
> - }
> -
> - return hirq;
> -}
> -
> -X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
> -{
> - struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> - u8 old_cppr;
> - u32 hirq;
> -
> - pr_devel("H_XIRR\n");
> -
> - xc->GLUE(X_STAT_PFX,h_xirr)++;
> -
> - /* First collect pending bits from HW */
> - GLUE(X_PFX,ack_pending)(xc);
> -
> - pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
> - xc->pending, xc->hw_cppr, xc->cppr);
> -
> - /* Grab previous CPPR and reverse map it */
> - old_cppr = xive_prio_to_guest(xc->cppr);
> -
> - /* Scan for actual interrupts */
> - hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
> -
> - pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
> - hirq, xc->hw_cppr, xc->cppr);
> -
> -#ifdef XIVE_RUNTIME_CHECKS
> - /* That should never hit */
> - if (hirq & 0xff000000)
> - pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
> -#endif
> -
> - /*
> - * XXX We could check if the interrupt is masked here and
> - * filter it. If we chose to do so, we would need to do:
> - *
> - * if (masked) {
> - * lock();
> - * if (masked) {
> - * old_Q = true;
> - * hirq = 0;
> - * }
> - * unlock();
> - * }
> - */
> -
> - /* Return interrupt and old CPPR in GPR4 */
> - vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
> -
> - return H_SUCCESS;
> -}
> -
> -X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server)
> -{
> - struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> - u8 pending = xc->pending;
> - u32 hirq;
> -
> - pr_devel("H_IPOLL(server=%ld)\n", server);
> -
> - xc->GLUE(X_STAT_PFX,h_ipoll)++;
> -
> - /* Grab the target VCPU if not the current one */
> - if (xc->server_num != server) {
> - vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
> - if (!vcpu)
> - return H_PARAMETER;
> - xc = vcpu->arch.xive_vcpu;
> -
> - /* Scan all priorities */
> - pending = 0xff;
> - } else {
> - /* Grab pending interrupt if any */
> - __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
> - u8 pipr = be64_to_cpu(qw1) & 0xff;
> - if (pipr < 8)
> - pending |= 1 << pipr;
> - }
> -
> - hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
> -
> - /* Return interrupt and old CPPR in GPR4 */
> - vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
> -
> - return H_SUCCESS;
> -}
> -
> -static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
> -{
> - u8 pending, prio;
> -
> - pending = xc->pending;
> - if (xc->mfrr != 0xff) {
> - if (xc->mfrr < 8)
> - pending |= 1 << xc->mfrr;
> - else
> - pending |= 0x80;
> - }
> - if (!pending)
> - return;
> - prio = ffs(pending) - 1;
> -
> - __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
> -}
> -
> -static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
> - struct kvmppc_xive_vcpu *xc)
> -{
> - unsigned int prio;
> -
> - /* For each priority that is now masked */
> - for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
> - struct xive_q *q = &xc->queues[prio];
> - struct kvmppc_xive_irq_state *state;
> - struct kvmppc_xive_src_block *sb;
> - u32 idx, toggle, entry, irq, hw_num;
> - struct xive_irq_data *xd;
> - __be32 *qpage;
> - u16 src;
> -
> - idx = q->idx;
> - toggle = q->toggle;
> - qpage = READ_ONCE(q->qpage);
> - if (!qpage)
> - continue;
> -
> - /* For each interrupt in the queue */
> - for (;;) {
> - entry = be32_to_cpup(qpage + idx);
> -
> - /* No more ? */
> - if ((entry >> 31) == toggle)
> - break;
> - irq = entry & 0x7fffffff;
> -
> - /* Skip dummies and IPIs */
> - if (irq == XICS_DUMMY || irq == XICS_IPI)
> - goto next;
> - sb = kvmppc_xive_find_source(xive, irq, &src);
> - if (!sb)
> - goto next;
> - state = &sb->irq_state[src];
> -
> - /* Has it been rerouted ? */
> - if (xc->server_num == state->act_server)
> - goto next;
> -
> - /*
> - * Allright, it *has* been re-routed, kill it from
> - * the queue.
> - */
> - qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
> -
> - /* Find the HW interrupt */
> - kvmppc_xive_select_irq(state, &hw_num, &xd);
> -
> - /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
> - if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
> - GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
> -
> - /* EOI the source */
> - GLUE(X_PFX,source_eoi)(hw_num, xd);
> -
> - next:
> - idx = (idx + 1) & q->msk;
> - if (idx == 0)
> - toggle ^= 1;
> - }
> - }
> -}
> -
> -X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
> -{
> - struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> - struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
> - u8 old_cppr;
> -
> - pr_devel("H_CPPR(cppr=%ld)\n", cppr);
> -
> - xc->GLUE(X_STAT_PFX,h_cppr)++;
> -
> - /* Map CPPR */
> - cppr = xive_prio_from_guest(cppr);
> -
> - /* Remember old and update SW state */
> - old_cppr = xc->cppr;
> - xc->cppr = cppr;
> -
> - /*
> - * Order the above update of xc->cppr with the subsequent
> - * read of xc->mfrr inside push_pending_to_hw()
> - */
> - smp_mb();
> -
> - if (cppr > old_cppr) {
> - /*
> - * We are masking less, we need to look for pending things
> - * to deliver and set VP pending bits accordingly to trigger
> - * a new interrupt otherwise we might miss MFRR changes for
> - * which we have optimized out sending an IPI signal.
> - */
> - GLUE(X_PFX,push_pending_to_hw)(xc);
> - } else {
> - /*
> - * We are masking more, we need to check the queue for any
> - * interrupt that has been routed to another CPU, take
> - * it out (replace it with the dummy) and retrigger it.
> - *
> - * This is necessary since those interrupts may otherwise
> - * never be processed, at least not until this CPU restores
> - * its CPPR.
> - *
> - * This is in theory racy vs. HW adding new interrupts to
> - * the queue. In practice this works because the interesting
> - * cases are when the guest has done a set_xive() to move the
> - * interrupt away, which flushes the xive, followed by the
> - * target CPU doing a H_CPPR. So any new interrupt coming into
> - * the queue must still be routed to us and isn't a source
> - * of concern.
> - */
> - GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
> - }
> -
> - /* Apply new CPPR */
> - xc->hw_cppr = cppr;
> - __x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
> -
> - return H_SUCCESS;
> -}
> -
> -X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
> -{
> - struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
> - struct kvmppc_xive_src_block *sb;
> - struct kvmppc_xive_irq_state *state;
> - struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> - struct xive_irq_data *xd;
> - u8 new_cppr = xirr >> 24;
> - u32 irq = xirr & 0x00ffffff, hw_num;
> - u16 src;
> - int rc = 0;
> -
> - pr_devel("H_EOI(xirr=%08lx)\n", xirr);
> -
> - xc->GLUE(X_STAT_PFX,h_eoi)++;
> -
> - xc->cppr = xive_prio_from_guest(new_cppr);
> -
> - /*
> - * IPIs are synthetized from MFRR and thus don't need
> - * any special EOI handling. The underlying interrupt
> - * used to signal MFRR changes is EOId when fetched from
> - * the queue.
> - */
> - if (irq == XICS_IPI || irq == 0) {
> - /*
> - * This barrier orders the setting of xc->cppr vs.
> - * subsquent test of xc->mfrr done inside
> - * scan_interrupts and push_pending_to_hw
> - */
> - smp_mb();
> - goto bail;
> - }
> -
> - /* Find interrupt source */
> - sb = kvmppc_xive_find_source(xive, irq, &src);
> - if (!sb) {
> - pr_devel(" source not found !\n");
> - rc = H_PARAMETER;
> - /* Same as above */
> - smp_mb();
> - goto bail;
> - }
> - state = &sb->irq_state[src];
> - kvmppc_xive_select_irq(state, &hw_num, &xd);
> -
> - state->in_eoi = true;
> -
> - /*
> - * This barrier orders both setting of in_eoi above vs,
> - * subsequent test of guest_priority, and the setting
> - * of xc->cppr vs. subsquent test of xc->mfrr done inside
> - * scan_interrupts and push_pending_to_hw
> - */
> - smp_mb();
> -
> -again:
> - if (state->guest_priority == MASKED) {
> - arch_spin_lock(&sb->lock);
> - if (state->guest_priority != MASKED) {
> - arch_spin_unlock(&sb->lock);
> - goto again;
> - }
> - pr_devel(" EOI on saved P...\n");
> -
> - /* Clear old_p, that will cause unmask to perform an EOI */
> - state->old_p = false;
> -
> - arch_spin_unlock(&sb->lock);
> - } else {
> - pr_devel(" EOI on source...\n");
> -
> - /* Perform EOI on the source */
> - GLUE(X_PFX,source_eoi)(hw_num, xd);
> -
> - /* If it's an emulated LSI, check level and resend */
> - if (state->lsi && state->asserted)
> - __x_writeq(0, __x_trig_page(xd));
> -
> - }
> -
> - /*
> - * This barrier orders the above guest_priority check
> - * and spin_lock/unlock with clearing in_eoi below.
> - *
> - * It also has to be a full mb() as it must ensure
> - * the MMIOs done in source_eoi() are completed before
> - * state->in_eoi is visible.
> - */
> - mb();
> - state->in_eoi = false;
> -bail:
> -
> - /* Re-evaluate pending IRQs and update HW */
> - GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
> - GLUE(X_PFX,push_pending_to_hw)(xc);
> - pr_devel(" after scan pending=%02x\n", xc->pending);
> -
> - /* Apply new CPPR */
> - xc->hw_cppr = xc->cppr;
> - __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
> -
> - return rc;
> -}
> -
> -X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
> - unsigned long mfrr)
> -{
> - struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> -
> - pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
> -
> - xc->GLUE(X_STAT_PFX,h_ipi)++;
> -
> - /* Find target */
> - vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
> - if (!vcpu)
> - return H_PARAMETER;
> - xc = vcpu->arch.xive_vcpu;
> -
> - /* Locklessly write over MFRR */
> - xc->mfrr = mfrr;
> -
> - /*
> - * The load of xc->cppr below and the subsequent MMIO store
> - * to the IPI must happen after the above mfrr update is
> - * globally visible so that:
> - *
> - * - Synchronize with another CPU doing an H_EOI or a H_CPPR
> - * updating xc->cppr then reading xc->mfrr.
> - *
> - * - The target of the IPI sees the xc->mfrr update
> - */
> - mb();
> -
> - /* Shoot the IPI if most favored than target cppr */
> - if (mfrr < xc->cppr)
> - __x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
> -
> - return H_SUCCESS;
> -}
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 44d74bfe05df..5003563ca38f 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -1803,11 +1803,11 @@ hcall_real_table:
> .long 0 /* 0x5c */
> .long 0 /* 0x60 */
> #ifdef CONFIG_KVM_XICS
> - .long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
> - .long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
> - .long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
> - .long DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
> - .long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
> + .long DOTSYM(xics_rm_h_eoi) - hcall_real_table
> + .long DOTSYM(xics_rm_h_cppr) - hcall_real_table
> + .long DOTSYM(xics_rm_h_ipi) - hcall_real_table
> + .long 0 /* 0x70 - H_IPOLL */
> + .long DOTSYM(xics_rm_h_xirr) - hcall_real_table
> #else
> .long 0 /* 0x64 - H_EOI */
> .long 0 /* 0x68 - H_CPPR */
> @@ -1977,7 +1977,7 @@ hcall_real_table:
> .long 0 /* 0x2f4 */
> .long 0 /* 0x2f8 */
> #ifdef CONFIG_KVM_XICS
> - .long DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
> + .long DOTSYM(xics_rm_h_xirr_x) - hcall_real_table
> #else
> .long 0 /* 0x2fc - H_XIRR_X*/
> #endif
More information about the Linuxppc-dev
mailing list