[PATCH v2 03/16] KVM: PPC: Book3S HV: XIVE: introduce a new capability KVM_CAP_PPC_IRQ_XIVE
Cédric Le Goater
clg at kaod.org
Fri Feb 22 22:28:27 AEDT 2019
The user interface exposes a new capability to let QEMU connect the
vCPU to the XIVE KVM device if required. The capability is only
advertised on a PowerNV Hypervisor as support for nested guests
(pseries KVM Hypervisor) is not yet available.
Internally, the interface to the new KVM device is protected with a
new interrupt mode: KVMPPC_IRQ_XIVE.
Signed-off-by: Cédric Le Goater <clg at kaod.org>
---
arch/powerpc/include/asm/kvm_host.h | 1 +
arch/powerpc/include/asm/kvm_ppc.h | 13 +++
arch/powerpc/kvm/book3s_xive.h | 6 ++
include/uapi/linux/kvm.h | 1 +
arch/powerpc/kvm/book3s_xive.c | 67 +++++++-----
arch/powerpc/kvm/book3s_xive_native.c | 144 ++++++++++++++++++++++++++
arch/powerpc/kvm/powerpc.c | 33 ++++++
Documentation/virtual/kvm/api.txt | 9 ++
8 files changed, 246 insertions(+), 28 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9f75a75a07f2..eb8581be0ee8 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -448,6 +448,7 @@ struct kvmppc_passthru_irqmap {
#define KVMPPC_IRQ_DEFAULT 0
#define KVMPPC_IRQ_MPIC 1
#define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */
+#define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */
#define MMIO_HPTE_CACHE_SIZE 4
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 4b72ddde7dc1..1e61877fe147 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -594,6 +594,14 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE;
+}
+
+extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
extern void kvmppc_xive_native_init_module(void);
extern void kvmppc_xive_native_exit_module(void);
@@ -621,6 +629,11 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir
int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+ { return 0; }
+static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
+static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
static inline void kvmppc_xive_native_init_module(void) { }
static inline void kvmppc_xive_native_exit_module(void) { }
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index a08ae6fd4c51..bcb1bbcf0359 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -248,5 +248,11 @@ extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
+/*
+ * Common Xive routines for XICS-over-XIVE and XIVE native
+ */
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu);
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu);
+
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index e6368163d3a0..52bf74a1616e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_ARM_VM_IPA_SIZE 165
#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166
#define KVM_CAP_HYPERV_CPUID 167
+#define KVM_CAP_PPC_IRQ_XIVE 168
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index f78d002f0fe0..d1cc18a5b1c4 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1049,7 +1049,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
}
EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
-static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct kvm *kvm = vcpu->kvm;
@@ -1883,6 +1883,43 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
return 0;
}
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int i;
+
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+ u32 i0, i1, idx;
+
+ if (!q->qpage && !xc->esc_virq[i])
+ continue;
+
+ seq_printf(m, " [q%d]: ", i);
+
+ if (q->qpage) {
+ idx = q->idx;
+ i0 = be32_to_cpup(q->qpage + idx);
+ idx = (idx + 1) & q->msk;
+ i1 = be32_to_cpup(q->qpage + idx);
+ seq_printf(m, "T=%d %08x %08x...\n", q->toggle,
+ i0, i1);
+ }
+ if (xc->esc_virq[i]) {
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+ struct xive_irq_data *xd =
+ irq_data_get_irq_handler_data(d);
+ u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+
+ seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+ (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+ (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+ xc->esc_virq[i], pq, xd->eoi_page);
+ seq_puts(m, "\n");
+ }
+ }
+ return 0;
+}
static int xive_debug_show(struct seq_file *m, void *private)
{
@@ -1908,7 +1945,6 @@ static int xive_debug_show(struct seq_file *m, void *private)
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
- unsigned int i;
if (!xc)
continue;
@@ -1918,33 +1954,8 @@ static int xive_debug_show(struct seq_file *m, void *private)
xc->server_num, xc->cppr, xc->hw_cppr,
xc->mfrr, xc->pending,
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
- for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
- struct xive_q *q = &xc->queues[i];
- u32 i0, i1, idx;
- if (!q->qpage && !xc->esc_virq[i])
- continue;
-
- seq_printf(m, " [q%d]: ", i);
-
- if (q->qpage) {
- idx = q->idx;
- i0 = be32_to_cpup(q->qpage + idx);
- idx = (idx + 1) & q->msk;
- i1 = be32_to_cpup(q->qpage + idx);
- seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
- }
- if (xc->esc_virq[i]) {
- struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
- struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
- u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
- seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
- (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
- (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
- xc->esc_virq[i], pq, xd->eoi_page);
- seq_printf(m, "\n");
- }
- }
+ kvmppc_xive_debug_show_queues(m, vcpu);
t_rm_h_xirr += xc->stat_rm_h_xirr;
t_rm_h_ipoll += xc->stat_rm_h_ipoll;
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index e475ce83ad14..1f3da47a4a6a 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -31,6 +31,128 @@
#include "book3s_xive.h"
+static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct xive_q *q = &xc->queues[prio];
+
+ xive_native_disable_queue(xc->vp_id, q, prio);
+ if (q->qpage) {
+ put_page(virt_to_page(q->qpage));
+ q->qpage = NULL;
+ }
+}
+
+void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ int i;
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return;
+
+ if (!xc)
+ return;
+
+ pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
+
+ /* Ensure no interrupt is still routed to that VP */
+ xc->valid = false;
+ kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
+
+ /* Free the queues & associated interrupts */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ /* Free the escalation irq */
+ if (xc->esc_virq[i]) {
+ free_irq(xc->esc_virq[i], vcpu);
+ irq_dispose_mapping(xc->esc_virq[i]);
+ kfree(xc->esc_virq_names[i]);
+ xc->esc_virq[i] = 0;
+ }
+
+ /* Free the queue */
+ kvmppc_xive_native_cleanup_queue(vcpu, i);
+ }
+
+ /* Free the VP */
+ kfree(xc);
+
+ /* Cleanup the vcpu */
+ vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+ vcpu->arch.xive_vcpu = NULL;
+}
+
+int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu)
+{
+ struct kvmppc_xive *xive = dev->private;
+ struct kvmppc_xive_vcpu *xc;
+ int rc;
+
+ pr_devel("native_connect_vcpu(cpu=%d)\n", cpu);
+
+ if (dev->ops != &kvm_xive_native_ops) {
+ pr_devel("Wrong ops !\n");
+ return -EPERM;
+ }
+ if (xive->kvm != vcpu->kvm)
+ return -EPERM;
+ if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
+ return -EBUSY;
+ if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
+ pr_devel("Duplicate !\n");
+ return -EEXIST;
+ }
+ if (cpu >= KVM_MAX_VCPUS) {
+ pr_devel("Out of bounds !\n");
+ return -EINVAL;
+ }
+ xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+ if (!xc)
+ return -ENOMEM;
+
+ mutex_lock(&vcpu->kvm->lock);
+ vcpu->arch.xive_vcpu = xc;
+ xc->xive = xive;
+ xc->vcpu = vcpu;
+ xc->server_num = cpu;
+ xc->vp_id = xive->vp_base + cpu;
+ xc->valid = true;
+
+ rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
+ if (rc) {
+ pr_err("Failed to get VP info from OPAL: %d\n", rc);
+ goto bail;
+ }
+
+ /*
+ * Enable the VP first as the single escalation mode will
+ * affect escalation interrupts numbering
+ */
+ rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+ if (rc) {
+ pr_err("Failed to enable VP in OPAL: %d\n", rc);
+ goto bail;
+ }
+
+ /* Configure VCPU fields for use by assembly push/pull */
+ vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
+ vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
+
+ /* TODO: initialize queues ? */
+
+bail:
+ vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
+ mutex_unlock(&vcpu->kvm->lock);
+ if (rc)
+ kvmppc_xive_native_cleanup_vcpu(vcpu);
+
+ return rc;
+}
+
static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
@@ -126,10 +248,32 @@ static int xive_native_debug_show(struct seq_file *m, void *private)
{
struct kvmppc_xive *xive = m->private;
struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned int i;
if (!kvm)
return 0;
+ seq_puts(m, "=========\nVCPU state\n=========\n");
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ if (!xc)
+ continue;
+
+ seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+ xc->server_num,
+ vcpu->arch.xive_saved_state.nsr,
+ vcpu->arch.xive_saved_state.cppr,
+ vcpu->arch.xive_saved_state.ipb,
+ vcpu->arch.xive_saved_state.pipr,
+ vcpu->arch.xive_saved_state.w01,
+ (u32) vcpu->arch.xive_cam_word);
+
+ kvmppc_xive_debug_show_queues(m, vcpu);
+ }
+
return 0;
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8c69af10f91d..a38a643a24dd 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -570,6 +570,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_GET_CPU_CHAR:
r = 1;
break;
+#ifdef CONFIG_KVM_XIVE
+ case KVM_CAP_PPC_IRQ_XIVE:
+ /* only for PowerNV */
+ r = !!cpu_has_feature(CPU_FTR_HVMODE);
+ break;
+#endif
case KVM_CAP_PPC_ALLOC_HTAB:
r = hv_enabled;
@@ -753,6 +759,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
else
kvmppc_xics_free_icp(vcpu);
break;
+ case KVMPPC_IRQ_XIVE:
+ kvmppc_xive_native_cleanup_vcpu(vcpu);
+ break;
}
kvmppc_core_vcpu_free(vcpu);
@@ -1941,6 +1950,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
}
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_CAP_PPC_IRQ_XIVE: {
+ struct fd f;
+ struct kvm_device *dev;
+
+ r = -EBADF;
+ f = fdget(cap->args[0]);
+ if (!f.file)
+ break;
+
+ r = -ENXIO;
+ if (!xive_enabled())
+ break;
+
+ r = -EPERM;
+ dev = kvm_device_from_filp(f.file);
+ if (dev)
+ r = kvmppc_xive_native_connect_vcpu(dev, vcpu,
+ cap->args[1]);
+
+ fdput(f);
+ break;
+ }
+#endif /* CONFIG_KVM_XIVE */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_FWNMI:
r = -EINVAL;
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 356156f5c52d..1db1435769b4 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4458,6 +4458,15 @@ struct kvm_sync_regs {
struct kvm_vcpu_events events;
};
+6.75 KVM_CAP_PPC_IRQ_XIVE
+
+Architectures: ppc
+Target: vcpu
+Parameters: args[0] is the XIVE device fd
+ args[1] is the XIVE CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XIVE device.
+
7. Capabilities that can be enabled on VMs
------------------------------------------
--
2.20.1
More information about the Linuxppc-dev
mailing list