[PATCH 06/19] KVM: PPC: Book3S HV: add a GET_ESB_FD control to the XIVE native device

David Gibson david at gibson.dropbear.id.au
Tue Feb 5 16:28:22 AEDT 2019


On Mon, Feb 04, 2019 at 12:30:39PM +0100, Cédric Le Goater wrote:
> On 2/4/19 5:45 AM, David Gibson wrote:
> > On Mon, Jan 07, 2019 at 07:43:18PM +0100, Cédric Le Goater wrote:
> >> This will let the guest create a memory mapping to expose the ESB MMIO
> >> regions used to control the interrupt sources, to trigger events, to
> >> EOI or to turn off the sources.
> >>
> >> Signed-off-by: Cédric Le Goater <clg at kaod.org>
> >> ---
> >>  arch/powerpc/include/uapi/asm/kvm.h   |  4 ++
> >>  arch/powerpc/kvm/book3s_xive_native.c | 97 +++++++++++++++++++++++++++
> >>  2 files changed, 101 insertions(+)
> >>
> >> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> >> index 8c876c166ef2..6bb61ba141c2 100644
> >> --- a/arch/powerpc/include/uapi/asm/kvm.h
> >> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> >> @@ -675,4 +675,8 @@ struct kvm_ppc_cpu_char {
> >>  #define  KVM_XICS_PRESENTED		(1ULL << 43)
> >>  #define  KVM_XICS_QUEUED		(1ULL << 44)
> >>  
> >> +/* POWER9 XIVE Native Interrupt Controller */
> >> +#define KVM_DEV_XIVE_GRP_CTRL		1
> >> +#define   KVM_DEV_XIVE_GET_ESB_FD	1
> > 
> > Introducing a new FD for ESB and TIMA seems overkill.  Can't you get
> > to both with an mmap() directly on the xive device fd?  Using the
> > offset to distinguish which one to map, obviously.
> 
> The page offset would define some sort of user API. It seems feasible.
> But I am not sure this would be practical in the future if we need to 
> tune the length.

Um.. why not?  I mean, yes the XIVE supports rather a lot of
interrupts, but we have 64-bits of offset we can play with - we can
leave room for billions of ESB slots and still have room for billions
of VPs.

> The TIMA has two pages that can be exposed at guest level for interrupt 
> management : the OS and the USER page. That should be OK.
> 
> But we might want to map only portions of the interrupt ESB space, for 
> PCI passthrough for instance as Paul proposed. I am still looking at that.
> 
> Thanks,
> 
> C.
> 
> >>  #endif /* __LINUX_KVM_POWERPC_H */
> >> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> >> index 115143e76c45..e20081f0c8d4 100644
> >> --- a/arch/powerpc/kvm/book3s_xive_native.c
> >> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> >> @@ -153,6 +153,85 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
> >>  	return rc;
> >>  }
> >>  
> >> +static int xive_native_esb_fault(struct vm_fault *vmf)
> >> +{
> >> +	struct vm_area_struct *vma = vmf->vma;
> >> +	struct kvmppc_xive *xive = vma->vm_file->private_data;
> >> +	struct kvmppc_xive_src_block *sb;
> >> +	struct kvmppc_xive_irq_state *state;
> >> +	struct xive_irq_data *xd;
> >> +	u32 hw_num;
> >> +	u16 src;
> >> +	u64 page;
> >> +	unsigned long irq;
> >> +
> >> +	/*
> >> +	 * Linux/KVM uses a two pages ESB setting, one for trigger and
> >> +	 * one for EOI
> >> +	 */
> >> +	irq = vmf->pgoff / 2;
> >> +
> >> +	sb = kvmppc_xive_find_source(xive, irq, &src);
> >> +	if (!sb) {
> >> +		pr_err("%s: source %lx not found !\n", __func__, irq);
> >> +		return VM_FAULT_SIGBUS;
> >> +	}
> >> +
> >> +	state = &sb->irq_state[src];
> >> +	kvmppc_xive_select_irq(state, &hw_num, &xd);
> >> +
> >> +	arch_spin_lock(&sb->lock);
> >> +
> >> +	/*
> >> +	 * first/even page is for trigger
> >> +	 * second/odd page is for EOI and management.
> >> +	 */
> >> +	page = vmf->pgoff % 2 ? xd->eoi_page : xd->trig_page;
> >> +	arch_spin_unlock(&sb->lock);
> >> +
> >> +	if (!page) {
> >> +		pr_err("%s: acessing invalid ESB page for source %lx !\n",
> >> +		       __func__, irq);
> >> +		return VM_FAULT_SIGBUS;
> >> +	}
> >> +
> >> +	vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
> >> +	return VM_FAULT_NOPAGE;
> >> +}
> >> +
> >> +static const struct vm_operations_struct xive_native_esb_vmops = {
> >> +	.fault = xive_native_esb_fault,
> >> +};
> >> +
> >> +static int xive_native_esb_mmap(struct file *file, struct vm_area_struct *vma)
> >> +{
> >> +	/* There are two ESB pages (trigger and EOI) per IRQ */
> >> +	if (vma_pages(vma) + vma->vm_pgoff > KVMPPC_XIVE_NR_IRQS * 2)
> >> +		return -EINVAL;
> >> +
> >> +	vma->vm_flags |= VM_IO | VM_PFNMAP;
> >> +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> >> +	vma->vm_ops = &xive_native_esb_vmops;
> >> +	return 0;
> >> +}
> >> +
> >> +static const struct file_operations xive_native_esb_fops = {
> >> +	.mmap = xive_native_esb_mmap,
> >> +};
> >> +
> >> +static int kvmppc_xive_native_get_esb_fd(struct kvmppc_xive *xive, u64 addr)
> >> +{
> >> +	u64 __user *ubufp = (u64 __user *) addr;
> >> +	int ret;
> >> +
> >> +	ret = anon_inode_getfd("[xive-esb]", &xive_native_esb_fops, xive,
> >> +				O_RDWR | O_CLOEXEC);
> >> +	if (ret < 0)
> >> +		return ret;
> >> +
> >> +	return put_user(ret, ubufp);
> >> +}
> >> +
> >>  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
> >>  				       struct kvm_device_attr *attr)
> >>  {
> >> @@ -162,12 +241,30 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
> >>  static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
> >>  				       struct kvm_device_attr *attr)
> >>  {
> >> +	struct kvmppc_xive *xive = dev->private;
> >> +
> >> +	switch (attr->group) {
> >> +	case KVM_DEV_XIVE_GRP_CTRL:
> >> +		switch (attr->attr) {
> >> +		case KVM_DEV_XIVE_GET_ESB_FD:
> >> +			return kvmppc_xive_native_get_esb_fd(xive, attr->addr);
> >> +		}
> >> +		break;
> >> +	}
> >>  	return -ENXIO;
> >>  }
> >>  
> >>  static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
> >>  				       struct kvm_device_attr *attr)
> >>  {
> >> +	switch (attr->group) {
> >> +	case KVM_DEV_XIVE_GRP_CTRL:
> >> +		switch (attr->attr) {
> >> +		case KVM_DEV_XIVE_GET_ESB_FD:
> >> +			return 0;
> >> +		}
> >> +		break;
> >> +	}
> >>  	return -ENXIO;
> >>  }
> >>  
> > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20190205/c1c08065/attachment-0001.sig>


More information about the Linuxppc-dev mailing list