[PATCH 06/19] KVM: PPC: Book3S HV: add a GET_ESB_FD control to the XIVE native device

Cédric Le Goater clg at kaod.org
Mon Feb 4 22:30:39 AEDT 2019


On 2/4/19 5:45 AM, David Gibson wrote:
> On Mon, Jan 07, 2019 at 07:43:18PM +0100, Cédric Le Goater wrote:
>> This will let the guest create a memory mapping to expose the ESB MMIO
>> regions used to control the interrupt sources, to trigger events, to
>> EOI or to turn off the sources.
>>
>> Signed-off-by: Cédric Le Goater <clg at kaod.org>
>> ---
>>  arch/powerpc/include/uapi/asm/kvm.h   |  4 ++
>>  arch/powerpc/kvm/book3s_xive_native.c | 97 +++++++++++++++++++++++++++
>>  2 files changed, 101 insertions(+)
>>
>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
>> index 8c876c166ef2..6bb61ba141c2 100644
>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>> @@ -675,4 +675,8 @@ struct kvm_ppc_cpu_char {
>>  #define  KVM_XICS_PRESENTED		(1ULL << 43)
>>  #define  KVM_XICS_QUEUED		(1ULL << 44)
>>  
>> +/* POWER9 XIVE Native Interrupt Controller */
>> +#define KVM_DEV_XIVE_GRP_CTRL		1
>> +#define   KVM_DEV_XIVE_GET_ESB_FD	1
> 
> Introducing a new FD for ESB and TIMA seems overkill.  Can't you get
> to both with an mmap() directly on the xive device fd?  Using the
> offset to distinguish which one to map, obviously.

The page offset would define some sort of user API. It seems feasible.
But I am not sure this would be practical in the future if we need to 
tune the length.

The TIMA has two pages that can be exposed at guest level for interrupt 
management : the OS and the USER page. That should be OK.

But we might want to map only portions of the interrupt ESB space, for 
PCI passthrough for instance as Paul proposed. I am still looking at that.

Thanks,

C.

>>  #endif /* __LINUX_KVM_POWERPC_H */
>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
>> index 115143e76c45..e20081f0c8d4 100644
>> --- a/arch/powerpc/kvm/book3s_xive_native.c
>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
>> @@ -153,6 +153,85 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
>>  	return rc;
>>  }
>>  
>> +static int xive_native_esb_fault(struct vm_fault *vmf)
>> +{
>> +	struct vm_area_struct *vma = vmf->vma;
>> +	struct kvmppc_xive *xive = vma->vm_file->private_data;
>> +	struct kvmppc_xive_src_block *sb;
>> +	struct kvmppc_xive_irq_state *state;
>> +	struct xive_irq_data *xd;
>> +	u32 hw_num;
>> +	u16 src;
>> +	u64 page;
>> +	unsigned long irq;
>> +
>> +	/*
>> +	 * Linux/KVM uses a two pages ESB setting, one for trigger and
>> +	 * one for EOI
>> +	 */
>> +	irq = vmf->pgoff / 2;
>> +
>> +	sb = kvmppc_xive_find_source(xive, irq, &src);
>> +	if (!sb) {
>> +		pr_err("%s: source %lx not found !\n", __func__, irq);
>> +		return VM_FAULT_SIGBUS;
>> +	}
>> +
>> +	state = &sb->irq_state[src];
>> +	kvmppc_xive_select_irq(state, &hw_num, &xd);
>> +
>> +	arch_spin_lock(&sb->lock);
>> +
>> +	/*
>> +	 * first/even page is for trigger
>> +	 * second/odd page is for EOI and management.
>> +	 */
>> +	page = vmf->pgoff % 2 ? xd->eoi_page : xd->trig_page;
>> +	arch_spin_unlock(&sb->lock);
>> +
>> +	if (!page) {
>> +		pr_err("%s: acessing invalid ESB page for source %lx !\n",
>> +		       __func__, irq);
>> +		return VM_FAULT_SIGBUS;
>> +	}
>> +
>> +	vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
>> +	return VM_FAULT_NOPAGE;
>> +}
>> +
>> +static const struct vm_operations_struct xive_native_esb_vmops = {
>> +	.fault = xive_native_esb_fault,
>> +};
>> +
>> +static int xive_native_esb_mmap(struct file *file, struct vm_area_struct *vma)
>> +{
>> +	/* There are two ESB pages (trigger and EOI) per IRQ */
>> +	if (vma_pages(vma) + vma->vm_pgoff > KVMPPC_XIVE_NR_IRQS * 2)
>> +		return -EINVAL;
>> +
>> +	vma->vm_flags |= VM_IO | VM_PFNMAP;
>> +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
>> +	vma->vm_ops = &xive_native_esb_vmops;
>> +	return 0;
>> +}
>> +
>> +static const struct file_operations xive_native_esb_fops = {
>> +	.mmap = xive_native_esb_mmap,
>> +};
>> +
>> +static int kvmppc_xive_native_get_esb_fd(struct kvmppc_xive *xive, u64 addr)
>> +{
>> +	u64 __user *ubufp = (u64 __user *) addr;
>> +	int ret;
>> +
>> +	ret = anon_inode_getfd("[xive-esb]", &xive_native_esb_fops, xive,
>> +				O_RDWR | O_CLOEXEC);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	return put_user(ret, ubufp);
>> +}
>> +
>>  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>>  				       struct kvm_device_attr *attr)
>>  {
>> @@ -162,12 +241,30 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>>  static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
>>  				       struct kvm_device_attr *attr)
>>  {
>> +	struct kvmppc_xive *xive = dev->private;
>> +
>> +	switch (attr->group) {
>> +	case KVM_DEV_XIVE_GRP_CTRL:
>> +		switch (attr->attr) {
>> +		case KVM_DEV_XIVE_GET_ESB_FD:
>> +			return kvmppc_xive_native_get_esb_fd(xive, attr->addr);
>> +		}
>> +		break;
>> +	}
>>  	return -ENXIO;
>>  }
>>  
>>  static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
>>  				       struct kvm_device_attr *attr)
>>  {
>> +	switch (attr->group) {
>> +	case KVM_DEV_XIVE_GRP_CTRL:
>> +		switch (attr->attr) {
>> +		case KVM_DEV_XIVE_GET_ESB_FD:
>> +			return 0;
>> +		}
>> +		break;
>> +	}
>>  	return -ENXIO;
>>  }
>>  
> 



More information about the Linuxppc-dev mailing list