[RFC PATCH v5 5/5] vfio-pci: Allow to expose MSI-X table to userspace when safe

David Gibson david at gibson.dropbear.id.au
Wed Aug 9 16:59:55 AEST 2017


On Mon, Aug 07, 2017 at 05:25:48PM +1000, Alexey Kardashevskiy wrote:
1;4803;0c> Some devices have a MSIX BAR not aligned to the system page size
> greater than 4K (like 64k for ppc64) which at the moment prevents
> such MMIO pages from being mapped to the userspace for the sake of
> the MSIX BAR content protection. If such page happens to share
> the same system page with some frequently accessed registers,
> the entire system page will be emulated which can seriously affect
> performance.
> 
> This allows mapping of MSI-X tables to userspace if hardware provides
> MSIX isolation via interrupt remapping or filtering; in other words
> allowing direct access to the MSIX BAR won't do any harm to other devices
> or cause spurious interrupts visible to the kernel.
> 
> This adds a wrapping helper to check if a capability is supported by
> an IOMMU group.
> 
> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>

Reviewed-by: David Gibson <david at gibson.dropbear.id.au>

> ---
>  include/linux/vfio.h             |  1 +
>  drivers/vfio/pci/vfio_pci.c      | 20 +++++++++++++++++---
>  drivers/vfio/pci/vfio_pci_rdwr.c |  5 ++++-
>  drivers/vfio/vfio.c              | 15 +++++++++++++++
>  4 files changed, 37 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 586809abb273..7110bca2fb60 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -46,6 +46,7 @@ struct vfio_device_ops {
>  
>  extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
>  extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
> +extern bool vfio_iommu_group_is_capable(struct device *dev, unsigned long cap);

This diff probably belongs in the earlier patch adding the function,
rather than here where it's first used.  Not worth respinning just for
that, though.

>  extern int vfio_add_group_dev(struct device *dev,
>  			      const struct vfio_device_ops *ops,
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index d87a0a3cda14..c4c39ed64b1e 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -561,11 +561,17 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
>  	struct vfio_region_info_cap_sparse_mmap *sparse;
>  	size_t end, size;
>  	int nr_areas = 2, i = 0, ret;
> +	bool is_msix_isolated = vfio_iommu_group_is_capable(&vdev->pdev->dev,
> +			IOMMU_GROUP_CAP_ISOLATE_MSIX);
>  
>  	end = pci_resource_len(vdev->pdev, vdev->msix_bar);
>  
> -	/* If MSI-X table is aligned to the start or end, only one area */
> -	if (((vdev->msix_offset & PAGE_MASK) == 0) ||
> +	/*
> +	 * If MSI-X table is allowed to mmap because of the capability
> +	 * of IRQ remapping or aligned to the start or end, only one area
> +	 */
> +	if (is_msix_isolated ||
> +	    ((vdev->msix_offset & PAGE_MASK) == 0) ||
>  	    (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
>  		nr_areas = 1;
>  
> @@ -577,6 +583,12 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
>  
>  	sparse->nr_areas = nr_areas;
>  
> +	if (is_msix_isolated) {
> +		sparse->areas[i].offset = 0;
> +		sparse->areas[i].size = end;
> +		return 0;
> +	}
> +
>  	if (vdev->msix_offset & PAGE_MASK) {
>  		sparse->areas[i].offset = 0;
>  		sparse->areas[i].size = vdev->msix_offset & PAGE_MASK;
> @@ -1094,6 +1106,8 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
>  	unsigned int index;
>  	u64 phys_len, req_len, pgoff, req_start;
>  	int ret;
> +	bool is_msix_isolated = vfio_iommu_group_is_capable(&vdev->pdev->dev,
> +			IOMMU_GROUP_CAP_ISOLATE_MSIX);
>  
>  	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
>  
> @@ -1115,7 +1129,7 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
>  	if (req_start + req_len > phys_len)
>  		return -EINVAL;
>  
> -	if (index == vdev->msix_bar) {
> +	if (index == vdev->msix_bar && !is_msix_isolated) {
>  		/*
>  		 * Disallow mmaps overlapping the MSI-X table; users don't
>  		 * get to touch this directly.  We could find somewhere
> diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
> index 357243d76f10..7514206a5ea7 100644
> --- a/drivers/vfio/pci/vfio_pci_rdwr.c
> +++ b/drivers/vfio/pci/vfio_pci_rdwr.c
> @@ -18,6 +18,7 @@
>  #include <linux/uaccess.h>
>  #include <linux/io.h>
>  #include <linux/vgaarb.h>
> +#include <linux/vfio.h>
>  
>  #include "vfio_pci_private.h"
>  
> @@ -123,6 +124,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
>  	resource_size_t end;
>  	void __iomem *io;
>  	ssize_t done;
> +	bool is_msix_isolated = vfio_iommu_group_is_capable(&vdev->pdev->dev,
> +			IOMMU_GROUP_CAP_ISOLATE_MSIX);
>  
>  	if (pci_resource_start(pdev, bar))
>  		end = pci_resource_len(pdev, bar);
> @@ -164,7 +167,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
>  	} else
>  		io = vdev->barmap[bar];
>  
> -	if (bar == vdev->msix_bar) {
> +	if (bar == vdev->msix_bar && !is_msix_isolated) {
>  		x_start = vdev->msix_offset;
>  		x_end = vdev->msix_offset + vdev->msix_size;
>  	}
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 330d50582f40..5292c4a5ae8f 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -169,6 +169,21 @@ void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
>  }
>  EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
>  
> +bool vfio_iommu_group_is_capable(struct device *dev, unsigned long cap)
> +{
> +	bool ret = false;
> +	struct iommu_group *group = vfio_iommu_group_get(dev);
> +
> +	if (group) {
> +		ret = iommu_group_is_capable(group, cap);
> +
> +		vfio_iommu_group_put(group, dev);
> +	}
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(vfio_iommu_group_is_capable);
> +
>  #ifdef CONFIG_VFIO_NOIOMMU
>  static void *vfio_noiommu_open(unsigned long arg)
>  {

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20170809/04a0560e/attachment.sig>


More information about the Linuxppc-dev mailing list