[PATCH kernel] powerpc/iommu: Add simple iommu_ops to report capabilities

Robin Murphy robin.murphy at arm.com
Tue Jul 5 19:41:50 AEST 2022


On 2022-07-05 07:22, Alexey Kardashevskiy wrote:
> Historically PPC64 managed to avoid using iommu_ops. The VFIO driver
> uses a SPAPR TCE sub-driver and all iommu_ops uses were kept in
> the Type1 VFIO driver. Recent development though has added a coherency
> capability check to the generic part of VFIO and essentially disabled
> VFIO on PPC64.
> 
> This adds a simple iommu_ops stub which reports support for cache
> coherency. Because bus_set_iommu() triggers IOMMU probing of PCI devices,
> this provides minimum code for the probing to not crash.

No more bus_set_iommu() please - I'll be sending a new version of this 
series very soon:

https://lore.kernel.org/linux-iommu/cover.1650890638.git.robin.murphy@arm.com/

Cheers,
Robin.

> The previous discussion is here:
> https://patchwork.ozlabs.org/project/kvm-ppc/patch/20220701061751.1955857-1-aik@ozlabs.ru/
> 
> Fixes: e8ae0e140c05 ("vfio: Require that devices support DMA cache coherence")
> Fixes: 70693f470848 ("vfio: Set DMA ownership for VFIO devices")
> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> ---
> 
> I have not looked into the domains for ages, what is missing here? With this
> on top of 5.19-rc1 VFIO works again on my POWER9 box. Thanks,
> 
> ---
>   arch/powerpc/include/asm/iommu.h |  2 +
>   arch/powerpc/kernel/iommu.c      | 70 ++++++++++++++++++++++++++++++++
>   arch/powerpc/kernel/pci_64.c     |  3 ++
>   3 files changed, 75 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
> index 7e29c73e3dd4..4bdae0ee29d0 100644
> --- a/arch/powerpc/include/asm/iommu.h
> +++ b/arch/powerpc/include/asm/iommu.h
> @@ -215,6 +215,8 @@ extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
>   		enum dma_data_direction *direction);
>   extern void iommu_tce_kill(struct iommu_table *tbl,
>   		unsigned long entry, unsigned long pages);
> +
> +extern const struct iommu_ops spapr_tce_iommu_ops;
>   #else
>   static inline void iommu_register_group(struct iommu_table_group *table_group,
>   					int pci_domain_number,
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index 7e56ddb3e0b9..2205b448f7d5 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -1176,4 +1176,74 @@ void iommu_del_device(struct device *dev)
>   	iommu_group_remove_device(dev);
>   }
>   EXPORT_SYMBOL_GPL(iommu_del_device);
> +
> +/*
> + * A simple iommu_ops to allow less cruft in generic VFIO code.
> + */
> +static bool spapr_tce_iommu_capable(enum iommu_cap cap)
> +{
> +	switch (cap) {
> +	case IOMMU_CAP_CACHE_COHERENCY:
> +		return true;
> +	default:
> +		break;
> +	}
> +
> +	return false;
> +}
> +
> +static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
> +{
> +	struct iommu_domain *domain = kzalloc(sizeof(*domain), GFP_KERNEL);
> +
> +	if (!domain)
> +		return NULL;
> +
> +	domain->geometry.aperture_start = 0;
> +	domain->geometry.aperture_end = ~0ULL;
> +	domain->geometry.force_aperture = true;
> +
> +	return domain;
> +}
> +
> +static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
> +{
> +	struct iommu_device *iommu_dev = kzalloc(sizeof(struct iommu_device), GFP_KERNEL);
> +
> +	iommu_dev->dev = dev;
> +	iommu_dev->ops = &spapr_tce_iommu_ops;
> +
> +	return iommu_dev;
> +}
> +
> +static void spapr_tce_iommu_release_device(struct device *dev)
> +{
> +}
> +
> +static int spapr_tce_iommu_attach_dev(struct iommu_domain *dom,
> +				      struct device *dev)
> +{
> +	return 0;
> +}
> +
> +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
> +{
> +	struct iommu_group *grp = dev->iommu_group;
> +
> +	if (!grp)
> +		grp = ERR_PTR(-ENODEV);
> +	return grp;
> +}
> +
> +const struct iommu_ops spapr_tce_iommu_ops = {
> +	.capable = spapr_tce_iommu_capable,
> +	.domain_alloc = spapr_tce_iommu_domain_alloc,
> +	.probe_device = spapr_tce_iommu_probe_device,
> +	.release_device = spapr_tce_iommu_release_device,
> +	.device_group = spapr_tce_iommu_device_group,
> +	.default_domain_ops = &(const struct iommu_domain_ops) {
> +		.attach_dev = spapr_tce_iommu_attach_dev,
> +	}
> +};
> +
>   #endif /* CONFIG_IOMMU_API */
> diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
> index 19b03ddf5631..04bc0c52e45c 100644
> --- a/arch/powerpc/kernel/pci_64.c
> +++ b/arch/powerpc/kernel/pci_64.c
> @@ -20,6 +20,7 @@
>   #include <linux/irq.h>
>   #include <linux/vmalloc.h>
>   #include <linux/of.h>
> +#include <linux/iommu.h>
>   
>   #include <asm/processor.h>
>   #include <asm/io.h>
> @@ -27,6 +28,7 @@
>   #include <asm/byteorder.h>
>   #include <asm/machdep.h>
>   #include <asm/ppc-pci.h>
> +#include <asm/iommu.h>
>   
>   /* pci_io_base -- the base address from which io bars are offsets.
>    * This is the lowest I/O base address (so bar values are always positive),
> @@ -69,6 +71,7 @@ static int __init pcibios_init(void)
>   		ppc_md.pcibios_fixup();
>   
>   	printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
> +	bus_set_iommu(&pci_bus_type, &spapr_tce_iommu_ops);
>   
>   	return 0;
>   }


More information about the Linuxppc-dev mailing list