[PATCH kernel v3 18/22] powerpc/powernv/npu: Add compound IOMMU groups

David Gibson david at gibson.dropbear.id.au
Mon Nov 19 12:12:42 AEDT 2018


On Tue, Nov 13, 2018 at 07:28:19PM +1100, Alexey Kardashevskiy wrote:
> At the moment powernv registers an IOMMU group for each PE. There is
> an exception though - NPU (an emulated PCI bridge representing an NVLink);
> powernv attaches these bridges to the GPU IOMMU group which becomes
> a master.
> 
> Now we have POWER9 systems with GPUs connected to each other directly,
> bypassing PCI. At the moment powernv does not control these links so
> it has to put such interconnected GPUs to the same IOMMU group which
> means that the old scheme with a GPU as a master won't work - there will
> be up to 3 GPUs in such group.
> 
> This introduces a npu_comp struct which represents a compound IOMMU
> group made of multiple PEs. This converts the existing NVLink1 code to
> use the new scheme. From now on, each PE must have a valid
> iommu_table_group_ops which will either be called directly (a single PE
> group) or indirectly from a compound group.
> 
> This moves IOMMU group registration for NPU-connected GPUs to npu-dma.c.
> For POWER8, this stores a new compound group pointer in a PE (so a GPU
> is still a master); for POWER9 the new group pointer is stored in an NPU.
> 
> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> ---
>  arch/powerpc/include/asm/pci.h            |   1 +
>  arch/powerpc/platforms/powernv/pci.h      |   7 +
>  arch/powerpc/platforms/powernv/npu-dma.c  | 286 ++++++++++++++++++++--
>  arch/powerpc/platforms/powernv/pci-ioda.c | 173 +++----------
>  4 files changed, 308 insertions(+), 159 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
> index baf2886..0c72f18 100644
> --- a/arch/powerpc/include/asm/pci.h
> +++ b/arch/powerpc/include/asm/pci.h
> @@ -132,5 +132,6 @@ extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index);
>  extern int pnv_npu2_init(struct pci_controller *hose);
>  extern int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
>  		unsigned long msr);
> +extern int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev);
>  
>  #endif /* __ASM_POWERPC_PCI_H */
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index cf9f748..aef4bb5 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -62,6 +62,7 @@ struct pnv_ioda_pe {
>  
>  	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
>  	struct iommu_table_group table_group;
> +	struct npu_comp		*npucomp;
>  
>  	/* 64-bit TCE bypass region */
>  	bool			tce_bypass_enabled;
> @@ -201,6 +202,8 @@ extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
>  extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
>  extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
>  extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
> +extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
> +		__u64 window_size, __u32 levels);
>  extern int pnv_eeh_post_init(void);
>  
>  extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
> @@ -216,6 +219,10 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
>  extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
>  extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
>  extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe);
> +extern struct iommu_table_group *pnv_try_setup_npu_table_group(
> +		struct pnv_ioda_pe *pe);
> +extern struct iommu_table_group *pnv_npu_compound_attach(
> +		struct pnv_ioda_pe *pe);
>  
>  /* pci-ioda-tce.c */
>  #define POWERNV_IOMMU_DEFAULT_LEVELS	1
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index 1792c7e..2231f4c 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -317,31 +317,6 @@ static struct iommu_table_group_ops pnv_pci_npu_ops = {
>  	.unset_window = pnv_npu_unset_window,
>  	.take_ownership = pnv_npu_take_ownership,
>  };
> -
> -struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
> -{
> -	struct pnv_phb *phb = npe->phb;
> -	struct pci_bus *pbus = phb->hose->bus;
> -	struct pci_dev *npdev, *gpdev = NULL, *gptmp;
> -	struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
> -
> -	if (!gpe || !gpdev)
> -		return NULL;
> -
> -	npe->table_group.ops = &pnv_pci_npu_ops;
> -
> -	list_for_each_entry(npdev, &pbus->devices, bus_list) {
> -		gptmp = pnv_pci_get_gpu_dev(npdev);
> -
> -		if (gptmp != gpdev)
> -			continue;
> -
> -		pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
> -		iommu_group_add_device(gpe->table_group.group, &npdev->dev);
> -	}
> -
> -	return gpe;
> -}
>  #endif /* !CONFIG_IOMMU_API */
>  
>  /*
> @@ -349,6 +324,17 @@ struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
>   */
>  /* Maximum possible number of ATSD MMIO registers per NPU */
>  #define NV_NMMU_ATSD_REGS 8
> +#define NV_NPU_MAX_PE_NUM	16
> +
> +/*
> + * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
> + * up to 3 x (GPU + 2xNPUs) (POWER9).
> + */
> +struct npu_comp {
> +	struct iommu_table_group table_group;
> +	int pe_num;
> +	struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
> +};
>  
>  /* An NPU descriptor, valid for POWER9 only */
>  struct npu {
> @@ -365,6 +351,8 @@ struct npu {
>  	struct list_head next;
>  
>  	struct pci_controller *hose;
> +
> +	struct npu_comp npucomp;
>  };

I'm confused by this.  The comment simply there are multiple NPUs in a
single composite-group, but the np_comp structure is embedded in the
npu structure, implying there's a copy per-NPU.


>  static LIST_HEAD(npu2_devices);
> @@ -382,6 +370,254 @@ static struct npu *npdev_to_npu(struct pci_dev *npdev)
>  	return NULL;
>  }
>  
> +#ifdef CONFIG_IOMMU_API
> +static long pnv_npu_peers_create_table_userspace(
> +		struct iommu_table_group *table_group,
> +		int num, __u32 page_shift, __u64 window_size, __u32 levels,
> +		struct iommu_table **ptbl)
> +{
> +	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
> +			table_group);
> +
> +	if (!npucomp->pe_num || !npucomp->pe[0] ||
> +			!npucomp->pe[0]->table_group.ops ||
> +			!npucomp->pe[0]->table_group.ops->create_table)
> +		return -EFAULT;
> +
> +	return npucomp->pe[0]->table_group.ops->create_table(
> +			&npucomp->pe[0]->table_group, num, page_shift,
> +			window_size, levels, ptbl);
> +}
> +
> +static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
> +		int num, struct iommu_table *tbl)
> +{
> +	int i, j;
> +	long ret = 0;
> +	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
> +			table_group);
> +
> +	for (i = 0; i < npucomp->pe_num; ++i) {
> +		struct pnv_ioda_pe *pe = npucomp->pe[i];
> +
> +		if (!pe->table_group.ops->set_window)
> +			continue;
> +
> +		ret = pe->table_group.ops->set_window(&pe->table_group,
> +				num, tbl);
> +		if (ret)
> +			break;
> +	}
> +
> +	if (ret) {
> +		for (j = 0; j < i; ++j) {
> +			struct pnv_ioda_pe *pe = npucomp->pe[j];
> +
> +			if (!pe->table_group.ops->unset_window)
> +				continue;
> +
> +			ret = pe->table_group.ops->unset_window(
> +					&pe->table_group, num);
> +			if (ret)
> +				break;
> +		}
> +	} else {
> +		table_group->tables[num] = iommu_tce_table_get(tbl);
> +	}
> +
> +	return ret;
> +}
> +
> +static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
> +		int num)
> +{
> +	int i, j;
> +	long ret = 0;
> +	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
> +			table_group);
> +
> +	for (i = 0; i < npucomp->pe_num; ++i) {
> +		struct pnv_ioda_pe *pe = npucomp->pe[i];
> +
> +		WARN_ON(npucomp->table_group.tables[num] !=
> +				table_group->tables[num]);
> +		if (!npucomp->table_group.tables[num])
> +			continue;
> +
> +		if (!pe->table_group.ops->unset_window)
> +			continue;
> +
> +		ret = pe->table_group.ops->unset_window(&pe->table_group, num);
> +		if (ret)
> +			break;
> +	}
> +
> +	if (ret) {
> +		for (j = 0; j < i; ++j) {
> +			struct pnv_ioda_pe *pe = npucomp->pe[j];
> +
> +			if (!npucomp->table_group.tables[num])
> +				continue;
> +
> +			if (!pe->table_group.ops->set_window)
> +				continue;
> +
> +			ret = pe->table_group.ops->set_window(&pe->table_group,
> +					num, table_group->tables[num]);
> +			if (ret)
> +				break;
> +		}
> +	} else if (table_group->tables[num]) {
> +		iommu_tce_table_put(table_group->tables[num]);
> +		table_group->tables[num] = NULL;
> +	}
> +
> +	return ret;
> +}
> +
> +static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
> +{
> +	int i;
> +	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
> +			table_group);
> +
> +	for (i = 0; i < npucomp->pe_num; ++i) {
> +		struct pnv_ioda_pe *pe = npucomp->pe[i];
> +
> +		if (!pe->table_group.ops->take_ownership)
> +			continue;
> +		pe->table_group.ops->take_ownership(&pe->table_group);
> +	}
> +}
> +
> +static void pnv_npu_peers_release_ownership(
> +		struct iommu_table_group *table_group)
> +{
> +	int i;
> +	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
> +			table_group);
> +
> +	for (i = 0; i < npucomp->pe_num; ++i) {
> +		struct pnv_ioda_pe *pe = npucomp->pe[i];
> +
> +		if (!pe->table_group.ops->release_ownership)
> +			continue;
> +		pe->table_group.ops->release_ownership(&pe->table_group);
> +	}
> +}
> +
> +static struct iommu_table_group_ops pnv_npu_peers_ops = {
> +	.get_table_size = pnv_pci_ioda2_get_table_size,
> +	.create_table = pnv_npu_peers_create_table_userspace,
> +	.set_window = pnv_npu_peers_set_window,
> +	.unset_window = pnv_npu_peers_unset_window,
> +	.take_ownership = pnv_npu_peers_take_ownership,
> +	.release_ownership = pnv_npu_peers_release_ownership,
> +};
> +
> +static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
> +		struct pnv_ioda_pe *pe)
> +{
> +	if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
> +		return;
> +
> +	npucomp->pe[npucomp->pe_num] = pe;
> +	++npucomp->pe_num;
> +}
> +
> +struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
> +{
> +	struct iommu_table_group *table_group;
> +	struct npu *npu;
> +	struct npu_comp *npucomp;
> +	struct pci_dev *gpdev = NULL;
> +	struct pci_controller *hose;
> +	struct pci_dev *npdev;
> +
> +	list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
> +		npdev = pnv_pci_get_npu_dev(gpdev, 0);
> +		if (npdev)
> +			break;
> +	}
> +
> +	if (!npdev)
> +		/* It is not an NPU attached device, skip */
> +		return NULL;
> +
> +	hose = pci_bus_to_host(gpdev->bus);
> +	npu = npdev_to_npu(npdev);
> +	if (npu) {
> +		table_group = &npu->npucomp.table_group;
> +
> +		if (!table_group->group) {
> +			table_group->ops = &pnv_npu_peers_ops;
> +			iommu_register_group(table_group,
> +					hose->global_number,
> +					pe->pe_number);
> +		}
> +	} else {
> +		/* Create a group for 1 GPU and attached NPUs */
> +		pe->npucomp = kzalloc(sizeof(pe->npucomp), GFP_KERNEL);
> +		table_group = &pe->npucomp->table_group;
> +		table_group->ops = &pnv_npu_peers_ops;
> +		iommu_register_group(table_group, hose->global_number,
> +				pe->pe_number);
> +	}
> +
> +	/* Steal capabilities from a GPU PE */
> +	table_group->max_dynamic_windows_supported =
> +		pe->table_group.max_dynamic_windows_supported;
> +	table_group->tce32_start = pe->table_group.tce32_start;
> +	table_group->tce32_size = pe->table_group.tce32_size;
> +	table_group->max_levels = pe->table_group.max_levels;
> +	table_group->pgsizes = pe->table_group.pgsizes;
> +
> +	npucomp = container_of(table_group, struct npu_comp, table_group);
> +	pnv_comp_attach_table_group(npucomp, pe);
> +
> +	return table_group;
> +}
> +
> +struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
> +{
> +	struct iommu_table_group *table_group;
> +	struct npu_comp *npucomp;
> +	struct pci_dev *gpdev = NULL;
> +	struct pci_dev *npdev;
> +	struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
> +
> +	WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
> +	if (!gpe)
> +		return NULL;
> +
> +	/*
> +	 * IODA2 bridges get this set up from
> +	 * pci_controller_ops::setup_bridge but NPU bridges do not
> +	 * have this hook defined so we do it here.
> +	 */
> +	pe->table_group.max_dynamic_windows_supported =
> +		IOMMU_TABLE_GROUP_MAX_TABLES;
> +	pe->table_group.ops = &pnv_pci_npu_ops;
> +
> +	table_group = iommu_group_get_iommudata(
> +			iommu_group_get(&gpdev->dev));
> +
> +	npucomp = container_of(table_group, struct npu_comp, table_group);
> +	pnv_comp_attach_table_group(npucomp, pe);
> +
> +	list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
> +		struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
> +
> +		if (gpdevtmp != gpdev)
> +			continue;
> +
> +		iommu_add_device(table_group, &npdev->dev);
> +	}
> +
> +	return table_group;
> +}
> +#endif /* CONFIG_IOMMU_API */
> +
>  /* Maximum number of nvlinks per npu */
>  #define NV_MAX_LINKS 6
>  
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 04639ae..0e8ada5 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -190,7 +190,8 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
>  	unsigned int pe_num = pe->pe_number;
>  
>  	WARN_ON(pe->pdev);
> -
> +	WARN_ON(pe->npucomp);
> +	kfree(pe->npucomp);
>  	memset(pe, 0, sizeof(struct pnv_ioda_pe));
>  	clear_bit(pe_num, phb->ioda.pe_alloc);
>  }
> @@ -1269,7 +1270,8 @@ static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
>  		pnv_ioda_setup_npu_PE(pdev);
>  }
>  
> -static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe);
> +static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
> +		struct iommu_table_group *table_group, struct pci_bus *bus);
>  
>  static void pnv_pci_ioda_setup_PEs(void)
>  {
> @@ -1593,7 +1595,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>  		mutex_unlock(&phb->ioda.pe_list_mutex);
>  
>  		pnv_pci_ioda2_setup_dma_pe(phb, pe);
> -		pnv_ioda_setup_bus_iommu_group(pe);
> +		pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL);
>  	}
>  }
>  
> @@ -2554,7 +2556,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
>  #endif
>  
>  #ifdef CONFIG_IOMMU_API
> -static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
> +unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
>  		__u64 window_size, __u32 levels)
>  {
>  	unsigned long bytes = 0;
> @@ -2628,147 +2630,38 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
>  	.release_ownership = pnv_ioda2_release_ownership,
>  };
>  
> -static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
> -{
> -	struct pci_controller *hose;
> -	struct pnv_phb *phb;
> -	struct pnv_ioda_pe **ptmppe = opaque;
> -	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
> -	struct pci_dn *pdn = pci_get_pdn(pdev);
> -
> -	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
> -		return 0;
> -
> -	hose = pci_bus_to_host(pdev->bus);
> -	phb = hose->private_data;
> -	if (phb->type != PNV_PHB_NPU_NVLINK)
> -		return 0;
> -
> -	*ptmppe = &phb->ioda.pe_array[pdn->pe_number];
> -
> -	return 1;
> -}
> -
> -/*
> - * This returns PE of associated NPU.
> - * This assumes that NPU is in the same IOMMU group with GPU and there is
> - * no other PEs.
> - */
> -static struct pnv_ioda_pe *gpe_table_group_to_npe(
> -		struct iommu_table_group *table_group)
> -{
> -	struct pnv_ioda_pe *npe = NULL;
> -	int ret = iommu_group_for_each_dev(table_group->group, &npe,
> -			gpe_table_group_to_npe_cb);
> -
> -	BUG_ON(!ret || !npe);
> -
> -	return npe;
> -}
> -
> -static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
> -		int num, struct iommu_table *tbl)
> -{
> -	struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
> -	int num2 = (num == 0) ? 1 : 0;
> -	long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
> -
> -	if (ret)
> -		return ret;
> -
> -	if (table_group->tables[num2])
> -		npe->table_group.ops->unset_window(&npe->table_group, num2);
> -
> -	ret = npe->table_group.ops->set_window(&npe->table_group, num, tbl);
> -	if (ret) {
> -		pnv_pci_ioda2_unset_window(table_group, num);
> -		if (table_group->tables[num2])
> -			npe->table_group.ops->set_window(&npe->table_group,
> -					num2, table_group->tables[num2]);
> -	}
> -
> -	return ret;
> -}
> -
> -static long pnv_pci_ioda2_npu_unset_window(
> -		struct iommu_table_group *table_group,
> -		int num)
> -{
> -	struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
> -	int num2 = (num == 0) ? 1 : 0;
> -	long ret = pnv_pci_ioda2_unset_window(table_group, num);
> -
> -	if (ret)
> -		return ret;
> -
> -	if (!npe->table_group.tables[num])
> -		return 0;
> -
> -	ret = npe->table_group.ops->unset_window(&npe->table_group, num);
> -	if (ret)
> -		return ret;
> -
> -	if (table_group->tables[num2])
> -		ret = npe->table_group.ops->set_window(&npe->table_group, num2,
> -				table_group->tables[num2]);
> -
> -	return ret;
> -}
> -
> -static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
> -{
> -	struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
> -
> -	npe->table_group.ops->take_ownership(&npe->table_group);
> -	pnv_ioda2_take_ownership(table_group);
> -}
> -
> -static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
> -	.get_table_size = pnv_pci_ioda2_get_table_size,
> -	.create_table = pnv_pci_ioda2_create_table_userspace,
> -	.set_window = pnv_pci_ioda2_npu_set_window,
> -	.unset_window = pnv_pci_ioda2_npu_unset_window,
> -	.take_ownership = pnv_ioda2_npu_take_ownership,
> -	.release_ownership = pnv_ioda2_release_ownership,
> -};
> -
>  static void pnv_ioda_setup_bus_iommu_group_add_devices(struct pnv_ioda_pe *pe,
> +		struct iommu_table_group *table_group,
>  		struct pci_bus *bus)
>  {
>  	struct pci_dev *dev;
>  
>  	list_for_each_entry(dev, &bus->devices, bus_list) {
> -		iommu_add_device(&pe->table_group, &dev->dev);
> +		iommu_add_device(table_group, &dev->dev);
>  
>  		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
>  			pnv_ioda_setup_bus_iommu_group_add_devices(pe,
> -					dev->subordinate);
> +					table_group, dev->subordinate);
>  	}
>  }
>  
> -static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe)
> +static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
> +		struct iommu_table_group *table_group, struct pci_bus *bus)
>  {
> -	if (!pnv_pci_ioda_pe_dma_weight(pe))
> -		return;
>  
> -	iommu_register_group(&pe->table_group, pe->phb->hose->global_number,
> -			pe->pe_number);
> -
> -	/*
> -	 * set_iommu_table_base(&pe->pdev->dev, tbl) should have been called
> -	 * by now
> -	 */
>  	if (pe->flags & PNV_IODA_PE_DEV)
> -		iommu_add_device(&pe->table_group, &pe->pdev->dev);
> -	else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
> -		pnv_ioda_setup_bus_iommu_group_add_devices(pe, pe->pbus);
> +		iommu_add_device(table_group, &pe->pdev->dev);
> +
> +	if ((pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) || bus)
> +		pnv_ioda_setup_bus_iommu_group_add_devices(pe, table_group,
> +				bus);
>  }
>  
>  static void pnv_pci_ioda_setup_iommu_api(void)
>  {
> -	struct pci_controller *hose, *tmp;
> +	struct pci_controller *hose;
>  	struct pnv_phb *phb;
> -	struct pnv_ioda_pe *pe, *gpe;
> +	struct pnv_ioda_pe *pe;
>  
>  	/*
>  	 * There are 4 types of PEs:
> @@ -2790,29 +2683,41 @@ static void pnv_pci_ioda_setup_iommu_api(void)
>  		if (phb->type == PNV_PHB_NPU_NVLINK)
>  			continue;
>  
> -		list_for_each_entry(pe, &phb->ioda.pe_list, list)
> -			pnv_ioda_setup_bus_iommu_group(pe);
> +		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
> +			struct iommu_table_group *table_group;
> +
> +			table_group = pnv_try_setup_npu_table_group(pe);
> +			if (!table_group) {
> +				if (!pnv_pci_ioda_pe_dma_weight(pe))
> +					continue;
> +
> +				table_group = &pe->table_group;
> +				iommu_register_group(&pe->table_group,
> +						pe->phb->hose->global_number,
> +						pe->pe_number);
> +			}
> +			pnv_ioda_setup_bus_iommu_group(pe, table_group,
> +					pe->pbus);
> +		}
>  	}
>  
>  	/*
>  	 * Now we have all PHBs discovered, time to add NPU devices to
>  	 * the corresponding IOMMU groups.
>  	 */
> -	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
> +	list_for_each_entry(hose, &hose_list, list_node) {
>  		phb = hose->private_data;
>  
>  		if (phb->type != PNV_PHB_NPU_NVLINK)
>  			continue;
>  
> -		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
> -			gpe = pnv_pci_npu_setup_iommu(pe);
> -			if (gpe)
> -				gpe->table_group.ops = &pnv_pci_ioda2_npu_ops;
> -		}
> +		list_for_each_entry(pe, &phb->ioda.pe_list, list)
> +			pnv_npu_compound_attach(pe);
>  	}
>  }
>  #else /* !CONFIG_IOMMU_API */
> -static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe) { }
> +static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
> +		struct iommu_table_group *table_group, struct pci_bus *bus){}
>  static void pnv_pci_ioda_setup_iommu_api(void) { };
>  #endif
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20181119/a28c8bcc/attachment.sig>


More information about the Linuxppc-dev mailing list