[Skiboot] [PATCH V2 02/15] pau: introduce support

Frederic Barrat fbarrat at linux.ibm.com
Mon Oct 4 20:00:36 AEDT 2021



On 23/09/2021 11:03, Christophe Lombard wrote:
> OpenCapi for P10 is included in the P10 chip. This requires OCAPI capable
> PHYs, Datalink Layer Logic and Transaction Layer Logic to be included.
> The PHYs are the physical connection to the OCAPI interconnect.
> The Datalink Layer provides link training.
> The Transaction Layer executes the cache coherent and data movement
> commands on the P10 chip.
> The PAU provides the Transaction Layer functionality for the OCAPI
> link(s) on the P10 chip.
> 
> The P10 PAU supports two OCAPI links. Six accelerator units PAUs are
> instantiated on the P10 chip for a total of twelve OCAPI links.
> 
> This patch adds PAU opencapi structure for supporting OpenCapi5.
> hw/pau.c file contains main of PAU management functions.
> 
> Signed-off-by: Christophe Lombard <clombard at linux.vnet.ibm.com>
> ---


This patch introduces 3 new files (pau.c, pau.h, pau-regs.h). The header 
files are dual-licensed but pau.c is not. We should probably correct 
that before merging.
Vasant: do we need a new revision or is it something you can take care 
before merging?

   Fred




>   core/init.c        |   3 +
>   hdata/spira.c      |  31 +++++--
>   hdata/spira.h      |   2 +-
>   hw/Makefile.inc    |   2 +-
>   hw/pau.c           | 225 +++++++++++++++++++++++++++++++++++++++++++++
>   include/pau-regs.h |  30 ++++++
>   include/pau.h      |  94 +++++++++++++++++++
>   include/platform.h |   4 +
>   include/skiboot.h  |   1 +
>   9 files changed, 383 insertions(+), 9 deletions(-)
>   create mode 100644 hw/pau.c
>   create mode 100644 include/pau-regs.h
>   create mode 100644 include/pau.h
> 
> diff --git a/core/init.c b/core/init.c
> index 235f9055..056431ee 100644
> --- a/core/init.c
> +++ b/core/init.c
> @@ -1371,6 +1371,9 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>   	probe_npu();
>   	probe_npu2();
>   
> +	/* Probe PAUs */
> +	probe_pau();
> +
>   	/* Initialize PCI */
>   	pci_init_slots();
>   
> diff --git a/hdata/spira.c b/hdata/spira.c
> index baa23751..2c7a6a6c 100644
> --- a/hdata/spira.c
> +++ b/hdata/spira.c
> @@ -966,21 +966,38 @@ static void add_nx(void)
>   static void add_nmmu(void)
>   {
>   	struct dt_node *xscom, *nmmu;
> -	u32 scom;
> +	u32 scom1, scom2;
> +	u32 chip_id;
>   
>   	/* Nest MMU only exists on POWER9 or later */
>   	if (proc_gen < proc_gen_p9)
>   		return;
>   
> -	if (proc_gen == proc_gen_p9)
> -		scom = 0x5012c40;
> -	else
> -		scom = 0x2010c40;
> +	if (proc_gen == proc_gen_p10) {
> +		scom1 = 0x2010c40;
> +		scom2 = 0x3010c40;
> +	} else
> +		scom1 = 0x5012c40;
>   
>   	dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
> -		nmmu = dt_new_addr(xscom, "nmmu", scom);
> +		nmmu = dt_new_addr(xscom, "nmmu", scom1);
> +		dt_add_property_strings(nmmu, "compatible", "ibm,power9-nest-mmu");
> +		dt_add_property_cells(nmmu, "reg", scom1, 0x20);
> +
> +		/*
> +		 * P10 has a second nMMU, a.k.a "south" nMMU.
> +		 * It exists only on P1 and P3
> +		 */
> +		if (proc_gen < proc_gen_p10)
> +			return;
> +
> +		chip_id = __dt_get_chip_id(xscom);
> +		if (chip_id != 2 && chip_id != 6)
> +			continue;
> +
> +		nmmu = dt_new_addr(xscom, "nmmu", scom2);
>   		dt_add_property_strings(nmmu, "compatible", "ibm,power9-nest-mmu");
> -		dt_add_property_cells(nmmu, "reg", scom, 0x20);
> +		dt_add_property_cells(nmmu, "reg", scom2, 0x20);
>   	}
>   }
>   
> diff --git a/hdata/spira.h b/hdata/spira.h
> index afdc9228..8def23bd 100644
> --- a/hdata/spira.h
> +++ b/hdata/spira.h
> @@ -1152,7 +1152,7 @@ struct sppcrd_smp_link {
>   	__be16 pci_sideband_slot_idx;
>   
>   	__be16 slca_idx; /* SLCA index of the *external* port */
> -	__be16 reserved;
> +	__be16 opt_id;
>   
>   	/* nvlink/ocapi detection devices */
>   	__be32 i2c_link_cable;
> diff --git a/hw/Makefile.inc b/hw/Makefile.inc
> index c254fcbd..0b12f0be 100644
> --- a/hw/Makefile.inc
> +++ b/hw/Makefile.inc
> @@ -8,7 +8,7 @@ HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o
>   HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-procedures.o
>   HW_OBJS += npu2-common.o npu2-opencapi.o phys-map.o sbe-p9.o capp.o
>   HW_OBJS += occ-sensor.o vas.o sbe-p8.o dio-p9.o lpc-port80h.o cache-p9.o
> -HW_OBJS += npu-opal.o ocmb.o xive2.o
> +HW_OBJS += npu-opal.o ocmb.o xive2.o pau.o
>   HW=hw/built-in.a
>   
>   include $(SRC)/hw/fsp/Makefile.inc
> diff --git a/hw/pau.c b/hw/pau.c
> new file mode 100644
> index 00000000..6a5eacad
> --- /dev/null
> +++ b/hw/pau.c
> @@ -0,0 +1,225 @@
> +// SPDX-License-Identifier: Apache-2.0
> +/*
> + * Copyright 2020 IBM Corp.
> + */
> +
> +#include <phys-map.h>
> +#include <pau.h>
> +#include <pau-regs.h>
> +
> +struct pau_dev *pau_next_dev(struct pau *pau, struct pau_dev *dev,
> +			       enum pau_dev_type type)
> +{
> +	uint32_t i = 0;
> +
> +	if (dev)
> +		i = dev->index + 1;
> +
> +	for (; i < pau->links; i++) {
> +		dev = &pau->devices[i];
> +
> +		if (dev->type == type || type == PAU_DEV_TYPE_ANY)
> +			return dev;
> +	}
> +
> +	return NULL;
> +}
> +
> +static void pau_dt_create_link(struct dt_node *pau, uint32_t pau_index,
> +			       uint32_t dev_index)
> +{
> +	struct dt_node *link;
> +	uint32_t phy_lane_mask = 0, pau_unit = 0;
> +	uint32_t op_unit = 0, odl_index = 0;
> +
> +	link = dt_new_addr(pau, "link", dev_index);
> +
> +	dt_add_property_string(link, "compatible", "ibm,pau-link");
> +	dt_add_property_cells(link, "reg", dev_index);
> +	dt_add_property_cells(link, "ibm,pau-link-index", dev_index);
> +
> +	/* pau_index	Interface Link - OPxA/B
> +	 * 0		OPT0 -- PAU0
> +	 *		OPT1 -- no PAU, SMP only
> +	 *		OPT2 -- no PAU, SMP only
> +	 * 1		OPT3 -- PAU3
> +	 * 2		OPT4 -- PAU4 by default, but can be muxed to use PAU5
> +	 * 3		OPT5 -- PAU5 by default, but can be muxed to use PAU4
> +	 * 4		OPT6 -- PAU6 by default, but can be muxed to use PAU7
> +	 * 5		OPT7 -- PAU7 by default, but can be muxed to use PAU6
> +	 */
> +	switch (pau_index) {
> +	case 0:
> +		/* OP0A - OP0B */
> +		pau_unit = 0;
> +		op_unit = 0;
> +		break;
> +	case 1:
> +		/* OP3A - OP3B */
> +		pau_unit = 3;
> +		op_unit = 3;
> +		break;
> +	case 2:
> +		/* OP4A - OP4B or OP5A - OP5B (TO DO) */
> +		pau_unit = 4;
> +		op_unit = 4;
> +		break;
> +	case 3:
> +		/* OP5A - OP5B or OP4A - OP4B (TO DO) */
> +		pau_unit = 5;
> +		op_unit = 5;
> +		break;
> +	case 4:
> +		/* OP6A - OP6B or OP7A - OP7B (TO DO) */
> +		pau_unit = 6;
> +		op_unit = 6;
> +		break;
> +	case 5:
> +		/* OP7A - OP7B or OP6A - OP6B (TO DO) */
> +		pau_unit = 7;
> +		op_unit = 7;
> +		break;
> +	default:
> +		return;
> +	}
> +
> +	/* ODL0 is hooked up to OTL0 */
> +	if (dev_index == 0) {
> +		odl_index = 0;
> +		phy_lane_mask = PPC_BITMASK32(0, 3);
> +		phy_lane_mask |= PPC_BITMASK32(5, 8);
> +	} else if (dev_index == 1) {
> +		odl_index = 1;
> +		phy_lane_mask = PPC_BITMASK32(9, 12);
> +		phy_lane_mask |= PPC_BITMASK32(14, 17);
> +	}
> +
> +	dt_add_property_cells(link, "ibm,odl-index", odl_index);
> +	dt_add_property_cells(link, "ibm,pau-unit", pau_unit);
> +	dt_add_property_cells(link, "ibm,op-unit", op_unit);
> +	dt_add_property_cells(link, "ibm,pau-lane-mask", phy_lane_mask);
> +	dt_add_property_cells(link, "ibm,phb-index", pau_get_phb_index(pau_index, dev_index));
> +}
> +
> +static void pau_dt_create_pau(struct dt_node *xscom, uint32_t pau_index)
> +{
> +	const uint32_t pau_base[] = { 0x10010800, 0x11010800,
> +				      0x12010800, 0x12011000,
> +				      0x13010800, 0x13011000};
> +	struct dt_node *pau;
> +	uint32_t links;
> +
> +	assert(pau_index < PAU_NBR);
> +	pau = dt_new_addr(xscom, "pau", pau_base[pau_index]);
> +
> +	dt_add_property_cells(pau, "#size-cells", 0);
> +	dt_add_property_cells(pau, "#address-cells", 1);
> +	dt_add_property_cells(pau, "reg", pau_base[pau_index], 0x2c);
> +	dt_add_property_string(pau, "compatible", "ibm,power10-pau");
> +	dt_add_property_cells(pau, "ibm,pau-index", pau_index);
> +
> +	links = PAU_LINKS_OPENCAPI_PER_PAU;
> +	for (uint32_t i = 0; i < links; i++)
> +		pau_dt_create_link(pau, pau_index, i);
> +}
> +
> +static bool pau_dt_create(void)
> +{
> +	struct dt_node *xscom;
> +
> +	/* P10 chips only */
> +	if (proc_gen < proc_gen_p10)
> +		return false;
> +
> +	dt_for_each_compatible(dt_root, xscom, "ibm,xscom")
> +		for (uint32_t i = 0; i < PAU_NBR; i++)
> +			pau_dt_create_pau(xscom, i);
> +
> +	return true;
> +}
> +
> +static struct pau *pau_create(struct dt_node *dn)
> +{
> +	struct pau *pau;
> +	struct dt_node *link;
> +	struct pau_dev *dev;
> +	char *path;
> +	uint32_t i;
> +
> +	pau = zalloc(sizeof(*pau));
> +	assert(pau);
> +
> +	init_lock(&pau->lock);
> +
> +	pau->dt_node = dn;
> +	pau->index = dt_prop_get_u32(dn, "ibm,pau-index");
> +	pau->xscom_base = dt_get_address(dn, 0, NULL);
> +
> +	pau->chip_id = dt_get_chip_id(dn);
> +	assert(get_chip(pau->chip_id));
> +
> +	pau->links = PAU_LINKS_OPENCAPI_PER_PAU;
> +	dt_for_each_compatible(dn, link, "ibm,pau-link") {
> +		i = dt_prop_get_u32(link, "ibm,pau-link-index");
> +		assert(i < PAU_LINKS_OPENCAPI_PER_PAU);
> +
> +		dev = &pau->devices[i];
> +		dev->index = i;
> +		dev->pau = pau;
> +		dev->dn = link;
> +		dev->odl_index = dt_prop_get_u32(link, "ibm,odl-index");
> +		dev->op_unit = dt_prop_get_u32(link, "ibm,op-unit");
> +		dev->phy_lane_mask = dt_prop_get_u32(link, "ibm,pau-lane-mask");
> +	};
> +
> +	path = dt_get_path(dn);
> +	PAUINF(pau, "Found %s\n", path);
> +	PAUINF(pau, "SCOM base: 0x%llx\n", pau->xscom_base);
> +	free(path);
> +
> +	return pau;
> +}
> +
> +static void pau_device_detect_fixup(struct pau_dev *dev)
> +{
> +	struct dt_node *dn = dev->dn;
> +
> +	if (dev->type == PAU_DEV_TYPE_OPENCAPI) {
> +		PAUDEVDBG(dev, "Link type opencapi\n");
> +		dt_add_property_strings(dn, "ibm,pau-link-type", "opencapi");
> +		return;
> +	}
> +
> +	PAUDEVDBG(dev, "Link type unknown\n");
> +	dt_add_property_strings(dn, "ibm,pau-link-type", "unknown");
> +}
> +
> +static void pau_init(struct pau *pau)
> +{
> +	struct pau_dev *dev;
> +
> +	platform.pau_device_detect(pau);
> +	pau_for_each_dev(dev, pau)
> +		pau_device_detect_fixup(dev);
> +
> +}
> +
> +void probe_pau(void)
> +{
> +	struct dt_node *dn;
> +	struct pau *pau;
> +
> +	/* This can be removed when/if we decide to use HDAT instead */
> +	if (!pau_dt_create())
> +		return;
> +
> +	if (!platform.pau_device_detect) {
> +		prlog(PR_INFO, "PAU: Platform does not support PAU\n");
> +		return;
> +	}
> +
> +	dt_for_each_compatible(dt_root, dn, "ibm,power10-pau") {
> +		pau = pau_create(dn);
> +		pau_init(pau);
> +	}
> +}
> diff --git a/include/pau-regs.h b/include/pau-regs.h
> new file mode 100644
> index 00000000..a35668f1
> --- /dev/null
> +++ b/include/pau-regs.h
> @@ -0,0 +1,30 @@
> +/* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
> + * Copyright 2021 IBM Corp.
> + */
> +
> +#ifndef __PAU_REGS_H
> +#define __PAU_REGS_H
> +
> +/* PAU FIR registers */
> +#define PAU_FIR(n)				(0x400 + (n) * 0x40)
> +#define PAU_FIR_MASK(n)				(0x403 + (n) * 0x40)
> +#define PAU_FIR_ACTION0(n)			(0x406 + (n) * 0x40)
> +#define PAU_FIR_ACTION1(n)			(0x407 + (n) * 0x40)
> +#define PAU_FIR_MAX				3
> +
> +/* PAU RING: Indirect address/data port */
> +#define PAU_MISC_SCOM_IND_SCOM_ADDR		0x33e
> +#define   PAU_MISC_DA_ADDR			PPC_BITMASK(0, 23)
> +#define   PAU_MISC_DA_LEN			PPC_BITMASK(24, 25)
> +#define     PAU_MISC_DA_LEN_4B			2
> +#define     PAU_MISC_DA_LEN_8B			3
> +#define PAU_MISC_SCOM_IND_SCOM_DATA		0x33f
> +
> +/* PAU RING: Indirect register blocks */
> +#define PAU_BLOCK(nib0, nib1)			((nib0) << 20 | (nib1) << 16)
> +#define PAU_REG_BLOCK(reg)			((reg) & 0xff0000)
> +#define PAU_REG_OFFSET(reg)			((reg) & 0xffff)
> +
> +#define PAU_BLOCK_CQ_SM(n)			PAU_BLOCK(4, (n))
> +
> +#endif /* __PAU_REGS_H */
> diff --git a/include/pau.h b/include/pau.h
> new file mode 100644
> index 00000000..2a26a65f
> --- /dev/null
> +++ b/include/pau.h
> @@ -0,0 +1,94 @@
> +/* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
> + * Copyright 2021 IBM Corp.
> + */
> +
> +#ifndef __PAU_H
> +#define __PAU_H
> +
> +#include <io.h>
> +#include <pci.h>
> +#include <xscom.h>
> +#include <pau-regs.h>
> +
> +#define PAU_NBR 6
> +#define PAU_LINKS_OPENCAPI_PER_PAU 2
> +
> +enum pau_dev_type {
> +	PAU_DEV_TYPE_UNKNOWN = 0,
> +	PAU_DEV_TYPE_OPENCAPI,
> +	PAU_DEV_TYPE_ANY = INT_MAX
> +};
> +
> +struct pau_dev {
> +	enum pau_dev_type	type;
> +	uint32_t		index;
> +	struct dt_node		*dn;
> +
> +	/* Associated PHY information */
> +	uint32_t		pau_unit; /* 0,3,4,5,6,7 */
> +	uint32_t		odl_index;
> +	uint32_t		op_unit; /* 0 -> 7 */
> +	uint32_t		phy_lane_mask;
> +
> +	struct pau		*pau;
> +};
> +
> +struct pau {
> +	uint32_t		index;
> +	struct dt_node		*dt_node;
> +	uint32_t		chip_id;
> +	uint64_t		xscom_base;
> +
> +	/* Global MMIO window (all PAU regs) */
> +	uint64_t		regs[2];
> +
> +	struct lock		lock;
> +
> +	uint32_t		links;
> +	struct pau_dev		devices[PAU_LINKS_OPENCAPI_PER_PAU];
> +};
> +
> +#define PAUDBG(pau, fmt, a...) PAULOG(PR_DEBUG, pau, fmt, ##a)
> +#define PAUINF(pau, fmt, a...) PAULOG(PR_INFO, pau, fmt, ##a)
> +#define PAUERR(pau, fmt, a...) PAULOG(PR_ERR, pau, fmt, ##a)
> +
> +#define PAUDEVDBG(dev, fmt, a...) PAUDEVLOG(PR_DEBUG, dev, fmt, ##a)
> +#define PAUDEVINF(dev, fmt, a...) PAUDEVLOG(PR_INFO, dev, fmt, ##a)
> +#define PAUDEVERR(dev, fmt, a...) PAUDEVLOG(PR_ERR, dev, fmt, ##a)
> +
> +#define PAULOG(l, pau, fmt, a...) \
> +	prlog(l, "PAU[%d:%d]: " fmt, (pau)->chip_id, (pau)->index, ##a)
> +
> +#define PAUDEVLOG(l, dev, fmt, a...)		\
> +	prlog(l, "PAU[%d:%d:%d]: " fmt,		\
> +	      (dev)->pau->chip_id,		\
> +	      (dev)->pau->index,		\
> +	      (dev)->index, ##a)
> +
> +
> +/* pau-scope index of the link */
> +static inline uint32_t pau_dev_index(struct pau_dev *dev, int links)
> +{
> +	return dev->pau->index * links + dev->index;
> +}
> +
> +struct pau_dev *pau_next_dev(struct pau *pau, struct pau_dev *dev,
> +			       enum pau_dev_type type);
> +
> +#define pau_for_each_dev_type(dev, pau, type) \
> +	for (dev = NULL; (dev = pau_next_dev(pau, dev, type));)
> +
> +#define pau_for_each_opencapi_dev(dev, pau) \
> +	pau_for_each_dev_type(dev, pau, PAU_DEV_TYPE_OPENCAPI)
> +
> +#define pau_for_each_dev(dev, pau) \
> +	pau_for_each_dev_type(dev, pau, PAU_DEV_TYPE_ANY)
> +
> +#define PAU_PHB_INDEX_BASE	6 /* immediately after real PHBs */
> +static inline int pau_get_phb_index(unsigned int pau_index,
> +				    unsigned int link_index)
> +{
> +	return PAU_PHB_INDEX_BASE + pau_index * 2 + link_index;
> +}
> +
> +#endif /* __PAU_H */
> diff --git a/include/platform.h b/include/platform.h
> index 27a3afa0..6fafddbf 100644
> --- a/include/platform.h
> +++ b/include/platform.h
> @@ -10,6 +10,7 @@ struct pci_device;
>   struct pci_slot;
>   struct errorlog;
>   struct npu2;
> +struct pau;
>   
>   enum resource_id {
>   	RESOURCE_ID_KERNEL,
> @@ -126,6 +127,9 @@ struct platform {
>   	/* NPU device detection */
>   	void		(*npu2_device_detect)(struct npu2 *npu);
>   
> +	/* PAU device detection */
> +	void		(*pau_device_detect)(struct pau *pau);
> +
>   	/*
>   	 * Probe platform, return true on a match, called before
>   	 * any allocation has been performed outside of the heap
> diff --git a/include/skiboot.h b/include/skiboot.h
> index df11934f..595094a7 100644
> --- a/include/skiboot.h
> +++ b/include/skiboot.h
> @@ -209,6 +209,7 @@ extern int preload_capp_ucode(void);
>   extern void preload_io_vpd(void);
>   extern void probe_npu(void);
>   extern void probe_npu2(void);
> +extern void probe_pau(void);
>   extern void uart_init(void);
>   extern void mbox_init(void);
>   extern void early_uart_init(void);
> 


More information about the Skiboot mailing list