[Skiboot] [PATCH v3 4/5] npu2: Add OPAL calls for nvlink2 address translation services

Alistair Popple alistair at popple.id.au
Fri Mar 24 12:22:25 AEDT 2017


Adds three OPAL calls for interacting with NPU2 devices:
opal_npu_init_context, opal_npu_destroy_context and opal_npu_map_lpar.

These are used to setup and configure address translation services
(ATS) for a process/partition on a given NVLink2 device.

Signed-off-by: Alistair Popple <alistair at popple.id.au>
---
 doc/opal-api/opal-npu2-141-142-143.rst |  66 ++++++++++
 hw/npu2.c                              | 230 ++++++++++++++++++++++++++++++++-
 include/opal-api.h                     |   5 +-
 3 files changed, 299 insertions(+), 2 deletions(-)
 create mode 100644 doc/opal-api/opal-npu2-141-142-143.rst

diff --git a/doc/opal-api/opal-npu2-141-142-143.rst b/doc/opal-api/opal-npu2-141-142-143.rst
new file mode 100644
index 0000000..541051c
--- /dev/null
+++ b/doc/opal-api/opal-npu2-141-142-143.rst
@@ -0,0 +1,66 @@
+OPAL NPU2 calls
+================
+
+There are three OPAL calls for interacting with NPU2 devices: ::
+
+#define OPAL_NPU_INIT_CONTEXT			141
+#define OPAL_NPU_DESTROY_CONTEXT		142
+#define OPAL_NPU_MAP_LPAR			143
+
+These are used to setup and configure address translation services
+(ATS) for a given NVLink2 device. Note that in some documentation this
+is also referred to as extended translation services (XTS).
+
+Each NVLink2 supports multiple processes running on a GPU which issues
+requests for address translation. The NPU2 is responsible for
+completing the request by forwarding it to the Nest MMU (NMMU) along
+with the appropriate translation context (MSR/LPCR) bits. These bits
+are keyed off a 20-bit process ID (PASID/PID) which is identical to
+the PID used on the processor.
+
+The OPAL calls documented here are used to setup/destroy the
+appropriate context for a given process on a given NVLink2 device.
+
+OPAL_NPU_INIT_CONTEXT
+---------------------
+
+Parameters: ::
+
+	uint64_t phb_id
+	int pasid
+	uint64_t msr
+	uint64_t lpid
+
+Allocates a new context ID and sets up the given PASID/PID to be
+associated with the supplied MSR on for the given LPID. MSR should
+only contain bits set requried for NPU2 address lookups - ie. MSR
+DR/HV/PR/SF.
+
+Returns the context ID on success or ``OPAL_RESOURCE`` if no more
+contexts are available or ``OPAL_UNSUPPORTED`` in the case of
+unsupported MSR bits.
+
+OPAL_NPU_DESTROY_CONTEXT
+------------------------
+
+Parameters: ::
+
+	uint64_t phb_id
+	uint64_t id
+
+Destroys a previously allocated context ID. This may cause further
+translation requests from the GPU to fail.
+
+OPAL_NPU_MAP_LPAR
+-----------------
+
+Parameters: ::
+
+	uint64_t phb_id
+	uint64_t bdf
+	uint64_t lparid
+	uint64_t lpcr
+
+Associates the given GPU BDF with a particular LPAR and LPCR
+bits. Hash mode ATS is currently unsupported so lpcr should be set
+to 0.
diff --git a/hw/npu2.c b/hw/npu2.c
index 0215570..9857539 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -763,7 +763,7 @@ static void npu2_hw_init(struct npu2 *p)
 
 	/* Enable XTS retry mode */
 	val = npu2_read(p, NPU2_XTS_CFG);
-	npu2_write(p, NPU2_XTS_CFG, val | NPU2_XTS_CFG_TRY_ATR_RO);
+	npu2_write(p, NPU2_XTS_CFG, val | NPU2_XTS_CFG_MMIOSD | NPU2_XTS_CFG_TRY_ATR_RO);
 }
 
 static int64_t npu2_map_pe_dma_window_real(struct phb *phb,
@@ -1582,3 +1582,231 @@ void probe_npu2(void)
 	dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex")
 		npu2_create_phb(np);
 }
+
+/*
+ * Search a table for an entry with matching value under mask. Returns
+ * the index and the current value in *value.
+ */
+static int npu_table_search(struct npu2 *p, uint64_t table_addr, int stride,
+			    int table_size, uint64_t *value, uint64_t mask)
+{
+	int i;
+	uint64_t val;
+
+	assert(value);
+
+	for (i = 0; i < table_size; i++) {
+		val = npu2_read(p, table_addr + i*stride);
+		if ((val & mask) == *value) {
+			*value = val;
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/*
+ * Allocate a context ID and initialise the tables with the relevant
+ * information. Returns the ID on or error if one couldn't be
+ * allocated.
+ */
+#define NPU2_VALID_ATS_MSR_BITS (MSR_DR | MSR_HV | MSR_PR | MSR_SF)
+static int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr,
+				     uint64_t bdf)
+{
+	struct phb *phb = pci_get_phb(phb_id);
+	struct npu2 *p = phb_to_npu2(phb);
+	uint64_t xts_bdf, xts_bdf_pid = 0;
+	int id, lparshort;
+
+	if (!phb || phb->phb_type != phb_type_npu_v2)
+		return OPAL_PARAMETER;
+
+	/*
+	 * MSR bits should be masked by the caller to allow for future
+	 * expansion if required.
+	 */
+	if (msr & ~NPU2_VALID_ATS_MSR_BITS)
+		return OPAL_UNSUPPORTED;
+
+	/*
+	 * Need to get LPARSHORT.
+	 */
+	lock(&p->lock);
+	xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_BDF, 0ul, bdf);
+	if (npu_table_search(p, NPU2_XTS_BDF_MAP, 8, NPU2_XTS_BDF_MAP_SIZE,
+			     &xts_bdf, NPU2_XTS_BDF_MAP_BDF) < 0) {
+		NPU2ERR(p, "LPARID not associated with any GPU\n");
+		id = OPAL_PARAMETER;
+		goto out;
+	}
+
+	lparshort = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf);
+	NPU2DBG(p, "Found LPARSHORT = 0x%x for BDF = 0x%03llx\n", lparshort,
+		bdf);
+
+	/*
+	 * Need to find a free context.
+	 */
+	id = npu_table_search(p, NPU2_XTS_PID_MAP, 0x20, NPU2_XTS_PID_MAP_SIZE,
+			      &xts_bdf_pid, -1UL);
+	if (id < 0) {
+		NPU2ERR(p, "No XTS contexts available\n");
+		id = OPAL_RESOURCE;
+		goto out;
+	}
+
+	/* Enable this mapping for both real and virtual addresses */
+	xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_VALID_ATRGPA0, 0UL, 1);
+	xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_VALID_ATRGPA1, xts_bdf_pid, 1);
+
+	/* Enables TLBIE/MMIOSD forwarding for this entry */
+	xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_VALID_ATSD, xts_bdf_pid, 1);
+	xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_LPARSHORT, xts_bdf_pid,
+			       lparshort);
+
+	/* Set the relevant MSR bits */
+	xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_MSR_DR, xts_bdf_pid,
+			       !!(msr & MSR_DR));
+	xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_MSR_HV, xts_bdf_pid,
+			       !!(msr & MSR_HV));
+	xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_MSR_PR, xts_bdf_pid,
+			       !!(msr & MSR_PR));
+	xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_MSR_SF, xts_bdf_pid,
+			       !!(msr & MSR_SF));
+
+	/* Finally set the PID/PASID */
+	xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_PASID, xts_bdf_pid, pasid);
+
+	/* Write the entry */
+	NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid);
+	npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid);
+
+out:
+	unlock(&p->lock);
+	return id;
+}
+opal_call(OPAL_NPU_INIT_CONTEXT, opal_npu_init_context, 4);
+
+static int opal_npu_destroy_context(uint64_t phb_id, uint64_t pid, uint64_t bdf)
+{
+	struct phb *phb = pci_get_phb(phb_id);
+	struct npu2 *p = phb_to_npu2(phb);
+	uint64_t xts_bdf, xts_bdf_pid;
+	uint64_t lparshort;
+	int id, rc = 0;
+
+	if (!phb || phb->phb_type != phb_type_npu_v2)
+		return OPAL_PARAMETER;
+
+	lock(&p->lock);
+
+	/* Need to find lparshort for this bdf */
+	xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_BDF, 0ul, bdf);
+	if (npu_table_search(p, NPU2_XTS_BDF_MAP, 8, NPU2_XTS_BDF_MAP_SIZE,
+			     &xts_bdf, NPU2_XTS_BDF_MAP_BDF) < 0) {
+		NPU2ERR(p, "LPARID not associated with any GPU\n");
+		rc = OPAL_PARAMETER;
+		goto out;
+	}
+
+	lparshort = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf);
+	NPU2DBG(p, "Found LPARSHORT = 0x%llx destroy context for BDF = 0x%03llx PID = 0x%llx\n",
+		lparshort, bdf, pid);
+
+	/* Now find the entry in the bdf/pid table */
+	xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_LPARSHORT, 0ul, lparshort);
+	xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_PASID, xts_bdf_pid, pid);
+	id = npu_table_search(p, NPU2_XTS_PID_MAP, 0x20, NPU2_XTS_PID_MAP_SIZE, &xts_bdf_pid,
+			      NPU2_XTS_PID_MAP_LPARSHORT | NPU2_XTS_PID_MAP_PASID);
+	if (id < 0) {
+		rc = OPAL_PARAMETER;
+		goto out;
+	}
+
+	/* And zero the entry */
+	npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, 0);
+	unlock(&p->lock);
+out:
+	return rc;
+}
+opal_call(OPAL_NPU_DESTROY_CONTEXT, opal_npu_destroy_context, 3);
+
+/*
+ * Map the given virtual bdf to lparid with given lpcr.
+ */
+static int opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
+			     uint64_t lpcr)
+{
+	struct phb *phb = pci_get_phb(phb_id);
+	struct npu2 *p = phb_to_npu2(phb);
+	struct npu2_dev *ndev = NULL;
+	uint64_t xts_bdf_lpar, rc = OPAL_SUCCESS;
+	int i;
+	int id;
+
+	if (!phb || phb->phb_type != phb_type_npu_v2)
+		return OPAL_PARAMETER;
+
+	if (lpcr)
+		/* The LPCR bits are only required for hash based ATS,
+		 * which we don't currently support but may need to in
+		 * future. */
+		return OPAL_UNSUPPORTED;
+
+	lock(&p->lock);
+
+	/* Find any existing entries and update them */
+	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_VALID, 0UL, 1);
+	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_LPARID, xts_bdf_lpar, lparid);
+	id = npu_table_search(p, NPU2_XTS_BDF_MAP, 8, NPU2_XTS_BDF_MAP_SIZE,
+			      &xts_bdf_lpar,
+			      NPU2_XTS_BDF_MAP_VALID |
+			      NPU2_XTS_BDF_MAP_LPARID);
+	if (id < 0) {
+		/* No existing mapping found, find space for a new one */
+		xts_bdf_lpar = 0;
+		id = npu_table_search(p, NPU2_XTS_BDF_MAP, 8, NPU2_XTS_BDF_MAP_SIZE,
+				      &xts_bdf_lpar, -1UL);
+	}
+
+	if (id < 0) {
+		/* Unable to find a free mapping */
+		NPU2ERR(p, "No free XTS_BDF[] entry\n");
+		rc = OPAL_RESOURCE;
+		goto out;
+	}
+
+	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_VALID, 0UL, 1);
+	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BDF, xts_bdf_lpar, bdf);
+
+	/* We only support radix for the moment */
+	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_XLAT, xts_bdf_lpar, 0x3);
+	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_LPARID, xts_bdf_lpar, lparid);
+
+	/* Need to find an NVLink to send the ATSDs for this device over */
+	for (i = 0; i < p->total_devices; i++) {
+		if (p->devices[i].gpu_bdfn == bdf) {
+			ndev = &p->devices[i];
+			break;
+		}
+	}
+
+	if (!ndev) {
+		NPU2ERR(p, "Unable to find nvlink for bdf %llx\n", bdf);
+		rc = OPAL_PARAMETER;
+		goto out;
+	}
+
+	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_STACK, xts_bdf_lpar, 0x4 >> (ndev->index / 2));
+	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BRICK, xts_bdf_lpar, (ndev->index % 2));
+
+	NPU2DBG(p, "XTS_BDF_MAP[%03d] = 0x%08llx\n", id, xts_bdf_lpar);
+	npu2_write(p, NPU2_XTS_BDF_MAP + id*8, xts_bdf_lpar);
+
+out:
+	unlock(&p->lock);
+	return rc;
+}
+opal_call(OPAL_NPU_MAP_LPAR, opal_npu_map_lpar, 4);
diff --git a/include/opal-api.h b/include/opal-api.h
index 6adb564..7966200 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -201,7 +201,10 @@
 #define OPAL_XIVE_RESERVED3			143
 #define OPAL_XIVE_RESERVED4			144
 #define OPAL_SIGNAL_SYSTEM_RESET		145
-#define OPAL_LAST				145
+#define OPAL_NPU_INIT_CONTEXT			146
+#define OPAL_NPU_DESTROY_CONTEXT		147
+#define OPAL_NPU_MAP_LPAR			148
+#define OPAL_LAST				148
 
 /* Device tree flags */
 
-- 
2.1.4



More information about the Skiboot mailing list