[Skiboot] [PATCH v2 2/3] core/pci: Support virtual device

Gavin Shan gwshan at linux.vnet.ibm.com
Thu Aug 11 12:12:37 AEST 2016


The NVLinks (v1 and v2 to be supported in future) are exposed to
Linux kernel by emulated PCI devices (aka PCI virtual devices).
Currently, the implementation is covered by NVLink driver (npu.c),
meaning npu2.c will have similar implementation though it will be
totally duplicated with that in npu.c.

This supports PCI virtual device in the generic layer so that it
can be shared by all NVLink drivers. The design is highlighted as:

   * There are 3 config spaces for every PCI virtual device, corresponds
     to the cached config space, readonly space, write-1-clear space.
   * Reuse PCI config register filter mechanism to allow NVLink driver
     to emulate the access to the designated config registers. The config
     values are fetched from or written to the cached config space when
     the config registers aren't covered by filter.

Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
---
 core/Makefile.inc  |   6 +-
 core/pci-virt.c    | 260 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/pci-virt.h |  85 ++++++++++++++++++
 include/pci.h      |   1 +
 4 files changed, 349 insertions(+), 3 deletions(-)
 create mode 100644 core/pci-virt.c
 create mode 100644 include/pci-virt.h

diff --git a/core/Makefile.inc b/core/Makefile.inc
index 13b287c..be59bca 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -3,9 +3,9 @@
 SUBDIRS += core
 CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
 CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o
-CORE_OBJS += timebase.o opal-msg.o pci.o pci-slot.o pcie-slot.o pci-opal.o
-CORE_OBJS += fast-reboot.o device.o exceptions.o trace.o affinity.o vpd.o
-CORE_OBJS += hostservices.o platform.o nvram.o nvram-format.o hmi.o
+CORE_OBJS += timebase.o opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o
+CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
+CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
 CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
 CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
 
diff --git a/core/pci-virt.c b/core/pci-virt.c
new file mode 100644
index 0000000..570c6e8
--- /dev/null
+++ b/core/pci-virt.c
@@ -0,0 +1,260 @@
+/* Copyright 2013-2016 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <pci.h>
+#include <pci-virt.h>
+
+void pci_virt_cfg_read_raw(struct pci_virt_device *pvd,
+			   uint32_t space, uint32_t offset,
+			   uint32_t size, uint32_t *data)
+{
+	uint32_t i;
+
+	if (space >= PCI_VIRT_CFG_MAX || !pvd->config[space])
+		return;
+
+	for (*data = 0, i = 0; i < size; i++)
+		*data |= ((uint32_t)(pvd->config[space][offset + i]) << (i * 8));
+}
+
+void pci_virt_cfg_write_raw(struct pci_virt_device *pvd,
+			    uint32_t space, uint32_t offset,
+			    uint32_t size, uint32_t data)
+{
+	int i;
+
+	if (space >= PCI_VIRT_CFG_MAX || !pvd->config[space])
+		return;
+
+	for (i = 0; i < size; i++) {
+		pvd->config[space][offset + i] = data;
+		data = (data >> 8);
+	}
+}
+
+static struct pci_cfg_reg_filter *pci_virt_find_filter(
+					struct pci_virt_device *pvd,
+					uint32_t start, uint32_t len)
+{
+	struct pci_cfg_reg_filter *pcrf;
+
+	if (!pvd || !len || start >= pvd->cfg_size)
+		return NULL;
+
+	list_for_each(&pvd->pcrf, pcrf, link) {
+		if (start >= pcrf->start &&
+		    (start + len) <= (pcrf->start + len))
+			return pcrf;
+	}
+
+	return NULL;
+}
+
+struct pci_cfg_reg_filter *pci_virt_add_filter(struct pci_virt_device *pvd,
+					       uint32_t start,
+					       uint32_t len,
+					       uint32_t flags,
+					       pci_cfg_reg_func func,
+					       void *data)
+{
+	struct pci_cfg_reg_filter *pcrf;
+
+	if (!pvd || !len || (start + len) >= pvd->cfg_size)
+		return NULL;
+	if (!(flags & PCI_REG_FLAG_MASK))
+		return NULL;
+
+	pcrf = pci_virt_find_filter(pvd, start, len);
+	if (pcrf) {
+		prlog(PR_ERR, "%s: Filter [%x, %x] overlapped with [%x, %x]\n",
+		      __func__, start, len, pcrf->start, pcrf->len);
+		return NULL;
+	}
+
+	pcrf = zalloc(sizeof(*pcrf));
+	if (!pcrf) {
+		prlog(PR_ERR, "%s: Out of memory!\n", __func__);
+		return NULL;
+	}
+
+	pcrf->start = start;
+	pcrf->len   = len;
+	pcrf->flags = flags;
+	pcrf->func  = func;
+	pcrf->data  = data;
+	list_add_tail(&pvd->pcrf, &pcrf->link);
+
+	return pcrf;
+}
+
+struct pci_virt_device *pci_virt_find_device(struct phb *phb,
+					     uint32_t bdfn)
+{
+	struct pci_virt_device *pvd;
+
+	list_for_each(&phb->virt_devices, pvd, node) {
+		if (pvd->bdfn == bdfn)
+			return pvd;
+	}
+
+	return NULL;
+}
+
+static inline bool pci_virt_cfg_valid(struct pci_virt_device *pvd,
+				      uint32_t offset, uint32_t size)
+{
+	if ((offset + size) > pvd->cfg_size)
+		return false;
+
+	if (!size || (size > 4))
+		return false;
+
+	if ((size & (size - 1)) || (offset & (size - 1)))
+		return false;
+
+	return true;
+}
+
+int64_t pci_virt_cfg_read(struct phb *phb, uint32_t bdfn,
+			  uint32_t offset, uint32_t size,
+			  uint32_t *data)
+{
+	struct pci_virt_device *pvd;
+	struct pci_cfg_reg_filter *pcrf;
+	int64_t ret = OPAL_SUCCESS;
+
+	*data = 0xffffffff;
+
+	/* Search for PCI virtual device */
+	pvd = pci_virt_find_device(phb, bdfn);
+	if (!pvd)
+		return OPAL_PARAMETER;
+
+	/* Check if config address is valid or not */
+	if (!pci_virt_cfg_valid(pvd, offset, size))
+		return OPAL_PARAMETER;
+
+	/* The value is fetched from the normal config space when the
+	 * trap handler returns OPAL_PARTIAL. Otherwise, the trap handler
+	 * should provide the return value.
+	 */
+	pcrf = pci_virt_find_filter(pvd, offset, size);
+	if (!pcrf || !pcrf->func || !(pcrf->flags & PCI_REG_FLAG_READ))
+		goto out;
+
+	ret = pcrf->func(pvd, pcrf, offset, size, data, false);
+	if (ret != OPAL_PARTIAL)
+		return ret;
+out:
+	pci_virt_cfg_read_raw(pvd, PCI_VIRT_CFG_NORMAL, offset, size, data);
+	return OPAL_SUCCESS;
+}
+
+int64_t pci_virt_cfg_write(struct phb *phb, uint32_t bdfn,
+			   uint32_t offset, uint32_t size,
+			   uint32_t data)
+{
+	struct pci_virt_device *pvd;
+	struct pci_cfg_reg_filter *pcrf;
+	uint32_t val, v, r, c, i;
+	int64_t ret = OPAL_SUCCESS;
+
+	/* Search for PCI virtual device */
+	pvd = pci_virt_find_device(phb, bdfn);
+	if (!pvd)
+		return OPAL_PARAMETER;
+
+	/* Check if config address is valid or not */
+	if (!pci_virt_cfg_valid(pvd, offset, size))
+		return OPAL_PARAMETER;
+
+	/* The value is written to the config space if the trap handler
+	 * returns OPAL_PARTIAL. Otherwise, the value to be written is
+	 * dropped.
+	 */
+	pcrf = pci_virt_find_filter(pvd, offset, size);
+	if (!pcrf || !pcrf->func || !(pcrf->flags & PCI_REG_FLAG_WRITE))
+		goto out;
+
+	ret = pcrf->func(pvd, pcrf, offset, size, &data, true);
+	if (ret != OPAL_PARTIAL)
+		return ret;
+out:
+	val = data;
+	for (i = 0; i < size; i++) {
+		PCI_VIRT_CFG_NORMAL_RD(pvd, offset + i, 1, &v);
+		PCI_VIRT_CFG_RDONLY_RD(pvd, offset + i, 1, &r);
+		PCI_VIRT_CFG_W1CLR_RD(pvd, offset + i, 1, &c);
+
+		/* Drop read-only bits */
+		val &= ~(r << (i * 8));
+		val |= (r & v) << (i * 8);
+
+		/* Drop W1C bits */
+		val &= ~(val & ((c & v) << (i * 8)));
+	}
+
+	PCI_VIRT_CFG_NORMAL_WR(pvd, offset, size, val);
+	return OPAL_SUCCESS;
+}
+
+struct pci_virt_device *pci_virt_add_device(struct phb *phb, uint32_t bdfn,
+					    uint32_t cfg_size, void *data)
+{
+	struct pci_virt_device *pvd;
+	uint8_t *cfg;
+	uint32_t i;
+
+	/* The standard config header size is 64 bytes */
+	if (!phb || (bdfn & 0xffff0000) || (cfg_size < 64))
+		return NULL;
+
+	/* Check if the bdfn is available */
+	pvd = pci_virt_find_device(phb, bdfn);
+	if (pvd) {
+		prlog(PR_ERR, "%s: bdfn 0x%x was reserved\n",
+		      __func__, bdfn);
+		return NULL;
+	}
+
+	/* Populate the PCI virtual device */
+	pvd = zalloc(sizeof(*pvd));
+	if (!pvd) {
+		prlog(PR_ERR, "%s: Cannot alloate PCI virtual device (0x%x)\n",
+		      __func__, bdfn);
+		return NULL;
+	}
+
+	cfg = zalloc(cfg_size * PCI_VIRT_CFG_MAX);
+	if (!cfg) {
+		prlog(PR_ERR, "%s: Cannot allocate config space (0x%x)\n",
+		      __func__, bdfn);
+		free(pvd);
+		return NULL;
+	}
+
+	for (i = 0; i < PCI_VIRT_CFG_MAX; i++, cfg += cfg_size)
+		pvd->config[i] = cfg;
+
+	pvd->bdfn     = bdfn;
+	pvd->cfg_size = cfg_size;
+	pvd->data     = data;
+	list_head_init(&pvd->pcrf);
+	list_add_tail(&phb->virt_devices, &pvd->node);
+
+	return pvd;
+}
diff --git a/include/pci-virt.h b/include/pci-virt.h
new file mode 100644
index 0000000..7c787cf
--- /dev/null
+++ b/include/pci-virt.h
@@ -0,0 +1,85 @@
+/* Copyright 2013-2016 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PCI_VIRT_H
+#define __PCI_VIRT_H
+
+#include <ccan/list/list.h>
+
+enum {
+	PCI_VIRT_CFG_NORMAL,
+	PCI_VIRT_CFG_RDONLY,
+	PCI_VIRT_CFG_W1CLR,
+	PCI_VIRT_CFG_MAX
+};
+
+struct pci_virt_device {
+	uint32_t		bdfn;
+	uint32_t		cfg_size;
+	uint8_t			*config[PCI_VIRT_CFG_MAX];
+	struct list_head	pcrf;
+	struct list_node	node;
+	void			*data;
+};
+
+extern void pci_virt_cfg_read_raw(struct pci_virt_device *pvd,
+				  uint32_t space, uint32_t offset,
+				  uint32_t size, uint32_t *data);
+extern void pci_virt_cfg_write_raw(struct pci_virt_device *pvd,
+				   uint32_t space, uint32_t offset,
+				   uint32_t size, uint32_t data);
+extern struct pci_cfg_reg_filter *pci_virt_add_filter(
+					struct pci_virt_device *pvd,
+					uint32_t start, uint32_t len,
+					uint32_t flags, pci_cfg_reg_func func,
+					void *data);
+extern int64_t pci_virt_cfg_read(struct phb *phb, uint32_t bdfn,
+				 uint32_t offset, uint32_t size,
+				 uint32_t *data);
+extern int64_t pci_virt_cfg_write(struct phb *phb, uint32_t bdfn,
+				  uint32_t offset, uint32_t size,
+				  uint32_t data);
+extern struct pci_virt_device *pci_virt_find_device(struct phb *phb,
+						    uint32_t bdfn);
+extern struct pci_virt_device *pci_virt_add_device(struct phb *phb,
+						   uint32_t bdfn,
+						   uint32_t cfg_size,
+						   void *data);
+
+/* Config space accessors */
+#define PCI_VIRT_CFG_NORMAL_RD(d, o, s, v)	\
+	pci_virt_cfg_read_raw(d, PCI_VIRT_CFG_NORMAL, o, s, v)
+#define PCI_VIRT_CFG_NORMAL_WR(d, o, s, v)	\
+	pci_virt_cfg_write_raw(d, PCI_VIRT_CFG_NORMAL, o, s, v)
+#define PCI_VIRT_CFG_RDONLY_RD(d, o, s, v)	\
+	pci_virt_cfg_read_raw(d, PCI_VIRT_CFG_RDONLY, o, s, v)
+#define PCI_VIRT_CFG_RDONLY_WR(d, o, s, v)	\
+	pci_virt_cfg_write_raw(d, PCI_VIRT_CFG_RDONLY, o, s, v)
+#define PCI_VIRT_CFG_W1CLR_RD(d, o, s, v)	\
+	pci_virt_cfg_read_raw(d, PCI_VIRT_CFG_W1CLR, o, s, v)
+#define PCI_VIRT_CFG_W1CLR_WR(d, o, s, v)	\
+	pci_virt_cfg_write_raw(d, PCI_VIRT_CFG_W1CLR, o, s, v)
+
+#define PCI_VIRT_CFG_INIT(d, o, s, v, r, w)		\
+	do {						\
+		PCI_VIRT_CFG_NORMAL_WR(d, o, s, v);	\
+		PCI_VIRT_CFG_RDONLY_WR(d, o, s, r);	\
+		PCI_VIRT_CFG_W1CLR_WR(d, o, s, w);	\
+	} while (0)
+#define PCI_VIRT_CFG_INIT_RO(d, o, s, v)		\
+	PCI_VIRT_CFG_INIT(d, o, s, v, 0xffffffff, 0)
+
+#endif /* __VIRT_PCI_H */
diff --git a/include/pci.h b/include/pci.h
index 7350f5c..9df5a69 100644
--- a/include/pci.h
+++ b/include/pci.h
@@ -322,6 +322,7 @@ struct phb {
 	enum phb_type		phb_type;
 	struct lock		lock;
 	struct list_head	devices;
+	struct list_head	virt_devices;
 	const struct phb_ops	*ops;
 	struct pci_lsi_state	lstate;
 	uint32_t		mps;
-- 
2.1.0



More information about the Skiboot mailing list