[Skiboot] [PATCH v2 2/3] core/pci: Support virtual device

Russell Currey ruscur at russell.cc
Fri Sep 2 13:15:48 AEST 2016


On Thu, 2016-08-11 at 12:12 +1000, Gavin Shan wrote:
> The NVLinks (v1 and v2 to be supported in future) are exposed to
> Linux kernel by emulated PCI devices (aka PCI virtual devices).
> Currently, the implementation is covered by NVLink driver (npu.c),
> meaning npu2.c will have similar implementation though it will be
> totally duplicated with that in npu.c.
> 
> This supports PCI virtual device in the generic layer so that it
> can be shared by all NVLink drivers. The design is highlighted as:
> 
>    * There are 3 config spaces for every PCI virtual device, corresponds
>      to the cached config space, readonly space, write-1-clear space.
>    * Reuse PCI config register filter mechanism to allow NVLink driver
>      to emulate the access to the designated config registers. The config
>      values are fetched from or written to the cached config space when
>      the config registers aren't covered by filter.
> 
> Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
> ---

A few minor comments, but otherwise:

Reviewed-by: Russell Currey <ruscur at russell.cc>

>  core/Makefile.inc  |   6 +-
>  core/pci-virt.c    | 260
> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/pci-virt.h |  85 ++++++++++++++++++
>  include/pci.h      |   1 +
>  4 files changed, 349 insertions(+), 3 deletions(-)
>  create mode 100644 core/pci-virt.c
>  create mode 100644 include/pci-virt.h
> 
> diff --git a/core/Makefile.inc b/core/Makefile.inc
> index 13b287c..be59bca 100644
> --- a/core/Makefile.inc
> +++ b/core/Makefile.inc
> @@ -3,9 +3,9 @@
>  SUBDIRS += core
>  CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
>  CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o
> -CORE_OBJS += timebase.o opal-msg.o pci.o pci-slot.o pcie-slot.o pci-opal.o
> -CORE_OBJS += fast-reboot.o device.o exceptions.o trace.o affinity.o vpd.o
> -CORE_OBJS += hostservices.o platform.o nvram.o nvram-format.o hmi.o
> +CORE_OBJS += timebase.o opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o
> +CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o
> affinity.o
> +CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
>  CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
>  CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
>  
> diff --git a/core/pci-virt.c b/core/pci-virt.c
> new file mode 100644
> index 0000000..570c6e8
> --- /dev/null
> +++ b/core/pci-virt.c
> @@ -0,0 +1,260 @@
> +/* Copyright 2013-2016 IBM Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + *      http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> + * implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <skiboot.h>
> +#include <pci.h>
> +#include <pci-virt.h>
> +
> +void pci_virt_cfg_read_raw(struct pci_virt_device *pvd,
> +			   uint32_t space, uint32_t offset,
> +			   uint32_t size, uint32_t *data)
> +{
> +	uint32_t i;
> +
> +	if (space >= PCI_VIRT_CFG_MAX || !pvd->config[space])
> +		return;
> +
> +	for (*data = 0, i = 0; i < size; i++)
> +		*data |= ((uint32_t)(pvd->config[space][offset + i]) << (i *
> 8));
> +}
> +
> +void pci_virt_cfg_write_raw(struct pci_virt_device *pvd,
> +			    uint32_t space, uint32_t offset,
> +			    uint32_t size, uint32_t data)
> +{
> +	int i;

This should probably be uint32_t like "i" in pci_virt_cfg_read_raw

> +
> +	if (space >= PCI_VIRT_CFG_MAX || !pvd->config[space])
> +		return;
> +
> +	for (i = 0; i < size; i++) {
> +		pvd->config[space][offset + i] = data;
> +		data = (data >> 8);
> +	}
> +}
> +
> +static struct pci_cfg_reg_filter *pci_virt_find_filter(
> +					struct pci_virt_device *pvd,
> +					uint32_t start, uint32_t len)
> +{
> +	struct pci_cfg_reg_filter *pcrf;
> +
> +	if (!pvd || !len || start >= pvd->cfg_size)
> +		return NULL;
> +
> +	list_for_each(&pvd->pcrf, pcrf, link) {
> +		if (start >= pcrf->start &&
> +		    (start + len) <= (pcrf->start + len))
> +			return pcrf;
> +	}
> +
> +	return NULL;
> +}
> +
> +struct pci_cfg_reg_filter *pci_virt_add_filter(struct pci_virt_device *pvd,
> +					       uint32_t start,
> +					       uint32_t len,
> +					       uint32_t flags,
> +					       pci_cfg_reg_func func,
> +					       void *data)
> +{
> +	struct pci_cfg_reg_filter *pcrf;
> +
> +	if (!pvd || !len || (start + len) >= pvd->cfg_size)
> +		return NULL;
> +	if (!(flags & PCI_REG_FLAG_MASK))
> +		return NULL;
> +
> +	pcrf = pci_virt_find_filter(pvd, start, len);
> +	if (pcrf) {
> +		prlog(PR_ERR, "%s: Filter [%x, %x] overlapped with [%x,
> %x]\n",
> +		      __func__, start, len, pcrf->start, pcrf->len);
> +		return NULL;
> +	}
> +
> +	pcrf = zalloc(sizeof(*pcrf));
> +	if (!pcrf) {
> +		prlog(PR_ERR, "%s: Out of memory!\n", __func__);
> +		return NULL;
> +	}
> +
> +	pcrf->start = start;
> +	pcrf->len   = len;
> +	pcrf->flags = flags;
> +	pcrf->func  = func;
> +	pcrf->data  = data;
> +	list_add_tail(&pvd->pcrf, &pcrf->link);
> +
> +	return pcrf;
> +}
> +
> +struct pci_virt_device *pci_virt_find_device(struct phb *phb,
> +					     uint32_t bdfn)
> +{
> +	struct pci_virt_device *pvd;
> +
> +	list_for_each(&phb->virt_devices, pvd, node) {
> +		if (pvd->bdfn == bdfn)
> +			return pvd;
> +	}
> +
> +	return NULL;
> +}
> +
> +static inline bool pci_virt_cfg_valid(struct pci_virt_device *pvd,
> +				      uint32_t offset, uint32_t size)
> +{
> +	if ((offset + size) > pvd->cfg_size)
> +		return false;
> +
> +	if (!size || (size > 4))
> +		return false;
> +
> +	if ((size & (size - 1)) || (offset & (size - 1)))
> +		return false;
> +
> +	return true;
> +}
> +
> +int64_t pci_virt_cfg_read(struct phb *phb, uint32_t bdfn,
> +			  uint32_t offset, uint32_t size,
> +			  uint32_t *data)
> +{
> +	struct pci_virt_device *pvd;
> +	struct pci_cfg_reg_filter *pcrf;
> +	int64_t ret = OPAL_SUCCESS;
> +
> +	*data = 0xffffffff;

Out of curiosity what does setting this achieve?

> +
> +	/* Search for PCI virtual device */
> +	pvd = pci_virt_find_device(phb, bdfn);
> +	if (!pvd)
> +		return OPAL_PARAMETER;
> +
> +	/* Check if config address is valid or not */
> +	if (!pci_virt_cfg_valid(pvd, offset, size))
> +		return OPAL_PARAMETER;
> +
> +	/* The value is fetched from the normal config space when the
> +	 * trap handler returns OPAL_PARTIAL. Otherwise, the trap handler
> +	 * should provide the return value.
> +	 */
> +	pcrf = pci_virt_find_filter(pvd, offset, size);
> +	if (!pcrf || !pcrf->func || !(pcrf->flags & PCI_REG_FLAG_READ))
> +		goto out;
> +
> +	ret = pcrf->func(pvd, pcrf, offset, size, data, false);
> +	if (ret != OPAL_PARTIAL)
> +		return ret;
> +out:
> +	pci_virt_cfg_read_raw(pvd, PCI_VIRT_CFG_NORMAL, offset, size, data);
> +	return OPAL_SUCCESS;
> +}
> +
> +int64_t pci_virt_cfg_write(struct phb *phb, uint32_t bdfn,
> +			   uint32_t offset, uint32_t size,
> +			   uint32_t data)
> +{
> +	struct pci_virt_device *pvd;
> +	struct pci_cfg_reg_filter *pcrf;
> +	uint32_t val, v, r, c, i;
> +	int64_t ret = OPAL_SUCCESS;

I don't think this needs to be initialised

> +
> +	/* Search for PCI virtual device */
> +	pvd = pci_virt_find_device(phb, bdfn);
> +	if (!pvd)
> +		return OPAL_PARAMETER;
> +
> +	/* Check if config address is valid or not */
> +	if (!pci_virt_cfg_valid(pvd, offset, size))
> +		return OPAL_PARAMETER;
> +
> +	/* The value is written to the config space if the trap handler
> +	 * returns OPAL_PARTIAL. Otherwise, the value to be written is
> +	 * dropped.
> +	 */
> +	pcrf = pci_virt_find_filter(pvd, offset, size);
> +	if (!pcrf || !pcrf->func || !(pcrf->flags & PCI_REG_FLAG_WRITE))
> +		goto out;
> +
> +	ret = pcrf->func(pvd, pcrf, offset, size, &data, true);
> +	if (ret != OPAL_PARTIAL)
> +		return ret;
> +out:
> +	val = data;
> +	for (i = 0; i < size; i++) {
> +		PCI_VIRT_CFG_NORMAL_RD(pvd, offset + i, 1, &v);
> +		PCI_VIRT_CFG_RDONLY_RD(pvd, offset + i, 1, &r);
> +		PCI_VIRT_CFG_W1CLR_RD(pvd, offset + i, 1, &c);
> +
> +		/* Drop read-only bits */
> +		val &= ~(r << (i * 8));
> +		val |= (r & v) << (i * 8);
> +
> +		/* Drop W1C bits */
> +		val &= ~(val & ((c & v) << (i * 8)));
> +	}
> +
> +	PCI_VIRT_CFG_NORMAL_WR(pvd, offset, size, val);
> +	return OPAL_SUCCESS;
> +}
> +
> +struct pci_virt_device *pci_virt_add_device(struct phb *phb, uint32_t bdfn,
> +					    uint32_t cfg_size, void *data)
> +{
> +	struct pci_virt_device *pvd;
> +	uint8_t *cfg;
> +	uint32_t i;
> +
> +	/* The standard config header size is 64 bytes */
> +	if (!phb || (bdfn & 0xffff0000) || (cfg_size < 64))
> +		return NULL;
> +
> +	/* Check if the bdfn is available */
> +	pvd = pci_virt_find_device(phb, bdfn);
> +	if (pvd) {
> +		prlog(PR_ERR, "%s: bdfn 0x%x was reserved\n",
> +		      __func__, bdfn);
> +		return NULL;
> +	}
> +
> +	/* Populate the PCI virtual device */
> +	pvd = zalloc(sizeof(*pvd));
> +	if (!pvd) {
> +		prlog(PR_ERR, "%s: Cannot alloate PCI virtual device
> (0x%x)\n",
> +		      __func__, bdfn);

Typo here, "alloate" should be "allocate".

> +		return NULL;
> +	}
> +
> +	cfg = zalloc(cfg_size * PCI_VIRT_CFG_MAX);
> +	if (!cfg) {
> +		prlog(PR_ERR, "%s: Cannot allocate config space (0x%x)\n",
> +		      __func__, bdfn);
> +		free(pvd);
> +		return NULL;
> +	}
> +
> +	for (i = 0; i < PCI_VIRT_CFG_MAX; i++, cfg += cfg_size)
> +		pvd->config[i] = cfg;
> +
> +	pvd->bdfn     = bdfn;
> +	pvd->cfg_size = cfg_size;
> +	pvd->data     = data;
> +	list_head_init(&pvd->pcrf);
> +	list_add_tail(&phb->virt_devices, &pvd->node);
> +
> +	return pvd;
> +}
> diff --git a/include/pci-virt.h b/include/pci-virt.h
> new file mode 100644
> index 0000000..7c787cf
> --- /dev/null
> +++ b/include/pci-virt.h
> @@ -0,0 +1,85 @@
> +/* Copyright 2013-2016 IBM Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + *      http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> + * implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef __PCI_VIRT_H
> +#define __PCI_VIRT_H
> +
> +#include <ccan/list/list.h>
> +
> +enum {
> +	PCI_VIRT_CFG_NORMAL,
> +	PCI_VIRT_CFG_RDONLY,
> +	PCI_VIRT_CFG_W1CLR,
> +	PCI_VIRT_CFG_MAX
> +};
> +
> +struct pci_virt_device {
> +	uint32_t		bdfn;
> +	uint32_t		cfg_size;
> +	uint8_t			*config[PCI_VIRT_CFG_MAX];
> +	struct list_head	pcrf;
> +	struct list_node	node;
> +	void			*data;
> +};
> +
> +extern void pci_virt_cfg_read_raw(struct pci_virt_device *pvd,
> +				  uint32_t space, uint32_t offset,
> +				  uint32_t size, uint32_t *data);
> +extern void pci_virt_cfg_write_raw(struct pci_virt_device *pvd,
> +				   uint32_t space, uint32_t offset,
> +				   uint32_t size, uint32_t data);
> +extern struct pci_cfg_reg_filter *pci_virt_add_filter(
> +					struct pci_virt_device *pvd,
> +					uint32_t start, uint32_t len,
> +					uint32_t flags, pci_cfg_reg_func
> func,
> +					void *data);
> +extern int64_t pci_virt_cfg_read(struct phb *phb, uint32_t bdfn,
> +				 uint32_t offset, uint32_t size,
> +				 uint32_t *data);
> +extern int64_t pci_virt_cfg_write(struct phb *phb, uint32_t bdfn,
> +				  uint32_t offset, uint32_t size,
> +				  uint32_t data);
> +extern struct pci_virt_device *pci_virt_find_device(struct phb *phb,
> +						    uint32_t bdfn);
> +extern struct pci_virt_device *pci_virt_add_device(struct phb *phb,
> +						   uint32_t bdfn,
> +						   uint32_t cfg_size,
> +						   void *data);
> +
> +/* Config space accessors */
> +#define PCI_VIRT_CFG_NORMAL_RD(d, o, s, v)	\
> +	pci_virt_cfg_read_raw(d, PCI_VIRT_CFG_NORMAL, o, s, v)
> +#define PCI_VIRT_CFG_NORMAL_WR(d, o, s, v)	\
> +	pci_virt_cfg_write_raw(d, PCI_VIRT_CFG_NORMAL, o, s, v)
> +#define PCI_VIRT_CFG_RDONLY_RD(d, o, s, v)	\
> +	pci_virt_cfg_read_raw(d, PCI_VIRT_CFG_RDONLY, o, s, v)
> +#define PCI_VIRT_CFG_RDONLY_WR(d, o, s, v)	\
> +	pci_virt_cfg_write_raw(d, PCI_VIRT_CFG_RDONLY, o, s, v)
> +#define PCI_VIRT_CFG_W1CLR_RD(d, o, s, v)	\
> +	pci_virt_cfg_read_raw(d, PCI_VIRT_CFG_W1CLR, o, s, v)
> +#define PCI_VIRT_CFG_W1CLR_WR(d, o, s, v)	\
> +	pci_virt_cfg_write_raw(d, PCI_VIRT_CFG_W1CLR, o, s, v)
> +
> +#define PCI_VIRT_CFG_INIT(d, o, s, v, r, w)		\
> +	do {						\
> +		PCI_VIRT_CFG_NORMAL_WR(d, o, s, v);	\
> +		PCI_VIRT_CFG_RDONLY_WR(d, o, s, r);	\
> +		PCI_VIRT_CFG_W1CLR_WR(d, o, s, w);	\
> +	} while (0)
> +#define PCI_VIRT_CFG_INIT_RO(d, o, s, v)		\
> +	PCI_VIRT_CFG_INIT(d, o, s, v, 0xffffffff, 0)
> +
> +#endif /* __VIRT_PCI_H */

The ifdef is __PCI_VIRT_H but the endif refers to __VIRT_PCI_H

> diff --git a/include/pci.h b/include/pci.h
> index 7350f5c..9df5a69 100644
> --- a/include/pci.h
> +++ b/include/pci.h
> @@ -322,6 +322,7 @@ struct phb {
>  	enum phb_type		phb_type;
>  	struct lock		lock;
>  	struct list_head	devices;
> +	struct list_head	virt_devices;
>  	const struct phb_ops	*ops;
>  	struct pci_lsi_state	lstate;
>  	uint32_t		mps;



More information about the Skiboot mailing list