[Skiboot] [PATCH v2 2/3] core/pci: Support virtual device
Russell Currey
ruscur at russell.cc
Fri Sep 2 13:15:48 AEST 2016
On Thu, 2016-08-11 at 12:12 +1000, Gavin Shan wrote:
> The NVLinks (v1 and v2 to be supported in future) are exposed to
> Linux kernel by emulated PCI devices (aka PCI virtual devices).
> Currently, the implementation is covered by NVLink driver (npu.c),
> meaning npu2.c will have similar implementation though it will be
> totally duplicated with that in npu.c.
>
> This supports PCI virtual device in the generic layer so that it
> can be shared by all NVLink drivers. The design is highlighted as:
>
> * There are 3 config spaces for every PCI virtual device, corresponds
> to the cached config space, readonly space, write-1-clear space.
> * Reuse PCI config register filter mechanism to allow NVLink driver
> to emulate the access to the designated config registers. The config
> values are fetched from or written to the cached config space when
> the config registers aren't covered by filter.
>
> Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
> ---
A few minor comments, but otherwise:
Reviewed-by: Russell Currey <ruscur at russell.cc>
> core/Makefile.inc | 6 +-
> core/pci-virt.c | 260
> +++++++++++++++++++++++++++++++++++++++++++++++++++++
> include/pci-virt.h | 85 ++++++++++++++++++
> include/pci.h | 1 +
> 4 files changed, 349 insertions(+), 3 deletions(-)
> create mode 100644 core/pci-virt.c
> create mode 100644 include/pci-virt.h
>
> diff --git a/core/Makefile.inc b/core/Makefile.inc
> index 13b287c..be59bca 100644
> --- a/core/Makefile.inc
> +++ b/core/Makefile.inc
> @@ -3,9 +3,9 @@
> SUBDIRS += core
> CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
> CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o
> -CORE_OBJS += timebase.o opal-msg.o pci.o pci-slot.o pcie-slot.o pci-opal.o
> -CORE_OBJS += fast-reboot.o device.o exceptions.o trace.o affinity.o vpd.o
> -CORE_OBJS += hostservices.o platform.o nvram.o nvram-format.o hmi.o
> +CORE_OBJS += timebase.o opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o
> +CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o
> affinity.o
> +CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
> CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
> CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
>
> diff --git a/core/pci-virt.c b/core/pci-virt.c
> new file mode 100644
> index 0000000..570c6e8
> --- /dev/null
> +++ b/core/pci-virt.c
> @@ -0,0 +1,260 @@
> +/* Copyright 2013-2016 IBM Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> + * implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <skiboot.h>
> +#include <pci.h>
> +#include <pci-virt.h>
> +
> +void pci_virt_cfg_read_raw(struct pci_virt_device *pvd,
> + uint32_t space, uint32_t offset,
> + uint32_t size, uint32_t *data)
> +{
> + uint32_t i;
> +
> + if (space >= PCI_VIRT_CFG_MAX || !pvd->config[space])
> + return;
> +
> + for (*data = 0, i = 0; i < size; i++)
> + *data |= ((uint32_t)(pvd->config[space][offset + i]) << (i *
> 8));
> +}
> +
> +void pci_virt_cfg_write_raw(struct pci_virt_device *pvd,
> + uint32_t space, uint32_t offset,
> + uint32_t size, uint32_t data)
> +{
> + int i;
This should probably be uint32_t like "i" in pci_virt_cfg_read_raw
> +
> + if (space >= PCI_VIRT_CFG_MAX || !pvd->config[space])
> + return;
> +
> + for (i = 0; i < size; i++) {
> + pvd->config[space][offset + i] = data;
> + data = (data >> 8);
> + }
> +}
> +
> +static struct pci_cfg_reg_filter *pci_virt_find_filter(
> + struct pci_virt_device *pvd,
> + uint32_t start, uint32_t len)
> +{
> + struct pci_cfg_reg_filter *pcrf;
> +
> + if (!pvd || !len || start >= pvd->cfg_size)
> + return NULL;
> +
> + list_for_each(&pvd->pcrf, pcrf, link) {
> + if (start >= pcrf->start &&
> + (start + len) <= (pcrf->start + len))
> + return pcrf;
> + }
> +
> + return NULL;
> +}
> +
> +struct pci_cfg_reg_filter *pci_virt_add_filter(struct pci_virt_device *pvd,
> + uint32_t start,
> + uint32_t len,
> + uint32_t flags,
> + pci_cfg_reg_func func,
> + void *data)
> +{
> + struct pci_cfg_reg_filter *pcrf;
> +
> + if (!pvd || !len || (start + len) >= pvd->cfg_size)
> + return NULL;
> + if (!(flags & PCI_REG_FLAG_MASK))
> + return NULL;
> +
> + pcrf = pci_virt_find_filter(pvd, start, len);
> + if (pcrf) {
> + prlog(PR_ERR, "%s: Filter [%x, %x] overlapped with [%x,
> %x]\n",
> + __func__, start, len, pcrf->start, pcrf->len);
> + return NULL;
> + }
> +
> + pcrf = zalloc(sizeof(*pcrf));
> + if (!pcrf) {
> + prlog(PR_ERR, "%s: Out of memory!\n", __func__);
> + return NULL;
> + }
> +
> + pcrf->start = start;
> + pcrf->len = len;
> + pcrf->flags = flags;
> + pcrf->func = func;
> + pcrf->data = data;
> + list_add_tail(&pvd->pcrf, &pcrf->link);
> +
> + return pcrf;
> +}
> +
> +struct pci_virt_device *pci_virt_find_device(struct phb *phb,
> + uint32_t bdfn)
> +{
> + struct pci_virt_device *pvd;
> +
> + list_for_each(&phb->virt_devices, pvd, node) {
> + if (pvd->bdfn == bdfn)
> + return pvd;
> + }
> +
> + return NULL;
> +}
> +
> +static inline bool pci_virt_cfg_valid(struct pci_virt_device *pvd,
> + uint32_t offset, uint32_t size)
> +{
> + if ((offset + size) > pvd->cfg_size)
> + return false;
> +
> + if (!size || (size > 4))
> + return false;
> +
> + if ((size & (size - 1)) || (offset & (size - 1)))
> + return false;
> +
> + return true;
> +}
> +
> +int64_t pci_virt_cfg_read(struct phb *phb, uint32_t bdfn,
> + uint32_t offset, uint32_t size,
> + uint32_t *data)
> +{
> + struct pci_virt_device *pvd;
> + struct pci_cfg_reg_filter *pcrf;
> + int64_t ret = OPAL_SUCCESS;
> +
> + *data = 0xffffffff;
Out of curiosity what does setting this achieve?
> +
> + /* Search for PCI virtual device */
> + pvd = pci_virt_find_device(phb, bdfn);
> + if (!pvd)
> + return OPAL_PARAMETER;
> +
> + /* Check if config address is valid or not */
> + if (!pci_virt_cfg_valid(pvd, offset, size))
> + return OPAL_PARAMETER;
> +
> + /* The value is fetched from the normal config space when the
> + * trap handler returns OPAL_PARTIAL. Otherwise, the trap handler
> + * should provide the return value.
> + */
> + pcrf = pci_virt_find_filter(pvd, offset, size);
> + if (!pcrf || !pcrf->func || !(pcrf->flags & PCI_REG_FLAG_READ))
> + goto out;
> +
> + ret = pcrf->func(pvd, pcrf, offset, size, data, false);
> + if (ret != OPAL_PARTIAL)
> + return ret;
> +out:
> + pci_virt_cfg_read_raw(pvd, PCI_VIRT_CFG_NORMAL, offset, size, data);
> + return OPAL_SUCCESS;
> +}
> +
> +int64_t pci_virt_cfg_write(struct phb *phb, uint32_t bdfn,
> + uint32_t offset, uint32_t size,
> + uint32_t data)
> +{
> + struct pci_virt_device *pvd;
> + struct pci_cfg_reg_filter *pcrf;
> + uint32_t val, v, r, c, i;
> + int64_t ret = OPAL_SUCCESS;
I don't think this needs to be initialised
> +
> + /* Search for PCI virtual device */
> + pvd = pci_virt_find_device(phb, bdfn);
> + if (!pvd)
> + return OPAL_PARAMETER;
> +
> + /* Check if config address is valid or not */
> + if (!pci_virt_cfg_valid(pvd, offset, size))
> + return OPAL_PARAMETER;
> +
> + /* The value is written to the config space if the trap handler
> + * returns OPAL_PARTIAL. Otherwise, the value to be written is
> + * dropped.
> + */
> + pcrf = pci_virt_find_filter(pvd, offset, size);
> + if (!pcrf || !pcrf->func || !(pcrf->flags & PCI_REG_FLAG_WRITE))
> + goto out;
> +
> + ret = pcrf->func(pvd, pcrf, offset, size, &data, true);
> + if (ret != OPAL_PARTIAL)
> + return ret;
> +out:
> + val = data;
> + for (i = 0; i < size; i++) {
> + PCI_VIRT_CFG_NORMAL_RD(pvd, offset + i, 1, &v);
> + PCI_VIRT_CFG_RDONLY_RD(pvd, offset + i, 1, &r);
> + PCI_VIRT_CFG_W1CLR_RD(pvd, offset + i, 1, &c);
> +
> + /* Drop read-only bits */
> + val &= ~(r << (i * 8));
> + val |= (r & v) << (i * 8);
> +
> + /* Drop W1C bits */
> + val &= ~(val & ((c & v) << (i * 8)));
> + }
> +
> + PCI_VIRT_CFG_NORMAL_WR(pvd, offset, size, val);
> + return OPAL_SUCCESS;
> +}
> +
> +struct pci_virt_device *pci_virt_add_device(struct phb *phb, uint32_t bdfn,
> + uint32_t cfg_size, void *data)
> +{
> + struct pci_virt_device *pvd;
> + uint8_t *cfg;
> + uint32_t i;
> +
> + /* The standard config header size is 64 bytes */
> + if (!phb || (bdfn & 0xffff0000) || (cfg_size < 64))
> + return NULL;
> +
> + /* Check if the bdfn is available */
> + pvd = pci_virt_find_device(phb, bdfn);
> + if (pvd) {
> + prlog(PR_ERR, "%s: bdfn 0x%x was reserved\n",
> + __func__, bdfn);
> + return NULL;
> + }
> +
> + /* Populate the PCI virtual device */
> + pvd = zalloc(sizeof(*pvd));
> + if (!pvd) {
> + prlog(PR_ERR, "%s: Cannot alloate PCI virtual device
> (0x%x)\n",
> + __func__, bdfn);
Typo here, "alloate" should be "allocate".
> + return NULL;
> + }
> +
> + cfg = zalloc(cfg_size * PCI_VIRT_CFG_MAX);
> + if (!cfg) {
> + prlog(PR_ERR, "%s: Cannot allocate config space (0x%x)\n",
> + __func__, bdfn);
> + free(pvd);
> + return NULL;
> + }
> +
> + for (i = 0; i < PCI_VIRT_CFG_MAX; i++, cfg += cfg_size)
> + pvd->config[i] = cfg;
> +
> + pvd->bdfn = bdfn;
> + pvd->cfg_size = cfg_size;
> + pvd->data = data;
> + list_head_init(&pvd->pcrf);
> + list_add_tail(&phb->virt_devices, &pvd->node);
> +
> + return pvd;
> +}
> diff --git a/include/pci-virt.h b/include/pci-virt.h
> new file mode 100644
> index 0000000..7c787cf
> --- /dev/null
> +++ b/include/pci-virt.h
> @@ -0,0 +1,85 @@
> +/* Copyright 2013-2016 IBM Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> + * implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef __PCI_VIRT_H
> +#define __PCI_VIRT_H
> +
> +#include <ccan/list/list.h>
> +
> +enum {
> + PCI_VIRT_CFG_NORMAL,
> + PCI_VIRT_CFG_RDONLY,
> + PCI_VIRT_CFG_W1CLR,
> + PCI_VIRT_CFG_MAX
> +};
> +
> +struct pci_virt_device {
> + uint32_t bdfn;
> + uint32_t cfg_size;
> + uint8_t *config[PCI_VIRT_CFG_MAX];
> + struct list_head pcrf;
> + struct list_node node;
> + void *data;
> +};
> +
> +extern void pci_virt_cfg_read_raw(struct pci_virt_device *pvd,
> + uint32_t space, uint32_t offset,
> + uint32_t size, uint32_t *data);
> +extern void pci_virt_cfg_write_raw(struct pci_virt_device *pvd,
> + uint32_t space, uint32_t offset,
> + uint32_t size, uint32_t data);
> +extern struct pci_cfg_reg_filter *pci_virt_add_filter(
> + struct pci_virt_device *pvd,
> + uint32_t start, uint32_t len,
> + uint32_t flags, pci_cfg_reg_func
> func,
> + void *data);
> +extern int64_t pci_virt_cfg_read(struct phb *phb, uint32_t bdfn,
> + uint32_t offset, uint32_t size,
> + uint32_t *data);
> +extern int64_t pci_virt_cfg_write(struct phb *phb, uint32_t bdfn,
> + uint32_t offset, uint32_t size,
> + uint32_t data);
> +extern struct pci_virt_device *pci_virt_find_device(struct phb *phb,
> + uint32_t bdfn);
> +extern struct pci_virt_device *pci_virt_add_device(struct phb *phb,
> + uint32_t bdfn,
> + uint32_t cfg_size,
> + void *data);
> +
> +/* Config space accessors */
> +#define PCI_VIRT_CFG_NORMAL_RD(d, o, s, v) \
> + pci_virt_cfg_read_raw(d, PCI_VIRT_CFG_NORMAL, o, s, v)
> +#define PCI_VIRT_CFG_NORMAL_WR(d, o, s, v) \
> + pci_virt_cfg_write_raw(d, PCI_VIRT_CFG_NORMAL, o, s, v)
> +#define PCI_VIRT_CFG_RDONLY_RD(d, o, s, v) \
> + pci_virt_cfg_read_raw(d, PCI_VIRT_CFG_RDONLY, o, s, v)
> +#define PCI_VIRT_CFG_RDONLY_WR(d, o, s, v) \
> + pci_virt_cfg_write_raw(d, PCI_VIRT_CFG_RDONLY, o, s, v)
> +#define PCI_VIRT_CFG_W1CLR_RD(d, o, s, v) \
> + pci_virt_cfg_read_raw(d, PCI_VIRT_CFG_W1CLR, o, s, v)
> +#define PCI_VIRT_CFG_W1CLR_WR(d, o, s, v) \
> + pci_virt_cfg_write_raw(d, PCI_VIRT_CFG_W1CLR, o, s, v)
> +
> +#define PCI_VIRT_CFG_INIT(d, o, s, v, r, w) \
> + do { \
> + PCI_VIRT_CFG_NORMAL_WR(d, o, s, v); \
> + PCI_VIRT_CFG_RDONLY_WR(d, o, s, r); \
> + PCI_VIRT_CFG_W1CLR_WR(d, o, s, w); \
> + } while (0)
> +#define PCI_VIRT_CFG_INIT_RO(d, o, s, v) \
> + PCI_VIRT_CFG_INIT(d, o, s, v, 0xffffffff, 0)
> +
> +#endif /* __VIRT_PCI_H */
The ifdef is __PCI_VIRT_H but the endif refers to __VIRT_PCI_H
> diff --git a/include/pci.h b/include/pci.h
> index 7350f5c..9df5a69 100644
> --- a/include/pci.h
> +++ b/include/pci.h
> @@ -322,6 +322,7 @@ struct phb {
> enum phb_type phb_type;
> struct lock lock;
> struct list_head devices;
> + struct list_head virt_devices;
> const struct phb_ops *ops;
> struct pci_lsi_state lstate;
> uint32_t mps;
More information about the Skiboot
mailing list