[Skiboot] [PATCH] pci/iov: Remove skiboot VF tracking
Oliver
oohall at gmail.com
Tue May 7 20:36:58 AEST 2019
On Wed, May 1, 2019 at 6:30 PM Oliver O'Halloran <oohall at gmail.com> wrote:
>
> This feature was added a few years ago in response to a request to make
> the MaxPayloadSize (MPS) field of a Virtual Function match the MPS of the
> Physical Function that hosts it.
>
> The SR-IOV specification states the the MPS field of the VF is "ResvP".
> This indicates the VF will use whatever MPS is configured on the PF and
> that the field should be treated as a reserved field in the config space
> of the VF. In other words, a SR-IOV spec compliant VF should always return
> zero in the MPS field. Adding hacks in OPAL to make it non-zero is...
> misguided at best.
>
> Additionally, there is a bug in the way pci_device structures are handled
> by VFs that results in a crash on fast-reboot that occurs if VFs are
> enabled and then disabled prior to rebooting. This patch fixes the bug by
> removing the code entirely. This patch has no impact on SR-IOV support on
> the host operating system.
>
> Cc: Sergey Miroshnichenko <s.miroshnichenko at yadro.com>
> Cc: skiboot-stable at lists.ozlabs.org
> Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
The test team smoke tested it and it came back clean. Merged to master
as of 22057f868f3b2b1fd02647a738f6da0858b5eb6c
Vasant, you can pick this up for stable now if you like.
> ---
> core/Makefile.inc | 2 +-
> core/pci-iov.c | 265 ----------------------------------------------
> core/pci.c | 2 -
> include/pci-iov.h | 37 -------
> 4 files changed, 1 insertion(+), 305 deletions(-)
> delete mode 100644 core/pci-iov.c
> delete mode 100644 include/pci-iov.h
>
> diff --git a/core/Makefile.inc b/core/Makefile.inc
> index 3b4387081a62..21c12fb8db31 100644
> --- a/core/Makefile.inc
> +++ b/core/Makefile.inc
> @@ -3,7 +3,7 @@
> SUBDIRS += core
> CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
> CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o
> -CORE_OBJS += opal-msg.o pci.o pci-iov.o pci-virt.o pci-slot.o pcie-slot.o
> +CORE_OBJS += opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o
> CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
> CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
> CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
> diff --git a/core/pci-iov.c b/core/pci-iov.c
> deleted file mode 100644
> index 8fecebc1292a..000000000000
> --- a/core/pci-iov.c
> +++ /dev/null
> @@ -1,265 +0,0 @@
> -/* Copyright 2013-2016 IBM Corp.
> - *
> - * Licensed under the Apache License, Version 2.0 (the "License");
> - * you may not use this file except in compliance with the License.
> - * You may obtain a copy of the License at
> - *
> - * http://www.apache.org/licenses/LICENSE-2.0
> - *
> - * Unless required by applicable law or agreed to in writing, software
> - * distributed under the License is distributed on an "AS IS" BASIS,
> - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> - * implied.
> - * See the License for the specific language governing permissions and
> - * limitations under the License.
> - */
> -
> -#include <skiboot.h>
> -#include <pci.h>
> -#include <pci-cfg.h>
> -#include <pci-slot.h>
> -#include <pci-iov.h>
> -
> -/*
> - * Tackle the VF's MPS in PCIe capability. The field is read only.
> - * This function caches what is written and returns the cached
> - * MPS on read.
> - */
> -static int64_t pci_iov_vf_devctl(void *dev, struct pci_cfg_reg_filter *pcrf,
> - uint32_t offset, uint32_t len,
> - uint32_t *data, bool write)
> -{
> - struct pci_device *vf = (struct pci_device *)dev;
> - uint32_t pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false);
> - uint8_t *pcache;
> -
> - if (offset != (pos + PCICAP_EXP_DEVCTL))
> - return OPAL_PARTIAL;
> -
> - pcache = &pcrf->data[0];
> - if (write) {
> - *pcache = ((uint8_t)(*data >> (8 * (4 - len)))) &
> - PCICAP_EXP_DEVCTL_MPS;
> - } else {
> - *data &= ~(PCICAP_EXP_DEVCTL_MPS << (8 * (4 - len)));
> - *data |= (((uint32_t)(*pcache & PCICAP_EXP_DEVCTL_MPS))
> - << (8 * (4 - len)));
> - }
> -
> - return OPAL_SUCCESS;
> -}
> -
> -static void pci_iov_vf_quirk(struct phb *phb, struct pci_device *vf)
> -{
> - struct pci_cfg_reg_filter *pcrf;
> - uint32_t pos;
> -
> - if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false))
> - return;
> -
> - /*
> - * On Mellanox MT27500 Family [ConnectX-3], its VF's MPS field in
> - * the corresponding config register is readonly. The MPS for PF/VF
> - * are usually different. We are introducing a quirk to make them
> - * look same to avoid confusion.
> - */
> - if (vf->vdid != 0x100315b3)
> - return;
> -
> - pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false);
> - pcrf = pci_add_cfg_reg_filter(vf, pos + PCICAP_EXP_DEVCTL, 4,
> - PCI_REG_FLAG_MASK, pci_iov_vf_devctl);
> - if (!pcrf)
> - prlog(PR_WARNING, "%s: Missed DEVCTL filter on %04x:%02x:%02x.%01x\n",
> - __func__, phb->opal_id, (vf->bdfn >> 8),
> - ((vf->bdfn >> 3) & 0x1f), (vf->bdfn & 0x7));
> -}
> -
> -/*
> - * Update the SRIOV parameters that change when the number of
> - * VFs is configured.
> - */
> -static bool pci_iov_update_parameters(struct pci_iov *iov)
> -{
> - struct phb *phb = iov->phb;
> - uint16_t bdfn = iov->pd->bdfn;
> - uint32_t pos = iov->pos;
> - uint16_t val;
> - bool enabled;
> -
> - pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_CTRL, &val);
> - enabled = !!(val & PCIECAP_SRIOV_CTRL_VFE);
> - if (iov->enabled == enabled)
> - return false;
> -
> - if (enabled) {
> - pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_INITIAL_VF,
> - &iov->init_VFs);
> - pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_NUM_VF,
> - &iov->num_VFs);
> - pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_OFFSET,
> - &iov->offset);
> - pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_STRIDE,
> - &iov->stride);
> - } else {
> - iov->init_VFs = 0;
> - iov->num_VFs = 0;
> - iov->offset = 0;
> - iov->stride = 0;
> - }
> -
> - iov->enabled = enabled;
> - return true;
> -}
> -
> -static int64_t pci_iov_change(void *dev __unused,
> - struct pci_cfg_reg_filter *pcrf,
> - uint32_t offset __unused,
> - uint32_t len __unused,
> - uint32_t *data __unused,
> - bool write __unused)
> -{
> - struct pci_iov *iov = (struct pci_iov *)pcrf->data;
> - struct phb *phb = iov->phb;
> - struct pci_device *pd = iov->pd;
> - struct pci_device *vf, *tmp;
> - uint32_t i;
> - bool changed;
> -
> - /* Update SRIOV variable parameters */
> - changed = pci_iov_update_parameters(iov);
> - if (!changed)
> - return OPAL_PARTIAL;
> -
> - /* Remove all VFs that have been attached to the parent */
> - if (!iov->enabled) {
> - list_for_each_safe(&pd->children, vf, tmp, link)
> - list_del(&vf->link);
> - return OPAL_PARTIAL;
> - }
> -
> - /* Initialize the VFs and attach them to parent */
> - for (changed = false, i = 0; i < iov->num_VFs; i++) {
> - vf = &iov->VFs[i];
> - vf->bdfn = pd->bdfn + iov->offset + iov->stride * i;
> - list_add_tail(&pd->children, &vf->link);
> -
> - /*
> - * We don't populate the capabilities again if they have
> - * been existing, to save time. Also, we need delay for
> - * 100ms before the VF's config space becomes ready.
> - */
> - if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false)) {
> - if (!changed) {
> - changed = !changed;
> - time_wait_ms(100);
> - }
> -
> - pci_init_capabilities(phb, vf);
> - pci_iov_vf_quirk(phb, vf);
> - }
> -
> - /* Call PHB hook */
> - if (phb->ops->device_init)
> - phb->ops->device_init(phb, pd, NULL);
> - }
> -
> - return OPAL_PARTIAL;
> -}
> -
> -/*
> - * This function is called with disabled SRIOV capability. So the VF's
> - * config address isn't finalized and its config space isn't accessible.
> - */
> -static void pci_iov_init_VF(struct pci_device *pd, struct pci_device *vf)
> -{
> - vf->is_bridge = false;
> - vf->is_multifunction = false;
> - vf->is_vf = true;
> - vf->dev_type = PCIE_TYPE_ENDPOINT;
> - vf->scan_map = -1;
> - vf->vdid = pd->vdid;
> - vf->sub_vdid = pd->sub_vdid;
> - vf->class = pd->class;
> - vf->dn = NULL;
> - vf->slot = NULL;
> - vf->parent = pd;
> - vf->phb = pd->phb;
> - list_head_init(&vf->pcrf);
> - list_head_init(&vf->children);
> -}
> -
> -static void pci_free_iov_cap(void *data)
> -{
> - struct pci_iov *iov = data;
> - free(iov->VFs);
> - free(iov);
> -}
> -
> -void pci_init_iov_cap(struct phb *phb, struct pci_device *pd)
> -{
> - int64_t pos;
> - struct pci_iov *iov;
> - struct pci_cfg_reg_filter *pcrf;
> - uint32_t i;
> -
> - /* Search for SRIOV capability */
> - if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false))
> - return;
> -
> - pos = pci_find_ecap(phb, pd->bdfn, PCIECAP_ID_SRIOV, NULL);
> - if (pos <= 0)
> - return;
> -
> - /* Allocate IOV */
> - iov = zalloc(sizeof(*iov));
> - if (!iov) {
> - prlog(PR_ERR, "%s: Cannot alloc IOV for %04x:%02x:%02x.%01x\n",
> - __func__, phb->opal_id, (pd->bdfn >> 8),
> - ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7));
> - return;
> - }
> -
> - /* Allocate VFs */
> - pci_cfg_read16(phb, pd->bdfn, pos + PCIECAP_SRIOV_TOTAL_VF,
> - &iov->total_VFs);
> - iov->VFs = zalloc(sizeof(*iov->VFs) * iov->total_VFs);
> - if (!iov->VFs) {
> - prlog(PR_ERR, "%s: Cannot alloc %d VFs for %04x:%02x:%02x.%01x\n",
> - __func__, iov->total_VFs, phb->opal_id,
> - (pd->bdfn >> 8), ((pd->bdfn >> 3) & 0x1f),
> - (pd->bdfn & 0x7));
> - free(iov);
> - return;
> - }
> -
> - /* Initialize VFs */
> - for (i = 0; i < iov->total_VFs; i++)
> - pci_iov_init_VF(pd, &iov->VFs[i]);
> -
> - /* Register filter for enabling or disabling SRIOV capability */
> - pcrf = pci_add_cfg_reg_filter(pd, pos + PCIECAP_SRIOV_CTRL, 2,
> - PCI_REG_FLAG_WRITE, pci_iov_change);
> - if (!pcrf) {
> - prlog(PR_ERR, "%s: Cannot set filter on %04x:%02x:%02x.%01x\n",
> - __func__, phb->opal_id, (pd->bdfn >> 8),
> - ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7));
> - free(iov->VFs);
> - free(iov);
> - return;
> - }
> -
> - /* Associate filter and IOV capability */
> - pcrf->data = (void *)iov;
> -
> - /*
> - * Retrieve the number of VFs and other information if applicable.
> - * Register the SRIOV capability in the mean while.
> - */
> - iov->phb = phb;
> - iov->pd = pd;
> - iov->pos = pos;
> - iov->enabled = false;
> - pci_iov_update_parameters(iov);
> - pci_set_cap(pd, PCIECAP_ID_SRIOV, pos, iov, pci_free_iov_cap, true);
> -}
> diff --git a/core/pci.c b/core/pci.c
> index ee563c2fc577..02f5caa8d901 100644
> --- a/core/pci.c
> +++ b/core/pci.c
> @@ -18,7 +18,6 @@
> #include <cpu.h>
> #include <pci.h>
> #include <pci-cfg.h>
> -#include <pci-iov.h>
> #include <pci-slot.h>
> #include <pci-quirk.h>
> #include <timebase.h>
> @@ -193,7 +192,6 @@ void pci_init_capabilities(struct phb *phb, struct pci_device *pd)
> {
> pci_init_pcie_cap(phb, pd);
> pci_init_aer_cap(phb, pd);
> - pci_init_iov_cap(phb, pd);
> pci_init_pm_cap(phb, pd);
> }
>
> diff --git a/include/pci-iov.h b/include/pci-iov.h
> deleted file mode 100644
> index 787b2cd1da69..000000000000
> --- a/include/pci-iov.h
> +++ /dev/null
> @@ -1,37 +0,0 @@
> -/* Copyright 2013-2016 IBM Corp.
> - *
> - * Licensed under the Apache License, Version 2.0 (the "License");
> - * you may not use this file except in compliance with the License.
> - * You may obtain a copy of the License at
> - *
> - * http://www.apache.org/licenses/LICENSE-2.0
> - *
> - * Unless required by applicable law or agreed to in writing, software
> - * distributed under the License is distributed on an "AS IS" BASIS,
> - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> - * implied.
> - * See the License for the specific language governing permissions and
> - * limitations under the License.
> - */
> -
> -#ifndef __PCI_IOV_H
> -#define __PCI_IOV_H
> -
> -struct pci_iov {
> - struct phb *phb;
> - struct pci_device *pd;
> - struct pci_device *VFs;
> - uint32_t pos;
> - bool enabled;
> - struct pci_cfg_reg_filter pcrf;
> -
> - uint16_t init_VFs;
> - uint16_t total_VFs;
> - uint16_t num_VFs;
> - uint16_t offset;
> - uint16_t stride;
> -};
> -
> -extern void pci_init_iov_cap(struct phb *phb, struct pci_device *pd);
> -
> -#endif
> --
> 2.20.1
>
More information about the Skiboot
mailing list