[Skiboot] [PATCH] pci/iov: Remove skiboot VF tracking

Oliver oohall at gmail.com
Tue May 7 20:36:58 AEST 2019


On Wed, May 1, 2019 at 6:30 PM Oliver O'Halloran <oohall at gmail.com> wrote:
>
> This feature was added a few years ago in response to a request to make
> the MaxPayloadSize (MPS) field of a Virtual Function match the MPS of the
> Physical Function that hosts it.
>
> The SR-IOV specification states the the MPS field of the VF is "ResvP".
> This indicates the VF will use whatever MPS is configured on the PF and
> that the field should be treated as a reserved field in the config space
> of the VF. In other words, a SR-IOV spec compliant VF should always return
> zero in the MPS field.  Adding hacks in OPAL to make it non-zero is...
> misguided at best.
>
> Additionally, there is a bug in the way pci_device structures are handled
> by VFs that results in a crash on fast-reboot that occurs if VFs are
> enabled and then disabled prior to rebooting. This patch fixes the bug by
> removing the code entirely. This patch has no impact on SR-IOV support on
> the host operating system.
>
> Cc: Sergey Miroshnichenko <s.miroshnichenko at yadro.com>
> Cc: skiboot-stable at lists.ozlabs.org
> Signed-off-by: Oliver O'Halloran <oohall at gmail.com>

The test team smoke tested it and it came back clean. Merged to master
as of 22057f868f3b2b1fd02647a738f6da0858b5eb6c

Vasant, you can pick this up for stable now if you like.

> ---
>  core/Makefile.inc |   2 +-
>  core/pci-iov.c    | 265 ----------------------------------------------
>  core/pci.c        |   2 -
>  include/pci-iov.h |  37 -------
>  4 files changed, 1 insertion(+), 305 deletions(-)
>  delete mode 100644 core/pci-iov.c
>  delete mode 100644 include/pci-iov.h
>
> diff --git a/core/Makefile.inc b/core/Makefile.inc
> index 3b4387081a62..21c12fb8db31 100644
> --- a/core/Makefile.inc
> +++ b/core/Makefile.inc
> @@ -3,7 +3,7 @@
>  SUBDIRS += core
>  CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
>  CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o
> -CORE_OBJS += opal-msg.o pci.o pci-iov.o pci-virt.o pci-slot.o pcie-slot.o
> +CORE_OBJS += opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o
>  CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
>  CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
>  CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
> diff --git a/core/pci-iov.c b/core/pci-iov.c
> deleted file mode 100644
> index 8fecebc1292a..000000000000
> --- a/core/pci-iov.c
> +++ /dev/null
> @@ -1,265 +0,0 @@
> -/* Copyright 2013-2016 IBM Corp.
> - *
> - * Licensed under the Apache License, Version 2.0 (the "License");
> - * you may not use this file except in compliance with the License.
> - * You may obtain a copy of the License at
> - *
> - *      http://www.apache.org/licenses/LICENSE-2.0
> - *
> - * Unless required by applicable law or agreed to in writing, software
> - * distributed under the License is distributed on an "AS IS" BASIS,
> - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> - * implied.
> - * See the License for the specific language governing permissions and
> - * limitations under the License.
> - */
> -
> -#include <skiboot.h>
> -#include <pci.h>
> -#include <pci-cfg.h>
> -#include <pci-slot.h>
> -#include <pci-iov.h>
> -
> -/*
> - * Tackle the VF's MPS in PCIe capability. The field is read only.
> - * This function caches what is written and returns the cached
> - * MPS on read.
> - */
> -static int64_t pci_iov_vf_devctl(void *dev, struct pci_cfg_reg_filter *pcrf,
> -                                uint32_t offset, uint32_t len,
> -                                uint32_t *data, bool write)
> -{
> -       struct pci_device *vf = (struct pci_device *)dev;
> -       uint32_t pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false);
> -       uint8_t *pcache;
> -
> -       if (offset != (pos + PCICAP_EXP_DEVCTL))
> -               return OPAL_PARTIAL;
> -
> -       pcache = &pcrf->data[0];
> -       if (write) {
> -               *pcache = ((uint8_t)(*data >> (8 * (4 - len)))) &
> -                          PCICAP_EXP_DEVCTL_MPS;
> -       } else {
> -               *data &= ~(PCICAP_EXP_DEVCTL_MPS << (8 * (4 - len)));
> -               *data |= (((uint32_t)(*pcache & PCICAP_EXP_DEVCTL_MPS))
> -                         << (8 * (4 - len)));
> -       }
> -
> -       return OPAL_SUCCESS;
> -}
> -
> -static void pci_iov_vf_quirk(struct phb *phb, struct pci_device *vf)
> -{
> -       struct pci_cfg_reg_filter *pcrf;
> -       uint32_t pos;
> -
> -       if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false))
> -               return;
> -
> -       /*
> -        * On Mellanox MT27500 Family [ConnectX-3], its VF's MPS field in
> -        * the corresponding config register is readonly. The MPS for PF/VF
> -        * are usually different. We are introducing a quirk to make them
> -        * look same to avoid confusion.
> -        */
> -       if (vf->vdid != 0x100315b3)
> -               return;
> -
> -       pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false);
> -       pcrf = pci_add_cfg_reg_filter(vf, pos + PCICAP_EXP_DEVCTL, 4,
> -                                     PCI_REG_FLAG_MASK, pci_iov_vf_devctl);
> -       if (!pcrf)
> -               prlog(PR_WARNING, "%s: Missed DEVCTL filter on %04x:%02x:%02x.%01x\n",
> -                     __func__, phb->opal_id, (vf->bdfn >> 8),
> -                     ((vf->bdfn >> 3) & 0x1f), (vf->bdfn & 0x7));
> -}
> -
> -/*
> - * Update the SRIOV parameters that change when the number of
> - * VFs is configured.
> - */
> -static bool pci_iov_update_parameters(struct pci_iov *iov)
> -{
> -       struct phb *phb = iov->phb;
> -       uint16_t bdfn = iov->pd->bdfn;
> -       uint32_t pos = iov->pos;
> -       uint16_t val;
> -       bool enabled;
> -
> -       pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_CTRL, &val);
> -       enabled = !!(val & PCIECAP_SRIOV_CTRL_VFE);
> -       if (iov->enabled == enabled)
> -               return false;
> -
> -       if (enabled) {
> -               pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_INITIAL_VF,
> -                              &iov->init_VFs);
> -               pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_NUM_VF,
> -                              &iov->num_VFs);
> -               pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_OFFSET,
> -                              &iov->offset);
> -               pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_STRIDE,
> -                              &iov->stride);
> -       } else {
> -               iov->init_VFs   = 0;
> -               iov->num_VFs    = 0;
> -               iov->offset     = 0;
> -               iov->stride     = 0;
> -       }
> -
> -       iov->enabled = enabled;
> -       return true;
> -}
> -
> -static int64_t pci_iov_change(void *dev __unused,
> -                             struct pci_cfg_reg_filter *pcrf,
> -                             uint32_t offset __unused,
> -                             uint32_t len __unused,
> -                             uint32_t *data __unused,
> -                             bool write __unused)
> -{
> -       struct pci_iov *iov = (struct pci_iov *)pcrf->data;
> -       struct phb *phb = iov->phb;
> -       struct pci_device *pd = iov->pd;
> -       struct pci_device *vf, *tmp;
> -       uint32_t i;
> -       bool changed;
> -
> -       /* Update SRIOV variable parameters */
> -       changed = pci_iov_update_parameters(iov);
> -       if (!changed)
> -               return OPAL_PARTIAL;
> -
> -       /* Remove all VFs that have been attached to the parent */
> -       if (!iov->enabled) {
> -               list_for_each_safe(&pd->children, vf, tmp, link)
> -                       list_del(&vf->link);
> -               return OPAL_PARTIAL;
> -       }
> -
> -       /* Initialize the VFs and attach them to parent */
> -       for (changed = false, i = 0; i < iov->num_VFs; i++) {
> -               vf = &iov->VFs[i];
> -               vf->bdfn = pd->bdfn + iov->offset + iov->stride * i;
> -               list_add_tail(&pd->children, &vf->link);
> -
> -               /*
> -                * We don't populate the capabilities again if they have
> -                * been existing, to save time. Also, we need delay for
> -                * 100ms before the VF's config space becomes ready.
> -                */
> -               if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false)) {
> -                       if (!changed) {
> -                               changed = !changed;
> -                               time_wait_ms(100);
> -                       }
> -
> -                       pci_init_capabilities(phb, vf);
> -                       pci_iov_vf_quirk(phb, vf);
> -               }
> -
> -               /* Call PHB hook */
> -               if (phb->ops->device_init)
> -                       phb->ops->device_init(phb, pd, NULL);
> -       }
> -
> -       return OPAL_PARTIAL;
> -}
> -
> -/*
> - * This function is called with disabled SRIOV capability. So the VF's
> - * config address isn't finalized and its config space isn't accessible.
> - */
> -static void pci_iov_init_VF(struct pci_device *pd, struct pci_device *vf)
> -{
> -       vf->is_bridge           = false;
> -       vf->is_multifunction    = false;
> -       vf->is_vf               = true;
> -       vf->dev_type            = PCIE_TYPE_ENDPOINT;
> -       vf->scan_map            = -1;
> -       vf->vdid                = pd->vdid;
> -       vf->sub_vdid            = pd->sub_vdid;
> -       vf->class               = pd->class;
> -       vf->dn                  = NULL;
> -       vf->slot                = NULL;
> -       vf->parent              = pd;
> -       vf->phb                 = pd->phb;
> -       list_head_init(&vf->pcrf);
> -       list_head_init(&vf->children);
> -}
> -
> -static void pci_free_iov_cap(void *data)
> -{
> -       struct pci_iov *iov = data;
> -       free(iov->VFs);
> -       free(iov);
> -}
> -
> -void pci_init_iov_cap(struct phb *phb, struct pci_device *pd)
> -{
> -       int64_t pos;
> -       struct pci_iov *iov;
> -       struct pci_cfg_reg_filter *pcrf;
> -       uint32_t i;
> -
> -       /* Search for SRIOV capability */
> -       if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false))
> -               return;
> -
> -       pos = pci_find_ecap(phb, pd->bdfn, PCIECAP_ID_SRIOV, NULL);
> -       if (pos <= 0)
> -               return;
> -
> -       /* Allocate IOV */
> -       iov = zalloc(sizeof(*iov));
> -       if (!iov) {
> -               prlog(PR_ERR, "%s: Cannot alloc IOV for %04x:%02x:%02x.%01x\n",
> -                     __func__, phb->opal_id, (pd->bdfn >> 8),
> -                     ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7));
> -               return;
> -       }
> -
> -       /* Allocate VFs */
> -       pci_cfg_read16(phb, pd->bdfn, pos + PCIECAP_SRIOV_TOTAL_VF,
> -                      &iov->total_VFs);
> -       iov->VFs = zalloc(sizeof(*iov->VFs) * iov->total_VFs);
> -       if (!iov->VFs) {
> -               prlog(PR_ERR, "%s: Cannot alloc %d VFs for %04x:%02x:%02x.%01x\n",
> -                     __func__, iov->total_VFs, phb->opal_id,
> -                     (pd->bdfn >> 8), ((pd->bdfn >> 3) & 0x1f),
> -                     (pd->bdfn & 0x7));
> -               free(iov);
> -               return;
> -       }
> -
> -       /* Initialize VFs */
> -       for (i = 0; i < iov->total_VFs; i++)
> -               pci_iov_init_VF(pd, &iov->VFs[i]);
> -
> -       /* Register filter for enabling or disabling SRIOV capability */
> -       pcrf = pci_add_cfg_reg_filter(pd, pos + PCIECAP_SRIOV_CTRL, 2,
> -                                     PCI_REG_FLAG_WRITE, pci_iov_change);
> -       if (!pcrf) {
> -               prlog(PR_ERR, "%s: Cannot set filter on %04x:%02x:%02x.%01x\n",
> -                     __func__, phb->opal_id, (pd->bdfn >> 8),
> -                     ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7));
> -               free(iov->VFs);
> -               free(iov);
> -               return;
> -       }
> -
> -       /* Associate filter and IOV capability */
> -       pcrf->data = (void *)iov;
> -
> -       /*
> -        * Retrieve the number of VFs and other information if applicable.
> -        * Register the SRIOV capability in the mean while.
> -        */
> -       iov->phb = phb;
> -       iov->pd = pd;
> -       iov->pos = pos;
> -       iov->enabled = false;
> -       pci_iov_update_parameters(iov);
> -       pci_set_cap(pd, PCIECAP_ID_SRIOV, pos, iov, pci_free_iov_cap, true);
> -}
> diff --git a/core/pci.c b/core/pci.c
> index ee563c2fc577..02f5caa8d901 100644
> --- a/core/pci.c
> +++ b/core/pci.c
> @@ -18,7 +18,6 @@
>  #include <cpu.h>
>  #include <pci.h>
>  #include <pci-cfg.h>
> -#include <pci-iov.h>
>  #include <pci-slot.h>
>  #include <pci-quirk.h>
>  #include <timebase.h>
> @@ -193,7 +192,6 @@ void pci_init_capabilities(struct phb *phb, struct pci_device *pd)
>  {
>         pci_init_pcie_cap(phb, pd);
>         pci_init_aer_cap(phb, pd);
> -       pci_init_iov_cap(phb, pd);
>         pci_init_pm_cap(phb, pd);
>  }
>
> diff --git a/include/pci-iov.h b/include/pci-iov.h
> deleted file mode 100644
> index 787b2cd1da69..000000000000
> --- a/include/pci-iov.h
> +++ /dev/null
> @@ -1,37 +0,0 @@
> -/* Copyright 2013-2016 IBM Corp.
> - *
> - * Licensed under the Apache License, Version 2.0 (the "License");
> - * you may not use this file except in compliance with the License.
> - * You may obtain a copy of the License at
> - *
> - *      http://www.apache.org/licenses/LICENSE-2.0
> - *
> - * Unless required by applicable law or agreed to in writing, software
> - * distributed under the License is distributed on an "AS IS" BASIS,
> - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> - * implied.
> - * See the License for the specific language governing permissions and
> - * limitations under the License.
> - */
> -
> -#ifndef __PCI_IOV_H
> -#define __PCI_IOV_H
> -
> -struct pci_iov {
> -       struct phb                      *phb;
> -       struct pci_device               *pd;
> -       struct pci_device               *VFs;
> -       uint32_t                        pos;
> -       bool                            enabled;
> -       struct pci_cfg_reg_filter       pcrf;
> -
> -       uint16_t                        init_VFs;
> -       uint16_t                        total_VFs;
> -       uint16_t                        num_VFs;
> -       uint16_t                        offset;
> -       uint16_t                        stride;
> -};
> -
> -extern void pci_init_iov_cap(struct phb *phb, struct pci_device *pd);
> -
> -#endif
> --
> 2.20.1
>


More information about the Skiboot mailing list