[Skiboot] [PATCH] pci/iov: Remove skiboot VF tracking

Sergey Miroshnichenko s.miroshnichenko at yadro.com
Wed May 15 00:44:24 AEST 2019


On 5/7/19 1:36 PM, Oliver wrote:
> On Wed, May 1, 2019 at 6:30 PM Oliver O'Halloran <oohall at gmail.com> wrote:
>>
>> This feature was added a few years ago in response to a request to make
>> the MaxPayloadSize (MPS) field of a Virtual Function match the MPS of the
>> Physical Function that hosts it.
>>
>> The SR-IOV specification states the the MPS field of the VF is "ResvP".
>> This indicates the VF will use whatever MPS is configured on the PF and
>> that the field should be treated as a reserved field in the config space
>> of the VF. In other words, a SR-IOV spec compliant VF should always return
>> zero in the MPS field.  Adding hacks in OPAL to make it non-zero is...
>> misguided at best.
>>
>> Additionally, there is a bug in the way pci_device structures are handled
>> by VFs that results in a crash on fast-reboot that occurs if VFs are
>> enabled and then disabled prior to rebooting. This patch fixes the bug by
>> removing the code entirely. This patch has no impact on SR-IOV support on
>> the host operating system.
>>
>> Cc: Sergey Miroshnichenko <s.miroshnichenko at yadro.com>
>> Cc: skiboot-stable at lists.ozlabs.org
>> Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
> 
> The test team smoke tested it and it came back clean. Merged to master
> as of 22057f868f3b2b1fd02647a738f6da0858b5eb6c
> 

Works fine on our setup as well (platforms/vesnin).

And it lets me gladly drop two patches from the "core/pci: Sync VFs and the changes of
bdfns between the firmware and the OS" patchset.

Serge

> Vasant, you can pick this up for stable now if you like.
> 
>> ---
>>  core/Makefile.inc |   2 +-
>>  core/pci-iov.c    | 265 ----------------------------------------------
>>  core/pci.c        |   2 -
>>  include/pci-iov.h |  37 -------
>>  4 files changed, 1 insertion(+), 305 deletions(-)
>>  delete mode 100644 core/pci-iov.c
>>  delete mode 100644 include/pci-iov.h
>>
>> diff --git a/core/Makefile.inc b/core/Makefile.inc
>> index 3b4387081a62..21c12fb8db31 100644
>> --- a/core/Makefile.inc
>> +++ b/core/Makefile.inc
>> @@ -3,7 +3,7 @@
>>  SUBDIRS += core
>>  CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
>>  CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o
>> -CORE_OBJS += opal-msg.o pci.o pci-iov.o pci-virt.o pci-slot.o pcie-slot.o
>> +CORE_OBJS += opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o
>>  CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
>>  CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
>>  CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
>> diff --git a/core/pci-iov.c b/core/pci-iov.c
>> deleted file mode 100644
>> index 8fecebc1292a..000000000000
>> --- a/core/pci-iov.c
>> +++ /dev/null
>> @@ -1,265 +0,0 @@
>> -/* Copyright 2013-2016 IBM Corp.
>> - *
>> - * Licensed under the Apache License, Version 2.0 (the "License");
>> - * you may not use this file except in compliance with the License.
>> - * You may obtain a copy of the License at
>> - *
>> - *      http://www.apache.org/licenses/LICENSE-2.0
>> - *
>> - * Unless required by applicable law or agreed to in writing, software
>> - * distributed under the License is distributed on an "AS IS" BASIS,
>> - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> - * implied.
>> - * See the License for the specific language governing permissions and
>> - * limitations under the License.
>> - */
>> -
>> -#include <skiboot.h>
>> -#include <pci.h>
>> -#include <pci-cfg.h>
>> -#include <pci-slot.h>
>> -#include <pci-iov.h>
>> -
>> -/*
>> - * Tackle the VF's MPS in PCIe capability. The field is read only.
>> - * This function caches what is written and returns the cached
>> - * MPS on read.
>> - */
>> -static int64_t pci_iov_vf_devctl(void *dev, struct pci_cfg_reg_filter *pcrf,
>> -                                uint32_t offset, uint32_t len,
>> -                                uint32_t *data, bool write)
>> -{
>> -       struct pci_device *vf = (struct pci_device *)dev;
>> -       uint32_t pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false);
>> -       uint8_t *pcache;
>> -
>> -       if (offset != (pos + PCICAP_EXP_DEVCTL))
>> -               return OPAL_PARTIAL;
>> -
>> -       pcache = &pcrf->data[0];
>> -       if (write) {
>> -               *pcache = ((uint8_t)(*data >> (8 * (4 - len)))) &
>> -                          PCICAP_EXP_DEVCTL_MPS;
>> -       } else {
>> -               *data &= ~(PCICAP_EXP_DEVCTL_MPS << (8 * (4 - len)));
>> -               *data |= (((uint32_t)(*pcache & PCICAP_EXP_DEVCTL_MPS))
>> -                         << (8 * (4 - len)));
>> -       }
>> -
>> -       return OPAL_SUCCESS;
>> -}
>> -
>> -static void pci_iov_vf_quirk(struct phb *phb, struct pci_device *vf)
>> -{
>> -       struct pci_cfg_reg_filter *pcrf;
>> -       uint32_t pos;
>> -
>> -       if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false))
>> -               return;
>> -
>> -       /*
>> -        * On Mellanox MT27500 Family [ConnectX-3], its VF's MPS field in
>> -        * the corresponding config register is readonly. The MPS for PF/VF
>> -        * are usually different. We are introducing a quirk to make them
>> -        * look same to avoid confusion.
>> -        */
>> -       if (vf->vdid != 0x100315b3)
>> -               return;
>> -
>> -       pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false);
>> -       pcrf = pci_add_cfg_reg_filter(vf, pos + PCICAP_EXP_DEVCTL, 4,
>> -                                     PCI_REG_FLAG_MASK, pci_iov_vf_devctl);
>> -       if (!pcrf)
>> -               prlog(PR_WARNING, "%s: Missed DEVCTL filter on %04x:%02x:%02x.%01x\n",
>> -                     __func__, phb->opal_id, (vf->bdfn >> 8),
>> -                     ((vf->bdfn >> 3) & 0x1f), (vf->bdfn & 0x7));
>> -}
>> -
>> -/*
>> - * Update the SRIOV parameters that change when the number of
>> - * VFs is configured.
>> - */
>> -static bool pci_iov_update_parameters(struct pci_iov *iov)
>> -{
>> -       struct phb *phb = iov->phb;
>> -       uint16_t bdfn = iov->pd->bdfn;
>> -       uint32_t pos = iov->pos;
>> -       uint16_t val;
>> -       bool enabled;
>> -
>> -       pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_CTRL, &val);
>> -       enabled = !!(val & PCIECAP_SRIOV_CTRL_VFE);
>> -       if (iov->enabled == enabled)
>> -               return false;
>> -
>> -       if (enabled) {
>> -               pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_INITIAL_VF,
>> -                              &iov->init_VFs);
>> -               pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_NUM_VF,
>> -                              &iov->num_VFs);
>> -               pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_OFFSET,
>> -                              &iov->offset);
>> -               pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_STRIDE,
>> -                              &iov->stride);
>> -       } else {
>> -               iov->init_VFs   = 0;
>> -               iov->num_VFs    = 0;
>> -               iov->offset     = 0;
>> -               iov->stride     = 0;
>> -       }
>> -
>> -       iov->enabled = enabled;
>> -       return true;
>> -}
>> -
>> -static int64_t pci_iov_change(void *dev __unused,
>> -                             struct pci_cfg_reg_filter *pcrf,
>> -                             uint32_t offset __unused,
>> -                             uint32_t len __unused,
>> -                             uint32_t *data __unused,
>> -                             bool write __unused)
>> -{
>> -       struct pci_iov *iov = (struct pci_iov *)pcrf->data;
>> -       struct phb *phb = iov->phb;
>> -       struct pci_device *pd = iov->pd;
>> -       struct pci_device *vf, *tmp;
>> -       uint32_t i;
>> -       bool changed;
>> -
>> -       /* Update SRIOV variable parameters */
>> -       changed = pci_iov_update_parameters(iov);
>> -       if (!changed)
>> -               return OPAL_PARTIAL;
>> -
>> -       /* Remove all VFs that have been attached to the parent */
>> -       if (!iov->enabled) {
>> -               list_for_each_safe(&pd->children, vf, tmp, link)
>> -                       list_del(&vf->link);
>> -               return OPAL_PARTIAL;
>> -       }
>> -
>> -       /* Initialize the VFs and attach them to parent */
>> -       for (changed = false, i = 0; i < iov->num_VFs; i++) {
>> -               vf = &iov->VFs[i];
>> -               vf->bdfn = pd->bdfn + iov->offset + iov->stride * i;
>> -               list_add_tail(&pd->children, &vf->link);
>> -
>> -               /*
>> -                * We don't populate the capabilities again if they have
>> -                * been existing, to save time. Also, we need delay for
>> -                * 100ms before the VF's config space becomes ready.
>> -                */
>> -               if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false)) {
>> -                       if (!changed) {
>> -                               changed = !changed;
>> -                               time_wait_ms(100);
>> -                       }
>> -
>> -                       pci_init_capabilities(phb, vf);
>> -                       pci_iov_vf_quirk(phb, vf);
>> -               }
>> -
>> -               /* Call PHB hook */
>> -               if (phb->ops->device_init)
>> -                       phb->ops->device_init(phb, pd, NULL);
>> -       }
>> -
>> -       return OPAL_PARTIAL;
>> -}
>> -
>> -/*
>> - * This function is called with disabled SRIOV capability. So the VF's
>> - * config address isn't finalized and its config space isn't accessible.
>> - */
>> -static void pci_iov_init_VF(struct pci_device *pd, struct pci_device *vf)
>> -{
>> -       vf->is_bridge           = false;
>> -       vf->is_multifunction    = false;
>> -       vf->is_vf               = true;
>> -       vf->dev_type            = PCIE_TYPE_ENDPOINT;
>> -       vf->scan_map            = -1;
>> -       vf->vdid                = pd->vdid;
>> -       vf->sub_vdid            = pd->sub_vdid;
>> -       vf->class               = pd->class;
>> -       vf->dn                  = NULL;
>> -       vf->slot                = NULL;
>> -       vf->parent              = pd;
>> -       vf->phb                 = pd->phb;
>> -       list_head_init(&vf->pcrf);
>> -       list_head_init(&vf->children);
>> -}
>> -
>> -static void pci_free_iov_cap(void *data)
>> -{
>> -       struct pci_iov *iov = data;
>> -       free(iov->VFs);
>> -       free(iov);
>> -}
>> -
>> -void pci_init_iov_cap(struct phb *phb, struct pci_device *pd)
>> -{
>> -       int64_t pos;
>> -       struct pci_iov *iov;
>> -       struct pci_cfg_reg_filter *pcrf;
>> -       uint32_t i;
>> -
>> -       /* Search for SRIOV capability */
>> -       if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false))
>> -               return;
>> -
>> -       pos = pci_find_ecap(phb, pd->bdfn, PCIECAP_ID_SRIOV, NULL);
>> -       if (pos <= 0)
>> -               return;
>> -
>> -       /* Allocate IOV */
>> -       iov = zalloc(sizeof(*iov));
>> -       if (!iov) {
>> -               prlog(PR_ERR, "%s: Cannot alloc IOV for %04x:%02x:%02x.%01x\n",
>> -                     __func__, phb->opal_id, (pd->bdfn >> 8),
>> -                     ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7));
>> -               return;
>> -       }
>> -
>> -       /* Allocate VFs */
>> -       pci_cfg_read16(phb, pd->bdfn, pos + PCIECAP_SRIOV_TOTAL_VF,
>> -                      &iov->total_VFs);
>> -       iov->VFs = zalloc(sizeof(*iov->VFs) * iov->total_VFs);
>> -       if (!iov->VFs) {
>> -               prlog(PR_ERR, "%s: Cannot alloc %d VFs for %04x:%02x:%02x.%01x\n",
>> -                     __func__, iov->total_VFs, phb->opal_id,
>> -                     (pd->bdfn >> 8), ((pd->bdfn >> 3) & 0x1f),
>> -                     (pd->bdfn & 0x7));
>> -               free(iov);
>> -               return;
>> -       }
>> -
>> -       /* Initialize VFs */
>> -       for (i = 0; i < iov->total_VFs; i++)
>> -               pci_iov_init_VF(pd, &iov->VFs[i]);
>> -
>> -       /* Register filter for enabling or disabling SRIOV capability */
>> -       pcrf = pci_add_cfg_reg_filter(pd, pos + PCIECAP_SRIOV_CTRL, 2,
>> -                                     PCI_REG_FLAG_WRITE, pci_iov_change);
>> -       if (!pcrf) {
>> -               prlog(PR_ERR, "%s: Cannot set filter on %04x:%02x:%02x.%01x\n",
>> -                     __func__, phb->opal_id, (pd->bdfn >> 8),
>> -                     ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7));
>> -               free(iov->VFs);
>> -               free(iov);
>> -               return;
>> -       }
>> -
>> -       /* Associate filter and IOV capability */
>> -       pcrf->data = (void *)iov;
>> -
>> -       /*
>> -        * Retrieve the number of VFs and other information if applicable.
>> -        * Register the SRIOV capability in the mean while.
>> -        */
>> -       iov->phb = phb;
>> -       iov->pd = pd;
>> -       iov->pos = pos;
>> -       iov->enabled = false;
>> -       pci_iov_update_parameters(iov);
>> -       pci_set_cap(pd, PCIECAP_ID_SRIOV, pos, iov, pci_free_iov_cap, true);
>> -}
>> diff --git a/core/pci.c b/core/pci.c
>> index ee563c2fc577..02f5caa8d901 100644
>> --- a/core/pci.c
>> +++ b/core/pci.c
>> @@ -18,7 +18,6 @@
>>  #include <cpu.h>
>>  #include <pci.h>
>>  #include <pci-cfg.h>
>> -#include <pci-iov.h>
>>  #include <pci-slot.h>
>>  #include <pci-quirk.h>
>>  #include <timebase.h>
>> @@ -193,7 +192,6 @@ void pci_init_capabilities(struct phb *phb, struct pci_device *pd)
>>  {
>>         pci_init_pcie_cap(phb, pd);
>>         pci_init_aer_cap(phb, pd);
>> -       pci_init_iov_cap(phb, pd);
>>         pci_init_pm_cap(phb, pd);
>>  }
>>
>> diff --git a/include/pci-iov.h b/include/pci-iov.h
>> deleted file mode 100644
>> index 787b2cd1da69..000000000000
>> --- a/include/pci-iov.h
>> +++ /dev/null
>> @@ -1,37 +0,0 @@
>> -/* Copyright 2013-2016 IBM Corp.
>> - *
>> - * Licensed under the Apache License, Version 2.0 (the "License");
>> - * you may not use this file except in compliance with the License.
>> - * You may obtain a copy of the License at
>> - *
>> - *      http://www.apache.org/licenses/LICENSE-2.0
>> - *
>> - * Unless required by applicable law or agreed to in writing, software
>> - * distributed under the License is distributed on an "AS IS" BASIS,
>> - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> - * implied.
>> - * See the License for the specific language governing permissions and
>> - * limitations under the License.
>> - */
>> -
>> -#ifndef __PCI_IOV_H
>> -#define __PCI_IOV_H
>> -
>> -struct pci_iov {
>> -       struct phb                      *phb;
>> -       struct pci_device               *pd;
>> -       struct pci_device               *VFs;
>> -       uint32_t                        pos;
>> -       bool                            enabled;
>> -       struct pci_cfg_reg_filter       pcrf;
>> -
>> -       uint16_t                        init_VFs;
>> -       uint16_t                        total_VFs;
>> -       uint16_t                        num_VFs;
>> -       uint16_t                        offset;
>> -       uint16_t                        stride;
>> -};
>> -
>> -extern void pci_init_iov_cap(struct phb *phb, struct pci_device *pd);
>> -
>> -#endif
>> --
>> 2.20.1
>>


More information about the Skiboot mailing list