[Skiboot-stable] [PATCH] pci/iov: Remove skiboot VF tracking

Oliver O'Halloran oohall at gmail.com
Wed May 1 18:30:12 AEST 2019


This feature was added a few years ago in response to a request to make
the MaxPayloadSize (MPS) field of a Virtual Function match the MPS of the
Physical Function that hosts it.

The SR-IOV specification states the the MPS field of the VF is "ResvP".
This indicates the VF will use whatever MPS is configured on the PF and
that the field should be treated as a reserved field in the config space
of the VF. In other words, a SR-IOV spec compliant VF should always return
zero in the MPS field.  Adding hacks in OPAL to make it non-zero is...
misguided at best.

Additionally, there is a bug in the way pci_device structures are handled
by VFs that results in a crash on fast-reboot that occurs if VFs are
enabled and then disabled prior to rebooting. This patch fixes the bug by
removing the code entirely. This patch has no impact on SR-IOV support on
the host operating system.

Cc: Sergey Miroshnichenko <s.miroshnichenko at yadro.com>
Cc: skiboot-stable at lists.ozlabs.org
Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
---
 core/Makefile.inc |   2 +-
 core/pci-iov.c    | 265 ----------------------------------------------
 core/pci.c        |   2 -
 include/pci-iov.h |  37 -------
 4 files changed, 1 insertion(+), 305 deletions(-)
 delete mode 100644 core/pci-iov.c
 delete mode 100644 include/pci-iov.h

diff --git a/core/Makefile.inc b/core/Makefile.inc
index 3b4387081a62..21c12fb8db31 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -3,7 +3,7 @@
 SUBDIRS += core
 CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
 CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o
-CORE_OBJS += opal-msg.o pci.o pci-iov.o pci-virt.o pci-slot.o pcie-slot.o
+CORE_OBJS += opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o
 CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
 CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
 CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
diff --git a/core/pci-iov.c b/core/pci-iov.c
deleted file mode 100644
index 8fecebc1292a..000000000000
--- a/core/pci-iov.c
+++ /dev/null
@@ -1,265 +0,0 @@
-/* Copyright 2013-2016 IBM Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <skiboot.h>
-#include <pci.h>
-#include <pci-cfg.h>
-#include <pci-slot.h>
-#include <pci-iov.h>
-
-/*
- * Tackle the VF's MPS in PCIe capability. The field is read only.
- * This function caches what is written and returns the cached
- * MPS on read.
- */
-static int64_t pci_iov_vf_devctl(void *dev, struct pci_cfg_reg_filter *pcrf,
-				 uint32_t offset, uint32_t len,
-				 uint32_t *data, bool write)
-{
-	struct pci_device *vf = (struct pci_device *)dev;
-	uint32_t pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false);
-	uint8_t *pcache;
-
-	if (offset != (pos + PCICAP_EXP_DEVCTL))
-		return OPAL_PARTIAL;
-
-	pcache = &pcrf->data[0];
-	if (write) {
-		*pcache = ((uint8_t)(*data >> (8 * (4 - len)))) &
-			   PCICAP_EXP_DEVCTL_MPS;
-	} else {
-		*data &= ~(PCICAP_EXP_DEVCTL_MPS << (8 * (4 - len)));
-		*data |= (((uint32_t)(*pcache & PCICAP_EXP_DEVCTL_MPS))
-			  << (8 * (4 - len)));
-	}
-
-	return OPAL_SUCCESS;
-}
-
-static void pci_iov_vf_quirk(struct phb *phb, struct pci_device *vf)
-{
-	struct pci_cfg_reg_filter *pcrf;
-	uint32_t pos;
-
-	if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false))
-		return;
-
-	/*
-	 * On Mellanox MT27500 Family [ConnectX-3], its VF's MPS field in
-	 * the corresponding config register is readonly. The MPS for PF/VF
-	 * are usually different. We are introducing a quirk to make them
-	 * look same to avoid confusion.
-	 */
-	if (vf->vdid != 0x100315b3)
-		return;
-
-	pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false);
-	pcrf = pci_add_cfg_reg_filter(vf, pos + PCICAP_EXP_DEVCTL, 4,
-				      PCI_REG_FLAG_MASK, pci_iov_vf_devctl);
-	if (!pcrf)
-		prlog(PR_WARNING, "%s: Missed DEVCTL filter on %04x:%02x:%02x.%01x\n",
-		      __func__, phb->opal_id, (vf->bdfn >> 8),
-		      ((vf->bdfn >> 3) & 0x1f), (vf->bdfn & 0x7));
-}
-
-/*
- * Update the SRIOV parameters that change when the number of
- * VFs is configured.
- */
-static bool pci_iov_update_parameters(struct pci_iov *iov)
-{
-	struct phb *phb = iov->phb;
-	uint16_t bdfn = iov->pd->bdfn;
-	uint32_t pos = iov->pos;
-	uint16_t val;
-	bool enabled;
-
-	pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_CTRL, &val);
-	enabled = !!(val & PCIECAP_SRIOV_CTRL_VFE);
-	if (iov->enabled == enabled)
-		return false;
-
-	if (enabled) {
-		pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_INITIAL_VF,
-			       &iov->init_VFs);
-		pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_NUM_VF,
-			       &iov->num_VFs);
-		pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_OFFSET,
-			       &iov->offset);
-		pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_STRIDE,
-			       &iov->stride);
-	} else {
-		iov->init_VFs	= 0;
-		iov->num_VFs	= 0;
-		iov->offset	= 0;
-		iov->stride	= 0;
-	}
-
-	iov->enabled = enabled;
-	return true;
-}
-
-static int64_t pci_iov_change(void *dev __unused,
-			      struct pci_cfg_reg_filter *pcrf,
-			      uint32_t offset __unused,
-			      uint32_t len __unused,
-			      uint32_t *data __unused,
-			      bool write __unused)
-{
-	struct pci_iov *iov = (struct pci_iov *)pcrf->data;
-	struct phb *phb = iov->phb;
-	struct pci_device *pd = iov->pd;
-	struct pci_device *vf, *tmp;
-	uint32_t i;
-	bool changed;
-
-	/* Update SRIOV variable parameters */
-	changed = pci_iov_update_parameters(iov);
-	if (!changed)
-		return OPAL_PARTIAL;
-
-	/* Remove all VFs that have been attached to the parent */
-	if (!iov->enabled) {
-		list_for_each_safe(&pd->children, vf, tmp, link)
-			list_del(&vf->link);
-		return OPAL_PARTIAL;
-	}
-
-	/* Initialize the VFs and attach them to parent */
-	for (changed = false, i = 0; i < iov->num_VFs; i++) {
-		vf = &iov->VFs[i];
-		vf->bdfn = pd->bdfn + iov->offset + iov->stride * i;
-		list_add_tail(&pd->children, &vf->link);
-
-		/*
-		 * We don't populate the capabilities again if they have
-		 * been existing, to save time. Also, we need delay for
-		 * 100ms before the VF's config space becomes ready.
-		 */
-		if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false)) {
-			if (!changed) {
-				changed = !changed;
-				time_wait_ms(100);
-			}
-
-			pci_init_capabilities(phb, vf);
-			pci_iov_vf_quirk(phb, vf);
-		}
-
-		/* Call PHB hook */
-		if (phb->ops->device_init)
-			phb->ops->device_init(phb, pd, NULL);
-	}
-
-	return OPAL_PARTIAL;
-}
-
-/*
- * This function is called with disabled SRIOV capability. So the VF's
- * config address isn't finalized and its config space isn't accessible.
- */
-static void pci_iov_init_VF(struct pci_device *pd, struct pci_device *vf)
-{
-	vf->is_bridge		= false;
-	vf->is_multifunction	= false;
-	vf->is_vf		= true;
-	vf->dev_type		= PCIE_TYPE_ENDPOINT;
-	vf->scan_map		= -1;
-	vf->vdid		= pd->vdid;
-	vf->sub_vdid		= pd->sub_vdid;
-	vf->class		= pd->class;
-	vf->dn			= NULL;
-	vf->slot		= NULL;
-	vf->parent		= pd;
-	vf->phb			= pd->phb;
-	list_head_init(&vf->pcrf);
-	list_head_init(&vf->children);
-}
-
-static void pci_free_iov_cap(void *data)
-{
-	struct pci_iov *iov = data;
-	free(iov->VFs);
-	free(iov);
-}
-
-void pci_init_iov_cap(struct phb *phb, struct pci_device *pd)
-{
-	int64_t pos;
-	struct pci_iov *iov;
-	struct pci_cfg_reg_filter *pcrf;
-	uint32_t i;
-
-	/* Search for SRIOV capability */
-	if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false))
-		return;
-
-	pos = pci_find_ecap(phb, pd->bdfn, PCIECAP_ID_SRIOV, NULL);
-	if (pos <= 0)
-		return;
-
-	/* Allocate IOV */
-	iov = zalloc(sizeof(*iov));
-	if (!iov) {
-		prlog(PR_ERR, "%s: Cannot alloc IOV for %04x:%02x:%02x.%01x\n",
-		      __func__, phb->opal_id, (pd->bdfn >> 8),
-		      ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7));
-		return;
-	}
-
-	/* Allocate VFs */
-	pci_cfg_read16(phb, pd->bdfn, pos + PCIECAP_SRIOV_TOTAL_VF,
-		       &iov->total_VFs);
-	iov->VFs = zalloc(sizeof(*iov->VFs) * iov->total_VFs);
-	if (!iov->VFs) {
-		prlog(PR_ERR, "%s: Cannot alloc %d VFs for %04x:%02x:%02x.%01x\n",
-		      __func__, iov->total_VFs, phb->opal_id,
-		      (pd->bdfn >> 8), ((pd->bdfn >> 3) & 0x1f),
-		      (pd->bdfn & 0x7));
-		free(iov);
-		return;
-	}
-
-	/* Initialize VFs */
-	for (i = 0; i < iov->total_VFs; i++)
-		pci_iov_init_VF(pd, &iov->VFs[i]);
-
-	/* Register filter for enabling or disabling SRIOV capability */
-	pcrf = pci_add_cfg_reg_filter(pd, pos + PCIECAP_SRIOV_CTRL, 2,
-				      PCI_REG_FLAG_WRITE, pci_iov_change);
-	if (!pcrf) {
-		prlog(PR_ERR, "%s: Cannot set filter on %04x:%02x:%02x.%01x\n",
-		      __func__, phb->opal_id, (pd->bdfn >> 8),
-		      ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7));
-		free(iov->VFs);
-		free(iov);
-		return;
-	}
-
-	/* Associate filter and IOV capability */
-	pcrf->data = (void *)iov;
-
-	/*
-	 * Retrieve the number of VFs and other information if applicable.
-	 * Register the SRIOV capability in the mean while.
-	 */
-	iov->phb = phb;
-	iov->pd = pd;
-	iov->pos = pos;
-	iov->enabled = false;
-	pci_iov_update_parameters(iov);
-	pci_set_cap(pd, PCIECAP_ID_SRIOV, pos, iov, pci_free_iov_cap, true);
-}
diff --git a/core/pci.c b/core/pci.c
index ee563c2fc577..02f5caa8d901 100644
--- a/core/pci.c
+++ b/core/pci.c
@@ -18,7 +18,6 @@
 #include <cpu.h>
 #include <pci.h>
 #include <pci-cfg.h>
-#include <pci-iov.h>
 #include <pci-slot.h>
 #include <pci-quirk.h>
 #include <timebase.h>
@@ -193,7 +192,6 @@ void pci_init_capabilities(struct phb *phb, struct pci_device *pd)
 {
 	pci_init_pcie_cap(phb, pd);
 	pci_init_aer_cap(phb, pd);
-	pci_init_iov_cap(phb, pd);
 	pci_init_pm_cap(phb, pd);
 }
 
diff --git a/include/pci-iov.h b/include/pci-iov.h
deleted file mode 100644
index 787b2cd1da69..000000000000
--- a/include/pci-iov.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright 2013-2016 IBM Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __PCI_IOV_H
-#define __PCI_IOV_H
-
-struct pci_iov {
-	struct phb			*phb;
-	struct pci_device		*pd;
-	struct pci_device		*VFs;
-	uint32_t			pos;
-	bool				enabled;
-	struct pci_cfg_reg_filter	pcrf;
-
-	uint16_t			init_VFs;
-	uint16_t			total_VFs;
-	uint16_t			num_VFs;
-	uint16_t			offset;
-	uint16_t			stride;
-};
-
-extern void pci_init_iov_cap(struct phb *phb, struct pci_device *pd);
-
-#endif
-- 
2.20.1



More information about the Skiboot-stable mailing list