[PATCH v7 2/3] powerpc/eeh: EEH support for VFIO PCI device
Gavin Shan
gwshan at linux.vnet.ibm.com
Tue May 27 18:40:51 EST 2014
The patch exports functions to be used by new ioctl commands, which
will be introduced in subsequent patch, to support EEH functinality
for VFIO PCI device.
Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
---
arch/powerpc/include/asm/eeh.h | 15 +++
arch/powerpc/kernel/eeh.c | 286 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 301 insertions(+)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 34a2d83..ffc95e7 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -26,6 +26,7 @@
#include <linux/string.h>
#include <linux/time.h>
+struct iommu_table;
struct pci_dev;
struct pci_bus;
struct device_node;
@@ -191,6 +192,8 @@ enum {
#define EEH_OPT_ENABLE 1 /* EEH enable */
#define EEH_OPT_THAW_MMIO 2 /* MMIO enable */
#define EEH_OPT_THAW_DMA 3 /* DMA enable */
+#define EEH_OPT_GET_PE_ADDR 0 /* Get PE addr */
+#define EEH_OPT_GET_PE_MODE 1 /* Get PE mode */
#define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */
#define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */
#define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */
@@ -198,6 +201,11 @@ enum {
#define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */
#define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */
#define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */
+#define EEH_PE_STATE_NORMAL 0 /* Normal state */
+#define EEH_PE_STATE_RESET 1 /* PE reset */
+#define EEH_PE_STATE_STOPPED_IO_DMA 2 /* Stopped */
+#define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA */
+#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */
#define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */
#define EEH_RESET_HOT 1 /* Hot reset */
#define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */
@@ -305,6 +313,13 @@ void eeh_add_device_late(struct pci_dev *);
void eeh_add_device_tree_late(struct pci_bus *);
void eeh_add_sysfs_files(struct pci_bus *);
void eeh_remove_device(struct pci_dev *);
+int eeh_dev_open(struct pci_dev *pdev);
+void eeh_dev_release(struct pci_dev *pdev);
+struct eeh_pe *eeh_iommu_table_to_pe(struct iommu_table *tbl);
+int eeh_pe_set_option(struct eeh_pe *pe, int option);
+int eeh_pe_get_state(struct eeh_pe *pe);
+int eeh_pe_reset(struct eeh_pe *pe, int option);
+int eeh_pe_configure(struct eeh_pe *pe);
/**
* EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 3bc8b12..30693c1 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -40,6 +40,7 @@
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/io.h>
+#include <asm/iommu.h>
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
@@ -108,6 +109,9 @@ struct eeh_ops *eeh_ops = NULL;
/* Lock to avoid races due to multiple reports of an error */
DEFINE_RAW_SPINLOCK(confirm_error_lock);
+/* Lock to protect passed flags */
+static DEFINE_MUTEX(eeh_dev_mutex);
+
/* Buffer for reporting pci register dumps. Its here in BSS, and
* not dynamically alloced, so that it ends up in RMO where RTAS
* can access it.
@@ -1106,6 +1110,288 @@ void eeh_remove_device(struct pci_dev *dev)
edev->mode &= ~EEH_DEV_SYSFS;
}
+/**
+ * eeh_dev_open - Mark EEH device and PE as passed through
+ * @pdev: PCI device
+ *
+ * Mark the indicated EEH device and PE as passed through.
+ * In the result, the EEH errors detected on the PE won't be
+ * reported. The owner of the device will be responsible for
+ * detection and recovery.
+ */
+int eeh_dev_open(struct pci_dev *pdev)
+{
+ struct eeh_dev *edev;
+
+ mutex_lock(&eeh_dev_mutex);
+
+ /* No PCI device ? */
+ if (!pdev) {
+ mutex_unlock(&eeh_dev_mutex);
+ return -ENODEV;
+ }
+
+ /* No EEH device ? */
+ edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev || !edev->pe) {
+ mutex_unlock(&eeh_dev_mutex);
+ return -ENODEV;
+ }
+
+ eeh_dev_set_passed(edev, true);
+ eeh_pe_set_passed(edev->pe, true);
+ mutex_unlock(&eeh_dev_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(eeh_dev_open);
+
+/**
+ * eeh_dev_release - Reclaim the ownership of EEH device
+ * @pdev: PCI device
+ *
+ * Reclaim ownership of EEH device, potentially the corresponding
+ * PE. In the result, the EEH errors detected on the PE will be
+ * reported and handled as usual.
+ */
+void eeh_dev_release(struct pci_dev *pdev)
+{
+ bool release_pe = true;
+ struct eeh_pe *pe = NULL;
+ struct eeh_dev *tmp, *edev;
+
+ mutex_lock(&eeh_dev_mutex);
+
+ /* No PCI device ? */
+ if (!pdev) {
+ mutex_unlock(&eeh_dev_mutex);
+ return;
+ }
+
+ /* No EEH device ? */
+ edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev || !eeh_dev_passed(edev) ||
+ !edev->pe || !eeh_pe_passed(pe)) {
+ mutex_unlock(&eeh_dev_mutex);
+ return;
+ }
+
+ /* Release device */
+ pe = edev->pe;
+ eeh_dev_set_passed(edev, false);
+
+ /* Release PE */
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ if (eeh_dev_passed(edev)) {
+ release_pe = false;
+ break;
+ }
+ }
+
+ if (release_pe)
+ eeh_pe_set_passed(pe, false);
+
+ mutex_unlock(&eeh_dev_mutex);
+}
+EXPORT_SYMBOL(eeh_dev_release);
+
+/**
+ * eeh_iommu_table_to_pe - Convert IOMMU table to EEH PE
+ * @tbl: IOMMU table
+ *
+ * The routine is called to convert IOMMU table to EEH PE.
+ */
+struct eeh_pe *eeh_iommu_table_to_pe(struct iommu_table *tbl)
+{
+ struct pci_dev *pdev = NULL;
+ struct eeh_dev *edev;
+ bool found = false;
+
+ /* No IOMMU table ? */
+ if (!tbl)
+ return NULL;
+
+ /* No PCI device ? */
+ for_each_pci_dev(pdev) {
+ if (get_iommu_table_base(&pdev->dev) == tbl) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return NULL;
+
+ /* No EEH device or PE ? */
+ edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev || !edev->pe)
+ return NULL;
+
+ return edev->pe;
+}
+
+/**
+ * eeh_pe_set_option - Set options for the indicated PE
+ * @pe: EEH PE
+ * @option: requested option
+ *
+ * The routine is called to enable or disable EEH functionality
+ * on the indicated PE, to enable IO or DMA for the frozen PE.
+ */
+int eeh_pe_set_option(struct eeh_pe *pe, int option)
+{
+ int ret = 0;
+
+ /* Invalid PE ? */
+ if (!pe)
+ return -ENODEV;
+
+ /*
+ * EEH functionality could possibly be disabled, just
+ * return error for the case. And the EEH functinality
+ * isn't expected to be disabled on one specific PE.
+ */
+ switch (option) {
+ case EEH_OPT_ENABLE:
+ if (eeh_enabled())
+ break;
+ ret = -EIO;
+ break;
+ case EEH_OPT_DISABLE:
+ break;
+ case EEH_OPT_THAW_MMIO:
+ case EEH_OPT_THAW_DMA:
+ if (!eeh_ops || !eeh_ops->set_option) {
+ ret = -ENOENT;
+ break;
+ }
+
+ ret = eeh_ops->set_option(pe, option);
+ break;
+ default:
+ pr_debug("%s: Option %d out of range (%d, %d)\n",
+ __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_set_option);
+
+/**
+ * eeh_pe_get_state - Retrieve PE's state
+ * @pe: EEH PE
+ *
+ * Retrieve the PE's state, which includes 3 aspects: enabled
+ * DMA, enabled IO and asserted reset.
+ */
+int eeh_pe_get_state(struct eeh_pe *pe)
+{
+ int result, ret = 0;
+
+ /* Existing PE ? */
+ if (!pe)
+ return -ENODEV;
+
+ if (!eeh_ops || !eeh_ops->get_state)
+ return -ENOENT;
+
+ result = eeh_ops->get_state(pe, NULL);
+ if (!(result & EEH_STATE_RESET_ACTIVE) &&
+ (result & EEH_STATE_DMA_ENABLED) &&
+ (result & EEH_STATE_MMIO_ENABLED))
+ ret = EEH_PE_STATE_NORMAL;
+ else if (result & EEH_STATE_RESET_ACTIVE)
+ ret = EEH_PE_STATE_RESET;
+ else if (!(result & EEH_STATE_RESET_ACTIVE) &&
+ !(result & EEH_STATE_DMA_ENABLED) &&
+ !(result & EEH_STATE_MMIO_ENABLED))
+ ret = EEH_PE_STATE_STOPPED_IO_DMA;
+ else if (!(result & EEH_STATE_RESET_ACTIVE) &&
+ (result & EEH_STATE_DMA_ENABLED) &&
+ !(result & EEH_STATE_MMIO_ENABLED))
+ ret = EEH_PE_STATE_STOPPED_DMA;
+ else
+ ret = EEH_PE_STATE_UNAVAIL;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_get_state);
+
+/**
+ * eeh_pe_reset - Issue PE reset according to specified type
+ * @pe: EEH PE
+ * @option: reset type
+ *
+ * The routine is called to reset the specified PE with the
+ * indicated type, either fundamental reset or hot reset.
+ * PE reset is the most important part for error recovery.
+ */
+int eeh_pe_reset(struct eeh_pe *pe, int option)
+{
+ int ret = 0;
+
+ /* Invalid PE ? */
+ if (!pe)
+ return -ENODEV;
+
+ if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset)
+ return -ENOENT;
+
+ switch (option) {
+ case EEH_RESET_DEACTIVATE:
+ ret = eeh_ops->reset(pe, option);
+ if (ret)
+ break;
+
+ /*
+ * The PE is still in frozen state and we need to clear
+ * that. It's good to clear frozen state after deassert
+ * to avoid messy IO access during reset, which might
+ * cause recursive frozen PE.
+ */
+ ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO);
+ if (!ret)
+ ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA);
+ if (!ret)
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+ break;
+ case EEH_RESET_HOT:
+ case EEH_RESET_FUNDAMENTAL:
+ ret = eeh_ops->reset(pe, option);
+ break;
+ default:
+ pr_debug("%s: Unsupported option %d\n",
+ __func__, option);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_reset);
+
+/**
+ * eeh_pe_configure - Configure PCI bridges after PE reset
+ * @pe: EEH PE
+ *
+ * The routine is called to restore the PCI config space for
+ * those PCI devices, especially PCI bridges affected by PE
+ * reset issued previously.
+ */
+int eeh_pe_configure(struct eeh_pe *pe)
+{
+ int ret = 0;
+
+ /* Invalid PE ? */
+ if (!pe)
+ return -ENODEV;
+
+ /* Restore config space for the affected devices */
+ eeh_pe_restore_bars(pe);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_configure);
+
static int proc_eeh_show(struct seq_file *m, void *v)
{
if (!eeh_enabled()) {
--
1.8.3.2
More information about the Linuxppc-dev
mailing list