2.6: PATCH for multiple EEH bugs
linas at austin.ibm.com
linas at austin.ibm.com
Wed Feb 4 12:18:39 EST 2004
Dohhh,
On Tue, Feb 03, 2004 at 06:45:05PM -0600, David Boutcher wrote:
>
>
> owner-linuxppc64-dev at lists.linuxppc.org wrote on 02/03/2004 06:34:59 PM:
> > Patch for multiple EEH-related bugs. Please review this patch,
> > & if appropriate, please apply. It should apply cleanly to
> > the current ameslab tree (Feb 03 2004 2.6.2-rc3).
>
> By the way....no patch :-)
>
> Dave Boutcher
> IBM Linux Technology Center
>
-------------- next part --------------
===== arch/ppc64/kernel/chrp_setup.c 1.49 vs edited =====
--- 1.49/arch/ppc64/kernel/chrp_setup.c Mon Jan 19 20:07:02 2004
+++ edited/arch/ppc64/kernel/chrp_setup.c Tue Feb 3 16:35:44 2004
@@ -71,6 +71,7 @@
extern void openpic_init_irq_desc(irq_desc_t *);
extern void find_and_init_phbs(void);
+extern void __init eeh_init(void);
extern void pSeries_get_boot_time(struct rtc_time *rtc_time);
extern void pSeries_get_rtc_time(struct rtc_time *rtc_time);
===== arch/ppc64/kernel/eeh.c 1.17 vs edited =====
--- 1.17/arch/ppc64/kernel/eeh.c Tue Feb 3 11:03:04 2004
+++ edited/arch/ppc64/kernel/eeh.c Tue Feb 3 16:38:19 2004
@@ -38,44 +38,68 @@
#define BUID_LO(buid) ((buid) & 0xffffffff)
#define CONFIG_ADDR(busno, devfn) (((((busno) & 0xff) << 8) | ((devfn) & 0xf8)) << 8)
-unsigned long eeh_total_mmio_ffs;
-unsigned long eeh_false_positives;
/* RTAS tokens */
static int ibm_set_eeh_option;
static int ibm_set_slot_reset;
static int ibm_read_slot_reset_state;
-static int eeh_implemented;
+static int eeh_subsystem_enabled;
#define EEH_MAX_OPTS 4096
static char *eeh_opts;
static int eeh_opts_last;
-unsigned char slot_err_buf[RTAS_ERROR_LOG_MAX];
+/* System monitoring statistics */
+unsigned long eeh_total_mmio_ffs;
+unsigned long eeh_false_positives;
+unsigned long eeh_ignored_failures;
pte_t *find_linux_pte(pgd_t *pgdir, unsigned long va); /* from htab.c */
static int eeh_check_opts_config(struct device_node *dn,
int class_code, int vendor_id, int device_id,
int default_state);
-unsigned long eeh_token_to_phys(unsigned long token)
+
+/**
+ * eeh_token_to_phys - convert EEH address token to phys address
+ * @token i/o token, should be address in the form 0xA....
+ *
+ * Converts EEH address tokens into physical addresses. Note that
+ * ths routine does *not* convert I/O BAR addresses (which start
+ * with 0xE...) to phys addresses!
+ */
+unsigned long
+eeh_token_to_phys(unsigned long token)
{
+ pte_t *ptep;
+ unsigned long pa, vaddr;
if (REGION_ID(token) == EEH_REGION_ID) {
- unsigned long vaddr = IO_TOKEN_TO_ADDR(token);
- pte_t *ptep = find_linux_pte(ioremap_mm.pgd, vaddr);
- unsigned long pa = pte_pfn(*ptep) << PAGE_SHIFT;
- return pa | (vaddr & (PAGE_SIZE-1));
- } else
+ vaddr = IO_TOKEN_TO_ADDR(token);
+ } else {
return token;
+ }
+
+ ptep = find_linux_pte(ioremap_mm.pgd, vaddr);
+ pa = pte_pfn(*ptep) << PAGE_SHIFT;
+ return pa | (vaddr & (PAGE_SIZE-1));
}
-/* Check for an eeh failure at the given token address.
+/**
+ * eeh_check_failure - check if all 1's data is due to EEH slot freeze
+ * @token i/o token, should be address in the form 0xA....
+ * @val value, should be all 1's (XXX why do we need this arg??)
+ * @who arbitrary ID, useful for debugging
+ *
+ * Check for an eeh failure at the given token address.
* The given value has been read and it should be 1's (0xff, 0xffff or
* 0xffffffff).
*
* Probe to determine if an error actually occurred. If not return val.
* Otherwise panic.
+ *
+ * Note this routine might be called in an interrupt context ...
*/
-unsigned long eeh_check_failure(void *token, unsigned long val)
+unsigned long
+eeh_check_failure(void *token, unsigned long val, int who)
{
unsigned long addr;
struct pci_dev *dev;
@@ -85,28 +109,27 @@
/* IO BAR access could get us here...or if we manually force EEH
* operation on even if the hardware won't support it.
*/
- if (!eeh_implemented || ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE)
+ if (!eeh_subsystem_enabled || ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE)
return val;
- /* Finding the phys addr + pci device is quite expensive.
- * However, the RTAS call is MUCH slower.... :(
- */
+ /* Finding the phys addr + pci device; this is pretty quick. */
addr = eeh_token_to_phys((unsigned long)token);
- dev = pci_find_dev_by_addr(addr);
- if (!dev) {
- printk("EEH: no pci dev found for addr=0x%lx\n", addr);
- return val;
- }
+ dev = pci_get_device_by_addr(addr);
+
+ if (!dev) return val;
+
dn = pci_device_to_OF_node(dev);
if (!dn) {
- printk("EEH: no pci dn found for addr=0x%lx\n", addr);
+ pci_dev_put (dev);
return val;
}
/* Access to IO BARs might get this far and still not want checking. */
- if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) || dn->eeh_mode & EEH_MODE_NOCHECK)
+ if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) ||
+ dn->eeh_mode & EEH_MODE_NOCHECK) {
+ pci_dev_put (dev);
return val;
-
+ }
/* Now test for an EEH failure. This is VERY expensive.
* Note that the eeh_config_addr may be a parent device
@@ -119,6 +142,7 @@
dn->eeh_config_addr, BUID_HI(dn->phb->buid),
BUID_LO(dn->phb->buid));
if (ret == 0 && rets[1] == 1 && rets[0] >= 2) {
+ unsigned char slot_err_buf[RTAS_ERROR_LOG_MAX];
unsigned long slot_err_ret;
memset(slot_err_buf, 0, RTAS_ERROR_LOG_MAX);
@@ -139,23 +163,36 @@
* the system in light of potential corruption, we
* can use it here.
*/
- if (panic_on_oops)
- panic("EEH: MMIO failure (%ld) on device:\n%s\n",
- rets[0], pci_name(dev));
- else
- printk("EEH: MMIO failure (%ld) on device:\n%s\n",
- rets[0], pci_name(dev));
+ if (panic_on_oops) {
+ panic("EEH: MMIO failure (%ld) on device:\n%s\n", rets[0], pci_name(dev));
+ } else {
+ eeh_ignored_failures ++;
+ if (!in_interrupt()) { /* XXX this will be replaced by eehdaemon */
+ printk(KERN_INFO "EEH: MMIO failure (%ld) on device:%s %s\n",
+ rets[0], pci_name(dev), pci_pretty_name(dev));
+ }
+ }
+ } else {
+ eeh_false_positives++;
}
}
- eeh_false_positives++;
+ pci_dev_put (dev);
return val; /* good case */
-
}
struct eeh_early_enable_info {
unsigned int buid_hi;
unsigned int buid_lo;
- int adapters_enabled;
+
+ /* Handy-dandy statistics help us understand what's going on */
+ int num_phbs_found;
+ int num_of_nodes;
+ int num_devices;
+ int num_devices_bad_status;
+ int num_devices_graphics;
+ int num_devices_w_eeh_parent;
+ int num_devices_w_eeh_disabled;
+ int num_adapters_enabled;
};
/* Enable eeh for the given device node. */
@@ -170,12 +207,17 @@
u32 *regs;
int enable;
- if (status && strcmp(status, "ok") != 0)
+ info->num_devices ++;
+ if (status && strcmp(status, "ok") != 0) {
+ info->num_devices_bad_status ++;
return NULL; /* ignore devices with bad status */
+ }
/* Weed out PHBs or other bad nodes. */
- if (!class_code || !vendor_id || !device_id)
+ if (!class_code || !vendor_id || !device_id) {
+ info->num_devices_bad_status ++;
return NULL;
+ }
/* Ignore known PHBs and EADs bridges */
if (*vendor_id == PCI_VENDOR_ID_IBM &&
@@ -191,28 +233,36 @@
* But there are a few cases like display devices that make sense.
*/
enable = 1; /* i.e. we will do checking */
- if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
+ if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ printk (KERN_INFO "EEH: %s: display device, disabling EEH checking.\n", dn->full_name);
+ info->num_devices_graphics ++;
enable = 0;
+ }
if (!eeh_check_opts_config(dn, *class_code, *vendor_id, *device_id, enable)) {
if (enable) {
- printk(KERN_INFO "EEH: %s user requested to run without EEH.\n", dn->full_name);
+ printk(KERN_NOTICE "EEH: %s user requested to run without EEH.\n", dn->full_name);
enable = 0;
}
}
if (!enable)
+ {
dn->eeh_mode = EEH_MODE_NOCHECK;
+ info->num_devices_w_eeh_disabled ++;
+ return NULL;
+ }
/* This device may already have an EEH parent. */
if (dn->parent && (dn->parent->eeh_mode & EEH_MODE_SUPPORTED)) {
/* Parent supports EEH. */
dn->eeh_mode |= EEH_MODE_SUPPORTED;
dn->eeh_config_addr = dn->parent->eeh_config_addr;
+ info->num_devices_w_eeh_parent ++;
return NULL;
}
- /* Ok..see if this device supports EEH. */
+ /* Ok... see if this device supports EEH. */
regs = (u32 *)get_property(dn, "reg", 0);
if (regs) {
/* First register entry is addr (00BBSS00) */
@@ -221,16 +271,21 @@
regs[0], info->buid_hi, info->buid_lo,
EEH_ENABLE);
if (ret == 0) {
- info->adapters_enabled++;
+ info->num_adapters_enabled++;
dn->eeh_mode |= EEH_MODE_SUPPORTED;
dn->eeh_config_addr = regs[0];
+ printk (KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);
+ } else {
+ printk (KERN_WARNING "EEH: %s: rtas_call failed.\n", dn->full_name);
}
+ } else {
+ printk (KERN_WARNING "EEH: %s: unable to get reg property.\n", dn->full_name);
}
return NULL;
}
/*
- * Initialize eeh by trying to enable it for all of the adapters in the system.
+ * Initialize EEH by trying to enable it for all of the adapters in the system.
* As a side effect we can determine here if eeh is supported at all.
* Note that we leave EEH on so failed config cycles won't cause a machine
* check. If a user turns off EEH for a particular adapter they are really
@@ -243,7 +298,7 @@
* The eeh-force-off/on option does literally what it says, so if Linux must
* avoid enabling EEH this must be done.
*/
-void eeh_init(void)
+void __init eeh_init(void)
{
struct device_node *phb;
struct eeh_early_enable_info info;
@@ -261,26 +316,37 @@
* of I/O macros even if we can't actually test for EEH failure.
*/
if (eeh_force_on > eeh_force_off)
- eeh_implemented = 1;
+ eeh_subsystem_enabled = 1;
else if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
return;
if (eeh_force_off > eeh_force_on) {
/* User is forcing EEH off. Be noisy if it is implemented. */
- if (eeh_implemented)
+ if (eeh_subsystem_enabled)
printk(KERN_WARNING "EEH: WARNING: PCI Enhanced I/O Error Handling is user disabled\n");
- eeh_implemented = 0;
+ eeh_subsystem_enabled = 0;
return;
}
-
/* Enable EEH for all adapters. Note that eeh requires buid's */
- info.adapters_enabled = 0;
+ info.num_adapters_enabled = 0;
+ info.num_of_nodes = 0;
+ info.num_phbs_found = 0;
+ info.num_devices = 0;
+ info.num_devices_bad_status = 0;
+ info.num_devices_graphics = 0;
+ info.num_devices_w_eeh_parent = 0;
+ info.num_devices_w_eeh_disabled = 0;
for (phb = of_find_node_by_name(NULL, "pci"); phb; phb = of_find_node_by_name(phb, "pci")) {
+
int len;
- int *buid_vals = (int *) get_property(phb, "ibm,fw-phb-id", &len);
+ int *buid_vals;
+
+ info.num_of_nodes ++;
+ buid_vals = (int *) get_property(phb, "ibm,fw-phb-id", &len);
if (!buid_vals)
continue;
+ info.num_phbs_found ++;
if (len == sizeof(int)) {
info.buid_lo = buid_vals[0];
info.buid_hi = 0;
@@ -288,24 +354,88 @@
info.buid_hi = buid_vals[0];
info.buid_lo = buid_vals[1];
} else {
- printk("EEH: odd ibm,fw-phb-id len returned: %d\n", len);
+ printk(KERN_INFO "EEH: odd ibm,fw-phb-id len returned: %d\n", len);
continue;
}
traverse_pci_devices(phb, early_enable_eeh, NULL, &info);
}
- if (info.adapters_enabled) {
+ if (info.num_adapters_enabled) {
printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
- eeh_implemented = 1;
+ eeh_subsystem_enabled = 1;
+ }
+ printk(KERN_INFO "EEH: num_of_nodes=%d\n", info.num_of_nodes);
+ printk(KERN_INFO "EEH: num_phbs_found=%d\n", info.num_phbs_found);
+ printk(KERN_INFO "EEH: num_devices=%d\n", info.num_devices);
+ printk(KERN_INFO "EEH: num_devices_bad_status=%d\n", info.num_devices_bad_status);
+ printk(KERN_INFO "EEH: num_devices_graphics=%d\n", info.num_devices_graphics);
+ printk(KERN_INFO "EEH: num_devices_w_eeh_parent=%d\n", info.num_devices_w_eeh_parent);
+ printk(KERN_INFO "EEH: num_devices_w_eeh_disabled=%d\n", info.num_devices_w_eeh_disabled);
+ printk(KERN_INFO "EEH: num_adapters_enabled=%d\n", info.num_adapters_enabled);
+}
+
+/**
+ * eeh_add_device - perform EEH initialization for the indicated pci device
+ * @dev: pci device for which to set up EEH
+ *
+ * This routine can be used to perform EEH initialization for PCI
+ * devices that were added after system boot (e.g. hotplug, dlpar).
+ * Whether this actually enables EEH or not for this device depends
+ * on the type of the device, on earlier boot command-line
+ * arguments & etc.
+ */
+void
+eeh_add_device (struct pci_dev *dev)
+{
+ struct device_node *dn;
+ struct pci_controller *phb;
+ struct eeh_early_enable_info info;
+
+ if (!dev || !eeh_subsystem_enabled) return;
+
+ printk (KERN_DEBUG "EEH: adding device %s %s\n",
+ pci_name (dev), pci_pretty_name(dev));
+ dn = pci_device_to_OF_node(dev);
+ if (NULL == dn) return;
+
+ phb = PCI_GET_PHB_PTR(dev);
+ if (NULL == phb || 0 == phb->buid) {
+ printk (KERN_WARNING "EEH: Expected buid but found none\n");
+ return;
}
+
+ info.buid_hi = BUID_HI(phb->buid);
+ info.buid_lo = BUID_LO(phb->buid);
+
+ early_enable_eeh(dn, &info);
+ pci_addr_cache_insert_device (dev);
}
+/**
+ * eeh_remove_device - undo EEH setup for the indicated pci device
+ * @dev: pci device to be removed
+ *
+ * This routine should be when a device is removed from a running
+ * system (e.g. by hotplug or dlpar).
+ */
+void
+eeh_remove_device (struct pci_dev *dev)
+{
+ if (!dev || !eeh_subsystem_enabled) return;
+
+ /* Unregister the device with the EEH/PCI address search system */
+ printk (KERN_DEBUG "EEH: remove device %s %s\n",
+ pci_name (dev), pci_pretty_name(dev));
+ pci_addr_cache_remove_device (dev);
+
+}
-int eeh_set_option(struct pci_dev *dev, int option)
+int
+eeh_set_option(struct pci_dev *dev, int option)
{
struct device_node *dn = pci_device_to_OF_node(dev);
struct pci_controller *phb = PCI_GET_PHB_PTR(dev);
- if (dn == NULL || phb == NULL || phb->buid == 0 || !eeh_implemented)
+ if (dn == NULL || phb == NULL || phb->buid == 0 || !eeh_subsystem_enabled)
return -2;
return rtas_call(ibm_set_eeh_option, 4, 1, NULL,
@@ -316,7 +446,7 @@
/* If EEH is implemented, find the PCI device using given phys addr
* and check to see if eeh failure checking is disabled.
- * Remap the addr (trivially) to the EEH region if not.
+ * Remap the addr (trivially) to the EEH region if EEH checking enabled.
* For addresses not known to PCI the vaddr is simply returned unchanged.
*/
void *eeh_ioremap(unsigned long addr, void *vaddr)
@@ -324,28 +454,72 @@
struct pci_dev *dev;
struct device_node *dn;
- if (!eeh_implemented)
+ if (!eeh_subsystem_enabled)
return vaddr;
- dev = pci_find_dev_by_addr(addr);
+ dev = pci_get_device_by_addr(addr);
if (!dev)
return vaddr;
- dn = pci_device_to_OF_node(dev);
- if (!dn)
+
+ dn = pci_device_to_OF_node(dev);
+ if (!dn) {
+ pci_dev_put (dev);
return vaddr;
- if (dn->eeh_mode & EEH_MODE_NOCHECK)
+ }
+ if (dn->eeh_mode & EEH_MODE_NOCHECK) {
+ pci_dev_put (dev);
return vaddr;
+ }
+ pci_dev_put (dev);
return (void *)IO_ADDR_TO_TOKEN(vaddr);
}
static int eeh_proc_falsepositive_read(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- int len;
- len = sprintf(page, "eeh_false_positives=%ld\n"
- "eeh_total_mmio_ffs=%ld\n",
- eeh_false_positives, eeh_total_mmio_ffs);
- return len;
+ char *p, *buffer;
+#define EEH_PROC_BUFSZ 250
+ int n=0, bs=EEH_PROC_BUFSZ;
+
+ if (count < 0) return -EINVAL;
+
+ buffer = kmalloc (EEH_PROC_BUFSZ,GFP_KERNEL);
+ if (!buffer) return -ENOMEM;
+
+ p = buffer;
+
+ if (0 == eeh_subsystem_enabled) {
+ n += snprintf (p+n, bs-n, "EEH Subsystem is globally disabled\n");
+ n += snprintf(p+n, bs-n, "eeh_total_mmio_ffs=%ld\n",
+ eeh_total_mmio_ffs);
+ } else {
+ n += snprintf (p+n, bs-n, "EEH Subsystem is enabled\n");
+ n += snprintf(p+n, bs-n,
+ "eeh_total_mmio_ffs=%ld\n"
+ "eeh_false_positives=%ld\n"
+ "eeh_ignored_failures=%ld\n",
+ eeh_total_mmio_ffs,
+ eeh_false_positives,
+ eeh_ignored_failures);
+ }
+
+ /* Misc machinations of the proc file system */
+ if (off >= strlen(buffer)) {
+ *eof = 1;
+ kfree(buffer);
+ return 0;
+ }
+ if (n > strlen(buffer) - off)
+ n = strlen(buffer) - off;
+ if (n > count)
+ n = count;
+ else
+ *eof = 1;
+
+ memcpy(page, buffer + off, n);
+ *start = page;
+ kfree(buffer);
+ return n;
}
/* Implementation of /proc/ppc64/eeh
@@ -362,6 +536,12 @@
return 0;
}
+static int __init eeh_init_late(void)
+{
+ eeh_init_proc ();
+ return 0;
+}
+
/*
* Test if "dev" should be configured on or off.
* This processes the options literally from left to right.
@@ -456,7 +636,7 @@
if (*cur) {
int curlen = curend-cur;
if (eeh_opts_last + curlen > EEH_MAX_OPTS-2) {
- printk(KERN_INFO "EEH: sorry...too many eeh cmd line options\n");
+ printk(KERN_WARNING "EEH: sorry...too many eeh cmd line options\n");
return 1;
}
eeh_opts[eeh_opts_last++] = state ? '+' : '-';
@@ -478,6 +658,6 @@
return eeh_parm(str, 1);
}
-__initcall(eeh_init_proc);
+__initcall(eeh_init_late);
__setup("eeh-off", eehoff_parm);
__setup("eeh-on", eehon_parm);
===== arch/ppc64/kernel/pSeries_pci.c 1.34 vs edited =====
--- 1.34/arch/ppc64/kernel/pSeries_pci.c Fri Jan 30 21:22:28 2004
+++ edited/arch/ppc64/kernel/pSeries_pci.c Tue Feb 3 16:35:49 2004
@@ -530,7 +530,7 @@
dev->resource[i].start += hose->pci_mem_offset;
dev->resource[i].end += hose->pci_mem_offset;
}
- }
+ }
}
EXPORT_SYMBOL(pcibios_fixup_device_resources);
===== arch/ppc64/kernel/pci.c 1.42 vs edited =====
--- 1.42/arch/ppc64/kernel/pci.c Mon Jan 19 20:07:05 2004
+++ edited/arch/ppc64/kernel/pci.c Tue Feb 3 16:39:53 2004
@@ -23,6 +23,8 @@
#include <linux/bootmem.h>
#include <linux/module.h>
#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -107,42 +109,264 @@
}
}
-/* Given an mmio phys address, find a pci device that implements
- * this address. This is of course expensive, but only used
- * for device initialization or error paths.
- * For io BARs it is assumed the pci_io_base has already been added
- * into addr.
+/**
+ * The pci address cache subsystem. This subsystem places
+ * PCI device address resources into a red-black tree, sorted
+ * according to the address range, so that given only an i/o
+ * address, the corresponding PCI device can be **quickly**
+ * found.
*
- * Bridges are ignored although they could be used to optimize the search.
+ * Currently, the only customer of this code is the EEH subsystem;
+ * thus, this code has been somewhat tailored to suit EEH better.
+ * In particular, the cache does *not* hold the addresses of devices
+ * for which EEH is not enabled.
+ *
+ * (Implementation Note: The RB tree seems to be better/faster
+ * than any hash algo I could think of for this problem, even
+ * with the penalty of slow pointer chases for d-cache misses).
*/
-struct pci_dev *pci_find_dev_by_addr(unsigned long addr)
+struct pci_io_addr_range
{
- struct pci_dev *dev = NULL;
+ struct rb_node rb_node;
+ unsigned long addr_lo;
+ unsigned long addr_hi;
+ struct pci_dev *pcidev;
+ unsigned int flags;
+};
+
+struct pci_io_addr_cache
+{
+ struct rb_root rb_root;
+ spinlock_t piar_lock;
+} pci_io_addr_cache_root;
+
+static inline struct pci_dev *
+__pci_get_device_by_addr (unsigned long addr)
+{
+ struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
+ while (n)
+ {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry (n, struct pci_io_addr_range, rb_node);
+ if (addr < piar->addr_lo) {
+ n = n->rb_left;
+ } else
+ if (addr > piar->addr_hi) {
+ n = n->rb_right;
+ } else {
+ pci_dev_get (piar->pcidev);
+ return piar->pcidev;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * pci_get_device_by_addr - Get device, given only address
+ * @addr: mmio (PIO) phys address or i/o port number
+ *
+ * Given an mmio phys address, or a port number, find a pci device
+ * that implements this address. Be sure to pci_dev_put the device
+ * when finished. I/O port numbers are assumed to be offset
+ * from zero (that is, they do *not* have pci_io_addr added in).
+ * It is safe to call this function within an interrupt.
+ */
+struct pci_dev *
+pci_get_device_by_addr (unsigned long addr)
+{
+ struct pci_dev *dev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ dev = __pci_get_device_by_addr (addr);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+ return dev;
+}
+
+/* Handy-dandy debug print routine, does nothing more
+ * than print out the contents of our addr cache. */
+static void
+pci_addr_cache_print (struct pci_io_addr_cache *cache)
+{
+ struct rb_node *n;
+ n = rb_first (&cache->rb_root);
+ int cnt=0;
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry (n, struct pci_io_addr_range, rb_node);
+ printk (KERN_DEBUG "PCI: %s addr range %d [%lx -%lx]: %s %s\n",
+ (piar->flags & IORESOURCE_IO) ? "i/o" : "mem",
+ cnt,
+ piar->addr_lo, piar->addr_hi,
+ pci_name (piar->pcidev),
+ pci_pretty_name (piar->pcidev));
+ cnt ++;
+ n = rb_next (n);
+ }
+}
+
+/* Insert address range into the rb tree. */
+static inline struct pci_io_addr_range *
+pci_addr_cache_insert (struct pci_dev *dev,
+ unsigned long alo, unsigned long ahi, unsigned int flags)
+{
+ struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
+ struct rb_node * parent = NULL;
+ struct pci_io_addr_range *piar;
+
+ // Walk tree, find a place to insert into tree
+ while (*p) {
+ parent = *p;
+ piar = rb_entry (parent, struct pci_io_addr_range, rb_node);
+ if (alo < piar->addr_lo) {
+ p = &parent->rb_left;
+ } else if (ahi > piar->addr_hi) {
+ p = &parent->rb_right;
+ } else {
+ if (dev != piar->pcidev ||
+ alo != piar->addr_lo || ahi != piar->addr_hi) {
+ printk (KERN_WARNING "PIAR: overlapping address range\n");
+ }
+ return piar;
+ }
+ }
+ piar = (struct pci_io_addr_range *) kmalloc (
+ sizeof(struct pci_io_addr_range), GFP_ATOMIC);
+
+ if (!piar) return NULL; // whoops
+
+ piar->addr_lo = alo;
+ piar->addr_hi = ahi;
+ piar->pcidev = dev;
+ piar->flags = flags;
+
+ rb_link_node (&piar->rb_node, parent, p);
+ rb_insert_color (&piar->rb_node, &pci_io_addr_cache_root.rb_root);
+ return piar;
+}
+
+inline void
+__pci_addr_cache_insert_device (struct pci_dev *dev)
+{
+ struct device_node *dn;
+ dn = pci_device_to_OF_node(dev);
+ if (!dn) {
+ printk(KERN_WARNING "PCI: no pci dn found for dev=%s %s\n",
+ pci_name(dev), pci_pretty_name(dev));
+ pci_dev_put (dev);
+ return;
+ }
+
+ // Skip any devices for which EEH is not enabled.
+ if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) ||
+ dn->eeh_mode & EEH_MODE_NOCHECK) {
+ printk(KERN_INFO "PCI: skip building address cache for=%s %s\n",
+ pci_name(dev), pci_pretty_name(dev));
+ pci_dev_put (dev);
+ return;
+ }
+
+ // Walk resources on this device, poke them into the tree
int i;
- unsigned long ioaddr;
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ unsigned long start = pci_resource_start(dev,i);
+ unsigned long end = pci_resource_end(dev,i);
+ unsigned int flags = pci_resource_flags(dev,i);
+
+ // We are interested only bus addresses, not dma or other stuff
+ if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) continue;
+ if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
+ continue;
+ pci_addr_cache_insert (dev, start, end, flags);
+ }
+}
- ioaddr = (addr > isa_io_base) ? addr - isa_io_base : 0;
+/**
+ * pci_addr_cache_insert_device - Add a device to the address cache
+ * @dev: PCI device whose I/O addresses we are interested in.
+ *
+ * In order to support the fast lookup of devices based on addresses,
+ * we maintain a cache of devices that can be quickly searched.
+ * This routine adds a device to that cache.
+ */
+void
+pci_addr_cache_insert_device (struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ __pci_addr_cache_insert_device (dev);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
- while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
- if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
+static inline void
+__pci_addr_cache_remove_device (struct pci_dev *dev)
+{
+ struct rb_node *n;
+
+restart:
+ n = rb_first (&pci_io_addr_cache_root.rb_root);
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry (n, struct pci_io_addr_range, rb_node);
+
+ if (piar->pcidev == dev)
+ {
+ rb_erase (n, &pci_io_addr_cache_root.rb_root);
+ kfree (piar);
+ goto restart;
+ }
+ n = rb_next (n);
+ }
+ pci_dev_put (dev);
+}
+
+/**
+ * pci_addr_cache_remove_device - remove pci device from addr cache
+ * @dev: device to remove
+ *
+ * Remove a device from the addr-cache tree.
+ * This is potentially expensive, since it will walk
+ * the tree multiple times (once per resource).
+ * But so what; device removal doesn't need to be that fast.
+ */
+void
+pci_addr_cache_remove_device (struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ __pci_addr_cache_remove_device (dev);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+/**
+ * pci_addr_cache_build - Build a cache of I/O addresses
+ *
+ * Build a cache of pci i/o addresses. This cache will be used to
+ * find the pci device that corresponds to a given address.
+ * This routine scans all pci busses to build the cache.
+ * Must be run late in boot process, after the pci controllers
+ * have been scaned for devices (after all device resources are known).
+ */
+static __init void
+pci_addr_cache_build (void)
+{
+ struct pci_dev *dev = NULL;
+
+ spin_lock_init (&pci_io_addr_cache_root.piar_lock);
+
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ // Ignore PCI bridges ( XXX why ??)
+ if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
+ pci_dev_put (dev);
continue;
-
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- unsigned long start = pci_resource_start(dev,i);
- unsigned long end = pci_resource_end(dev,i);
- unsigned int flags = pci_resource_flags(dev,i);
- if (start == 0 || ~start == 0 ||
- end == 0 || ~end == 0)
- continue;
- if ((flags & IORESOURCE_IO) &&
- (ioaddr >= start && ioaddr <= end))
- return dev;
- else if ((flags & IORESOURCE_MEM) &&
- (addr >= start && addr <= end))
- return dev;
}
+ pci_addr_cache_insert_device (dev);
}
- return NULL;
+
+ // Verify tree built up above, echo back the list of addrs.
+ pci_addr_cache_print (&pci_io_addr_cache_root);
}
void
@@ -343,6 +567,8 @@
printk("PCI: Probing PCI hardware done\n");
//ppc64_boot_msg(0x41, "PCI Done");
+
+ pci_addr_cache_build ();
return 0;
}
===== arch/ppc64/kernel/pci.h 1.10 vs edited =====
--- 1.10/arch/ppc64/kernel/pci.h Fri Sep 12 06:01:39 2003
+++ edited/arch/ppc64/kernel/pci.h Tue Feb 3 16:35:50 2004
@@ -37,11 +37,15 @@
void *traverse_pci_devices(struct device_node *start, traverse_func pre, traverse_func post, void *data);
void *traverse_all_pci_devices(traverse_func pre);
-struct pci_dev *pci_find_dev_by_addr(unsigned long addr);
void pci_devs_phb_init(void);
void pci_fix_bus_sysdata(void);
struct device_node *fetch_dev_dn(struct pci_dev *dev);
#define PCI_GET_PHB_PTR(dev) (((struct device_node *)(dev)->sysdata)->phb)
+
+/* PCI address cache management routines */
+struct pci_dev *pci_get_device_by_addr(unsigned long addr);
+void pci_addr_cache_insert_device (struct pci_dev *dev);
+void pci_addr_cache_remove_device (struct pci_dev *dev);
#endif /* __PPC_KERNEL_PCI_H__ */
===== drivers/pci/hotplug/rpaphp_core.c 1.2 vs edited =====
--- 1.2/drivers/pci/hotplug/rpaphp_core.c Tue Dec 9 11:03:38 2003
+++ edited/drivers/pci/hotplug/rpaphp_core.c Tue Feb 3 16:35:51 2004
@@ -30,6 +30,7 @@
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/init.h>
+#include <asm/eeh.h> /* for eeh_add_device() */
#include <asm/rtas.h> /* rtas_call */
#include <asm/pci-bridge.h> /* for pci_controller */
#include "../pci.h" /* for pci_add_new_bus*/
@@ -512,6 +513,7 @@
}
dev = rpaphp_find_pci_dev(slot->dn->child);
+ eeh_add_device(dev);
}
else {
/* slot is not enabled */
@@ -540,12 +542,12 @@
goto exit;
}
+ /* remove the device from the pci core */
+ eeh_remove_device(slot->dev);
+ pci_remove_bus_device(slot->dev);
- /* remove the device from the pci core */
- pci_remove_bus_device(slot->dev);
-
- pci_dev_put(slot->dev);
- slot->state = NOT_CONFIGURED;
+ pci_dev_put(slot->dev);
+ slot->state = NOT_CONFIGURED;
dbg("%s: adapter in slot[%s] unconfigured.\n", __FUNCTION__, slot->name);
===== include/asm-ppc64/eeh.h 1.6 vs edited =====
--- 1.6/include/asm-ppc64/eeh.h Fri Sep 12 06:06:51 2003
+++ edited/include/asm-ppc64/eeh.h Tue Feb 3 16:35:51 2004
@@ -45,22 +45,37 @@
/* This is for profiling only */
extern unsigned long eeh_total_mmio_ffs;
-void eeh_init(void);
-int eeh_get_state(unsigned long ea);
-unsigned long eeh_check_failure(void *token, unsigned long val);
+unsigned long eeh_check_failure(void *token, unsigned long val, int who);
void *eeh_ioremap(unsigned long addr, void *vaddr);
+/**
+ * eeh_add_device - perform EEH initialization for the indicated pci device
+ * @dev: pci device for which to set up EEH
+ *
+ * This routine can be used to perform EEH initialization for PCI
+ * devices that were added after system boot (e.g. hotplug, dlpar).
+ * Whether this actually enables EEH or not for this device depends
+ * on the type of the device, on earlier boot command-line
+ * arguments & etc.
+ */
+void eeh_add_device(struct pci_dev *);
+
+/**
+ * eeh_remove_device - undo EEH setup for the indicated pci device
+ * @dev: pci device to be removed
+ *
+ * This routine should be when a device is removed from a running
+ * system (e.g. by hotplug or dlpar).
+ */
+void eeh_remove_device(struct pci_dev *);
+
+
#define EEH_DISABLE 0
#define EEH_ENABLE 1
#define EEH_RELEASE_LOADSTORE 2
#define EEH_RELEASE_DMA 3
int eeh_set_option(struct pci_dev *dev, int options);
-/* Given a PCI device check if eeh should be configured or not.
- * This may look at firmware properties and/or kernel cmdline options.
- */
-int is_eeh_configured(struct pci_dev *dev);
-
/* Translate a (possible) eeh token to a physical addr.
* If "token" is not an eeh token it is simply returned under
* the assumption that it is already a physical addr.
@@ -78,11 +93,16 @@
* If this macro yields TRUE, the caller relays to eeh_check_failure()
* which does further tests out of line.
*/
-/* #define EEH_POSSIBLE_IO_ERROR(val) (~(val) == 0) */
-/* #define EEH_POSSIBLE_ERROR(addr, vaddr, val) ((vaddr) != (addr) && EEH_POSSIBLE_IO_ERROR(val) */
/* This version is rearranged to collect some profiling data */
-#define EEH_POSSIBLE_IO_ERROR(val) (~(val) == 0 && ++eeh_total_mmio_ffs)
-#define EEH_POSSIBLE_ERROR(addr, vaddr, val) (EEH_POSSIBLE_IO_ERROR(val) && (vaddr) != (addr))
+#define EEH_POSSIBLE_IO_ERROR(val, type) \
+ ((val) == (type)~0 && ++eeh_total_mmio_ffs)
+
+/* The vaddr will equal the addr if EEH checking is disabled for
+ * this device. This is because eeh_ioremap() will not have
+ * remapped to 0xA0, and thus both vaddr and addr will be 0xE0...
+ */
+#define EEH_POSSIBLE_ERROR(addr, vaddr, val, type) \
+ (EEH_POSSIBLE_IO_ERROR(val, type) && (vaddr) != (addr))
/*
* MMIO read/write operations with EEH support.
@@ -101,8 +121,8 @@
static inline u8 eeh_readb(void *addr) {
volatile u8 *vaddr = (volatile u8 *)IO_TOKEN_TO_ADDR(addr);
u8 val = in_8(vaddr);
- if (EEH_POSSIBLE_ERROR(addr, vaddr, val))
- return eeh_check_failure(addr, val);
+ if (EEH_POSSIBLE_ERROR(addr, vaddr, val, u8))
+ return eeh_check_failure(addr, val, 8);
return val;
}
static inline void eeh_writeb(u8 val, void *addr) {
@@ -112,25 +132,47 @@
static inline u16 eeh_readw(void *addr) {
volatile u16 *vaddr = (volatile u16 *)IO_TOKEN_TO_ADDR(addr);
u16 val = in_le16(vaddr);
- if (EEH_POSSIBLE_ERROR(addr, vaddr, val))
- return eeh_check_failure(addr, val);
+ if (EEH_POSSIBLE_ERROR(addr, vaddr, val, u16))
+ return eeh_check_failure(addr, val, 16);
return val;
}
static inline void eeh_writew(u16 val, void *addr) {
volatile u16 *vaddr = (volatile u16 *)IO_TOKEN_TO_ADDR(addr);
out_le16(vaddr, val);
}
+static inline u16 eeh_raw_readw(void *addr) {
+ volatile u16 *vaddr = (volatile u16 *)IO_TOKEN_TO_ADDR(addr);
+ u16 val = in_be16(vaddr);
+ if (EEH_POSSIBLE_ERROR(addr, vaddr, val, u16))
+ return eeh_check_failure(addr, val, 17);
+ return val;
+}
+static inline void eeh_raw_writew(u16 val, void *addr) {
+ volatile u16 *vaddr = (volatile u16 *)IO_TOKEN_TO_ADDR(addr);
+ out_be16(vaddr, val);
+}
static inline u32 eeh_readl(void *addr) {
volatile u32 *vaddr = (volatile u32 *)IO_TOKEN_TO_ADDR(addr);
u32 val = in_le32(vaddr);
- if (EEH_POSSIBLE_ERROR(addr, vaddr, val))
- return eeh_check_failure(addr, val);
+ if (EEH_POSSIBLE_ERROR(addr, vaddr, val, u32))
+ return eeh_check_failure(addr, val, 32);
return val;
}
static inline void eeh_writel(u32 val, void *addr) {
volatile u32 *vaddr = (volatile u32 *)IO_TOKEN_TO_ADDR(addr);
out_le32(vaddr, val);
}
+static inline u32 eeh_raw_readl(void *addr) {
+ volatile u32 *vaddr = (volatile u32 *)IO_TOKEN_TO_ADDR(addr);
+ u32 val = in_be32(vaddr);
+ if (EEH_POSSIBLE_ERROR(addr, vaddr, val, u32))
+ return eeh_check_failure(addr, val, 33);
+ return val;
+}
+static inline void eeh_raw_writel(u32 val, void *addr) {
+ volatile u32 *vaddr = (volatile u32 *)IO_TOKEN_TO_ADDR(addr);
+ out_be32(vaddr, val);
+}
static inline void eeh_memset_io(void *addr, int c, unsigned long n) {
void *vaddr = (void *)IO_TOKEN_TO_ADDR(addr);
@@ -139,8 +181,14 @@
static inline void eeh_memcpy_fromio(void *dest, void *src, unsigned long n) {
void *vsrc = (void *)IO_TOKEN_TO_ADDR(src);
memcpy(dest, vsrc, n);
- /* look for ffff's here at dest[n] */
+ /* Look for ffff's here at dest[n]. Assume that at least 4 bytes
+ * were copied. Check all four bytes.
+ */
+ if ((n>=4) && (EEH_POSSIBLE_ERROR(src, vsrc, (*((u32 *) dest+n-4)), u32))) {
+ eeh_check_failure(src, (*((u32 *) dest+n-4)), 88);
+ }
}
+
static inline void eeh_memcpy_toio(void *dest, void *src, unsigned long n) {
void *vdest = (void *)IO_TOKEN_TO_ADDR(dest);
memcpy(vdest, src, n);
@@ -158,8 +206,8 @@
if (_IO_IS_ISA(port) && !_IO_HAS_ISA_BUS)
return ~0;
val = in_8((u8 *)(port+pci_io_base));
- if (!_IO_IS_ISA(port) && EEH_POSSIBLE_IO_ERROR(val))
- return eeh_check_failure((void*)(port+pci_io_base), val);
+ if (!_IO_IS_ISA(port) && EEH_POSSIBLE_IO_ERROR(val, u8))
+ return eeh_check_failure((void*)(port), val, -8);
return val;
}
@@ -173,8 +221,8 @@
if (_IO_IS_ISA(port) && !_IO_HAS_ISA_BUS)
return ~0;
val = in_le16((u16 *)(port+pci_io_base));
- if (!_IO_IS_ISA(port) && EEH_POSSIBLE_IO_ERROR(val))
- return eeh_check_failure((void*)(port+pci_io_base), val);
+ if (!_IO_IS_ISA(port) && EEH_POSSIBLE_IO_ERROR(val, u16))
+ return eeh_check_failure((void*)(port), val, -16);
return val;
}
@@ -188,14 +236,33 @@
if (_IO_IS_ISA(port) && !_IO_HAS_ISA_BUS)
return ~0;
val = in_le32((u32 *)(port+pci_io_base));
- if (!_IO_IS_ISA(port) && EEH_POSSIBLE_IO_ERROR(val))
- return eeh_check_failure((void*)(port+pci_io_base), val);
+ if (!_IO_IS_ISA(port) && EEH_POSSIBLE_IO_ERROR(val, u32))
+ return eeh_check_failure((void*)(port), val, -32);
return val;
}
static inline void eeh_outl(u32 val, unsigned long port) {
if (!_IO_IS_ISA(port) || _IO_HAS_ISA_BUS)
return out_le32((u32 *)(port+pci_io_base), val);
+}
+
+/* in-string eeh macros */
+static inline void eeh_insb(unsigned long port, void * buf, int ns) {
+ _insb((u8 *)(port+pci_io_base), buf, ns);
+ if (!_IO_IS_ISA(port) && EEH_POSSIBLE_IO_ERROR((*(((u8*)buf)+ns-1)), u8))
+ eeh_check_failure((void*)(port), *(u8*)buf, -9);
+}
+
+static inline void eeh_insw_ns(unsigned long port, void * buf, int ns) {
+ _insw_ns((u16 *)(port+pci_io_base), buf, ns);
+ if (!_IO_IS_ISA(port) && EEH_POSSIBLE_IO_ERROR((*(((u16*)buf)+ns-1)), u16))
+ eeh_check_failure((void*)(port), *(u16*)buf, -17);
+}
+
+static inline void eeh_insl_ns(unsigned long port, void * buf, int nl) {
+ _insl_ns((u32 *)(port+pci_io_base), buf, nl);
+ if (!_IO_IS_ISA(port) && EEH_POSSIBLE_IO_ERROR((*(((u32*)buf)+nl-1)), u32))
+ eeh_check_failure((void*)(port), *(u32*)buf, -33);
}
#endif /* _EEH_H */
===== include/asm-ppc64/io.h 1.11 vs edited =====
--- 1.11/include/asm-ppc64/io.h Mon Jan 19 20:08:22 2004
+++ edited/include/asm-ppc64/io.h Tue Feb 3 16:35:52 2004
@@ -49,6 +49,13 @@
#define outb(data,addr) writeb(data,((unsigned long)(addr)))
#define outw(data,addr) writew(data,((unsigned long)(addr)))
#define outl(data,addr) writel(data,((unsigned long)(addr)))
+/*
+ * The *_ns versions below don't do byte-swapping.
+ * Neither do the standard versions now, these are just here
+ * for older code.
+ */
+#define insw_ns(port, buf, ns) _insw_ns((u16 *)((port)+pci_io_base), (buf), (ns))
+#define insl_ns(port, buf, nl) _insl_ns((u32 *)((port)+pci_io_base), (buf), (nl))
#else
#define readb(addr) eeh_readb((void*)(addr))
#define readw(addr) eeh_readw((void*)(addr))
@@ -71,12 +78,16 @@
* They are only used in practice for transferring buffers which
* are arrays of bytes, and byte-swapping is not appropriate in
* that case. - paulus */
-#define insb(port, buf, ns) _insb((u8 *)((port)+pci_io_base), (buf), (ns))
-#define outsb(port, buf, ns) _outsb((u8 *)((port)+pci_io_base), (buf), (ns))
-#define insw(port, buf, ns) _insw_ns((u16 *)((port)+pci_io_base), (buf), (ns))
-#define outsw(port, buf, ns) _outsw_ns((u16 *)((port)+pci_io_base), (buf), (ns))
-#define insl(port, buf, nl) _insl_ns((u32 *)((port)+pci_io_base), (buf), (nl))
-#define outsl(port, buf, nl) _outsl_ns((u32 *)((port)+pci_io_base), (buf), (nl))
+#define insb(port, buf, ns) eeh_insb((port), (buf), (ns))
+#define insw(port, buf, ns) eeh_insw_ns((port), (buf), (ns))
+#define insl(port, buf, nl) eeh_insl_ns((port), (buf), (nl))
+#define insw_ns(port, buf, ns) eeh_insw_ns((port), (buf), (ns))
+#define insl_ns(port, buf, nl) eeh_insl_ns((port), (buf), (nl))
+
+#define outsb(port, buf, ns) _outsb((u8 *)((port)+pci_io_base), (buf), (ns))
+#define outsw(port, buf, ns) _outsw_ns((u16 *)((port)+pci_io_base), (buf), (ns))
+#define outsl(port, buf, nl) _outsl_ns((u32 *)((port)+pci_io_base), (buf), (nl))
+
#endif
extern void _insb(volatile u8 *port, void *buf, int ns);
@@ -106,9 +117,7 @@
* Neither do the standard versions now, these are just here
* for older code.
*/
-#define insw_ns(port, buf, ns) _insw_ns((u16 *)((port)+pci_io_base), (buf), (ns))
#define outsw_ns(port, buf, ns) _outsw_ns((u16 *)((port)+pci_io_base), (buf), (ns))
-#define insl_ns(port, buf, nl) _insl_ns((u32 *)((port)+pci_io_base), (buf), (nl))
#define outsl_ns(port, buf, nl) _outsl_ns((u32 *)((port)+pci_io_base), (buf), (nl))
@@ -177,6 +186,9 @@
/*
* 8, 16 and 32 bit, big and little endian I/O operations, with barrier.
+ * These routines do not perform EEH-related I/O address translation,
+ * and should not be used directly by device drivers. Use inb/readb
+ * instead.
*/
static inline int in_8(volatile unsigned char *addr)
{
More information about the Linuxppc64-dev
mailing list