[PATCH] PPC64: EEH Recovery

Sun Nov 21 10:11:53 EST 2004

> Hi Paul,
> 
> The patch below implements hotplug style EEH error recovery. 
> Its split into two pieces: a part that needs to be applied to the
> PPC64 arch tree, and a part that needs to be applied to the 
> RPA PHP hotplug tree.  The PPC64 part needs to go in first.
> 
> Assuming this doesn't generate a round of discussion, please
> forward upstream to akpm/torvalds.

Here's some discussion :)

Just reading the diff, not the patched code.
Hopefully I undid all the html correctly.

1) why are you EXPORT_SYMBOL rtas_write_config?  eeh.c isn't a module

2) I object to grabing pci devices so they don't disappear and reappear.
   I worry about duplicate devices across register/unregister and sysfs
   kobject lifetimes getting confused and duplicate names.

  I'd prefer we just kept the pci config stuff we are going to restore
  off the of device node.

> @@ -635,7 +654,7 @@ unsigned long eeh_check_failure(const vo
>  	/* Finding the phys addr + pci device; this is pretty quick. */
>  	addr = eeh_token_to_phys((unsigned long __force) token);
>  	dev = pci_get_device_by_addr(addr);
> -	if (!dev)
> +	if (!dev) 
>  		return val;
>  
>  	dn = pci_device_to_OF_node(dev);

adding trailing white space.  tsk tsk
PS: there is more white space, this one caught my eye.

> +/* ------------------------------------------------------- */
> +/** Save and restore of PCI BARs
> + * 
> + * Although firmware will set up BARs during boot, it doesn't
> + * set up device BAR's after a device reset, although it will,
> + * if requested, set up bridge configuration. Thus, we need to 
> + * configure the PCI devices ourselves.  Config-space setup is 
> + * stored in the PCI structures which are normally deleted during
> + * device removal.  Thus, the "save" routine references the
> + * structures so that they aren't deleted. 
> + */
> +
> +
> +struct eeh_cfg_tree
> +{
> +	struct eeh_cfg_tree *sibling;
> +	struct eeh_cfg_tree *child;
> +	struct pci_dev *dev;
> +	struct device_node *dn;
> +};

Do we need a tree for this?

> +
> +static inline struct pci_dev * eeh_get_pci_dev(struct device_node *dn)
> +{
> +	struct pci_dev *dev = NULL;
> +	char bus_id[BUS_ID_SIZE];
> +
> +	sprintf(bus_id, "%04x:%02x:%02x.%d",dn->phb->global_number,
> +		dn->busno, PCI_SLOT(dn->devfn), PCI_FUNC(dn->devfn));
> +
> +	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
> +		if (!strcmp(pci_name(dev), bus_id)) 
> +			return dev;
> +	}
> +	return NULL;
> +}

IICK

1) matching on a stirng when we have the device_node?  really?
   please match on pci_device_to_OF_node(dev)
2) for_each_pcidev() please

> +
> +/** 
> + * eeh_save_bars - save the PCI config space info
> + */
> +struct eeh_cfg_tree * eeh_save_bars(struct device_node *dn)
> +{
> +	struct eeh_cfg_tree *cnode;
> +	struct pci_dev *dev;
> +	
> +	dev = eeh_get_pci_dev (dn);
> +	if (!dev) 
> +		return NULL;
> +
> +	cnode = kmalloc(sizeof(struct eeh_cfg_tree), GFP_KERNEL);
> +	if (!cnode) 
> +		return NULL;
> +	
> +	cnode->dev = dev;
> +	
> +	of_node_get(dn);
> +	cnode->dn = dn;
> +	
> +	cnode->sibling = NULL;
> +	cnode->child = NULL;
> +
> +	if (dn->child) {
> +		cnode->child = eeh_save_bars (dn->child);
> +	}
> +	if (dn->sibling) {
> +		cnode->sibling = eeh_save_bars (dn->sibling);
> +	}
> +
> +	return cnode;
> +}
> +EXPORT_SYMBOL(eeh_save_bars);
> +

> +/**
> + * __restore_bars - Restore the Base Address Registers
> + * Loads the PCI configuration space base address registers 
> + * and the expansion ROM base address from the array 
> + * passed as the second argument.
> + */
> +static inline void __restore_bars (struct device_node *dn, u32 *cfg_hdr)
> +{
> +	int i;
> +	for (i=4; i<10; i++) {
> +		rtas_write_config(dn, i*4, 4, cfg_hdr[i]);
> +	}
> +	rtas_write_config(dn, 12*4, 4, cfg_hdr[12]);
> +}
> +
> +/** 
> + * eeh_restore_bars - restore the PCI config space info
> + */
> +void eeh_restore_bars(struct eeh_cfg_tree *tree)
> +{
> +	if (tree->dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
> +		__restore_bars (tree->dn, tree->dev->saved_config_space);
> +	}
> +	
> +	if (tree->child) {
> +		eeh_restore_bars (tree->child);
> +	}
> +	if (tree->sibling) {
> +		eeh_restore_bars (tree->sibling);
> +	}
> +
> +	of_node_put (tree->dn);
> +	pci_dev_put (tree->dev);
> +	kfree (tree);
> +}
> +EXPORT_SYMBOL(eeh_restore_bars);
> +

How about a list of (dn *, pci config words to write)?
or an array of dn

>  
> -EXPORT_SYMBOL_GPL(rpaphp_find_hotplug_slot);

not analyzed ...

> +/* ------------------------------------------------------- */
> +/**
> + * handle_eeh_events -- reset a PCI device after hard lockup.
> + *
> + * pSeries systems will isolate a PCI slot if the PCI-Host
> + * bridge detects address or data parity errors, DMA's 
> + * occuring to wild addresses (which usually happen due to
> + * bugs in device drivers or in PCI adapter firmware).
> + * Slot isolations also occur if #SERR, #PERR or other misc
> + * PCI-related errors are detected.
> + * 
> + * Recovery process consists of unplugging the device driver
> + * (which generated hotplug events to userspace), then issuing
> + * a PCI #RST to the device, then reconfiguring the PCI config 
> + * space for all bridges & devices under this slot, and then 
> + * finally restarting the device drivers (which cause a second
> + * set of hotplug events to go out to userspace).
> + */
> +int handle_eeh_events (struct notifier_block *self, 
> +                       unsigned long reason, void *ev)
> +{
> +	struct eeh_event *event = ev;
> +	struct slot *frozen_slot;
> +	struct eeh_cfg_tree * saved_bars;
> +
> +	frozen_slot = rpaphp_find_slot(event->dev);
> +	if (!frozen_slot)
> +	{
> +		printk (KERN_ERR 
> +			"EEH: Cannot find PCI slot for EEH error! dev=%p dn=%p\n", 
> +			event->dev, event->dn);
> +		return 1;
> +	}
> +
> +	/* Keep a copy of the config space registers */
> +	saved_bars = eeh_save_bars(frozen_slot->dn);
> +	of_node_get(event->dn);
> +	pci_dev_get(event->dev);
> +
> +	rpaphp_unconfig_pci_adapter (frozen_slot);
> +
> +	event->dn->eeh_freeze_count ++;
> +	if (event->dn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) {
> +		/* 
> +		 * About 90% of all real-life EEH failures in the field
> +		 * are due to poorly seated PCI cards. Only 10% or so are
> +		 * due to actual, failed cards 
> +		 */
> +		printk (KERN_ERR
> +		   "EEH: device %s:%s has failed %d times \n"
> +			"and has been permanently disabled.  Please try reseating\n"
> +		   "this device or replacing it.\n",
> +			pci_name (event->dev),
> +			pci_pretty_name (event->dev),
> +			EEH_MAX_ALLOWED_FREEZES);
> +		goto rdone;
> +	}
> +
> +	/* Reset the pci controller. (Asserts RST#; resets config space). 
> +	 * Reconfigure bridges and devices */
> +	rtas_set_slot_reset (event->dn);
> +	rtas_configure_bridge(event->dn);
> +	eeh_restore_bars(saved_bars);
> +
> +	/* Give the system 5 seconds to finish running the user-space
> +	 * hotplug scripts, e.g. ifdown for ethernet.  Yes, this is a hack, 
> +	 * but if we don't do this, weird things happen.
> +	 */
> +	ssleep (5);
> +
> +	rpaphp_enable_pci_slot (frozen_slot);
> +
> +	/* The new device node is different than the old one; 
> +	 * copy over the freeze count, so that we don't loose track of it.
> +	 */
> +	frozen_slot->dn->eeh_freeze_count = event->dn->eeh_freeze_count;
> +rdone:
> +	of_node_put(event->dn);
> +	pci_dev_put(event->dev);
> +	return 0;
> +}

see comments with concerns about lifetimes.

> +
> +static struct notifier_block eeh_block;
> +
> +void __init init_eeh_handler (void)
> +{
> +	eeh_block.notifier_call = handle_eeh_events;
> +	eeh_register_notifier (&eeh_block);
> +}
> +
> +void __exit exit_eeh_handler (void)
> +{
> +	eeh_unregister_notifier (&eeh_block);
> +}
> +