[PATCH] powerpc/eeh: Fix race with driver un/bind

Benjamin Herrenschmidt benh at kernel.crashing.org
Fri Mar 23 17:33:11 AEDT 2018


On Fri, 2018-03-23 at 16:44 +1100, Michael Neuling wrote:

 .../...

> This fixes the problem in the same way the generic PCIe AER code (in
> drivers/pci/pcie/aer/aerdrv_core.c) does. It makes the EEH code hold
> the device_lock() before performing the driver EEH callbacks. This
> ensures either the callbacks are no longer register, or if they are
> registered the driver will not be removed from underneath us.
> 
> Signed-off-by: Michael Neuling <mikey at neuling.org>

Generally ok, minor nits though and do we want a CC stable ?

> ---
>  arch/powerpc/kernel/eeh_driver.c | 67 ++++++++++++++++++++++++----------------
>  1 file changed, 41 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
> index 0c0b66fc5b..7cf946ae9a 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -207,18 +207,18 @@ static void *eeh_report_error(void *data, void *userdata)
>  
>  	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
>  		return NULL;
> +
> +	device_lock(&dev->dev);
>  	dev->error_state = pci_channel_io_frozen;
>  
>  	driver = eeh_pcid_get(dev);
> -	if (!driver) return NULL;
> +	if (!driver) goto out2;

I don't like out1/out2, why not call them out_nodev and out_no_handler
? (same comment for the other ones).
>  
>  	eeh_disable_irq(dev);
>  
>  	if (!driver->err_handler ||
> -	    !driver->err_handler->error_detected) {
> -		eeh_pcid_put(dev);
> -		return NULL;
> -	}
> +	    !driver->err_handler->error_detected)
> +		goto out1;
>  
>  	rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
>  
> @@ -227,8 +227,11 @@ static void *eeh_report_error(void *data, void *userdata)
>  	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>  
>  	edev->in_error = true;
> -	eeh_pcid_put(dev);
>  	pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
> +out1:
> +	eeh_pcid_put(dev);
> +out2:

This also changes doing the uevent while holding a reference and the
the device lock, is that ok ? (I guess a reference is a good thing, the
device lock, not sure... I hope so but you should at least document it
as a chance in the cset comment).

> +	device_unlock(&dev->dev);
>  	return NULL;
>  }
>  
> @@ -251,15 +254,14 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
>  	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
>  		return NULL;
>  
> +	device_lock(&dev->dev);
>  	driver = eeh_pcid_get(dev);
> -	if (!driver) return NULL;
> +	if (!driver) goto out2;
>  
>  	if (!driver->err_handler ||
>  	    !driver->err_handler->mmio_enabled ||
> -	    (edev->mode & EEH_DEV_NO_HANDLER)) {
> -		eeh_pcid_put(dev);
> -		return NULL;
> -	}
> +	    (edev->mode & EEH_DEV_NO_HANDLER))
> +		goto out1;
>  
>  	rc = driver->err_handler->mmio_enabled(dev);
>  
> @@ -267,7 +269,10 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
>  	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>  	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>  
> +out1:
>  	eeh_pcid_put(dev);
> +out2:
> +	device_unlock(&dev->dev);
>  	return NULL;
>  }
>  
> @@ -290,20 +295,20 @@ static void *eeh_report_reset(void *data, void *userdata)
>  
>  	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
>  		return NULL;
> +
> +	device_lock(&dev->dev);
>  	dev->error_state = pci_channel_io_normal;
>  
>  	driver = eeh_pcid_get(dev);
> -	if (!driver) return NULL;
> +	if (!driver) goto out2;
>  
>  	eeh_enable_irq(dev);
>  
>  	if (!driver->err_handler ||
>  	    !driver->err_handler->slot_reset ||
>  	    (edev->mode & EEH_DEV_NO_HANDLER) ||
> -	    (!edev->in_error)) {
> -		eeh_pcid_put(dev);
> -		return NULL;
> -	}
> +	    (!edev->in_error))
> +		goto out1;
>  
>  	rc = driver->err_handler->slot_reset(dev);
>  	if ((*res == PCI_ERS_RESULT_NONE) ||
> @@ -311,7 +316,10 @@ static void *eeh_report_reset(void *data, void *userdata)
>  	if (*res == PCI_ERS_RESULT_DISCONNECT &&
>  	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>  
> +out1:
>  	eeh_pcid_put(dev);
> +out2:
> +	device_unlock(&dev->dev);
>  	return NULL;
>  }
>  
> @@ -362,10 +370,12 @@ static void *eeh_report_resume(void *data, void *userdata)
>  
>  	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
>  		return NULL;
> +
> +	device_lock(&dev->dev);
>  	dev->error_state = pci_channel_io_normal;
>  
>  	driver = eeh_pcid_get(dev);
> -	if (!driver) return NULL;
> +	if (!driver) goto out2;
>  
>  	was_in_error = edev->in_error;
>  	edev->in_error = false;
> @@ -375,18 +385,20 @@ static void *eeh_report_resume(void *data, void *userdata)
>  	    !driver->err_handler->resume ||
>  	    (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
>  		edev->mode &= ~EEH_DEV_NO_HANDLER;
> -		eeh_pcid_put(dev);
> -		return NULL;
> +		goto out1;
>  	}
>  
>  	driver->err_handler->resume(dev);
>  
> -	eeh_pcid_put(dev);
>  	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
> +out1:
> +	eeh_pcid_put(dev);
>  #ifdef CONFIG_PCI_IOV
>  	if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
>  		eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
>  #endif
> +out2:
> +	device_unlock(&dev->dev);
>  	return NULL;
>  }
>  
> @@ -406,23 +418,26 @@ static void *eeh_report_failure(void *data, void *userdata)
>  
>  	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
>  		return NULL;
> +
> +	device_lock(&dev->dev);
>  	dev->error_state = pci_channel_io_perm_failure;
>  
>  	driver = eeh_pcid_get(dev);
> -	if (!driver) return NULL;
> +	if (!driver) goto out2;
>  
>  	eeh_disable_irq(dev);
>  
>  	if (!driver->err_handler ||
> -	    !driver->err_handler->error_detected) {
> -		eeh_pcid_put(dev);
> -		return NULL;
> -	}
> +	    !driver->err_handler->error_detected)
> +		goto out1;
>  
>  	driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
>  
> -	eeh_pcid_put(dev);
>  	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
> +out1:
> +	eeh_pcid_put(dev);
> +out2:
> +	device_unlock(&dev->dev);
>  	return NULL;
>  }
>  


More information about the Linuxppc-dev mailing list