[PATCH/RFC] ppc64: EEH + SCSI recovery (IPR only)
Brian King
brking at us.ibm.com
Thu Feb 24 06:34:56 EST 2005
Linas Vepstas wrote:
> ===== drivers/scsi/ipr.c 1.31 vs edited =====
> --- 1.31/drivers/scsi/ipr.c 2004-12-14 17:06:35 -06:00
> +++ edited/drivers/scsi/ipr.c 2005-02-22 17:37:41 -06:00
> @@ -80,6 +80,8 @@
> #include <scsi/scsi_eh.h>
> #include <scsi/scsi_cmnd.h>
> #include <scsi/scsi_request.h>
> +
> +#define CONFIG_SCSI_IPR_EEH
This will obviously need to get cleaned up. This config option should
go away eventually.. I am assuming it is your way of flagging the
parts of code you have changed...
> #include "ipr.h"
>
> /*
> @@ -2917,7 +2919,6 @@ static int ipr_eh_host_reset(struct scsi
>
> if (WAIT_FOR_DUMP == ioa_cfg->sdt_state)
> ioa_cfg->sdt_state = GET_DUMP;
> -
> rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV);
Don't delete blank lines...
> LEAVE;
> @@ -5007,6 +5008,67 @@ static int ipr_reset_start_bist(struct i
> return rc;
> }
>
> +#ifdef CONFIG_SCSI_IPR_EEH
> +
> +static int ipr_reset_shutdown_ioa(struct ipr_cmnd *ipr_cmd);
> +
> +#define IPR_WAIT_FOR_EEH_RESET (HZ)
This should go in ipr.h with all the other timeout literals
> +static int ipr_reset_poll_eeh_recovery(struct ipr_cmnd *ipr_cmd)
> +{
> + struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
> + int rc;
> +
> + ENTER;
> + if (ioa_cfg->wait_on_eeh_reset) {
> + ipr_reset_start_timer(ipr_cmd, IPR_WAIT_FOR_EEH_RESET);
> + rc = IPR_RC_JOB_RETURN;
> + } else {
> + ipr_cmd->job_step = ipr_reset_start_bist;
> + rc = IPR_RC_JOB_CONTINUE;
> + }
> +
> + LEAVE;
> + return rc;
> +}
I really don't like this polling.
> +
> +static void ipr_eeh_frozen (struct pci_dev *pdev, void * data)
> +{
> + struct ipr_ioa_cfg *ioa_cfg = data;
> + ioa_cfg->wait_on_eeh_reset = 1;
> +}
Probably don't need the second arg - void * data. You can get the
ioa_cfg pointer with pci_get_drvdata(pdev)
Also, this function should start the ipr reset job. You need to prevent
ipr from talking to the device. Maybe something like:
static int ipr_reset_frozen(struct ipr_cmnd *ipr_cmd)
{
struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q);
ipr_cmd->done = ipr_reset_ioa_job;
return IPR_RC_JOB_RETURN;
}
static void ipr_eeh_frozen(struct pci_dev *pdev)
{
unsigned long host_lock_flags = 0;
struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
_ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_frozen, IPR_SHUTDOWN_NONE);
spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
}
static void ipr_eeh_post_reset(struct pci_dev *pdev)
{
unsigned long host_lock_flags = 0;
struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
_ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_restore_cfg_space, IPR_SHUTDOWN_NONE);
spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
}
That would let you get rid of all the polling code and should simplify
the code a bit.
> +static void ipr_eeh_post_reset (struct pci_dev *pdev, void * data)
> +{
> + struct ipr_ioa_cfg *ioa_cfg = data;
> + ioa_cfg->wait_on_eeh_reset = 0;
> +}
Same comment as above.
> +
> +static void ipr_eeh_perm_failure (struct pci_dev *pdev, void * data)
> +{
> + // struct ipr_ioa_cfg *ioa_cfg = data;
> +
> +#if 0 // XXXXXXXXXXXXXXXXXXXXXXX
> + ipr_cmd->job_step = ipr_reset_shutdown_ioa;
> + rc = IPR_RC_JOB_CONTINUE;
> +#endif
> +}
This needs to "bringdown" the adapter, but not actually touch it. Basically
stuff like unblocking requests so that we fail them instead of hanging, etc.
> +
> +static void ipr_register_eeh_handlers (struct ipr_ioa_cfg *ioa_cfg)
> +{
> + /* XXX borken memory management; this malloc not managed */
> + struct eeh_recovery_ops *eeh_ops;
> + eeh_ops = kmalloc (sizeof(struct eeh_recovery_ops), GFP_KERNEL);
> + memset (eeh_ops, 0, sizeof(struct eeh_recovery_ops));
> + eeh_ops->frozen = ipr_eeh_frozen;
> + eeh_ops->post_reset = ipr_eeh_post_reset;
> + eeh_ops->perm_failure = ipr_eeh_perm_failure;
> + eeh_ops->data = ioa_cfg;
> + eeh_register_recovery_ops (ioa_cfg->pdev, eeh_ops);
> +}
If this was generalized a bit more, you could add these function
pointers into the pci_driver struct so they could be statically
initialized in each driver.
> /**
> * ipr_reset_allowed - Query whether or not IOA can be reset
> * @ioa_cfg: ioa config struct
> @@ -5042,6 +5104,7 @@ static int ipr_reset_wait_to_start_bist(
> struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
> int rc = IPR_RC_JOB_RETURN;
>
> +#ifndef CONFIG_SCSI_IPR_EEH
> if (!ipr_reset_allowed(ioa_cfg) && ipr_cmd->u.time_left) {
> ipr_cmd->u.time_left -= IPR_CHECK_FOR_RESET_TIMEOUT;
> ipr_reset_start_timer(ipr_cmd, IPR_CHECK_FOR_RESET_TIMEOUT);
> @@ -5049,6 +5112,21 @@ static int ipr_reset_wait_to_start_bist(
> ipr_cmd->job_step = ipr_reset_start_bist;
> rc = IPR_RC_JOB_CONTINUE;
> }
> +#else
> + if (!ipr_reset_allowed(ioa_cfg) && ipr_cmd->u.time_left
> + && !eeh_slot_is_isolated (ioa_cfg->pdev)) {
> +
> + ipr_cmd->u.time_left -= IPR_CHECK_FOR_RESET_TIMEOUT;
> + ipr_reset_start_timer(ipr_cmd, IPR_CHECK_FOR_RESET_TIMEOUT);
> + } else {
> + if (eeh_slot_is_isolated (ioa_cfg->pdev)) {
> + ipr_cmd->job_step = ipr_reset_poll_eeh_recovery;
> + } else {
> + ipr_cmd->job_step = ipr_reset_start_bist;
> + }
> + rc = IPR_RC_JOB_CONTINUE;
> + }
> +#endif
Not sure what you are trying to accomplish with this bit of code. Does
not seem necessary to me.
> @@ -5079,7 +5157,16 @@ static int ipr_reset_alert(struct ipr_cm
> writel(IPR_UPROCI_RESET_ALERT, ioa_cfg->regs.set_uproc_interrupt_reg);
> ipr_cmd->job_step = ipr_reset_wait_to_start_bist;
> } else {
> +#ifndef CONFIG_SCSI_IPR_EEH
> ipr_cmd->job_step = ipr_reset_start_bist;
> +#else
> + if (eeh_slot_is_isolated (ioa_cfg->pdev)) {
> + ipr_cmd->job_step = ipr_reset_poll_eeh_recovery;
> + return IPR_RC_JOB_CONTINUE;
> + } else {
> + ipr_cmd->job_step = ipr_reset_start_bist;
> + }
> +#endif
> }
This is not needed.
>
> if (rc != PCIBIOS_SUCCESSFUL) {
> dev_err(&pdev->dev, "Failed to save PCI config space\n");
> ===== drivers/scsi/ipr.h 1.21 vs edited =====
> --- 1.21/drivers/scsi/ipr.h 2004-12-14 17:09:02 -06:00
> +++ edited/drivers/scsi/ipr.h 2005-02-22 15:52:36 -06:00
> @@ -833,6 +833,9 @@ struct ipr_ioa_cfg {
> u8 dump_taken:1;
> u8 allow_cmds:1;
> u8 allow_ml_add_del:1;
> +#ifdef CONFIG_SCSI_IPR_EEH
> + u8 wait_on_eeh_reset:1;
> +#endif
If you make the changes I suggest above, you should be able to remove this flag
altogether.
--
Brian King
eServer Storage I/O
IBM Linux Technology Center
More information about the Linuxppc64-dev
mailing list