[PATCH 3/3] ipr: Wait for aborted command responses

Brian King brking at linux.vnet.ibm.com
Sat Nov 1 00:45:56 AEDT 2014


Adding Wendy...

On 10/30/2014 05:27 PM, Brian King wrote:
> Fixes a race condition in abort handling that was injected
> when multiple interrupt support was added. When only a single
> interrupt is present, the adapter guarantees it will send
> responses for aborted commands prior to the response for the
> abort command itself. With multiple interrupts, these responses
> generally come back on different interrupts, so we need to
> ensure the abort thread waits until the aborted command is
> complete so we don't perform a double completion. This race
> condition was being hit frequently in environments which
> were triggering command timeouts, which was resulting in
> a double completion causing a kernel oops.
> 
> Cc: <stable at vger.kernel.org>
> Signed-off-by: Brian King <brking at linux.vnet.ibm.com>
> ---
> 
>  drivers/scsi/ipr.c |   92 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/scsi/ipr.h |    1 
>  2 files changed, 93 insertions(+)
> 
> diff -puN drivers/scsi/ipr.c~ipr_eh_wait drivers/scsi/ipr.c
> --- scsi-queue/drivers/scsi/ipr.c~ipr_eh_wait	2014-10-30 17:15:37.302753120 -0500
> +++ scsi-queue-bjking1/drivers/scsi/ipr.c	2014-10-30 17:15:37.311753039 -0500
> @@ -683,6 +683,7 @@ static void ipr_init_ipr_cmnd(struct ipr
>  	ipr_reinit_ipr_cmnd(ipr_cmd);
>  	ipr_cmd->u.scratch = 0;
>  	ipr_cmd->sibling = NULL;
> +	ipr_cmd->eh_comp = NULL;
>  	ipr_cmd->fast_done = fast_done;
>  	init_timer(&ipr_cmd->timer);
>  }
> @@ -848,6 +849,8 @@ static void ipr_scsi_eh_done(struct ipr_
> 
>  	scsi_dma_unmap(ipr_cmd->scsi_cmd);
>  	scsi_cmd->scsi_done(scsi_cmd);
> +	if (ipr_cmd->eh_comp)
> +		complete(ipr_cmd->eh_comp);
>  	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
>  }
> 
> @@ -4854,6 +4857,84 @@ static int ipr_slave_alloc(struct scsi_d
>  	return rc;
>  }
> 
> +/**
> + * ipr_match_lun - Match function for specified LUN
> + * @ipr_cmd:	ipr command struct
> + * @device:		device to match (sdev)
> + *
> + * Returns:
> + *	1 if command matches sdev / 0 if command does not match sdev
> + **/
> +static int ipr_match_lun(struct ipr_cmnd *ipr_cmd, void *device)
> +{
> +	if (ipr_cmd->scsi_cmd && ipr_cmd->scsi_cmd->device == device)
> +		return 1;
> +	return 0;
> +}
> +
> +/**
> + * ipr_wait_for_ops - Wait for matching commands to complete
> + * @ipr_cmd:	ipr command struct
> + * @device:		device to match (sdev)
> + * @match:		match function to use
> + *
> + * Returns:
> + *	SUCCESS / FAILED
> + **/
> +static int ipr_wait_for_ops(struct ipr_ioa_cfg *ioa_cfg, void *device,
> +			    int (*match)(struct ipr_cmnd *, void *))
> +{
> +	struct ipr_cmnd *ipr_cmd;
> +	int wait;
> +	unsigned long flags;
> +	struct ipr_hrr_queue *hrrq;
> +	signed long timeout = IPR_ABORT_TASK_TIMEOUT;
> +	DECLARE_COMPLETION_ONSTACK(comp);
> +
> +	ENTER;
> +	do {
> +		wait = 0;
> +
> +		for_each_hrrq(hrrq, ioa_cfg) {
> +			spin_lock_irqsave(hrrq->lock, flags);
> +			list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
> +				if (match(ipr_cmd, device)) {
> +					ipr_cmd->eh_comp = ∁
> +					wait++;
> +				}
> +			}
> +			spin_unlock_irqrestore(hrrq->lock, flags);
> +		}
> +
> +		if (wait) {
> +			timeout = wait_for_completion_timeout(&comp, timeout);
> +
> +			if (!timeout) {
> +				wait = 0;
> +
> +				for_each_hrrq(hrrq, ioa_cfg) {
> +					spin_lock_irqsave(hrrq->lock, flags);
> +					list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
> +						if (match(ipr_cmd, device)) {
> +							ipr_cmd->eh_comp = NULL;
> +							wait++;
> +						}
> +					}
> +					spin_unlock_irqrestore(hrrq->lock, flags);
> +				}
> +
> +				if (wait)
> +					dev_err(&ioa_cfg->pdev->dev, "Timed out waiting for aborted commands\n");
> +				LEAVE;
> +				return wait ? FAILED : SUCCESS;
> +			}
> +		}
> +	} while (wait);
> +
> +	LEAVE;
> +	return SUCCESS;
> +}
> +
>  static int ipr_eh_host_reset(struct scsi_cmnd *cmd)
>  {
>  	struct ipr_ioa_cfg *ioa_cfg;
> @@ -5073,11 +5154,17 @@ static int __ipr_eh_dev_reset(struct scs
>  static int ipr_eh_dev_reset(struct scsi_cmnd *cmd)
>  {
>  	int rc;
> +	struct ipr_ioa_cfg *ioa_cfg;
> +
> +	ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata;
> 
>  	spin_lock_irq(cmd->device->host->host_lock);
>  	rc = __ipr_eh_dev_reset(cmd);
>  	spin_unlock_irq(cmd->device->host->host_lock);
> 
> +	if (rc == SUCCESS)
> +		rc = ipr_wait_for_ops(ioa_cfg, cmd->device, ipr_match_lun);
> +
>  	return rc;
>  }
> 
> @@ -5255,13 +5342,18 @@ static int ipr_eh_abort(struct scsi_cmnd
>  {
>  	unsigned long flags;
>  	int rc;
> +	struct ipr_ioa_cfg *ioa_cfg;
> 
>  	ENTER;
> 
> +	ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata;
> +
>  	spin_lock_irqsave(scsi_cmd->device->host->host_lock, flags);
>  	rc = ipr_cancel_op(scsi_cmd);
>  	spin_unlock_irqrestore(scsi_cmd->device->host->host_lock, flags);
> 
> +	if (rc == SUCCESS)
> +		rc = ipr_wait_for_ops(ioa_cfg, scsi_cmd->device, ipr_match_lun);
>  	LEAVE;
>  	return rc;
>  }
> diff -puN drivers/scsi/ipr.h~ipr_eh_wait drivers/scsi/ipr.h
> --- scsi-queue/drivers/scsi/ipr.h~ipr_eh_wait	2014-10-30 17:15:37.305753093 -0500
> +++ scsi-queue-bjking1/drivers/scsi/ipr.h	2014-10-30 17:15:37.315753003 -0500
> @@ -1608,6 +1608,7 @@ struct ipr_cmnd {
>  		struct scsi_device *sdev;
>  	} u;
> 
> +	struct completion *eh_comp;
>  	struct ipr_hrr_queue *hrrq;
>  	struct ipr_ioa_cfg *ioa_cfg;
>  };
> _
> 


-- 
Brian King
Power Linux I/O
IBM Linux Technology Center




More information about the Linuxppc-dev mailing list