[PATCH 3/3] ipr: Wait for aborted command responses
Brian King
brking at linux.vnet.ibm.com
Sat Nov 1 00:45:56 AEDT 2014
Adding Wendy...
On 10/30/2014 05:27 PM, Brian King wrote:
> Fixes a race condition in abort handling that was injected
> when multiple interrupt support was added. When only a single
> interrupt is present, the adapter guarantees it will send
> responses for aborted commands prior to the response for the
> abort command itself. With multiple interrupts, these responses
> generally come back on different interrupts, so we need to
> ensure the abort thread waits until the aborted command is
> complete so we don't perform a double completion. This race
> condition was being hit frequently in environments which
> were triggering command timeouts, which was resulting in
> a double completion causing a kernel oops.
>
> Cc: <stable at vger.kernel.org>
> Signed-off-by: Brian King <brking at linux.vnet.ibm.com>
> ---
>
> drivers/scsi/ipr.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> drivers/scsi/ipr.h | 1
> 2 files changed, 93 insertions(+)
>
> diff -puN drivers/scsi/ipr.c~ipr_eh_wait drivers/scsi/ipr.c
> --- scsi-queue/drivers/scsi/ipr.c~ipr_eh_wait 2014-10-30 17:15:37.302753120 -0500
> +++ scsi-queue-bjking1/drivers/scsi/ipr.c 2014-10-30 17:15:37.311753039 -0500
> @@ -683,6 +683,7 @@ static void ipr_init_ipr_cmnd(struct ipr
> ipr_reinit_ipr_cmnd(ipr_cmd);
> ipr_cmd->u.scratch = 0;
> ipr_cmd->sibling = NULL;
> + ipr_cmd->eh_comp = NULL;
> ipr_cmd->fast_done = fast_done;
> init_timer(&ipr_cmd->timer);
> }
> @@ -848,6 +849,8 @@ static void ipr_scsi_eh_done(struct ipr_
>
> scsi_dma_unmap(ipr_cmd->scsi_cmd);
> scsi_cmd->scsi_done(scsi_cmd);
> + if (ipr_cmd->eh_comp)
> + complete(ipr_cmd->eh_comp);
> list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
> }
>
> @@ -4854,6 +4857,84 @@ static int ipr_slave_alloc(struct scsi_d
> return rc;
> }
>
> +/**
> + * ipr_match_lun - Match function for specified LUN
> + * @ipr_cmd: ipr command struct
> + * @device: device to match (sdev)
> + *
> + * Returns:
> + * 1 if command matches sdev / 0 if command does not match sdev
> + **/
> +static int ipr_match_lun(struct ipr_cmnd *ipr_cmd, void *device)
> +{
> + if (ipr_cmd->scsi_cmd && ipr_cmd->scsi_cmd->device == device)
> + return 1;
> + return 0;
> +}
> +
> +/**
> + * ipr_wait_for_ops - Wait for matching commands to complete
> + * @ipr_cmd: ipr command struct
> + * @device: device to match (sdev)
> + * @match: match function to use
> + *
> + * Returns:
> + * SUCCESS / FAILED
> + **/
> +static int ipr_wait_for_ops(struct ipr_ioa_cfg *ioa_cfg, void *device,
> + int (*match)(struct ipr_cmnd *, void *))
> +{
> + struct ipr_cmnd *ipr_cmd;
> + int wait;
> + unsigned long flags;
> + struct ipr_hrr_queue *hrrq;
> + signed long timeout = IPR_ABORT_TASK_TIMEOUT;
> + DECLARE_COMPLETION_ONSTACK(comp);
> +
> + ENTER;
> + do {
> + wait = 0;
> +
> + for_each_hrrq(hrrq, ioa_cfg) {
> + spin_lock_irqsave(hrrq->lock, flags);
> + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
> + if (match(ipr_cmd, device)) {
> + ipr_cmd->eh_comp = ∁
> + wait++;
> + }
> + }
> + spin_unlock_irqrestore(hrrq->lock, flags);
> + }
> +
> + if (wait) {
> + timeout = wait_for_completion_timeout(&comp, timeout);
> +
> + if (!timeout) {
> + wait = 0;
> +
> + for_each_hrrq(hrrq, ioa_cfg) {
> + spin_lock_irqsave(hrrq->lock, flags);
> + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
> + if (match(ipr_cmd, device)) {
> + ipr_cmd->eh_comp = NULL;
> + wait++;
> + }
> + }
> + spin_unlock_irqrestore(hrrq->lock, flags);
> + }
> +
> + if (wait)
> + dev_err(&ioa_cfg->pdev->dev, "Timed out waiting for aborted commands\n");
> + LEAVE;
> + return wait ? FAILED : SUCCESS;
> + }
> + }
> + } while (wait);
> +
> + LEAVE;
> + return SUCCESS;
> +}
> +
> static int ipr_eh_host_reset(struct scsi_cmnd *cmd)
> {
> struct ipr_ioa_cfg *ioa_cfg;
> @@ -5073,11 +5154,17 @@ static int __ipr_eh_dev_reset(struct scs
> static int ipr_eh_dev_reset(struct scsi_cmnd *cmd)
> {
> int rc;
> + struct ipr_ioa_cfg *ioa_cfg;
> +
> + ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata;
>
> spin_lock_irq(cmd->device->host->host_lock);
> rc = __ipr_eh_dev_reset(cmd);
> spin_unlock_irq(cmd->device->host->host_lock);
>
> + if (rc == SUCCESS)
> + rc = ipr_wait_for_ops(ioa_cfg, cmd->device, ipr_match_lun);
> +
> return rc;
> }
>
> @@ -5255,13 +5342,18 @@ static int ipr_eh_abort(struct scsi_cmnd
> {
> unsigned long flags;
> int rc;
> + struct ipr_ioa_cfg *ioa_cfg;
>
> ENTER;
>
> + ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata;
> +
> spin_lock_irqsave(scsi_cmd->device->host->host_lock, flags);
> rc = ipr_cancel_op(scsi_cmd);
> spin_unlock_irqrestore(scsi_cmd->device->host->host_lock, flags);
>
> + if (rc == SUCCESS)
> + rc = ipr_wait_for_ops(ioa_cfg, scsi_cmd->device, ipr_match_lun);
> LEAVE;
> return rc;
> }
> diff -puN drivers/scsi/ipr.h~ipr_eh_wait drivers/scsi/ipr.h
> --- scsi-queue/drivers/scsi/ipr.h~ipr_eh_wait 2014-10-30 17:15:37.305753093 -0500
> +++ scsi-queue-bjking1/drivers/scsi/ipr.h 2014-10-30 17:15:37.315753003 -0500
> @@ -1608,6 +1608,7 @@ struct ipr_cmnd {
> struct scsi_device *sdev;
> } u;
>
> + struct completion *eh_comp;
> struct ipr_hrr_queue *hrrq;
> struct ipr_ioa_cfg *ioa_cfg;
> };
> _
>
--
Brian King
Power Linux I/O
IBM Linux Technology Center
More information about the Linuxppc-dev
mailing list