[PATCH 3/3] ipr: Wait for aborted command responses

wenxiong at linux.vnet.ibm.com wenxiong at linux.vnet.ibm.com
Sat Nov 1 02:23:44 AEDT 2014


I have reviewed the patch and it fixed the issue we saw in our  
environment recently.

Thanks,
Wendy

Quoting Brian King <brking at linux.vnet.ibm.com>:

> Adding Wendy...
>
> On 10/30/2014 05:27 PM, Brian King wrote:
>> Fixes a race condition in abort handling that was injected
>> when multiple interrupt support was added. When only a single
>> interrupt is present, the adapter guarantees it will send
>> responses for aborted commands prior to the response for the
>> abort command itself. With multiple interrupts, these responses
>> generally come back on different interrupts, so we need to
>> ensure the abort thread waits until the aborted command is
>> complete so we don't perform a double completion. This race
>> condition was being hit frequently in environments which
>> were triggering command timeouts, which was resulting in
>> a double completion causing a kernel oops.
>>
>> Cc: <stable at vger.kernel.org>
>> Signed-off-by: Brian King <brking at linux.vnet.ibm.com>
>> ---
>>
>>  drivers/scsi/ipr.c |   92  
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  drivers/scsi/ipr.h |    1
>>  2 files changed, 93 insertions(+)
>>
>> diff -puN drivers/scsi/ipr.c~ipr_eh_wait drivers/scsi/ipr.c
>> --- scsi-queue/drivers/scsi/ipr.c~ipr_eh_wait	2014-10-30  
>> 17:15:37.302753120 -0500
>> +++ scsi-queue-bjking1/drivers/scsi/ipr.c	2014-10-30  
>> 17:15:37.311753039 -0500
>> @@ -683,6 +683,7 @@ static void ipr_init_ipr_cmnd(struct ipr
>>  	ipr_reinit_ipr_cmnd(ipr_cmd);
>>  	ipr_cmd->u.scratch = 0;
>>  	ipr_cmd->sibling = NULL;
>> +	ipr_cmd->eh_comp = NULL;
>>  	ipr_cmd->fast_done = fast_done;
>>  	init_timer(&ipr_cmd->timer);
>>  }
>> @@ -848,6 +849,8 @@ static void ipr_scsi_eh_done(struct ipr_
>>
>>  	scsi_dma_unmap(ipr_cmd->scsi_cmd);
>>  	scsi_cmd->scsi_done(scsi_cmd);
>> +	if (ipr_cmd->eh_comp)
>> +		complete(ipr_cmd->eh_comp);
>>  	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
>>  }
>>
>> @@ -4854,6 +4857,84 @@ static int ipr_slave_alloc(struct scsi_d
>>  	return rc;
>>  }
>>
>> +/**
>> + * ipr_match_lun - Match function for specified LUN
>> + * @ipr_cmd:	ipr command struct
>> + * @device:		device to match (sdev)
>> + *
>> + * Returns:
>> + *	1 if command matches sdev / 0 if command does not match sdev
>> + **/
>> +static int ipr_match_lun(struct ipr_cmnd *ipr_cmd, void *device)
>> +{
>> +	if (ipr_cmd->scsi_cmd && ipr_cmd->scsi_cmd->device == device)
>> +		return 1;
>> +	return 0;
>> +}
>> +
>> +/**
>> + * ipr_wait_for_ops - Wait for matching commands to complete
>> + * @ipr_cmd:	ipr command struct
>> + * @device:		device to match (sdev)
>> + * @match:		match function to use
>> + *
>> + * Returns:
>> + *	SUCCESS / FAILED
>> + **/
>> +static int ipr_wait_for_ops(struct ipr_ioa_cfg *ioa_cfg, void *device,
>> +			    int (*match)(struct ipr_cmnd *, void *))
>> +{
>> +	struct ipr_cmnd *ipr_cmd;
>> +	int wait;
>> +	unsigned long flags;
>> +	struct ipr_hrr_queue *hrrq;
>> +	signed long timeout = IPR_ABORT_TASK_TIMEOUT;
>> +	DECLARE_COMPLETION_ONSTACK(comp);
>> +
>> +	ENTER;
>> +	do {
>> +		wait = 0;
>> +
>> +		for_each_hrrq(hrrq, ioa_cfg) {
>> +			spin_lock_irqsave(hrrq->lock, flags);
>> +			list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
>> +				if (match(ipr_cmd, device)) {
>> +					ipr_cmd->eh_comp = ∁
>> +					wait++;
>> +				}
>> +			}
>> +			spin_unlock_irqrestore(hrrq->lock, flags);
>> +		}
>> +
>> +		if (wait) {
>> +			timeout = wait_for_completion_timeout(&comp, timeout);
>> +
>> +			if (!timeout) {
>> +				wait = 0;
>> +
>> +				for_each_hrrq(hrrq, ioa_cfg) {
>> +					spin_lock_irqsave(hrrq->lock, flags);
>> +					list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
>> +						if (match(ipr_cmd, device)) {
>> +							ipr_cmd->eh_comp = NULL;
>> +							wait++;
>> +						}
>> +					}
>> +					spin_unlock_irqrestore(hrrq->lock, flags);
>> +				}
>> +
>> +				if (wait)
>> +					dev_err(&ioa_cfg->pdev->dev, "Timed out waiting for aborted  
>> commands\n");
>> +				LEAVE;
>> +				return wait ? FAILED : SUCCESS;
>> +			}
>> +		}
>> +	} while (wait);
>> +
>> +	LEAVE;
>> +	return SUCCESS;
>> +}
>> +
>>  static int ipr_eh_host_reset(struct scsi_cmnd *cmd)
>>  {
>>  	struct ipr_ioa_cfg *ioa_cfg;
>> @@ -5073,11 +5154,17 @@ static int __ipr_eh_dev_reset(struct scs
>>  static int ipr_eh_dev_reset(struct scsi_cmnd *cmd)
>>  {
>>  	int rc;
>> +	struct ipr_ioa_cfg *ioa_cfg;
>> +
>> +	ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata;
>>
>>  	spin_lock_irq(cmd->device->host->host_lock);
>>  	rc = __ipr_eh_dev_reset(cmd);
>>  	spin_unlock_irq(cmd->device->host->host_lock);
>>
>> +	if (rc == SUCCESS)
>> +		rc = ipr_wait_for_ops(ioa_cfg, cmd->device, ipr_match_lun);
>> +
>>  	return rc;
>>  }
>>
>> @@ -5255,13 +5342,18 @@ static int ipr_eh_abort(struct scsi_cmnd
>>  {
>>  	unsigned long flags;
>>  	int rc;
>> +	struct ipr_ioa_cfg *ioa_cfg;
>>
>>  	ENTER;
>>
>> +	ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata;
>> +
>>  	spin_lock_irqsave(scsi_cmd->device->host->host_lock, flags);
>>  	rc = ipr_cancel_op(scsi_cmd);
>>  	spin_unlock_irqrestore(scsi_cmd->device->host->host_lock, flags);
>>
>> +	if (rc == SUCCESS)
>> +		rc = ipr_wait_for_ops(ioa_cfg, scsi_cmd->device, ipr_match_lun);
>>  	LEAVE;
>>  	return rc;
>>  }
>> diff -puN drivers/scsi/ipr.h~ipr_eh_wait drivers/scsi/ipr.h
>> --- scsi-queue/drivers/scsi/ipr.h~ipr_eh_wait	2014-10-30  
>> 17:15:37.305753093 -0500
>> +++ scsi-queue-bjking1/drivers/scsi/ipr.h	2014-10-30  
>> 17:15:37.315753003 -0500
>> @@ -1608,6 +1608,7 @@ struct ipr_cmnd {
>>  		struct scsi_device *sdev;
>>  	} u;
>>
>> +	struct completion *eh_comp;
>>  	struct ipr_hrr_queue *hrrq;
>>  	struct ipr_ioa_cfg *ioa_cfg;
>>  };
>> _
>>
>
>
> --
> Brian King
> Power Linux I/O
> IBM Linux Technology Center




More information about the Linuxppc-dev mailing list