[PATCH linux dev-4.10 v2 9/9] drivers: hwmon: occ: Cancel occ operations in remove()

Eddie James eajames at linux.vnet.ibm.com
Fri Oct 6 03:38:13 AEDT 2017



On 10/04/2017 08:20 PM, Andrew Jeffery wrote:
> On Fri, 2017-09-29 at 17:41 -0500, Eddie James wrote:
>> From: "Edward A. James" <eajames at us.ibm.com>
>>
>> Prevent hanging forever waiting for OCC ops to complete.
>>
>> Signed-off-by: Edward A. James <eajames at us.ibm.com>
>> ---
>>   drivers/hwmon/occ/p9_sbe.c | 33 ++++++++++++++++++++++++++-------
>>   1 file changed, 26 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c
>> index c7e0d9c..ffd6829 100644
>> --- a/drivers/hwmon/occ/p9_sbe.c
>> +++ b/drivers/hwmon/occ/p9_sbe.c
>> @@ -14,37 +14,53 @@
>>   #include <linux/platform_device.h>
>>   #include <linux/occ.h>
>>   #include <linux/sched.h>
>> +#include <linux/spinlock.h>
>>   #include <linux/workqueue.h>
>>   
>>   struct p9_sbe_occ {
>>   	struct occ occ;
>>   	struct device *sbe;
>> +	struct occ_client *client;
>> +	spinlock_t lock;
>>   };
>>   
>>   #define to_p9_sbe_occ(x)	container_of((x), struct p9_sbe_occ,
>> occ)
>>   
>> +static void p9_sbe_occ_close_client(struct p9_sbe_occ *occ)
>> +{
>> +	struct occ_client *tmp_client;
>> +
>> +	spin_lock_irq(&occ->lock);
>> +	tmp_client = occ->client;
>> +	occ->client = NULL;
>> +	occ_drv_release(tmp_client);
>> +	spin_unlock_irq(&occ->lock);
>> +}
>> +
>>   static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd)
>>   {
>>   	int rc, error;
>> -	struct occ_client *client;
>>   	struct occ_response *resp = &occ->resp;
>>   	struct p9_sbe_occ *p9_sbe_occ = to_p9_sbe_occ(occ);
>>   
>> -	client = occ_drv_open(p9_sbe_occ->sbe, 0);
>> -	if (!client) {
>> +	spin_lock_irq(&p9_sbe_occ->lock);
> I think we should be using spin_lock_irqsave(), otherwise
> spin_unlock_irq() will unconditionally re-enable interrupts.

This is all over the sbefifo and occ drivers too... do we need to change 
those? No one has mentioned anything about it.

>
>> +	p9_sbe_occ->client = occ_drv_open(p9_sbe_occ->sbe, 0);
>> +	if (!p9_sbe_occ->client) {
>>   		rc = -ENODEV;
>> +		spin_unlock_irq(&p9_sbe_occ->lock);
>>   		goto assign;
>>   	}
>> +	spin_unlock_irq(&p9_sbe_occ->lock);
> Rather than this dance with locking, you can do:
>
>      spin_lock_irqsave(&p9_sbe_occ->lock, flags);
>      p9_sbe_occ->client = occ_drv_open(p9_sbe_occ->sbe, 0);
> spin_unlock_irqrestore(&p9_sbe_occ->lock, flags);
> if (!p9_sbe_occ->client) {
>          ...
> }
>
> Please take a look at the rest of the code for this pattern as well.
>
>>   
>> -	rc = occ_drv_write(client, (const char *)&cmd[1], 7);
>> +	rc = occ_drv_write(p9_sbe_occ->client, (const char
>> *)&cmd[1], 7);
>>   	if (rc < 0)
>>   		goto err;
>>   
>> -	rc = occ_drv_read(client, (char *)resp, sizeof(*resp));
>> +	rc = occ_drv_read(p9_sbe_occ->client, (char *)resp,
>> sizeof(*resp));
>>   	if (rc < 0)
>>   		goto err;
>>   
>> -	occ_drv_release(client);
>> +	p9_sbe_occ_close_client(p9_sbe_occ);
>>   
>>   	switch (resp->return_status) {
>>   	case RESP_RETURN_CMD_IN_PRG:
>> @@ -72,7 +88,7 @@ static int p9_sbe_occ_send_cmd(struct occ *occ, u8
>> *cmd)
>>   	goto done;
>>   
>>   err:
>> -	occ_drv_release(client);
>> +	p9_sbe_occ_close_client(p9_sbe_occ);
>>   	dev_err(occ->bus_dev, "occ bus op failed rc:%d\n", rc);
>>   assign:
>>   	error = rc;
>> @@ -132,6 +148,7 @@ static int p9_sbe_occ_probe(struct
>> platform_device *pdev)
>>   	p9_sbe_occ->sbe = pdev->dev.parent;
>>   
>>   	occ = &p9_sbe_occ->occ;
>> +	spin_lock_init(&p9_sbe_occ->lock);
>>   	occ->bus_dev = &pdev->dev;
>>   	occ->groups[0] = &occ->group;
>>   	occ->poll_cmd_data = 0x20;
>> @@ -152,7 +169,9 @@ static int p9_sbe_occ_probe(struct
>> platform_device *pdev)
>>   static int p9_sbe_occ_remove(struct platform_device *pdev)
>>   {
>>   	struct occ *occ = platform_get_drvdata(pdev);
>> +	struct p9_sbe_occ *p9_sbe_occ = to_p9_sbe_occ(occ);
>>   
>> +	p9_sbe_occ_close_client(p9_sbe_occ);
>>   	occ_remove_status_attrs(occ);
> Shouldn't we be removing the status attributes before releasing the occ
> to prevent use-after-free or useless calls into read/write? This seems
> racy.

This is to prevent hanging. If we're stuck in a sysfs read(), I believe 
the remove will wait for the read to complete... can't happen if we 
don't close the client and cancel the op first. The 
p9_occ_sbe_send_command function is quite careful about not using the 
client after it's been released.

Besides, hwmon attributes are always removed after remove() is complete 
and the device is released.

It would be a good idea to set the p9_sbe_occ->sbe pointer to NULL so 
that we can't start any new operations during remove though.

Thanks,
Eddie

>
> Andrew
>
>>   
>>   	atomic_dec(&occ_num_occs);



More information about the openbmc mailing list