[PATCH] uacce: fix concurrency of fops_open and uacce_remove

Jean-Philippe Brucker jean-philippe at linaro.org
Thu Jun 16 01:16:02 AEST 2022


Hi,

On Fri, Jun 10, 2022 at 08:34:23PM +0800, Zhangfei Gao wrote:
> The uacce parent's module can be removed when uacce is working,
> which may cause troubles.
> 
> If rmmod/uacce_remove happens just after fops_open: bind_queue,
> the uacce_remove can not remove the bound queue since it is not
> added to the queue list yet, which blocks the uacce_disable_sva.
> 
> Change queues_lock area to make sure the bound queue is added to
> the list thereby can be searched in uacce_remove.
> 
> And uacce->parent->driver is checked immediately in case rmmod is
> just happening.
> 
> Also the parent driver must always stop DMA before calling
> uacce_remove.
> 
> Signed-off-by: Yang Shen <shenyang39 at huawei.com>
> Signed-off-by: Zhangfei Gao <zhangfei.gao at linaro.org>
> ---
>  drivers/misc/uacce/uacce.c | 19 +++++++++++++------
>  1 file changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
> index 281c54003edc..b6219c6bfb48 100644
> --- a/drivers/misc/uacce/uacce.c
> +++ b/drivers/misc/uacce/uacce.c
> @@ -136,9 +136,16 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
>  	if (!q)
>  		return -ENOMEM;
>  
> +	mutex_lock(&uacce->queues_lock);
> +
> +	if (!uacce->parent->driver) {

I don't think this is useful, because the core clears parent->driver after
having run uacce_remove():

  rmmod hisi_zip		open()
   ...				 uacce_fops_open()
   __device_release_driver()	  ...
    pci_device_remove()
     hisi_zip_remove()
      hisi_qm_uninit()
       uacce_remove()
        ...			  ...
   				  mutex_lock(uacce->queues_lock)
    ...				  if (!uacce->parent->driver)
    device_unbind_cleanup()	  /* driver still valid, proceed */
     dev->driver = NULL

Since uacce_remove() disabled SVA, the following uacce_bind_queue() will
fail anyway. However, if uacce->flags does not have UACCE_DEV_SVA set,
we'll proceed further and call uacce->ops->get_queue(), which does not
exist anymore since the parent module is gone.

I think we need the global uacce_mutex to serialize uacce_remove() and
uacce_fops_open(). uacce_remove() would do everything, including
xa_erase(), while holding that mutex. And uacce_fops_open() would try to
obtain the uacce object from the xarray while holding the mutex, which
fails if the uacce object is being removed.

Thanks,
Jean

> +		ret = -ENODEV;
> +		goto out_with_lock;
> +	}
> +
>  	ret = uacce_bind_queue(uacce, q);
>  	if (ret)
> -		goto out_with_mem;
> +		goto out_with_lock;
>  
>  	q->uacce = uacce;
>  
> @@ -153,7 +160,6 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
>  	uacce->inode = inode;
>  	q->state = UACCE_Q_INIT;
>  
> -	mutex_lock(&uacce->queues_lock);
>  	list_add(&q->list, &uacce->queues);
>  	mutex_unlock(&uacce->queues_lock);
>  
> @@ -161,7 +167,8 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
>  
>  out_with_bond:
>  	uacce_unbind_queue(q);
> -out_with_mem:
> +out_with_lock:
> +	mutex_unlock(&uacce->queues_lock);
>  	kfree(q);
>  	return ret;
>  }
> @@ -171,10 +178,10 @@ static int uacce_fops_release(struct inode *inode, struct file *filep)
>  	struct uacce_queue *q = filep->private_data;
>  
>  	mutex_lock(&q->uacce->queues_lock);
> -	list_del(&q->list);
> -	mutex_unlock(&q->uacce->queues_lock);
>  	uacce_put_queue(q);
>  	uacce_unbind_queue(q);
> +	list_del(&q->list);
> +	mutex_unlock(&q->uacce->queues_lock);
>  	kfree(q);
>  
>  	return 0;
> @@ -513,10 +520,10 @@ void uacce_remove(struct uacce_device *uacce)
>  		uacce_put_queue(q);
>  		uacce_unbind_queue(q);
>  	}
> -	mutex_unlock(&uacce->queues_lock);
>  
>  	/* disable sva now since no opened queues */
>  	uacce_disable_sva(uacce);
> +	mutex_unlock(&uacce->queues_lock);
>  
>  	if (uacce->cdev)
>  		cdev_device_del(uacce->cdev, &uacce->dev);
> -- 
> 2.36.1
> 


More information about the Linux-accelerators mailing list