[PATCH] uacce: fix concurrency of fops_open and uacce_remove

Zhangfei Gao zhangfei.gao at linaro.org
Sat Jun 18 00:23:13 AEST 2022



On 2022/6/17 下午4:20, Zhangfei Gao wrote:
>
>
> On 2022/6/17 下午2:05, Zhangfei Gao wrote:
>>
>>
>> On 2022/6/16 下午4:14, Jean-Philippe Brucker wrote:
>>> On Thu, Jun 16, 2022 at 12:10:18PM +0800, Zhangfei Gao wrote:
>>>>>> diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
>>>>>> index 281c54003edc..b6219c6bfb48 100644
>>>>>> --- a/drivers/misc/uacce/uacce.c
>>>>>> +++ b/drivers/misc/uacce/uacce.c
>>>>>> @@ -136,9 +136,16 @@ static int uacce_fops_open(struct inode 
>>>>>> *inode, struct file *filep)
>>>>>>        if (!q)
>>>>>>            return -ENOMEM;
>>>>>> +    mutex_lock(&uacce->queues_lock);
>>>>>> +
>>>>>> +    if (!uacce->parent->driver) {
>>>>> I don't think this is useful, because the core clears 
>>>>> parent->driver after
>>>>> having run uacce_remove():
>>>>>
>>>>>     rmmod hisi_zip        open()
>>>>>      ...                 uacce_fops_open()
>>>>>      __device_release_driver()      ...
>>>>>       pci_device_remove()
>>>>>        hisi_zip_remove()
>>>>>         hisi_qm_uninit()
>>>>>          uacce_remove()
>>>>>           ...              ...
>>>>>                        mutex_lock(uacce->queues_lock)
>>>>>       ...                  if (!uacce->parent->driver)
>>>>>       device_unbind_cleanup()      /* driver still valid, proceed */
>>>>>        dev->driver = NULL
>>>> The check  if (!uacce->parent->driver) is required, otherwise NULL 
>>>> pointer
>>>> may happen.
>>> I agree we need something, what I mean is that this check is not
>>> sufficient.
>>>
>>>> iommu_sva_bind_device
>>>> const struct iommu_ops *ops = dev_iommu_ops(dev);  ->
>>>> dev->iommu->iommu_dev->ops
>>>>
>>>> rmmod has no issue, but remove parent pci device has the issue.
>>> Ah right, relying on the return value of bind() wouldn't be enough 
>>> even if
>>> we mandated SVA.
>>>
>>> [...]
>>>>> I think we need the global uacce_mutex to serialize uacce_remove() 
>>>>> and
>>>>> uacce_fops_open(). uacce_remove() would do everything, including
>>>>> xa_erase(), while holding that mutex. And uacce_fops_open() would 
>>>>> try to
>>>>> obtain the uacce object from the xarray while holding the mutex, 
>>>>> which
>>>>> fails if the uacce object is being removed.
>>>> Since fops_open get char device refcount, uacce_release will not 
>>>> happen
>>>> until open returns.
>>> The refcount only ensures that the uacce_device object is not freed as
>>> long as there are open fds. But uacce_remove() can run while there are
>>> open fds, or fds in the process of being opened. And atfer 
>>> uacce_remove()
>>> runs, the uacce_device object still exists but is mostly unusable. For
>>> example once the module is freed, uacce->ops is not valid anymore. But
>>> currently uacce_fops_open() may dereference the ops in this case:
>>>
>>>     uacce_fops_open()
>>>      if (!uacce->parent->driver)
>>>      /* Still valid, keep going */
>>>      ...                    rmmod
>>>                          uacce_remove()
>>>      ...                     free_module()
>>>      uacce->ops->get_queue() /* BUG */
>>
>> uacce_remove should wait for uacce->queues_lock, until fops_open 
>> release the lock.
>> If open happen just after the uacce_remove: unlock, uacce_bind_queue 
>> in open should fail.
>>
>>> Accessing uacce->ops after free_module() is a use-after-free. We 
>>> need all
>> you men parent release the resources.
>>> the fops to synchronize with uacce_remove() to ensure they don't use 
>>> any
>>> resource of the parent after it's been freed.
>> After fops_open, currently we are counting on parent driver stop all 
>> dma first, then call uacce_remove, which is assumption.
>> Like drivers/crypto/hisilicon/zip/zip_main.c: 
>> hisi_qm_wait_task_finish, which will wait uacce_release.
>> If comments this , there may other issue,
>> Unable to handle kernel paging request at virtual address 
>> ffff80000b700204
>> pc : hisi_qm_cache_wb.part.0+0x2c/0xa0
>>
>>> I see uacce_fops_poll() may have the same problem, and should be inside
>>> uacce_mutex.
>> Do we need consider this, uacce_remove can happen anytime but not 
>> waiting dma stop?
>>
>> Not sure uacce_mutex can do this.
>> Currently the sequence is
>> mutex_lock(&uacce->queues_lock);
>> mutex_lock(&uacce_mutex);
>>
>> Or we set all the callbacks of uacce_ops to NULL?
> How about in uacce_remove
> mutex_lock(&uacce_mutex);
> uacce->ops = NULL;
> mutex_unlock(&uacce_mutex);
>
> And check uacce->ops  first when using.
>

Diff like this, will merge together.

  drivers/misc/uacce/uacce.c | 65 ++++++++++++++++++++++++++++++++------
  1 file changed, 56 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index b6219c6bfb48..311192728132 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -23,6 +23,11 @@ static int uacce_start_queue(struct uacce_queue *q)
          goto out_with_lock;
      }

+    if (!q->uacce->ops) {
+        ret = -EINVAL;
+        goto out_with_lock;
+    }
+
      if (q->uacce->ops->start_queue) {
          ret = q->uacce->ops->start_queue(q);
          if (ret < 0)
@@ -46,6 +51,9 @@ static int uacce_put_queue(struct uacce_queue *q)
      if (q->state == UACCE_Q_ZOMBIE)
          goto out;

+    if (!uacce->ops)
+        goto out;
+
      if ((q->state == UACCE_Q_STARTED) && uacce->ops->stop_queue)
          uacce->ops->stop_queue(q);

@@ -65,6 +73,7 @@ static long uacce_fops_unl_ioctl(struct file *filep,
  {
      struct uacce_queue *q = filep->private_data;
      struct uacce_device *uacce = q->uacce;
+    long ret;

      switch (cmd) {
      case UACCE_CMD_START_Q:
@@ -74,10 +83,17 @@ static long uacce_fops_unl_ioctl(struct file *filep,
          return uacce_put_queue(q);

      default:
-        if (!uacce->ops->ioctl)
-            return -EINVAL;
+        mutex_lock(&uacce_mutex);
+
+        if (!uacce->ops || !uacce->ops->ioctl) {
+            ret = -EINVAL;
+            goto out_with_lock;
+        }

-        return uacce->ops->ioctl(q, cmd, arg);
+        ret = uacce->ops->ioctl(q, cmd, arg);
+out_with_lock:
+        mutex_unlock(&uacce_mutex);
+        return ret;
      }
  }

@@ -138,10 +154,13 @@ static int uacce_fops_open(struct inode *inode, 
struct file *filep)

      mutex_lock(&uacce->queues_lock);

-    if (!uacce->parent->driver) {
+    mutex_lock(&uacce_mutex);
+    if (!uacce->parent || !uacce->ops) {
+        mutex_unlock(&uacce_mutex);
          ret = -ENODEV;
          goto out_with_lock;
      }
+    mutex_unlock(&uacce_mutex);

      ret = uacce_bind_queue(uacce, q);
      if (ret)
@@ -226,6 +245,11 @@ static int uacce_fops_mmap(struct file *filep, 
struct vm_area_struct *vma)

      mutex_lock(&uacce_mutex);

+    if (!uacce->ops) {
+        ret = -EINVAL;
+        goto out_with_lock;
+    }
+
      if (q->state != UACCE_Q_INIT && q->state != UACCE_Q_STARTED) {
          ret = -EINVAL;
          goto out_with_lock;
@@ -271,9 +295,18 @@ static __poll_t uacce_fops_poll(struct file *file, 
poll_table *wait)
      struct uacce_device *uacce = q->uacce;

      poll_wait(file, &q->wait, wait);
-    if (uacce->ops->is_q_updated && uacce->ops->is_q_updated(q))
+
+    mutex_lock(&uacce_mutex);
+    if (!uacce->ops)
+        goto out_with_lock;
+
+    if (uacce->ops->is_q_updated && uacce->ops->is_q_updated(q)) {
+        mutex_unlock(&uacce_mutex);
          return EPOLLIN | EPOLLRDNORM;
+    }

+out_with_lock:
+    mutex_unlock(&uacce_mutex);
      return 0;
  }

@@ -312,12 +345,20 @@ static ssize_t available_instances_show(struct 
device *dev,
                      char *buf)
  {
      struct uacce_device *uacce = to_uacce_device(dev);
+    ssize_t ret;

-    if (!uacce->ops->get_available_instances)
-        return -ENODEV;
+    mutex_lock(&uacce_mutex);
+    if (!uacce->ops || !uacce->ops->get_available_instances) {
+        ret = -ENODEV;
+        goto out_with_lock;
+    }
+
+    ret = sysfs_emit(buf, "%d\n",
+             uacce->ops->get_available_instances(uacce));

-    return sysfs_emit(buf, "%d\n",
-               uacce->ops->get_available_instances(uacce));
+out_with_lock:
+    mutex_unlock(&uacce_mutex);
+    return ret;
  }

  static ssize_t algorithms_show(struct device *dev,
@@ -523,6 +564,12 @@ void uacce_remove(struct uacce_device *uacce)

      /* disable sva now since no opened queues */
      uacce_disable_sva(uacce);
+
+    mutex_lock(&uacce_mutex);
+    uacce->parent = NULL;
+    uacce->ops = NULL;
+    mutex_unlock(&uacce_mutex);
+
      mutex_unlock(&uacce->queues_lock);

      if (uacce->cdev)
-- 
2.25.1



More information about the Linux-accelerators mailing list