[SLOF] [PATCH slof v4 5/5] virtio: Enable IOMMU

Alexey Kardashevskiy aik at ozlabs.ru
Fri Dec 6 10:21:46 AEDT 2019



On 05/12/2019 08:16, Michael Roth wrote:
> Quoting Alexey Kardashevskiy (2019-12-03 21:21:38)
>> When QEMU is started with iommu_platform=on, the guest driver must accept
>> it or the device will fail.
>>
>> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p
>> devices. -serial and -9p are only compile tested though.
>>
>> For virtio-net we map all RX buffers once and TX when xmit() is called
>> and unmap older pages when we are about to reuse the VQ descriptor.
>> As all other devices are synchronous, we unmap IOMMU pages right after
>> completion of a transaction.
>>
>> This depends on QEMU's:
>> https://patchwork.ozlabs.org/patch/1194067/
>>
>> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
>> Tested-by: Michael Roth <mdroth at linux.vnet.ibm.com>
>> ---
>> Changes:
>> v4:
>> * ditched vqs->id in virtio_queue_init_vq
>>
>> v2:
>> * added Mike's fs/dma-instance-function.fs
>> * total rework
>> ---
>>  lib/libvirtio/virtio.h        |  5 +++
>>  lib/libvirtio/virtio-9p.c     |  4 ++
>>  lib/libvirtio/virtio-blk.c    |  4 ++
>>  lib/libvirtio/virtio-net.c    |  5 ++-
>>  lib/libvirtio/virtio-scsi.c   |  5 +++
>>  lib/libvirtio/virtio-serial.c | 12 +++--
>>  lib/libvirtio/virtio.c        | 82 ++++++++++++++++++++++++++++++++++-
>>  7 files changed, 111 insertions(+), 6 deletions(-)
>>
>> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h
>> index 7efc1e524d77..c4eafe40dd31 100644
>> --- a/lib/libvirtio/virtio.h
>> +++ b/lib/libvirtio/virtio.h
>> @@ -29,6 +29,7 @@
>>  #define VIRTIO_F_RING_INDIRECT_DESC    BIT(28)
>>  #define VIRTIO_F_RING_EVENT_IDX                BIT(29)
>>  #define VIRTIO_F_VERSION_1             BIT(32)
>> +#define VIRTIO_F_IOMMU_PLATFORM         BIT(33)
>>
>>  #define VIRTIO_TIMEOUT                 5000 /* 5 sec timeout */
>>
>> @@ -83,6 +84,8 @@ struct vqs {
>>         struct vring_desc *desc;
>>         struct vring_avail *avail;
>>         struct vring_used *used;
>> +       void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */
>> +       uint64_t bus_desc;
>>  };
>>
>>  struct virtio_device {
>> @@ -108,6 +111,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q
>>  extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>>                               uint64_t addr, uint32_t len,
>>                               uint16_t flags, uint16_t next);
>> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features);
>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id);
>>  extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id);
>>  extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id);
>>
>> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c
>> index 426069fe9509..76078612b06e 100644
>> --- a/lib/libvirtio/virtio-9p.c
>> +++ b/lib/libvirtio/virtio-9p.c
>> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r
>>                 // do something better
>>                 mb();
>>         }
>> +
>> +       virtio_free_desc(vq, id, dev->features);
>> +       virtio_free_desc(vq, id + 1, dev->features);
>> +
>>         if (i == 0) {
>>                 return -1;
>>         }
>> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c
>> index a0dadbb0d6a8..0363038e559d 100644
>> --- a/lib/libvirtio/virtio-blk.c
>> +++ b/lib/libvirtio/virtio-blk.c
>> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum,
>>                         break;
>>         }
>>
>> +       virtio_free_desc(vq, id, dev->features);
>> +       virtio_free_desc(vq, id + 1, dev->features);
>> +       virtio_free_desc(vq, id + 2, dev->features);
>> +
>>         if (status == 0)
>>                 return cnt;
>>
>> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c
>> index ae67883020ef..5a0d19088527 100644
>> --- a/lib/libvirtio/virtio-net.c
>> +++ b/lib/libvirtio/virtio-net.c
>> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len)
>>         idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx);
>>         id = (idx * 2) % vq_tx->size;
>>
>> +       virtio_free_desc(vq_tx, id, vdev->features);
>> +       virtio_free_desc(vq_tx, id + 1, vdev->features);
>> +
>>         /* Set up virtqueue descriptor for header */
>>         virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr,
>>                          net_hdr_size, VRING_DESC_F_NEXT, id + 1);
>> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen)
>>  #endif
>>
>>         /* Copy data to destination buffer */
>> -       memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len);
>> +       memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len);
>>
>>         /* Move indices to next entries */
>>         last_rx_idx = last_rx_idx + 1;
>> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c
>> index ae87e97e7330..96285e3891af 100644
>> --- a/lib/libvirtio/virtio-scsi.c
>> +++ b/lib/libvirtio/virtio-scsi.c
>> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev,
>>                         break;
>>         }
>>
>> +       virtio_free_desc(vq, id, dev->features);
>> +       virtio_free_desc(vq, id + 1, dev->features);
>> +       if (!(buf == NULL || buf_len == 0))
>> +               virtio_free_desc(vq, id + 2, dev->features);
>> +
>>         return 0;
>>  }
>>
>> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c
>> index b8b898fc8bea..8826be96c24e 100644
>> --- a/lib/libvirtio/virtio-serial.c
>> +++ b/lib/libvirtio/virtio-serial.c
>> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev)
>>
>>  int virtio_serial_putchar(struct virtio_device *dev, char c)
>>  {
>> -       int id;
>> +       int id, ret;
>>         uint32_t time;
>>         volatile uint16_t *current_used_idx;
>>         uint16_t last_used_idx, avail_idx;
>> @@ -133,17 +133,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c)
>>         virtio_queue_notify(dev, TX_Q);
>>
>>         /* Wait for host to consume the descriptor */
>> +       ret = 1;
>>         time = SLOF_GetTimer() + VIRTIO_TIMEOUT;
>>         while (*current_used_idx == last_used_idx) {
>>                 // do something better
>>                 mb();
>>                 if (time < SLOF_GetTimer()) {
>>                         printf("virtio_serial_putchar failed! \n");
>> -                       return 0;
>> +                       ret = 0;
>> +                       break;
>>                 }
>>         }
>>
>> -       return 1;
>> +       virtio_free_desc(vq, id, dev->features);
>> +
>> +       return ret;
>>  }
>>
>>  char virtio_serial_getchar(struct virtio_device *dev)
>> @@ -163,7 +167,7 @@ char virtio_serial_getchar(struct virtio_device *dev)
>>                 % vq_rx->size;
>>
>>         /* Copy data to destination buffer */
>> -       memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE);
>> +       memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE);
>>
>>         /* Move indices to next entries */
>>         last_rx_idx = last_rx_idx + 1;
>> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c
>> index 3e615c65fc2c..9a0c3a96371a 100644
>> --- a/lib/libvirtio/virtio.c
>> +++ b/lib/libvirtio/virtio.c
>> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>>         next %= vq->size;
>>
>>         if (features & VIRTIO_F_VERSION_1) {
>> +               if (features & VIRTIO_F_IOMMU_PLATFORM) {
>> +                       void *gpa = (void *) addr;
>> +
>> +                       if (!vq->desc_gpas) {
>> +                               fprintf(stderr, "IOMMU setup has not been done!\n");
>> +                               return;
>> +                       }
>> +
>> +                       addr = SLOF_dma_map_in(gpa, len, 0);
>> +                       vq->desc_gpas[id] = gpa;
>> +               }
>>                 desc->addr = cpu_to_le64(addr);
>>                 desc->len = cpu_to_le32(len);
>>                 desc->flags = cpu_to_le16(flags);
>> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>>         }
>>  }
>>
>> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features)
>> +{
>> +       struct vring_desc *desc;
>> +
>> +       id %= vq->size;
>> +       desc = &vq->desc[id];
>> +
>> +       if (features & VIRTIO_F_VERSION_1) {
>> +               if (features & VIRTIO_F_IOMMU_PLATFORM) {
>> +                       SLOF_dma_map_out(le64_to_cpu(desc->addr),
>> +                                        0, le32_to_cpu(desc->len));
>> +                       vq->desc_gpas[id] = NULL;
>> +               }
>> +       }
>> +}
>> +
>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id)
>> +{
>> +       struct vqs *vq = &vdev->vq[queue];
>> +
>> +       if (vq->desc_gpas)
>> +               return vq->desc_gpas[id];
>> +
>> +       return (void *) virtio_modern64_to_cpu(vdev, vq->desc[id].addr);
>> +}
>> +
>>  /**
>>   * Reset virtio device
>>   */
>> @@ -326,6 +363,19 @@ static void virtio_set_qaddr(struct virtio_device *dev, int queue, unsigned long
>>                 uint64_t q_used;
>>                 uint32_t q_size = virtio_get_qsize(dev, queue);
>>
>> +               if (dev->features & VIRTIO_F_IOMMU_PLATFORM) {
>> +                       unsigned long cb;
>> +
>> +                       cb = q_size * sizeof(struct vring_desc);
>> +                       cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size;
>> +                       cb = VQ_ALIGN(cb);
>> +                       cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size;
> 
> Shouldn't this be:
> 
>   cb += sizeof(struct vring_used) + sizeof(struct vring_used_elem) * q_size;


Yes it should, good find!


> 
> The subsequent VQ_ALIGN probably masks it in the current code.
> 
>> +                       cb = VQ_ALIGN(cb);
>> +                       q_desc = SLOF_dma_map_in((void *)q_desc, cb, 0);
>> +
>> +                       dev->vq[queue].bus_desc = q_desc;
>> +               }
>> +
>>                 virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_desc), q_desc);
>>                 q_avail = q_desc + q_size * sizeof(struct vring_desc);
>>                 virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_avail), q_avail);
>> @@ -372,14 +422,41 @@ struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id)
>>
>>         vq->avail->flags = virtio_cpu_to_modern16(dev, VRING_AVAIL_F_NO_INTERRUPT);
>>         vq->avail->idx = 0;
>> +       if (dev->features & VIRTIO_F_IOMMU_PLATFORM)
>> +               vq->desc_gpas = SLOF_alloc_mem_aligned(
>> +                       vq->size * sizeof(vq->desc_gpas[0]), 4096);
>>
>>         return vq;
>>  }
>>
>>  void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id)
>>  {
>> -       if (vq->desc)
>> +       if (vq->desc_gpas) {
>> +               int i;
>> +
>> +               for (i = 0; i < vq->size; ++i)
>> +                       virtio_free_desc(vq, i, dev->features);
> 
> Since virtio_free_desc() calls dma-map-out regardless of whether the
> IOBA was unmapped via a previous call by the driver, isn't there a chance
> we unmap a desc->addr IOBA that has since been re-used elsewhere? I
> guess it's unlikely for SLOF but seems like it could become an issue.


As far as I understand it, only one instance of any driver can be open
at the time so the DMA window is used exclusevely by this particular
device. So when a queue is being terminated, I expect no other user around.


> Also not sure what side-effects we might expect for unused/uninitialized
> descriptors. In init-dma-window-vars we reserve DMA addr 0 so other code
> doesn't have to distinguish from NULL; seems like there's a chance that
> could get unmapped here.
> 
> Maybe we should test for desc_gpas[i] before calling virtio_free_desc(),
> or maybe build that check into virtio_free_desc() itself?

It still makes sense and makes things cleaner, I'll do that.


> Also, is there a case where a driver would legitimately call term_vq()
> without calling virtio_free_desc() for a previously used descriptor?
> Even if they eventually get cleaned up by term_vq() it seems there's a
> chance the descriptor gets re-used prior to that and we still end up
> leaking the ioba. Maybe we should print a warning in this case to detect
> leaks in the driver code.


virtio-net does this - if it ever sent anything, a couple of buffers are
always mapped (see virtionet_xmit()); receive buffers are simply always
mapped until virtio_queue_term_vq() is called. Every other device
releases a descriptor straight away. So I am not sure how I add an easy
detection for all cases but virtio-net.


> 
>> +
>> +               memset(vq->desc_gpas, 0, vq->size * sizeof(vq->desc_gpas[0]));
> 
> Is the memset really needed if we're freeing it right after?

No, not really, I'll remove it.

> 
>> +               SLOF_free_mem(vq->desc_gpas,
>> +                       vq->size * sizeof(vq->desc_gpas[0]));
>> +       }
>> +       if (vq->desc) {
>> +               if (dev->features & VIRTIO_F_IOMMU_PLATFORM) {
>> +                       unsigned long cb;
>> +                       uint32_t q_size = virtio_get_qsize(dev, id);
>> +
>> +                       cb = q_size * sizeof(struct vring_desc);
>> +                       cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size;
>> +                       cb = VQ_ALIGN(cb);
>> +                       cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size;
> 
> As per prior comment:
> 
>   cb += sizeof(struct vring_used) + sizeof(struct vring_used_elem) * q_size;


Oh right, fixed. Thanks,

> 
> 
> Looks good otherwise.
> 
>> +                       cb = VQ_ALIGN(cb);
>> +
>> +                       SLOF_dma_map_out(vq->bus_desc, 0, cb);
>> +               }
>> +
>>                 SLOF_free_mem(vq->desc, virtio_vring_size(vq->size));
>> +       }
>>         memset(vq, 0, sizeof(*vq));
>>  }
>>
>> @@ -473,6 +550,9 @@ int virtio_negotiate_guest_features(struct virtio_device *dev, uint64_t features
>>                 return -1;
>>         }
>>
>> +       if (host_features & VIRTIO_F_IOMMU_PLATFORM)
>> +               features |= VIRTIO_F_IOMMU_PLATFORM;
>> +
>>         virtio_set_guest_features(dev,  features);
>>         host_features = virtio_get_host_features(dev);
>>         if ((host_features & features) != features) {
>> -- 
>> 2.17.1
>>

-- 
Alexey


More information about the SLOF mailing list