[SLOF] [PATCH slof v3 4/4] virtio: Enable IOMMU

David Gibson david at gibson.dropbear.id.au
Fri Nov 22 12:11:25 AEDT 2019


On Thu, Nov 21, 2019 at 06:09:56PM -0600, Michael Roth wrote:
> Quoting Alexey Kardashevskiy (2019-11-20 17:58:01)
> > When QEMU is started with iommu_platform=on, the guest driver must accept
> > it or the device will fail.
> > 
> > This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p
> > devices. -serial and -9p are only compile tested though.
> > 
> > For virtio-net we map all RX buffers once and TX when xmit() is called
> > and unmap older pages when we are about to reuse the VQ descriptor.
> > As all other devices are synchronous, we unmap IOMMU pages right after
> > completion of a transaction.
> > 
> > This depends on QEMU's:
> > https://patchwork.ozlabs.org/patch/1194067/
> > 
> > Copied-bits-and-pieces-from: Michael Roth <mdroth at linux.vnet.ibm.com>
> > Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> > ---
> > Changes:
> > v2:
> > * added Mike's fs/dma-instance-function.fs
> > * total rework
> > ---
> >  board-qemu/slof/Makefile         |  1 +
> >  lib/libvirtio/virtio.h           |  5 ++
> >  lib/libvirtio/virtio-9p.c        |  4 ++
> >  lib/libvirtio/virtio-blk.c       |  4 ++
> >  lib/libvirtio/virtio-net.c       |  5 +-
> >  lib/libvirtio/virtio-scsi.c      |  5 ++
> >  lib/libvirtio/virtio-serial.c    | 12 +++--
> >  lib/libvirtio/virtio.c           | 82 +++++++++++++++++++++++++++++++-
> >  board-qemu/slof/OF.fs            |  3 ++
> >  slof/fs/dma-instance-function.fs | 28 +++++++++++
> >  10 files changed, 143 insertions(+), 6 deletions(-)
> >  create mode 100644 slof/fs/dma-instance-function.fs
> > 
> > diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile
> > index 2263e751bde9..d7ed2d7a6f18 100644
> > --- a/board-qemu/slof/Makefile
> > +++ b/board-qemu/slof/Makefile
> > @@ -99,6 +99,7 @@ OF_FFS_FILES = \
> >         $(SLOFCMNDIR)/fs/graphics.fs \
> >         $(SLOFCMNDIR)/fs/generic-disk.fs \
> >         $(SLOFCMNDIR)/fs/dma-function.fs \
> > +       $(SLOFCMNDIR)/fs/dma-instance-function.fs \
> >         $(SLOFCMNDIR)/fs/pci-device.fs \
> >         $(SLOFCMNDIR)/fs/pci-bridge.fs \
> >         $(SLOFCMNDIR)/fs/pci-properties.fs \
> > diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h
> > index 4927a97f9f5f..250120b3112b 100644
> > --- a/lib/libvirtio/virtio.h
> > +++ b/lib/libvirtio/virtio.h
> > @@ -29,6 +29,7 @@
> >  #define VIRTIO_F_RING_INDIRECT_DESC    BIT(28)
> >  #define VIRTIO_F_RING_EVENT_IDX                BIT(29)
> >  #define VIRTIO_F_VERSION_1             BIT(32)
> > +#define VIRTIO_F_IOMMU_PLATFORM         BIT(33)
> > 
> >  #define VIRTIO_TIMEOUT                 5000 /* 5 sec timeout */
> > 
> > @@ -84,6 +85,8 @@ struct vqs {
> >         struct vring_desc *desc;
> >         struct vring_avail *avail;
> >         struct vring_used *used;
> > +       void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */
> > +       uint64_t bus_desc;
> >  };
> > 
> >  struct virtio_device {
> > @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q
> >  extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
> >                               uint64_t addr, uint32_t len,
> >                               uint16_t flags, uint16_t next);
> > +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features);
> > +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id);
> >  extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id);
> >  extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id);
> > 
> > diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c
> > index 426069fe9509..76078612b06e 100644
> > --- a/lib/libvirtio/virtio-9p.c
> > +++ b/lib/libvirtio/virtio-9p.c
> > @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r
> >                 // do something better
> >                 mb();
> >         }
> > +
> > +       virtio_free_desc(vq, id, dev->features);
> > +       virtio_free_desc(vq, id + 1, dev->features);
> > +
> >         if (i == 0) {
> >                 return -1;
> >         }
> > diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c
> > index a0dadbb0d6a8..0363038e559d 100644
> > --- a/lib/libvirtio/virtio-blk.c
> > +++ b/lib/libvirtio/virtio-blk.c
> > @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum,
> >                         break;
> >         }
> > 
> > +       virtio_free_desc(vq, id, dev->features);
> > +       virtio_free_desc(vq, id + 1, dev->features);
> > +       virtio_free_desc(vq, id + 2, dev->features);
> > +
> >         if (status == 0)
> >                 return cnt;
> > 
> > diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c
> > index ae67883020ef..5a0d19088527 100644
> > --- a/lib/libvirtio/virtio-net.c
> > +++ b/lib/libvirtio/virtio-net.c
> > @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len)
> >         idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx);
> >         id = (idx * 2) % vq_tx->size;
> > 
> > +       virtio_free_desc(vq_tx, id, vdev->features);
> > +       virtio_free_desc(vq_tx, id + 1, vdev->features);
> > +
> >         /* Set up virtqueue descriptor for header */
> >         virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr,
> >                          net_hdr_size, VRING_DESC_F_NEXT, id + 1);
> > @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen)
> >  #endif
> > 
> >         /* Copy data to destination buffer */
> > -       memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len);
> > +       memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len);
> > 
> >         /* Move indices to next entries */
> >         last_rx_idx = last_rx_idx + 1;
> > diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c
> > index 251661e8d466..44573d10e24c 100644
> > --- a/lib/libvirtio/virtio-scsi.c
> > +++ b/lib/libvirtio/virtio-scsi.c
> > @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev,
> >                         break;
> >         }
> > 
> > +       virtio_free_desc(vq, id, dev->features);
> > +       virtio_free_desc(vq, id + 1, dev->features);
> > +       if (!(buf == NULL || buf_len == 0))
> > +               virtio_free_desc(vq, id + 2, dev->features);
> > +
> >         return 0;
> >  }
> > 
> > diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c
> > index 78ab0e25d7db..66fbfe8b6cc6 100644
> > --- a/lib/libvirtio/virtio-serial.c
> > +++ b/lib/libvirtio/virtio-serial.c
> > @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev)
> > 
> >  int virtio_serial_putchar(struct virtio_device *dev, char c)
> >  {
> > -       int id;
> > +       int id, ret;
> >         uint32_t vq_size, time;
> >         volatile uint16_t *current_used_idx;
> >         uint16_t last_used_idx, avail_idx;
> > @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c)
> >         virtio_queue_notify(dev, TX_Q);
> > 
> >         /* Wait for host to consume the descriptor */
> > +       ret = 1;
> >         time = SLOF_GetTimer() + VIRTIO_TIMEOUT;
> >         while (*current_used_idx == last_used_idx) {
> >                 // do something better
> >                 mb();
> >                 if (time < SLOF_GetTimer()) {
> >                         printf("virtio_serial_putchar failed! \n");
> > -                       return 0;
> > +                       ret = 0;
> > +                       break;
> >                 }
> >         }
> > 
> > -       return 1;
> > +       virtio_free_desc(vq, id, dev->features);
> > +
> > +       return ret;
> >  }
> > 
> >  char virtio_serial_getchar(struct virtio_device *dev)
> > @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev)
> >                 % vq_rx->size;
> > 
> >         /* Copy data to destination buffer */
> > -       memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE);
> > +       memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE);
> > 
> >         /* Move indices to next entries */
> >         last_rx_idx = last_rx_idx + 1;
> > diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c
> > index fff0c7e54c19..d4db5f656888 100644
> > --- a/lib/libvirtio/virtio.c
> > +++ b/lib/libvirtio/virtio.c
> > @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
> >         next %= vq->size;
> > 
> >         if (features & VIRTIO_F_VERSION_1) {
> > +               if (features & VIRTIO_F_IOMMU_PLATFORM) {
> > +                       void *gpa = (void *) addr;
> > +
> > +                       if (!vq->desc_gpas) {
> > +                               fprintf(stderr, "IOMMU setup has not been done!\n");
> > +                               return;
> > +                       }
> > +
> > +                       addr = SLOF_dma_map_in(gpa, len, 0);
> > +                       vq->desc_gpas[id] = gpa;
> > +               }
> >                 desc->addr = cpu_to_le64(addr);
> >                 desc->len = cpu_to_le32(len);
> >                 desc->flags = cpu_to_le16(flags);
> > @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
> >         }
> >  }
> > 
> > +void virtio_free_desc(struct vqs *vq, int id, uint64_t features)
> > +{
> > +       struct vring_desc *desc;
> > +
> > +       id %= vq->size;
> > +       desc = &vq->desc[id];
> > +
> > +       if (features & VIRTIO_F_VERSION_1) {
> > +               if (features & VIRTIO_F_IOMMU_PLATFORM) {
> > +                       SLOF_dma_map_out(le64_to_cpu(desc->addr),
> > +                                        0, le32_to_cpu(desc->len));
> > +                       vq->desc_gpas[id] = NULL;
> > +               }
> > +       }
> > +}
> 
> On a 4.18 RHEL8 kernel I was unable to boot from virtio-blk, it complains the
> disk isn't bootable and the H_PUT_TCE traces show:
> 
>  qemu-system-ppc-19019 [045] .... 786844.067356: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x1000,0xe46b003,0xdc73350
>  qemu-system-ppc-19019 [045] .... 786844.067358: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
>  qemu-system-ppc-19019 [045] .... 786844.067361: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x2000,0xe46c003,0xdc73350
>  qemu-system-ppc-19019 [045] .... 786844.067362: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
>  qemu-system-ppc-19019 [041] .... 786844.070532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3000,0xe67b003,0xdc73490
>  qemu-system-ppc-19019 [041] .... 786844.070533: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
>  qemu-system-ppc-19019 [041] .... 786844.070817: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x4000,0xe469003,0xdc73490
>  qemu-system-ppc-19019 [041] .... 786844.070818: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
>  qemu-system-ppc-19019 [041] .... 786844.071102: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x5000,0xe67a003,0xdc73490
>  qemu-system-ppc-19019 [041] .... 786844.071103: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
>  qemu-system-ppc-19019 [041] .... 786844.071123: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
>  qemu-system-ppc-19019 [041] .... 786844.073532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3228,0x0,0xdc73488
>  qemu-system-ppc-19019 [041] .... 786844.073534: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_PARAMETER
> 
> If I disable in-kernel TCE acceleration, or use TCG, it seems to work
> fine. If I add the following hack it seems to fix things:
> 
>   @@ -305,7 +307,8 @@ void virtio_free_desc(struct vqs *vq, int id, uint64_t features)
>    
>           if (features & VIRTIO_F_VERSION_1) {
>                   if (features & VIRTIO_F_IOMMU_PLATFORM) {
>   -                       SLOF_dma_map_out(le64_to_cpu(desc->addr),
>   +                       fprintf(stderr, "mapping out desc->addr: %llx\n", le64_to_cpu(desc->addr));
>   +                       SLOF_dma_map_out(le64_to_cpu(desc->addr) & ~(0xfff),
>                                            0, le32_to_cpu(desc->len));
>                           vq->desc_gpas[id] = NULL;
>                   }
> 
> I guess the kernel is a bit stricter about unmapping unaligned TCE entries? If
> so, maybe we should align automatically in dma-map-out?

Yes, I think we should.

It's probably also a good idea to match the qemu and kernel behaviour
to each other.  PAPR doesn't seem to specifically say what to do about
unaligned IOBAs, but I'm inclined to make qemu stricter rather than
making the kernel less strict.  Any counter opinions?

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/slof/attachments/20191122/91a31b0d/attachment.sig>


More information about the SLOF mailing list