[SLOF] [PATCH slof v3 4/4] virtio: Enable IOMMU
Alexey Kardashevskiy
aik at ozlabs.ru
Fri Nov 22 12:49:28 AEDT 2019
On 22/11/2019 11:09, Michael Roth wrote:
> Quoting Alexey Kardashevskiy (2019-11-20 17:58:01)
>> When QEMU is started with iommu_platform=on, the guest driver must accept
>> it or the device will fail.
>>
>> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p
>> devices. -serial and -9p are only compile tested though.
>>
>> For virtio-net we map all RX buffers once and TX when xmit() is called
>> and unmap older pages when we are about to reuse the VQ descriptor.
>> As all other devices are synchronous, we unmap IOMMU pages right after
>> completion of a transaction.
>>
>> This depends on QEMU's:
>> https://patchwork.ozlabs.org/patch/1194067/
>>
>> Copied-bits-and-pieces-from: Michael Roth <mdroth at linux.vnet.ibm.com>
>> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
>> ---
>> Changes:
>> v2:
>> * added Mike's fs/dma-instance-function.fs
>> * total rework
>> ---
>> board-qemu/slof/Makefile | 1 +
>> lib/libvirtio/virtio.h | 5 ++
>> lib/libvirtio/virtio-9p.c | 4 ++
>> lib/libvirtio/virtio-blk.c | 4 ++
>> lib/libvirtio/virtio-net.c | 5 +-
>> lib/libvirtio/virtio-scsi.c | 5 ++
>> lib/libvirtio/virtio-serial.c | 12 +++--
>> lib/libvirtio/virtio.c | 82 +++++++++++++++++++++++++++++++-
>> board-qemu/slof/OF.fs | 3 ++
>> slof/fs/dma-instance-function.fs | 28 +++++++++++
>> 10 files changed, 143 insertions(+), 6 deletions(-)
>> create mode 100644 slof/fs/dma-instance-function.fs
>>
>> diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile
>> index 2263e751bde9..d7ed2d7a6f18 100644
>> --- a/board-qemu/slof/Makefile
>> +++ b/board-qemu/slof/Makefile
>> @@ -99,6 +99,7 @@ OF_FFS_FILES = \
>> $(SLOFCMNDIR)/fs/graphics.fs \
>> $(SLOFCMNDIR)/fs/generic-disk.fs \
>> $(SLOFCMNDIR)/fs/dma-function.fs \
>> + $(SLOFCMNDIR)/fs/dma-instance-function.fs \
>> $(SLOFCMNDIR)/fs/pci-device.fs \
>> $(SLOFCMNDIR)/fs/pci-bridge.fs \
>> $(SLOFCMNDIR)/fs/pci-properties.fs \
>> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h
>> index 4927a97f9f5f..250120b3112b 100644
>> --- a/lib/libvirtio/virtio.h
>> +++ b/lib/libvirtio/virtio.h
>> @@ -29,6 +29,7 @@
>> #define VIRTIO_F_RING_INDIRECT_DESC BIT(28)
>> #define VIRTIO_F_RING_EVENT_IDX BIT(29)
>> #define VIRTIO_F_VERSION_1 BIT(32)
>> +#define VIRTIO_F_IOMMU_PLATFORM BIT(33)
>>
>> #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */
>>
>> @@ -84,6 +85,8 @@ struct vqs {
>> struct vring_desc *desc;
>> struct vring_avail *avail;
>> struct vring_used *used;
>> + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */
>> + uint64_t bus_desc;
>> };
>>
>> struct virtio_device {
>> @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q
>> extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>> uint64_t addr, uint32_t len,
>> uint16_t flags, uint16_t next);
>> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features);
>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id);
>> extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id);
>> extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id);
>>
>> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c
>> index 426069fe9509..76078612b06e 100644
>> --- a/lib/libvirtio/virtio-9p.c
>> +++ b/lib/libvirtio/virtio-9p.c
>> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r
>> // do something better
>> mb();
>> }
>> +
>> + virtio_free_desc(vq, id, dev->features);
>> + virtio_free_desc(vq, id + 1, dev->features);
>> +
>> if (i == 0) {
>> return -1;
>> }
>> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c
>> index a0dadbb0d6a8..0363038e559d 100644
>> --- a/lib/libvirtio/virtio-blk.c
>> +++ b/lib/libvirtio/virtio-blk.c
>> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum,
>> break;
>> }
>>
>> + virtio_free_desc(vq, id, dev->features);
>> + virtio_free_desc(vq, id + 1, dev->features);
>> + virtio_free_desc(vq, id + 2, dev->features);
>> +
>> if (status == 0)
>> return cnt;
>>
>> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c
>> index ae67883020ef..5a0d19088527 100644
>> --- a/lib/libvirtio/virtio-net.c
>> +++ b/lib/libvirtio/virtio-net.c
>> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len)
>> idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx);
>> id = (idx * 2) % vq_tx->size;
>>
>> + virtio_free_desc(vq_tx, id, vdev->features);
>> + virtio_free_desc(vq_tx, id + 1, vdev->features);
>> +
>> /* Set up virtqueue descriptor for header */
>> virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr,
>> net_hdr_size, VRING_DESC_F_NEXT, id + 1);
>> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen)
>> #endif
>>
>> /* Copy data to destination buffer */
>> - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len);
>> + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len);
>>
>> /* Move indices to next entries */
>> last_rx_idx = last_rx_idx + 1;
>> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c
>> index 251661e8d466..44573d10e24c 100644
>> --- a/lib/libvirtio/virtio-scsi.c
>> +++ b/lib/libvirtio/virtio-scsi.c
>> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev,
>> break;
>> }
>>
>> + virtio_free_desc(vq, id, dev->features);
>> + virtio_free_desc(vq, id + 1, dev->features);
>> + if (!(buf == NULL || buf_len == 0))
>> + virtio_free_desc(vq, id + 2, dev->features);
>> +
>> return 0;
>> }
>>
>> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c
>> index 78ab0e25d7db..66fbfe8b6cc6 100644
>> --- a/lib/libvirtio/virtio-serial.c
>> +++ b/lib/libvirtio/virtio-serial.c
>> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev)
>>
>> int virtio_serial_putchar(struct virtio_device *dev, char c)
>> {
>> - int id;
>> + int id, ret;
>> uint32_t vq_size, time;
>> volatile uint16_t *current_used_idx;
>> uint16_t last_used_idx, avail_idx;
>> @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c)
>> virtio_queue_notify(dev, TX_Q);
>>
>> /* Wait for host to consume the descriptor */
>> + ret = 1;
>> time = SLOF_GetTimer() + VIRTIO_TIMEOUT;
>> while (*current_used_idx == last_used_idx) {
>> // do something better
>> mb();
>> if (time < SLOF_GetTimer()) {
>> printf("virtio_serial_putchar failed! \n");
>> - return 0;
>> + ret = 0;
>> + break;
>> }
>> }
>>
>> - return 1;
>> + virtio_free_desc(vq, id, dev->features);
>> +
>> + return ret;
>> }
>>
>> char virtio_serial_getchar(struct virtio_device *dev)
>> @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev)
>> % vq_rx->size;
>>
>> /* Copy data to destination buffer */
>> - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE);
>> + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE);
>>
>> /* Move indices to next entries */
>> last_rx_idx = last_rx_idx + 1;
>> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c
>> index fff0c7e54c19..d4db5f656888 100644
>> --- a/lib/libvirtio/virtio.c
>> +++ b/lib/libvirtio/virtio.c
>> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>> next %= vq->size;
>>
>> if (features & VIRTIO_F_VERSION_1) {
>> + if (features & VIRTIO_F_IOMMU_PLATFORM) {
>> + void *gpa = (void *) addr;
>> +
>> + if (!vq->desc_gpas) {
>> + fprintf(stderr, "IOMMU setup has not been done!\n");
>> + return;
>> + }
>> +
>> + addr = SLOF_dma_map_in(gpa, len, 0);
>> + vq->desc_gpas[id] = gpa;
>> + }
>> desc->addr = cpu_to_le64(addr);
>> desc->len = cpu_to_le32(len);
>> desc->flags = cpu_to_le16(flags);
>> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>> }
>> }
>>
>> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features)
>> +{
>> + struct vring_desc *desc;
>> +
>> + id %= vq->size;
>> + desc = &vq->desc[id];
>> +
>> + if (features & VIRTIO_F_VERSION_1) {
>> + if (features & VIRTIO_F_IOMMU_PLATFORM) {
>> + SLOF_dma_map_out(le64_to_cpu(desc->addr),
>> + 0, le32_to_cpu(desc->len));
>> + vq->desc_gpas[id] = NULL;
>> + }
>> + }
>> +}
>
> On a 4.18 RHEL8 kernel I was unable to boot from virtio-blk, it complains the
> disk isn't bootable and the H_PUT_TCE traces show:
>
> qemu-system-ppc-19019 [045] .... 786844.067356: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x1000,0xe46b003,0xdc73350
> qemu-system-ppc-19019 [045] .... 786844.067358: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
> qemu-system-ppc-19019 [045] .... 786844.067361: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x2000,0xe46c003,0xdc73350
> qemu-system-ppc-19019 [045] .... 786844.067362: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
> qemu-system-ppc-19019 [041] .... 786844.070532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3000,0xe67b003,0xdc73490
> qemu-system-ppc-19019 [041] .... 786844.070533: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
> qemu-system-ppc-19019 [041] .... 786844.070817: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x4000,0xe469003,0xdc73490
> qemu-system-ppc-19019 [041] .... 786844.070818: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
> qemu-system-ppc-19019 [041] .... 786844.071102: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x5000,0xe67a003,0xdc73490
> qemu-system-ppc-19019 [041] .... 786844.071103: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
> qemu-system-ppc-19019 [041] .... 786844.071123: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS
> qemu-system-ppc-19019 [041] .... 786844.073532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3228,0x0,0xdc73488
> qemu-system-ppc-19019 [041] .... 786844.073534: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_PARAMETER
>
> If I disable in-kernel TCE acceleration, or use TCG, it seems to work
> fine. If I add the following hack it seems to fix things:
>
> @@ -305,7 +307,8 @@ void virtio_free_desc(struct vqs *vq, int id, uint64_t features)
>
> if (features & VIRTIO_F_VERSION_1) {
> if (features & VIRTIO_F_IOMMU_PLATFORM) {
> - SLOF_dma_map_out(le64_to_cpu(desc->addr),
> + fprintf(stderr, "mapping out desc->addr: %llx\n", le64_to_cpu(desc->addr));
> + SLOF_dma_map_out(le64_to_cpu(desc->addr) & ~(0xfff),
> 0, le32_to_cpu(desc->len));
> vq->desc_gpas[id] = NULL;
> }
>
> I guess the kernel is a bit stricter about unmapping unaligned TCE entries? If
> so, maybe we should align automatically in dma-map-out?
I thought I did that as well in 1/4, I'll double check. Hmmm.
>
>> +
>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id)
>> +{
>> + struct vqs *vq = &vdev->vq[queue];
>> +
>> + if (vq->desc_gpas)
>> + return vq->desc_gpas[id];
>> +
>> + return (void *) virtio_modern64_to_cpu(vdev, vq->desc[id].addr);
>> +}
>> +
>> /**
>> * Reset virtio device
>> */
>
> <snip>
>
>> diff --git a/board-qemu/slof/OF.fs b/board-qemu/slof/OF.fs
>> index a85f6c558e67..3e117ad03e09 100644
>> --- a/board-qemu/slof/OF.fs
>> +++ b/board-qemu/slof/OF.fs
>> @@ -143,6 +143,9 @@ check-for-nvramrc
>>
>> 8a0 cp
>>
>> +\ For DMA functions used by client/package instances.
>> +#include "dma-instance-function.fs"
>> +
>> \ The client interface.
>> #include "client.fs"
>> \ ELF binary file format.
>> diff --git a/slof/fs/dma-instance-function.fs b/slof/fs/dma-instance-function.fs
>> new file mode 100644
>> index 000000000000..057181be0f44
>> --- /dev/null
>> +++ b/slof/fs/dma-instance-function.fs
>> @@ -0,0 +1,28 @@
>> +\ ****************************************************************************/
>> +\ * Copyright (c) 2011 IBM Corporation
>
> I copy/pasted the license header and forgot to update, should probably change
> to 2019.
>
> Also, if you think you need it:
>
> Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>
Yes I do, thanks!
>> +\ * All rights reserved.
>> +\ * This program and the accompanying materials
>> +\ * are made available under the terms of the BSD License
>> +\ * which accompanies this distribution, and is available at
>> +\ * http://www.opensource.org/licenses/bsd-license.php
>> +\ *
>> +\ * Contributors:
>> +\ * IBM Corporation - initial implementation
>> +\ ****************************************************************************/
>> +
>> +\ DMA memory allocation functions
>> +: dma-alloc ( size -- virt )
>> + s" dma-alloc" $call-parent
>> +;
>> +
>> +: dma-free ( virt size -- )
>> + s" dma-free" $call-parent
>> +;
>> +
>> +: dma-map-in ( virt size cacheable? -- devaddr )
>> + s" dma-map-in" $call-parent
>> +;
>> +
>> +: dma-map-out ( virt devaddr size -- )
>> + s" dma-map-out" $call-parent
>> +;
>> --
>> 2.17.1
>>
>> _______________________________________________
>> SLOF mailing list
>> SLOF at lists.ozlabs.org
>> https://lists.ozlabs.org/listinfo/slof
--
Alexey
More information about the SLOF
mailing list