[PATCH v4 11/16] powerpc/powernv: Release replaced TCE
Alexey Kardashevskiy
aik at ozlabs.ru
Wed Jul 30 19:31:30 EST 2014
At the moment writing new TCE value to the IOMMU table fails with EBUSY
if there is a valid entry already. However PAPR specification allows
the guest to write new TCE value without clearing it first.
This adds a set_and_get() callback to iommu_table_ops which does the same
thing as set() plus it returns replaced TCE(s) so the caller can release
the pages afterwards.
This makes iommu_tce_build() put pages returned by set_and_get().
Since now we depend on permission bits in TCE entries, this preserves
those bits in TCE in iommu_put_tce_user_mode().
This removes use of pool locks as those locks serve for TCE allocations
rathen than IOMMU table access and new set_and_get() callback provides
lockless way of safe pages release.
This disables external IOMMU use (i.e. VFIO) for IOMMUs which do not
implement set_and_get() callback. Therefore the "powernv" platform is
the only supported one.
Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
---
Changes:
v4:
* this is merge+rework of
powerpc/powernv: Return non-zero TCE from pnv_tce_build
powerpc/iommu: Implement put_page() if TCE had non-zero value
powerpc/iommu: Extend ppc_md.tce_build(_rm) to return old TCE values
---
arch/powerpc/include/asm/iommu.h | 6 ++++++
arch/powerpc/kernel/iommu.c | 28 +++++++++++++++-------------
arch/powerpc/platforms/powernv/pci.c | 29 +++++++++++++++++++++++------
3 files changed, 44 insertions(+), 19 deletions(-)
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index c725e4a..4b13e4e 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -49,6 +49,12 @@ struct iommu_table_ops {
unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs);
+ int (*set_and_get)(struct iommu_table *tbl,
+ long index, long npages,
+ unsigned long uaddr,
+ unsigned long *old_tces,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs);
void (*clear)(struct iommu_table *tbl,
long index, long npages);
unsigned long (*get)(struct iommu_table *tbl, long index);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 6a86788..ad52e00 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1007,9 +1007,6 @@ EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
{
unsigned long oldtce;
- struct iommu_pool *pool = get_pool(tbl, entry);
-
- spin_lock(&(pool->lock));
oldtce = tbl->it_ops->get(tbl, entry);
if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
@@ -1017,8 +1014,6 @@ unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
else
oldtce = 0;
- spin_unlock(&(pool->lock));
-
return oldtce;
}
EXPORT_SYMBOL_GPL(iommu_clear_tce);
@@ -1056,16 +1051,12 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
{
int ret = -EBUSY;
unsigned long oldtce;
- struct iommu_pool *pool = get_pool(tbl, entry);
- spin_lock(&(pool->lock));
+ ret = tbl->it_ops->set_and_get(tbl, entry, 1, hwaddr, &oldtce,
+ direction, NULL);
- oldtce = tbl->it_ops->get(tbl, entry);
- /* Add new entry if it is not busy */
- if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
- ret = tbl->it_ops->set(tbl, entry, 1, hwaddr, direction, NULL);
-
- spin_unlock(&(pool->lock));
+ if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
+ put_page(pfn_to_page(__pa(oldtce) >> PAGE_SHIFT));
/* if (unlikely(ret))
pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
@@ -1092,6 +1083,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
return -EFAULT;
}
hwaddr = (unsigned long) page_address(page) + offset;
+ hwaddr |= tce & (TCE_PCI_READ | TCE_PCI_WRITE);
ret = iommu_tce_build(tbl, entry, hwaddr, direction);
if (ret)
@@ -1110,6 +1102,16 @@ int iommu_take_ownership(struct iommu_table *tbl)
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
int ret = 0, bit0 = 0;
+ /*
+ * VFIO does not control TCE entries allocation and the guest
+ * can write new TCEs on top of existing ones so iommu_tce_build()
+ * must be able to release old pages. This functionality
+ * requires set_and_get() callback defined so if it is not
+ * implemented, we disallow taking ownership over the table.
+ */
+ if (!tbl->it_ops->set_and_get)
+ return -EINVAL;
+
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
spin_lock(&tbl->pools[i].lock);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 1179c63..629d443 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -572,12 +572,14 @@ static void pnv_tce_invalidate(struct iommu_table *tbl, __be64 *startp,
}
static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
- unsigned long uaddr, enum dma_data_direction direction,
+ unsigned long uaddr, unsigned long *old_tces,
+ enum dma_data_direction direction,
struct dma_attrs *attrs, bool rm)
{
u64 proto_tce;
__be64 *tcep, *tces;
u64 rpn;
+ long i;
proto_tce = TCE_PCI_READ; // Read allowed
@@ -587,9 +589,13 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
rpn = __pa(uaddr) >> tbl->it_page_shift;
- while (npages--)
- *(tcep++) = cpu_to_be64(proto_tce |
- (rpn++ << tbl->it_page_shift));
+ for (i = 0; i < npages; i++) {
+ unsigned long oldtce = xchg(tcep, cpu_to_be64(proto_tce |
+ (rpn++ << tbl->it_page_shift)));
+ if (old_tces)
+ old_tces[i] = (unsigned long) __va(oldtce);
+ tcep++;
+ }
pnv_tce_invalidate(tbl, tces, tcep - 1, rm);
@@ -601,8 +607,18 @@ static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
- return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs,
- false);
+ return pnv_tce_build(tbl, index, npages, uaddr, NULL, direction,
+ attrs, false);
+}
+
+static int pnv_tce_set_and_get_vm(struct iommu_table *tbl, long index,
+ long npages,
+ unsigned long uaddr, unsigned long *old_tces,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ return pnv_tce_build(tbl, index, npages, uaddr, old_tces, direction,
+ attrs, false);
}
static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
@@ -630,6 +646,7 @@ static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
struct iommu_table_ops pnv_iommu_ops = {
.set = pnv_tce_build_vm,
+ .set_and_get = pnv_tce_set_and_get_vm,
.clear = pnv_tce_free_vm,
.get = pnv_tce_get,
};
--
2.0.0
More information about the Linuxppc-dev
mailing list