[PATCH 11/19] powerpc: iommu enablement for CMO
Robert Jennings
rcj at linux.vnet.ibm.com
Fri Jun 13 08:19:36 EST 2008
From: Robert Jennings <rcj at linux.vnet.ibm.com>
To support Cooperative Memory Overcommitment (CMO), we need to check
for failure and busy responses from some of the tce hcalls.
These changes for the pseries platform affect the powerpc architecture;
patches for the other affected platforms are included in this patch.
pSeries platform IOMMU code changes:
* platform TCE functions must handle H_NOT_ENOUGH_RESOURCES errors.
* platform TCE functions must retry when H_LONG_BUSY_* is returned.
* platform TCE functions must return error when H_NOT_ENOUGH_RESOURCES
encountered.
Architecture IOMMU code changes:
* Calls to ppc_md.tce_build need to check return values and return
DMA_MAPPING_ERROR
Architecture changes:
* struct machdep_calls for tce_build*_pSeriesLP functions need to change
to indicate failure
* all other platforms will need updates to iommu functions to match the new
calling semantics; they will return 0 on success. The other platforms
default configs have been built, but no further testing was performed.
Signed-off-by: Robert Jennings <rcj at linux.vnet.ibm.com>
---
arch/powerpc/kernel/iommu.c | 71 +++++++++++++++++++++++++++++--
arch/powerpc/platforms/cell/iommu.c | 3 +
arch/powerpc/platforms/iseries/iommu.c | 3 +
arch/powerpc/platforms/pasemi/iommu.c | 3 +
arch/powerpc/platforms/pseries/iommu.c | 76 ++++++++++++++++++++++++++++-----
arch/powerpc/sysdev/dart_iommu.c | 3 +
include/asm-powerpc/machdep.h | 2
7 files changed, 139 insertions(+), 22 deletions(-)
Index: b/arch/powerpc/kernel/iommu.c
===================================================================
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -183,6 +183,49 @@ static unsigned long iommu_range_alloc(s
return n;
}
+/** iommu_undo - Clear iommu_table bits without calling platform tce_free.
+ *
+ * @tbl - struct iommu_table to alter
+ * @dma_addr - DMA address to free entries for
+ * @npages - number of pages to free entries for
+ *
+ * This is the same as __iommu_free without the call to ppc_md.tce_free();
+ *
+ * To clean up after ppc_md.tce_build() errors we need to clear bits
+ * in the table without calling the ppc_md.tce_free() method; calling
+ * ppc_md.tce_free() could alter entries that were not touched due to a
+ * premature failure in ppc_md.tce_build().
+ *
+ * The ppc_md.tce_build() needs to perform its own clean up prior to
+ * returning its error.
+ */
+static void iommu_undo(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long entry, free_entry;
+
+ entry = dma_addr >> IOMMU_PAGE_SHIFT;
+ free_entry = entry - tbl->it_offset;
+
+ if (((free_entry + npages) > tbl->it_size) ||
+ (entry < tbl->it_offset)) {
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "iommu_undo: invalid entry\n");
+ printk(KERN_INFO "\tentry = 0x%lx\n", entry);
+ printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr);
+ printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl);
+ printk(KERN_INFO "\tbus# = 0x%lx\n", tbl->it_busno);
+ printk(KERN_INFO "\tsize = 0x%lx\n", tbl->it_size);
+ printk(KERN_INFO "\tstartOff = 0x%lx\n", tbl->it_offset);
+ printk(KERN_INFO "\tindex = 0x%lx\n", tbl->it_index);
+ WARN_ON(1);
+ }
+ return;
+ }
+
+ iommu_area_free(tbl->it_map, free_entry, npages);
+}
+
static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
void *page, unsigned int npages,
enum dma_data_direction direction,
@@ -190,6 +233,7 @@ static dma_addr_t iommu_alloc(struct dev
{
unsigned long entry, flags;
dma_addr_t ret = DMA_ERROR_CODE;
+ int rc;
spin_lock_irqsave(&(tbl->it_lock), flags);
@@ -204,9 +248,20 @@ static dma_addr_t iommu_alloc(struct dev
ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
/* Put the TCEs in the HW table */
- ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK,
- direction);
+ rc = ppc_md.tce_build(tbl, entry, npages,
+ (unsigned long)page & IOMMU_PAGE_MASK, direction);
+ /* ppc_md.tce_build() only returns non-zero for transient errors.
+ * Clean up the table bitmap in this case and return
+ * DMA_ERROR_CODE. For all other errors the functionality is
+ * not altered.
+ */
+ if (unlikely(rc)) {
+ iommu_undo(tbl, ret, npages);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return DMA_ERROR_CODE;
+ }
/* Flush/invalidate TLB caches if necessary */
if (ppc_md.tce_flush)
@@ -275,7 +330,7 @@ int iommu_map_sg(struct device *dev, str
dma_addr_t dma_next = 0, dma_addr;
unsigned long flags;
struct scatterlist *s, *outs, *segstart;
- int outcount, incount, i;
+ int outcount, incount, i, rc = 0;
unsigned int align;
unsigned long handle;
unsigned int max_seg_size;
@@ -336,7 +391,10 @@ int iommu_map_sg(struct device *dev, str
npages, entry, dma_addr);
/* Insert into HW table */
- ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, direction);
+ rc = ppc_md.tce_build(tbl, entry, npages,
+ vaddr & IOMMU_PAGE_MASK, direction);
+ if(unlikely(rc))
+ goto failure;
/* If we are in an open segment, try merging */
if (segstart != s) {
@@ -399,7 +457,10 @@ int iommu_map_sg(struct device *dev, str
vaddr = s->dma_address & IOMMU_PAGE_MASK;
npages = iommu_num_pages(s->dma_address, s->dma_length);
- __iommu_free(tbl, vaddr, npages);
+ if (!rc)
+ __iommu_free(tbl, vaddr, npages);
+ else
+ iommu_undo(tbl, vaddr, npages);
s->dma_address = DMA_ERROR_CODE;
s->dma_length = 0;
}
Index: b/arch/powerpc/platforms/cell/iommu.c
===================================================================
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct
}
}
-static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, enum dma_data_direction direction)
{
int i;
@@ -210,6 +210,7 @@ static void tce_build_cell(struct iommu_
pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
index, npages, direction, base_pte);
+ return 0;
}
static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
Index: b/arch/powerpc/platforms/iseries/iommu.c
===================================================================
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -41,7 +41,7 @@
#include <asm/iseries/hv_call_event.h>
#include <asm/iseries/iommu.h>
-static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
+static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, enum dma_data_direction direction)
{
u64 rc;
@@ -70,6 +70,7 @@ static void tce_build_iSeries(struct iom
index++;
uaddr += TCE_PAGE_SIZE;
}
+ return 0;
}
static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
Index: b/arch/powerpc/platforms/pasemi/iommu.c
===================================================================
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -83,7 +83,7 @@ static u32 *iob_l2_base;
static struct iommu_table iommu_table_iobmap;
static int iommu_table_iobmap_inited;
-static void iobmap_build(struct iommu_table *tbl, long index,
+static int iobmap_build(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction)
{
@@ -107,6 +107,7 @@ static void iobmap_build(struct iommu_ta
uaddr += IOBMAP_PAGE_SIZE;
bus_addr += IOBMAP_PAGE_SIZE;
}
+ return 0;
}
Index: b/arch/powerpc/platforms/pseries/iommu.c
===================================================================
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -25,6 +25,7 @@
*/
#include <linux/init.h>
+#include <linux/delay.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
@@ -48,7 +49,7 @@
#include "plpar_wrappers.h"
-static void tce_build_pSeries(struct iommu_table *tbl, long index,
+static int tce_build_pSeries(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction)
{
@@ -71,6 +72,7 @@ static void tce_build_pSeries(struct iom
uaddr += TCE_PAGE_SIZE;
tcep++;
}
+ return 0;
}
@@ -93,13 +95,18 @@ static unsigned long tce_get_pseries(str
return *tcep;
}
-static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static void tce_free_pSeriesLP(struct iommu_table*, long, long);
+static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
+
+static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
enum dma_data_direction direction)
{
- u64 rc;
+ u64 rc = 0;
u64 proto_tce, tce;
u64 rpn;
+ int sleep_msecs, ret = 0;
+ long tcenum_start = tcenum, npages_start = npages;
rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
proto_tce = TCE_PCI_READ;
@@ -108,7 +115,21 @@ static void tce_build_pSeriesLP(struct i
while (npages--) {
tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
- rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
+ do {
+ rc = plpar_tce_put((u64)tbl->it_index,
+ (u64)tcenum << 12, tce);
+ if (unlikely(H_IS_LONG_BUSY(rc))) {
+ sleep_msecs = plpar_get_longbusy_msecs(rc);
+ mdelay(sleep_msecs);
+ }
+ } while (unlikely(H_IS_LONG_BUSY(rc)));
+
+ if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+ ret = (int)rc;
+ tce_free_pSeriesLP(tbl, tcenum_start,
+ (npages_start - (npages + 1)));
+ break;
+ }
if (rc && printk_ratelimit()) {
printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
@@ -121,19 +142,22 @@ static void tce_build_pSeriesLP(struct i
tcenum++;
rpn++;
}
+ return ret;
}
static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
-static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
enum dma_data_direction direction)
{
- u64 rc;
+ u64 rc = 0;
u64 proto_tce;
u64 *tcep;
u64 rpn;
long l, limit;
+ long tcenum_start = tcenum, npages_start = npages;
+ int sleep_msecs, ret = 0;
if (npages == 1)
return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
@@ -171,15 +195,26 @@ static void tce_buildmulti_pSeriesLP(str
rpn++;
}
- rc = plpar_tce_put_indirect((u64)tbl->it_index,
- (u64)tcenum << 12,
- (u64)virt_to_abs(tcep),
- limit);
+ do {
+ rc = plpar_tce_put_indirect(tbl->it_index, tcenum << 12,
+ virt_to_abs(tcep), limit);
+ if (unlikely(H_IS_LONG_BUSY(rc))) {
+ sleep_msecs = plpar_get_longbusy_msecs(rc);
+ mdelay(sleep_msecs);
+ }
+ } while (unlikely(H_IS_LONG_BUSY(rc)));
npages -= limit;
tcenum += limit;
} while (npages > 0 && !rc);
+ if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+ ret = (int)rc;
+ tce_freemulti_pSeriesLP(tbl, tcenum_start,
+ (npages_start - (npages + limit)));
+ return ret;
+ }
+
if (rc && printk_ratelimit()) {
printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
@@ -187,14 +222,23 @@ static void tce_buildmulti_pSeriesLP(str
printk("\ttce[0] val = 0x%lx\n", tcep[0]);
show_stack(current, (unsigned long *)__get_SP());
}
+ return ret;
}
static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
{
+ int sleep_msecs;
u64 rc;
while (npages--) {
- rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
+ do {
+ rc = plpar_tce_put((u64)tbl->it_index,
+ (u64)tcenum << 12, 0);
+ if (unlikely(H_IS_LONG_BUSY(rc))) {
+ sleep_msecs = plpar_get_longbusy_msecs(rc);
+ mdelay(sleep_msecs);
+ }
+ } while (unlikely(H_IS_LONG_BUSY(rc)));
if (rc && printk_ratelimit()) {
printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
@@ -210,9 +254,17 @@ static void tce_free_pSeriesLP(struct io
static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
{
+ int sleep_msecs;
u64 rc;
- rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
+ do {
+ rc = plpar_tce_stuff((u64)tbl->it_index,
+ (u64)tcenum << 12, 0, npages);
+ if (unlikely(H_IS_LONG_BUSY(rc))) {
+ sleep_msecs = plpar_get_longbusy_msecs(rc);
+ mdelay(sleep_msecs);
+ }
+ } while (unlikely(H_IS_LONG_BUSY(rc)));
if (rc && printk_ratelimit()) {
printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
Index: b/arch/powerpc/sysdev/dart_iommu.c
===================================================================
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -147,7 +147,7 @@ static void dart_flush(struct iommu_tabl
}
}
-static void dart_build(struct iommu_table *tbl, long index,
+static int dart_build(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction)
{
@@ -183,6 +183,7 @@ static void dart_build(struct iommu_tabl
} else {
dart_dirty = 1;
}
+ return 0;
}
Index: b/include/asm-powerpc/machdep.h
===================================================================
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -76,7 +76,7 @@ struct machdep_calls {
* destroyed as well */
void (*hpte_clear_all)(void);
- void (*tce_build)(struct iommu_table * tbl,
+ int (*tce_build)(struct iommu_table * tbl,
long index,
long npages,
unsigned long uaddr,
More information about the Linuxppc-dev
mailing list