[RFC]: map 4K iommu pages even on 64K largepage systems.
Benjamin Herrenschmidt
benh at kernel.crashing.org
Tue Oct 24 14:43:27 EST 2006
> Haven't yet thought if this is a good long-term solution or not,
> whether this kind of thing is desirable or not. That's why its
> an RFC. Comments?
Ok, after some time reading the details of the patch, it looks fairly
good, just a few nits. Also, we need to fix the DART code still :)
Ben
> +static inline unsigned long
> +iommu_num_pages(unsigned long vaddr, unsigned long slen)
Coding style in that file is to do:
static inline unsigned long iommu_num_pages(unsigned long vaddr,
unsigned long slen)
> @@ -557,8 +562,8 @@ void iommu_unmap_single(struct iommu_tab
> BUG_ON(direction == DMA_NONE);
>
> if (tbl)
> - iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
> - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
> + iommu_free(tbl, dma_handle, (IOMMU_PAGE_ALIGN(dma_handle + size) -
> + (dma_handle & IOMMU_PAGE_MASK)) >> IOMMU_PAGE_SHIFT);
> }
Use iommu_num_pages ?
> /* Allocates a contiguous real buffer and creates mappings over it.
> @@ -571,6 +576,7 @@ void *iommu_alloc_coherent(struct iommu_
> void *ret = NULL;
> dma_addr_t mapping;
> unsigned int npages, order;
> + unsigned int nio_pages, io_order;
> struct page *page;
>
> size = PAGE_ALIGN(size);
> @@ -598,8 +604,10 @@ void *iommu_alloc_coherent(struct iommu_
> memset(ret, 0, size);
>
> /* Set up tces to cover the allocated range */
> - mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL,
> - mask >> PAGE_SHIFT, order);
> + nio_pages = size >> IOMMU_PAGE_SHIFT;
> + io_order = get_iommu_order(size);
Not directly related to your patch, but do we really have this
requirement of allocating with an alignement of the order of the
alloation size here ?
Also, your code assumes that PAGE_SHIFT >= IOMMU_PAGE_SHIFT... it might
be useful to not have this restriction in fact. Just maybe use
iommu_num_pages(size) instead of size >> IOMMU_PAGE_SHIFT here ...
> + mapping = iommu_alloc(tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
> + mask >> IOMMU_PAGE_SHIFT, io_order);
> if (mapping == DMA_ERROR_CODE) {
> free_pages((unsigned long)ret, order);
> return NULL;
> @@ -614,9 +622,10 @@ void iommu_free_coherent(struct iommu_ta
> unsigned int npages;
>
> if (tbl) {
> - size = PAGE_ALIGN(size);
> - npages = size >> PAGE_SHIFT;
> + size = IOMMU_PAGE_ALIGN(size);
> + npages = size >> IOMMU_PAGE_SHIFT;
>
> iommu_free(tbl, dma_handle, npages);
> + size = PAGE_ALIGN(size);
> free_pages((unsigned long)vaddr, get_order(size));
> }
All those repeated alignments on size and one loses track... Are you
sure you'll always end up with the proper values ? I'd rather use
iommu_num_pages() for the iommu_free and only align size once with
PAGE_ALIGN for free_pages().
> }
> Index: linux-2.6.19-rc1-git11/include/asm-powerpc/iommu.h
> ===================================================================
> --- linux-2.6.19-rc1-git11.orig/include/asm-powerpc/iommu.h 2006-09-19 22:42:06.000000000 -0500
> +++ linux-2.6.19-rc1-git11/include/asm-powerpc/iommu.h 2006-10-23 18:28:21.000000000 -0500
> @@ -23,16 +23,41 @@
> #ifdef __KERNEL__
>
> #include <asm/types.h>
> +#include <linux/compiler.h>
> #include <linux/spinlock.h>
> #include <linux/device.h>
> #include <linux/dma-mapping.h>
>
> +#define IOMMU_PAGE_SHIFT 12
> +#define IOMMU_PAGE_SIZE (ASM_CONST(1) << IOMMU_PAGE_SHIFT)
> +#define IOMMU_PAGE_MASK (~((1 << IOMMU_PAGE_SHIFT) - 1))
> +#define IOMMU_PAGE_ALIGN(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE)
> +
> +#ifndef __ASSEMBLY__
> +
> +/* Pure 2^n version of get_order */
> +static __inline__ __attribute_const__ int get_iommu_order(unsigned long size)
> +{
> + int order;
> +
> + size = (size - 1) >> (IOMMU_PAGE_SHIFT - 1);
> + order = -1;
> + do {
> + size >>= 1;
> + order++;
> + } while (size);
> + return order;
> +}
> +
> +#endif /* __ASSEMBLY__ */
Use cntlzw... or ilog2, which would give something like :
return __ilog2((size - 1) >> IOMMU_PAGE_SHIFT) + 1;
I think Dave Howells has patches fixing get_order generically to be
implemented in terms of ilog2. (I just discovered that on ppc64, we
didn't use cntlzw/ilog2 for it, sucks, but let's wait for Dave patches
rather than fixing it ourselves now).
> +
> /*
> * IOMAP_MAX_ORDER defines the largest contiguous block
> * of dma space we can get. IOMAP_MAX_ORDER = 13
> * allows up to 2**12 pages (4096 * 4096) = 16 MB
> */
> -#define IOMAP_MAX_ORDER 13
> +#define IOMAP_MAX_ORDER (25 - PAGE_SHIFT)
I'm not completely sure we want the above to be defined as a function of
the page shift...
> struct iommu_table {
> unsigned long it_busno; /* Bus number this table belongs to */
> Index: linux-2.6.19-rc1-git11/include/asm-powerpc/tce.h
> ===================================================================
> --- linux-2.6.19-rc1-git11.orig/include/asm-powerpc/tce.h 2006-09-19 22:42:06.000000000 -0500
> +++ linux-2.6.19-rc1-git11/include/asm-powerpc/tce.h 2006-10-23 15:46:57.000000000 -0500
> @@ -22,6 +22,8 @@
> #define _ASM_POWERPC_TCE_H
> #ifdef __KERNEL__
>
> +#include <asm/iommu.h>
> +
> /*
> * Tces come in two formats, one for the virtual bus and a different
> * format for PCI
> @@ -33,7 +35,7 @@
>
> #define TCE_SHIFT 12
> #define TCE_PAGE_SIZE (1 << TCE_SHIFT)
> -#define TCE_PAGE_FACTOR (PAGE_SHIFT - TCE_SHIFT)
> +#define TCE_PAGE_FACTOR (IOMMU_PAGE_SHIFT - TCE_SHIFT)
>
> #define TCE_ENTRY_SIZE 8 /* each TCE is 64 bits */
We can actually remove TCE_PAGE_FACTOR and code using it completely.
With your patch, TCE_SHIFT == IOMMU_PAGE_SHIFT, and in a second step, we
can start having the iommu_page_shift be a member of iommu_table. Thus
there will no longer be any need for a correction factor in the backend.
> Index: linux-2.6.19-rc1-git11/arch/powerpc/kernel/vio.c
> ===================================================================
> --- linux-2.6.19-rc1-git11.orig/arch/powerpc/kernel/vio.c 2006-10-13 11:52:51.000000000 -0500
> +++ linux-2.6.19-rc1-git11/arch/powerpc/kernel/vio.c 2006-10-23 16:06:43.000000000 -0500
> @@ -92,9 +92,9 @@ static struct iommu_table *vio_build_iom
> &tbl->it_index, &offset, &size);
>
> /* TCE table size - measured in tce entries */
> - tbl->it_size = size >> PAGE_SHIFT;
> + tbl->it_size = size >> IOMMU_PAGE_SHIFT;
> /* offset for VIO should always be 0 */
> - tbl->it_offset = offset >> PAGE_SHIFT;
> + tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
> tbl->it_busno = 0;
> tbl->it_type = TCE_VB;
>
> Index: linux-2.6.19-rc1-git11/arch/powerpc/platforms/pseries/iommu.c
> ===================================================================
> --- linux-2.6.19-rc1-git11.orig/arch/powerpc/platforms/pseries/iommu.c 2006-10-13 11:54:00.000000000 -0500
> +++ linux-2.6.19-rc1-git11/arch/powerpc/platforms/pseries/iommu.c 2006-10-23 18:42:03.000000000 -0500
> @@ -289,7 +289,7 @@ static void iommu_table_setparms(struct
> tbl->it_busno = phb->bus->number;
>
> /* Units of tce entries */
> - tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT;
> + tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT;
>
> /* Test if we are going over 2GB of DMA space */
> if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
> @@ -300,7 +300,7 @@ static void iommu_table_setparms(struct
> phb->dma_window_base_cur += phb->dma_window_size;
>
> /* Set the tce table size - measured in entries */
> - tbl->it_size = phb->dma_window_size >> PAGE_SHIFT;
> + tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT;
>
> tbl->it_index = 0;
> tbl->it_blocksize = 16;
> @@ -325,8 +325,8 @@ static void iommu_table_setparms_lpar(st
> tbl->it_base = 0;
> tbl->it_blocksize = 16;
> tbl->it_type = TCE_PCI;
> - tbl->it_offset = offset >> PAGE_SHIFT;
> - tbl->it_size = size >> PAGE_SHIFT;
> + tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
> + tbl->it_size = size >> IOMMU_PAGE_SHIFT;
> }
>
> static void iommu_bus_setup_pSeries(struct pci_bus *bus)
> @@ -522,8 +522,6 @@ static void iommu_dev_setup_pSeriesLP(st
> const void *dma_window = NULL;
> struct pci_dn *pci;
>
> - DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev));
> -
> /* dev setup for LPAR is a little tricky, since the device tree might
> * contain the dma-window properties per-device and not neccesarily
> * for the bus. So we need to search upwards in the tree until we
> @@ -532,6 +530,9 @@ static void iommu_dev_setup_pSeriesLP(st
> */
> dn = pci_device_to_OF_node(dev);
>
> + DBG("iommu_dev_setup_pSeriesLP, dev %p (%s) %s\n",
> + dev, pci_name(dev), dn->full_name);
> +
> for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
> pdn = pdn->parent) {
> dma_window = get_property(pdn, "ibm,dma-window", NULL);
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev at ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
More information about the Linuxppc-dev
mailing list