[PATCH] (RFC) slaughter imalloc

Benjamin Herrenschmidt benh at kernel.crashing.org
Tue May 17 17:31:33 EST 2005


Hi John !

Can you give me your opinion on this patch ? It will probably not apply
"as-is" as I did it on top of another pile of not-yet merged patches
that affect the same files, but I'm more interested in what you think of
it at the moment than actual testing.

The idea is to get rid of imalloc. I did 2 things here:

 - Normal ioremap's go to the vmalloc space like most other archs.
Immediate benefit: things like modules that ioremap their chip registers
when loaded will usually end up with the module code, data _and_ the
virtual region of the registers in the same segment, thus less SLB
misses.

 - Explicit ioremap just loses all references to the imalloc stuff. It
wasn't necessary as far as I can tell. ioremap_explicit() will just
establish PTEs directly, and iounmap_explicit() will remove PTEs & flush
hash for the concerned area. Those PTEs are currently put above the
vmalloc region, at basically the same place where the imalloc area used
to be after David's patch to kill ioremap_mm. Note that there is lots of
room left, more than we need, we could extend the vmalloc part of it
eventually but I doubt we'll ever run out of space anyway.

Of course, the later is slightly less robust if you call it with crappy
arguments, since previously, it would do some tracking, and not any
more, but does that every happen in practice (I do guard a bit anyway,
by making sure you don't hit outside of the PHB IO range, since that's
the only legitimate use of the _explicit() calls).

Finally, I moved all ioremap-related code to a new file called ioremap.c
instead of init.c for clarity.

Index: linux-work/include/asm-ppc64/pgtable.h
===================================================================
--- linux-work.orig/include/asm-ppc64/pgtable.h	2005-05-16 15:42:58.000000000 +1000
+++ linux-work/include/asm-ppc64/pgtable.h	2005-05-17 17:02:02.000000000 +1000
@@ -65,10 +65,15 @@
 /*
  * Define the address range of the vmalloc VM area.
  */
-#define VMALLOC_START (0xD000000000000000ul)
-#define VMALLOC_SIZE  (0x80000000000UL)
-#define VMALLOC_END   (VMALLOC_START + VMALLOC_SIZE)
-
+#ifndef __ASSEMBLY__
+extern unsigned long ioremap_bot;
+#define VMALLOC_BASE	(0xD000000000000000ul)
+#define VMALLOC_START	(ioremap_bot)
+#define VMALLOC_SIZE 	(0x80000000000UL - ioremap_bot)
+#define VMALLOC_END 	(VMALLOC_START + VMALLOC_SIZE)
+#define PHBS_IO_BASE  	(VMALLOC_END)
+#define PHBS_IO_END	(PHBS_IO_BASE + 80000000ul)
+#endif /* __ASSEMBLY__ */
 /*
  * Bits in a linux-style PTE.  These match the bits in the
  * (hardware-defined) PowerPC PTE as closely as possible.
Index: linux-work/arch/ppc64/mm/imalloc.c
===================================================================
--- linux-work.orig/arch/ppc64/mm/imalloc.c	2005-05-16 15:42:57.000000000 +1000
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,317 +0,0 @@
-/*
- * c 2001 PPC 64 Team, IBM Corp
- * 
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/semaphore.h>
-#include <asm/imalloc.h>
-#include <asm/cacheflush.h>
-
-static DECLARE_MUTEX(imlist_sem);
-struct vm_struct * imlist = NULL;
-
-static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
-{
-	unsigned long addr;
-	struct vm_struct **p, *tmp;
-
-	addr = ioremap_bot;
-	for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
-		if (size + addr < (unsigned long) tmp->addr)
-			break;
-		if ((unsigned long)tmp->addr >= ioremap_bot)
-			addr = tmp->size + (unsigned long) tmp->addr;
-		if (addr >= IMALLOC_END-size)
-			return 1;
-	}
-	*im_addr = addr;
-
-	return 0;
-}
-
-/* Return whether the region described by v_addr and size is a subset
- * of the region described by parent
- */
-static inline int im_region_is_subset(unsigned long v_addr, unsigned long size,
-			struct vm_struct *parent)
-{
-	return (int) (v_addr >= (unsigned long) parent->addr &&
-	              v_addr < (unsigned long) parent->addr + parent->size &&
-	    	      size < parent->size);
-}
-
-/* Return whether the region described by v_addr and size is a superset
- * of the region described by child
- */
-static int im_region_is_superset(unsigned long v_addr, unsigned long size,
-		struct vm_struct *child)
-{
-	struct vm_struct parent;
-
-	parent.addr = (void *) v_addr;
-	parent.size = size;
-
-	return im_region_is_subset((unsigned long) child->addr, child->size,
-			&parent);
-}
-
-/* Return whether the region described by v_addr and size overlaps
- * the region described by vm.  Overlapping regions meet the
- * following conditions:
- * 1) The regions share some part of the address space
- * 2) The regions aren't identical
- * 3) Neither region is a subset of the other
- */
-static int im_region_overlaps(unsigned long v_addr, unsigned long size,
-		     struct vm_struct *vm)
-{
-	if (im_region_is_superset(v_addr, size, vm))
-		return 0;
-
-	return (v_addr + size > (unsigned long) vm->addr + vm->size &&
-		v_addr < (unsigned long) vm->addr + vm->size) ||
-	       (v_addr < (unsigned long) vm->addr &&
-		v_addr + size > (unsigned long) vm->addr);
-}
-
-/* Determine imalloc status of region described by v_addr and size.
- * Can return one of the following:
- * IM_REGION_UNUSED   -  Entire region is unallocated in imalloc space.
- * IM_REGION_SUBSET -    Region is a subset of a region that is already
- * 			 allocated in imalloc space.
- * 		         vm will be assigned to a ptr to the parent region.
- * IM_REGION_EXISTS -    Exact region already allocated in imalloc space.
- *                       vm will be assigned to a ptr to the existing imlist
- *                       member.
- * IM_REGION_OVERLAPS -  Region overlaps an allocated region in imalloc space.
- * IM_REGION_SUPERSET -  Region is a superset of a region that is already
- *                       allocated in imalloc space.
- */
-static int im_region_status(unsigned long v_addr, unsigned long size,
-		    struct vm_struct **vm)
-{
-	struct vm_struct *tmp;
-
-	for (tmp = imlist; tmp; tmp = tmp->next)
-		if (v_addr < (unsigned long) tmp->addr + tmp->size)
-			break;
-
-	if (tmp) {
-		if (im_region_overlaps(v_addr, size, tmp))
-			return IM_REGION_OVERLAP;
-
-		*vm = tmp;
-		if (im_region_is_subset(v_addr, size, tmp)) {
-			/* Return with tmp pointing to superset */
-			return IM_REGION_SUBSET;
-		}
-		if (im_region_is_superset(v_addr, size, tmp)) {
-			/* Return with tmp pointing to first subset */
-			return IM_REGION_SUPERSET;
-		}
-		else if (v_addr == (unsigned long) tmp->addr &&
-		 	 size == tmp->size) {
-			/* Return with tmp pointing to exact region */
-			return IM_REGION_EXISTS;
-		}
-	}
-
-	*vm = NULL;
-	return IM_REGION_UNUSED;
-}
-
-static struct vm_struct * split_im_region(unsigned long v_addr, 
-		unsigned long size, struct vm_struct *parent)
-{
-	struct vm_struct *vm1 = NULL;
-	struct vm_struct *vm2 = NULL;
-	struct vm_struct *new_vm = NULL;
-	
-	vm1 = (struct vm_struct *) kmalloc(sizeof(*vm1), GFP_KERNEL);
-	if (vm1	== NULL) {
-		printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
-		return NULL;
-	}
-
-	if (v_addr == (unsigned long) parent->addr) {
-	        /* Use existing parent vm_struct to represent child, allocate
-		 * new one for the remainder of parent range
-		 */
-		vm1->size = parent->size - size;
-		vm1->addr = (void *) (v_addr + size);
-		vm1->next = parent->next;
-
-		parent->size = size;
-		parent->next = vm1;
-		new_vm = parent;
-	} else if (v_addr + size == (unsigned long) parent->addr + 
-			parent->size) {
-		/* Allocate new vm_struct to represent child, use existing
-		 * parent one for remainder of parent range
-		 */
-		vm1->size = size;
-		vm1->addr = (void *) v_addr;
-		vm1->next = parent->next;
-		new_vm = vm1;
-
-		parent->size -= size;
-		parent->next = vm1;
-	} else {
-	        /* Allocate two new vm_structs for the new child and 
-		 * uppermost remainder, and use existing parent one for the
-		 * lower remainder of parent range
-		 */
-		vm2 = (struct vm_struct *) kmalloc(sizeof(*vm2), GFP_KERNEL);
-		if (vm2 == NULL) {
-			printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
-			kfree(vm1);
-			return NULL;
-		}
-
-		vm1->size = size;
-		vm1->addr = (void *) v_addr;
-		vm1->next = vm2;
-		new_vm = vm1;
-
-		vm2->size = ((unsigned long) parent->addr + parent->size) - 
-				(v_addr + size);
-		vm2->addr = (void *) v_addr + size;
-		vm2->next = parent->next;
-
-		parent->size = v_addr - (unsigned long) parent->addr;
-		parent->next = vm1;
-	}
-
-	return new_vm;
-}
-
-static struct vm_struct * __add_new_im_area(unsigned long req_addr, 
-					    unsigned long size)
-{
-	struct vm_struct **p, *tmp, *area;
-		
-	for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
-		if (req_addr + size <= (unsigned long)tmp->addr)
-			break;
-	}
-	
-	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
-	if (!area)
-		return NULL;
-	area->flags = 0;
-	area->addr = (void *)req_addr;
-	area->size = size;
-	area->next = *p;
-	*p = area;
-
-	return area;
-}
-
-static struct vm_struct * __im_get_area(unsigned long req_addr, 
-					unsigned long size,
-					int criteria)
-{
-	struct vm_struct *tmp;
-	int status;
-
-	status = im_region_status(req_addr, size, &tmp);
-	if ((criteria & status) == 0) {
-		return NULL;
-	}
-	
-	switch (status) {
-	case IM_REGION_UNUSED:
-		tmp = __add_new_im_area(req_addr, size);
-		break;
-	case IM_REGION_SUBSET:
-		tmp = split_im_region(req_addr, size, tmp);
-		break;
-	case IM_REGION_EXISTS:
-		/* Return requested region */
-		break;
-	case IM_REGION_SUPERSET:
-		/* Return first existing subset of requested region */
-		break;
-	default:
-		printk(KERN_ERR "%s() unexpected imalloc region status\n",
-				__FUNCTION__);
-		tmp = NULL;
-	}
-
-	return tmp;
-}
-
-struct vm_struct * im_get_free_area(unsigned long size)
-{
-	struct vm_struct *area;
-	unsigned long addr;
-	
-	down(&imlist_sem);
-	if (get_free_im_addr(size, &addr)) {
-		printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n",
-				__FUNCTION__, size);
-		area = NULL;
-		goto next_im_done;
-	}
-
-	area = __im_get_area(addr, size, IM_REGION_UNUSED);
-	if (area == NULL) {
-		printk(KERN_ERR 
-		       "%s() cannot obtain area for addr 0x%lx size 0x%lx\n",
-			__FUNCTION__, addr, size);
-	}
-next_im_done:
-	up(&imlist_sem);
-	return area;
-}
-
-struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
-		int criteria)
-{
-	struct vm_struct *area;
-
-	down(&imlist_sem);
-	area = __im_get_area(v_addr, size, criteria);
-	up(&imlist_sem);
-	return area;
-}
-
-void im_free(void * addr)
-{
-	struct vm_struct **p, *tmp;
-  
-	if (!addr)
-		return;
-	if ((unsigned long) addr & ~PAGE_MASK) {
-		printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__,			addr);
-		return;
-	}
-	down(&imlist_sem);
-	for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
-		if (tmp->addr == addr) {
-			*p = tmp->next;
-
-			/* XXX: do we need the lock? */
-			spin_lock(&init_mm.page_table_lock);
-			unmap_vm_area(tmp);
-			spin_unlock(&init_mm.page_table_lock);
-
-			kfree(tmp);
-			up(&imlist_sem);
-			return;
-		}
-	}
-	up(&imlist_sem);
-	printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__,
-			addr);
-}
Index: linux-work/arch/ppc64/mm/ioremap.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/ppc64/mm/ioremap.c	2005-05-17 17:15:51.000000000 +1000
@@ -0,0 +1,276 @@
+/*
+ *  ioremap & friends implementation
+ *
+ *  extracted from arch/ppc64/mm/init.c (see (c) notice in there)
+ *
+ *  Benjamin Herrenschmidt <benh at kernel.crashing.org>, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/abs_addr.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+#include <asm/vsid.h>
+#include <asm/machdep.h>
+#include <asm/io.h>
+
+unsigned long ioremap_bot = VMALLOC_BASE;
+static unsigned long phbs_io_bot = PHBS_IO_BASE;
+
+#ifdef CONFIG_PPC_ISERIES
+
+void __iomem *ioremap(unsigned long addr, unsigned long size)
+{
+	return (void __iomem *)addr;
+}
+
+extern void __iomem *__ioremap(unsigned long addr, unsigned long size,
+		       unsigned long flags)
+{
+	return (void __iomem *)addr;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+	return;
+}
+
+#else /* CONFIG_PPC_ISERIES */
+
+/*
+ * map_io_page currently only called by __ioremap
+ * map_io_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+static int map_io_page(unsigned long ea, unsigned long pa, int flags)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	unsigned long vsid;
+
+	if (mem_init_done) {
+		spin_lock(&init_mm.page_table_lock);
+		pgdp = pgd_offset_k(ea);
+		pudp = pud_alloc(&init_mm, pgdp, ea);
+		if (!pudp)
+			return -ENOMEM;
+		pmdp = pmd_alloc(&init_mm, pudp, ea);
+		if (!pmdp)
+			return -ENOMEM;
+		ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
+		if (!ptep)
+			return -ENOMEM;
+		pa = abs_to_phys(pa);
+		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+							  __pgprot(flags)));
+		spin_unlock(&init_mm.page_table_lock);
+	} else {
+		unsigned long va, vpn, hash, hpteg;
+
+		/*
+		 * If the mm subsystem is not fully up, we cannot create a
+		 * linux page table entry for this mapping.  Simply bolt an
+		 * entry in the hardware page table.
+		 */
+		vsid = get_kernel_vsid(ea);
+		va = (vsid << 28) | (ea & 0xFFFFFFF);
+		vpn = va >> PAGE_SHIFT;
+
+		hash = hpt_hash(vpn, 0);
+
+		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+		/* Panic if a pte grpup is full */
+		if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0,
+				       _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX,
+				       1, 0) == -1) {
+			panic("map_io_page: could not insert mapping");
+		}
+	}
+	return 0;
+}
+
+
+static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
+			    unsigned long ea, unsigned long size,
+			    unsigned long flags)
+{
+	unsigned long i;
+
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= pgprot_val(PAGE_KERNEL);
+
+	for (i = 0; i < size; i += PAGE_SIZE)
+		if (map_io_page(ea+i, pa+i, flags))
+			return NULL;
+
+	return (void __iomem *) (ea + (addr & ~PAGE_MASK));
+}
+
+
+void __iomem *
+ioremap(unsigned long addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+}
+
+void __iomem * __ioremap(unsigned long addr, unsigned long size,
+			 unsigned long flags)
+{
+	unsigned long pa, ea;
+	void __iomem *ret;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the imalloc system is running, we use it.
+	 * Before that, we map using addresses going
+	 * up from ioremap_bot.  imalloc will use
+	 * the addresses from ioremap_bot through
+	 * IMALLOC_END
+	 * 
+	 */
+	pa = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - pa;
+
+	if (size == 0)
+		return NULL;
+
+	if (mem_init_done) {
+		struct vm_struct *area;
+		area = get_vm_area(size, VM_IOREMAP);
+		if (area == NULL)
+			return NULL;
+		ea = (unsigned long)(area->addr);
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (!ret)
+			vfree(area->addr);
+	} else {
+		ea = ioremap_bot;
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (ret)
+			ioremap_bot += size;
+	}
+	return ret;
+}
+
+#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
+
+int __ioremap_explicit(unsigned long pa, unsigned long ea,
+		       unsigned long size, unsigned long flags)
+{
+	void __iomem *ret;
+	
+	/* For now, require page-aligned values for pa, ea, and size */
+	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
+	    !IS_PAGE_ALIGNED(size)) {
+		printk(KERN_ERR	"unaligned value in %s\n", __FUNCTION__);
+		WARN_ON(1);
+		return 1;
+	}
+	if ((ea < PHBS_IO_BASE) || ((ea + size) > PHBS_IO_END)) {
+		printk(KERN_ERR	"out of bounds value in %s\n", __FUNCTION__);
+		WARN_ON(1);
+		return 1;
+	}
+	
+	/* No record is kept of explicit maps for now */
+	ret = __ioremap_com(pa, pa, ea, size, flags);
+	if (ret == NULL) {
+		printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
+		return 1;
+	}
+	if (ret != (void *) ea) {
+		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+/*  
+ * Unmap an IO region
+ * Calls before mem_init_done (ie python_countermeasures()) can't be unmapped
+ * for now.
+ */
+void iounmap(volatile void __iomem *token)
+{
+	unsigned long addr;
+
+	if (!mem_init_done)
+		return;
+	
+	addr = (unsigned long __force) token & PAGE_MASK;
+	if (addr < ioremap_bot)
+		return;
+
+	vfree((void *)addr);
+}
+
+int iounmap_explicit(volatile void __iomem *start, unsigned long size)
+{
+	struct vm_struct area;
+	unsigned long addr = (unsigned long __force)start;
+	
+	if (!IS_PAGE_ALIGNED(addr) || !IS_PAGE_ALIGNED(size)) {
+		printk(KERN_ERR	"unaligned value in %s\n",
+		       __FUNCTION__);
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	if ((addr < PHBS_IO_BASE) || ((addr + size) > PHBS_IO_END)) {
+		printk(KERN_ERR	"out of bounds value in %s\n", __FUNCTION__);
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	/* We create a fake vm_struct here to please unmap_vm_area instead
+	 * of re-implementing the 4 level page table iteration here.
+	 * It's always fully aligned here so we are happy. The bit top be
+	 * careful about here is that unmap_vm_area() never ends up wanting
+	 * more of the vm_struct than addr and size.
+	 */
+	memset(&area, 0, sizeof(area));
+	area.addr = (void *)((unsigned long __force) start & PAGE_MASK);
+	area.size = size;
+	unmap_vm_area(&area);
+
+	return 0;
+}
+
+#endif /* CONFIG_PPC_ISERIES */
+
+
+void __iomem * reserve_phb_iospace(unsigned long size)
+{
+	void __iomem *virt_addr;
+		
+	if (phbs_io_bot >= PHBS_IO_END) 
+		panic("reserve_phb_iospace(): phb io space overflow\n");
+			
+	virt_addr = (void __iomem *) phbs_io_bot;
+	phbs_io_bot += size;
+
+	return virt_addr;
+}
+
+
+EXPORT_SYMBOL(ioremap);
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(iounmap);
+EXPORT_SYMBOL(ioremap_bot); /* poor XFS ... */
Index: linux-work/arch/ppc64/mm/Makefile
===================================================================
--- linux-work.orig/arch/ppc64/mm/Makefile	2005-05-02 10:48:08.000000000 +1000
+++ linux-work/arch/ppc64/mm/Makefile	2005-05-17 16:55:07.000000000 +1000
@@ -4,7 +4,7 @@
 
 EXTRA_CFLAGS += -mno-minimal-toc
 
-obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \
+obj-y := fault.o init.o ioremap.o hash_utils.o hash_low.o tlb.o \
 	slb_low.o slb.o stab.o mmap.o
 obj-$(CONFIG_DISCONTIGMEM) += numa.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
Index: linux-work/include/asm-ppc64/imalloc.h
===================================================================
--- linux-work.orig/include/asm-ppc64/imalloc.h	2005-05-16 15:42:57.000000000 +1000
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,26 +0,0 @@
-#ifndef _PPC64_IMALLOC_H
-#define _PPC64_IMALLOC_H
-
-/*
- * Define the address range of the imalloc VM area.
- */
-#define PHBS_IO_BASE  	  VMALLOC_END
-#define IMALLOC_BASE      (PHBS_IO_BASE + 0x80000000ul)	/* Reserve 2 gigs for PHBs */
-#define IMALLOC_END       (VMALLOC_START + PGTABLE_RANGE)
-
-
-/* imalloc region types */
-#define IM_REGION_UNUSED	0x1
-#define IM_REGION_SUBSET	0x2
-#define IM_REGION_EXISTS	0x4
-#define IM_REGION_OVERLAP	0x8
-#define IM_REGION_SUPERSET	0x10
-
-extern struct vm_struct * im_get_free_area(unsigned long size);
-extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
-				      int region_type);
-extern void im_free(void *addr);
-
-extern unsigned long ioremap_bot;
-
-#endif /* _PPC64_IMALLOC_H */
Index: linux-work/arch/ppc64/mm/init.c
===================================================================
--- linux-work.orig/arch/ppc64/mm/init.c	2005-05-16 15:42:58.000000000 +1000
+++ linux-work/arch/ppc64/mm/init.c	2005-05-17 17:03:29.000000000 +1000
@@ -31,7 +31,6 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/stddef.h>
-#include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/bootmem.h>
@@ -40,7 +39,6 @@
 #include <linux/nodemask.h>
 #include <linux/module.h>
 
-#include <asm/pgalloc.h>
 #include <asm/page.h>
 #include <asm/abs_addr.h>
 #include <asm/prom.h>
@@ -65,11 +63,8 @@
 #include <asm/iommu.h>
 #include <asm/abs_addr.h>
 #include <asm/vdso.h>
-#include <asm/imalloc.h>
 
 int mem_init_done;
-unsigned long ioremap_bot = IMALLOC_BASE;
-static unsigned long phbs_io_bot = PHBS_IO_BASE;
 
 extern pgd_t swapper_pg_dir[];
 extern struct task_struct *current_set[NR_CPUS];
@@ -115,271 +110,6 @@
 	printk("%ld pages swap cached\n", cached);
 }
 
-#ifdef CONFIG_PPC_ISERIES
-
-void __iomem *ioremap(unsigned long addr, unsigned long size)
-{
-	return (void __iomem *)addr;
-}
-
-extern void __iomem *__ioremap(unsigned long addr, unsigned long size,
-		       unsigned long flags)
-{
-	return (void __iomem *)addr;
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-	return;
-}
-
-#else
-
-/*
- * map_io_page currently only called by __ioremap
- * map_io_page adds an entry to the ioremap page table
- * and adds an entry to the HPT, possibly bolting it
- */
-static int map_io_page(unsigned long ea, unsigned long pa, int flags)
-{
-	pgd_t *pgdp;
-	pud_t *pudp;
-	pmd_t *pmdp;
-	pte_t *ptep;
-	unsigned long vsid;
-
-	if (mem_init_done) {
-		spin_lock(&init_mm.page_table_lock);
-		pgdp = pgd_offset_k(ea);
-		pudp = pud_alloc(&init_mm, pgdp, ea);
-		if (!pudp)
-			return -ENOMEM;
-		pmdp = pmd_alloc(&init_mm, pudp, ea);
-		if (!pmdp)
-			return -ENOMEM;
-		ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
-		if (!ptep)
-			return -ENOMEM;
-		pa = abs_to_phys(pa);
-		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
-							  __pgprot(flags)));
-		spin_unlock(&init_mm.page_table_lock);
-	} else {
-		unsigned long va, vpn, hash, hpteg;
-
-		/*
-		 * If the mm subsystem is not fully up, we cannot create a
-		 * linux page table entry for this mapping.  Simply bolt an
-		 * entry in the hardware page table.
-		 */
-		vsid = get_kernel_vsid(ea);
-		va = (vsid << 28) | (ea & 0xFFFFFFF);
-		vpn = va >> PAGE_SHIFT;
-
-		hash = hpt_hash(vpn, 0);
-
-		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
-
-		/* Panic if a pte grpup is full */
-		if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0,
-				       _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX,
-				       1, 0) == -1) {
-			panic("map_io_page: could not insert mapping");
-		}
-	}
-	return 0;
-}
-
-
-static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
-			    unsigned long ea, unsigned long size,
-			    unsigned long flags)
-{
-	unsigned long i;
-
-	if ((flags & _PAGE_PRESENT) == 0)
-		flags |= pgprot_val(PAGE_KERNEL);
-
-	for (i = 0; i < size; i += PAGE_SIZE)
-		if (map_io_page(ea+i, pa+i, flags))
-			return NULL;
-
-	return (void __iomem *) (ea + (addr & ~PAGE_MASK));
-}
-
-
-void __iomem *
-ioremap(unsigned long addr, unsigned long size)
-{
-	return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
-}
-
-void __iomem * __ioremap(unsigned long addr, unsigned long size,
-			 unsigned long flags)
-{
-	unsigned long pa, ea;
-	void __iomem *ret;
-
-	/*
-	 * Choose an address to map it to.
-	 * Once the imalloc system is running, we use it.
-	 * Before that, we map using addresses going
-	 * up from ioremap_bot.  imalloc will use
-	 * the addresses from ioremap_bot through
-	 * IMALLOC_END
-	 * 
-	 */
-	pa = addr & PAGE_MASK;
-	size = PAGE_ALIGN(addr + size) - pa;
-
-	if (size == 0)
-		return NULL;
-
-	if (mem_init_done) {
-		struct vm_struct *area;
-		area = im_get_free_area(size);
-		if (area == NULL)
-			return NULL;
-		ea = (unsigned long)(area->addr);
-		ret = __ioremap_com(addr, pa, ea, size, flags);
-		if (!ret)
-			im_free(area->addr);
-	} else {
-		ea = ioremap_bot;
-		ret = __ioremap_com(addr, pa, ea, size, flags);
-		if (ret)
-			ioremap_bot += size;
-	}
-	return ret;
-}
-
-#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
-
-int __ioremap_explicit(unsigned long pa, unsigned long ea,
-		       unsigned long size, unsigned long flags)
-{
-	struct vm_struct *area;
-	void __iomem *ret;
-	
-	/* For now, require page-aligned values for pa, ea, and size */
-	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
-	    !IS_PAGE_ALIGNED(size)) {
-		printk(KERN_ERR	"unaligned value in %s\n", __FUNCTION__);
-		return 1;
-	}
-	
-	if (!mem_init_done) {
-		/* Two things to consider in this case:
-		 * 1) No records will be kept (imalloc, etc) that the region
-		 *    has been remapped
-		 * 2) It won't be easy to iounmap() the region later (because
-		 *    of 1)
-		 */
-		;
-	} else {
-		area = im_get_area(ea, size,
-			IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
-		if (area == NULL) {
-			/* Expected when PHB-dlpar is in play */
-			return 1;
-		}
-		if (ea != (unsigned long) area->addr) {
-			printk(KERN_ERR "unexpected addr return from "
-			       "im_get_area\n");
-			return 1;
-		}
-	}
-	
-	ret = __ioremap_com(pa, pa, ea, size, flags);
-	if (ret == NULL) {
-		printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
-		return 1;
-	}
-	if (ret != (void *) ea) {
-		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
-		return 1;
-	}
-
-	return 0;
-}
-
-/*  
- * Unmap an IO region and remove it from imalloc'd list.
- * Access to IO memory should be serialized by driver.
- * This code is modeled after vmalloc code - unmap_vm_area()
- *
- * XXX	what about calls before mem_init_done (ie python_countermeasures())
- */
-void iounmap(volatile void __iomem *token)
-{
-	void *addr;
-
-	if (!mem_init_done)
-		return;
-	
-	addr = (void *) ((unsigned long __force) token & PAGE_MASK);
-
-	im_free(addr);
-}
-
-static int iounmap_subset_regions(unsigned long addr, unsigned long size)
-{
-	struct vm_struct *area;
-
-	/* Check whether subsets of this region exist */
-	area = im_get_area(addr, size, IM_REGION_SUPERSET);
-	if (area == NULL)
-		return 1;
-
-	while (area) {
-		iounmap((void __iomem *) area->addr);
-		area = im_get_area(addr, size,
-				IM_REGION_SUPERSET);
-	}
-
-	return 0;
-}
-
-int iounmap_explicit(volatile void __iomem *start, unsigned long size)
-{
-	struct vm_struct *area;
-	unsigned long addr;
-	int rc;
-	
-	addr = (unsigned long __force) start & PAGE_MASK;
-
-	/* Verify that the region either exists or is a subset of an existing
-	 * region.  In the latter case, split the parent region to create 
-	 * the exact region 
-	 */
-	area = im_get_area(addr, size, 
-			    IM_REGION_EXISTS | IM_REGION_SUBSET);
-	if (area == NULL) {
-		/* Determine whether subset regions exist.  If so, unmap */
-		rc = iounmap_subset_regions(addr, size);
-		if (rc) {
-			printk(KERN_ERR
-			       "%s() cannot unmap nonexistent range 0x%lx\n",
- 				__FUNCTION__, addr);
-			return 1;
-		}
-	} else {
-		iounmap((void __iomem *) area->addr);
-	}
-	/*
-	 * FIXME! This can't be right:
-	iounmap(area->addr);
-	 * Maybe it should be "iounmap(area);"
-	 */
-	return 0;
-}
-
-#endif
-
-EXPORT_SYMBOL(ioremap);
-EXPORT_SYMBOL(__ioremap);
-EXPORT_SYMBOL(iounmap);
-
 void free_initmem(void)
 {
 	unsigned long addr;
@@ -795,19 +525,6 @@
 	local_irq_restore(flags);
 }
 
-void __iomem * reserve_phb_iospace(unsigned long size)
-{
-	void __iomem *virt_addr;
-		
-	if (phbs_io_bot >= IMALLOC_BASE) 
-		panic("reserve_phb_iospace(): phb io space overflow\n");
-			
-	virt_addr = (void __iomem *) phbs_io_bot;
-	phbs_io_bot += size;
-
-	return virt_addr;
-}
-
 kmem_cache_t *pmd_cache;
 
 void pgtable_cache_init(void)





More information about the Linuxppc64-dev mailing list