TASK_UNMAPPED_BASE

Anton Blanchard anton at samba.org
Fri Feb 6 04:06:57 EST 2004


> The problem with a TASK_UNMAPPED_BASE that was "too low" was referring to
> the bug where we always loaded ld.so at TASK_UNMAPPED_BASE even though
> that adress was in the middle of the bss.  Now that has been fixed, "too low"
> isn't a concern anymore.
>
> However, I'm not sure moving the TASK_UNMAPPED_BASE below the text section
> will work.  It's used for more than just loading shared libs.  Anonymous mmap
> areas and the heap are all located relative to it.

That reminded me of a patch wli did to rearrange how things are
allocated. I wonder if doing something radical like this will work on
ppc32.

Anton

diff -purN -X /home/mbligh/.diff.exclude 400-less_bouncy/arch/i386/Kconfig 410-topdown/arch/i386/Kconfig
--- 400-less_bouncy/arch/i386/Kconfig	2003-11-24 16:36:16.000000000 -0800
+++ 410-topdown/arch/i386/Kconfig	2003-11-25 14:24:37.000000000 -0800
@@ -1542,6 +1542,15 @@ config SCHEDSTATS
 	  application, you can say N to avoid the very slight overhead
 	  this adds.

+config MMAP_TOPDOWN
+	bool "Top-down vma allocation"
+	help
+	  Say Y here to have the kernel change its vma allocation policy
+	  to allocate vma's from the top of the address space down, and
+	  to shove the stack low so as to conserve virtualspace. This is
+	  risky because various apps, including a number of versions of
+	  ld.so, depend on the kernel's bottom-up behavior.
+
 config X86_EXTRA_IRQS
 	bool
 	depends on X86_LOCAL_APIC || X86_VOYAGER
diff -purN -X /home/mbligh/.diff.exclude 400-less_bouncy/arch/i386/mm/pgtable.c 410-topdown/arch/i386/mm/pgtable.c
--- 400-less_bouncy/arch/i386/mm/pgtable.c	2003-10-01 11:34:29.000000000 -0700
+++ 410-topdown/arch/i386/mm/pgtable.c	2003-11-25 14:24:37.000000000 -0800
@@ -237,3 +237,60 @@ void pgd_free(pgd_t *pgd)
 	/* in the non-PAE case, clear_page_tables() clears user pgd entries */
 	kmem_cache_free(pgd_cache, pgd);
 }
+
+#define GLIBC_BUFFER	(32*1024*1024)
+
+/*
+ * This is total crap; it needs to use the free area cache to mitigate
+ * catastrophic O(n) search with many vmas.
+ */
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+					unsigned long len, unsigned long pgoff,
+					unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma, *prev;
+
+	len = PAGE_ALIGN(len);
+	addr = PAGE_ALIGN(addr);
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (addr) {
+		struct vm_area_struct *vma;
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+				(!vma || addr + len <= vma->vm_start))
+			goto out;
+	}
+
+	if (!mm->mmap) {
+		if (len > TASK_SIZE - GLIBC_BUFFER)
+			addr = TASK_SIZE - len;
+		else
+			addr = TASK_SIZE - GLIBC_BUFFER - len;
+		goto out;
+	}
+
+	addr = -ENOMEM;
+	for (prev = NULL, vma = mm->mmap; vma; prev = vma, vma = vma->vm_next) {
+		unsigned long lo, hi;
+		lo = prev ? prev->vm_end : 0;
+		hi = vma->vm_start;
+		if (hi - lo >= len && (addr == -ENOMEM || addr < hi - len))
+			addr = hi - len;
+	}
+	/*
+	 * We're at the last one; let's try the top, but only if nothing
+	 * else can be found (to respect GLIBC_BUFFER).
+	 */
+	if (prev && TASK_SIZE - prev->vm_end >= len) {
+		if (TASK_SIZE - GLIBC_BUFFER - prev->vm_end >= len)
+			addr = TASK_SIZE - GLIBC_BUFFER - len;
+		else if (addr == -ENOMEM)
+			addr = TASK_SIZE - len;
+	}
+out:
+	return addr;
+}
diff -purN -X /home/mbligh/.diff.exclude 400-less_bouncy/fs/binfmt_elf.c 410-topdown/fs/binfmt_elf.c
--- 400-less_bouncy/fs/binfmt_elf.c	2003-11-24 16:34:48.000000000 -0800
+++ 410-topdown/fs/binfmt_elf.c	2003-11-25 14:24:37.000000000 -0800
@@ -7,6 +7,7 @@
  * Tools".
  *
  * Copyright 1993, 1994: Eric Youngdale (ericy at cais.com).
+ * Top-down vma allocation support, William Irwin, IBM, 2003
  */

 #include <linux/module.h>
@@ -329,8 +330,13 @@ static unsigned long load_elf_interp(str
 	if (retval < 0)
 		goto out_close;

+#ifndef CONFIG_MMAP_TOPDOWN
 	eppnt = elf_phdata;
 	for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
+#else
+	eppnt = &elf_phdata[interp_elf_ex->e_phnum - 1];
+	for (i = interp_elf_ex->e_phnum - 1; i >= 0; --i, --eppnt) {
+#endif
 	  if (eppnt->p_type == PT_LOAD) {
 	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 	    int elf_prot = 0;
@@ -344,7 +350,8 @@ static unsigned long load_elf_interp(str
 	    if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 	    	elf_type |= MAP_FIXED;

-	    map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type);
+	    map_addr = load_addr_set ? load_addr + vaddr : 0;
+	    map_addr = elf_map(interpreter, map_addr, eppnt, elf_prot, elf_type);
 	    if (BAD_ADDR(map_addr))
 	    	goto out_close;

diff -purN -X /home/mbligh/.diff.exclude 400-less_bouncy/include/asm-i386/a.out.h 410-topdown/include/asm-i386/a.out.h
--- 400-less_bouncy/include/asm-i386/a.out.h	2002-12-09 18:45:54.000000000 -0800
+++ 410-topdown/include/asm-i386/a.out.h	2003-11-25 14:24:37.000000000 -0800
@@ -19,7 +19,16 @@ struct exec

 #ifdef __KERNEL__

+/*
+ * Typical ELF load address is 0x8048000, which is 128MB + 288KB.
+ * Shoving the stack very close to it lets smaller programs fit in
+ * a single pagetable page's worth of virtualspace.
+ */
+#ifdef CONFIG_MMAP_TOPDOWN
+#define STACK_TOP	((128 << 20) + (256 << 10))
+#else
 #define STACK_TOP	TASK_SIZE
+#endif

 #endif

diff -purN -X /home/mbligh/.diff.exclude 400-less_bouncy/include/asm-i386/pgtable.h 410-topdown/include/asm-i386/pgtable.h
--- 400-less_bouncy/include/asm-i386/pgtable.h	2003-10-14 15:50:32.000000000 -0700
+++ 410-topdown/include/asm-i386/pgtable.h	2003-11-25 14:24:37.000000000 -0800
@@ -25,6 +25,10 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>

+#ifdef CONFIG_MMAP_TOPDOWN
+#define HAVE_ARCH_UNMAPPED_AREA
+#endif
+
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..


** Sent via the linuxppc-dev mail list. See http://lists.linuxppc.org/





More information about the Linuxppc-dev mailing list