vDSO preliminary implementation

Benjamin Herrenschmidt benh at kernel.crashing.org
Thu Aug 26 15:03:14 EST 2004


Hi !

Here's a first shot at implementing a vDSO for ppc32/ppc64. This is definitely
not final as you can see, the enclosed implementation doesn't provide anything
useful for userland to link against, so I didn't pass down any ELF AT_* entry
yet telling ld.so about the vDSO at this point.

What this implementation contains however is the signal trampoline beeing moved
to the vDSO area, thus no longer on the stack, so people working on
non-executable stacks can toy with it.

The kernel side should be complete +/- bugs (for example, I'm pretty sure the
vDSO Makefiles are broken for split src/obj directories, help fixing that
welcome, see comments in there). The vDSO has copy-on-write semantics so
you should be able to put breakpoints in there (untested, well, I tested that
COW worked but didn't try putting breakpoints). I also only tested signals
with 32 bits processes, though both are implemented.

What remains is implementing the various functions for use by userland and
the actual symbol table patching, for which I already have some code, it's
just not in there yet.

The patch is against a slightly old Linus bk snapshot,

Comments are welcome.

Ben.

diff -Nru a/arch/ppc64/Makefile b/arch/ppc64/Makefile
--- a/arch/ppc64/Makefile	2004-08-26 14:55:32 +10:00
+++ b/arch/ppc64/Makefile	2004-08-26 14:55:32 +10:00
@@ -43,6 +43,8 @@

 libs-y				+= arch/ppc64/lib/
 core-y				+= arch/ppc64/kernel/
+core-y				+= arch/ppc64/kernel/vdso32/
+core-y				+= arch/ppc64/kernel/vdso64/
 core-y				+= arch/ppc64/mm/
 core-$(CONFIG_XMON)		+= arch/ppc64/xmon/
 drivers-$(CONFIG_OPROFILE)	+= arch/ppc64/oprofile/
diff -Nru a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile
--- a/arch/ppc64/kernel/Makefile	2004-08-26 14:55:32 +10:00
+++ b/arch/ppc64/kernel/Makefile	2004-08-26 14:55:32 +10:00
@@ -11,7 +11,7 @@
 			udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \
 			ptrace32.o signal32.o rtc.o init_task.o \
 			lmb.o cputable.o cpu_setup_power4.o idle_power4.o \
-			iommu.o sysfs.o vio.o
+			iommu.o sysfs.o vio.o vdso.o

 obj-$(CONFIG_PPC_OF) +=	of_device.o

diff -Nru a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c
--- a/arch/ppc64/kernel/setup.c	2004-08-26 14:55:32 +10:00
+++ b/arch/ppc64/kernel/setup.c	2004-08-26 14:55:32 +10:00
@@ -47,6 +47,7 @@
 #include <asm/setup.h>
 #include <asm/system.h>
 #include <asm/rtas.h>
+#include <asm/vdso.h>

 extern unsigned long klimit;
 /* extern void *stab; */
@@ -646,6 +647,7 @@
 	ppc_md.setup_arch();

 	paging_init();
+
 	ppc64_boot_msg(0x15, "Setup Done");
 }

diff -Nru a/arch/ppc64/kernel/signal.c b/arch/ppc64/kernel/signal.c
--- a/arch/ppc64/kernel/signal.c	2004-08-26 14:55:32 +10:00
+++ b/arch/ppc64/kernel/signal.c	2004-08-26 14:55:32 +10:00
@@ -34,6 +34,7 @@
 #include <asm/ppcdebug.h>
 #include <asm/unistd.h>
 #include <asm/cacheflush.h>
+#include <asm/vdso.h>

 #define DEBUG_SIG 0

@@ -412,10 +413,14 @@
 		goto badframe;

 	/* Set up to return from userspace. */
-	err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
-	if (err)
-		goto badframe;
-
+	if (vdso64_rt_sigtramp) {
+		regs->link = vdso64_rt_sigtramp;
+	} else {
+		err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
+		if (err)
+			goto badframe;
+		regs->link = (unsigned long) &frame->tramp[0];
+	}
 	funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler;

 	/* Allocate a dummy caller frame for the signal handler. */
@@ -424,7 +429,6 @@

 	/* Set up "regs" so we "return" to the signal handler. */
 	err |= get_user(regs->nip, &funct_desc_ptr->entry);
-	regs->link = (unsigned long) &frame->tramp[0];
 	regs->gpr[1] = newsp;
 	err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
 	regs->gpr[3] = signr;
diff -Nru a/arch/ppc64/kernel/signal32.c b/arch/ppc64/kernel/signal32.c
--- a/arch/ppc64/kernel/signal32.c	2004-08-26 14:55:32 +10:00
+++ b/arch/ppc64/kernel/signal32.c	2004-08-26 14:55:32 +10:00
@@ -30,6 +30,7 @@
 #include <asm/ppcdebug.h>
 #include <asm/unistd.h>
 #include <asm/cacheflush.h>
+#include <asm/vdso.h>

 #define DEBUG_SIG 0

@@ -677,18 +678,24 @@

 	/* Save user registers on the stack */
 	frame = &rt_sf->uc.uc_mcontext;
-	if (save_user_regs(regs, frame, __NR_rt_sigreturn))
-		goto badframe;
-
 	if (put_user(regs->gpr[1], (unsigned long __user *)newsp))
 		goto badframe;
+
+	if (vdso32_rt_sigtramp) {
+		if (save_user_regs(regs, frame, 0))
+			goto badframe;
+		regs->link = vdso32_rt_sigtramp;
+	} else {
+		if (save_user_regs(regs, frame, __NR_rt_sigreturn))
+			goto badframe;
+		regs->link = (unsigned long) frame->tramp;
+	}
 	regs->gpr[1] = (unsigned long) newsp;
 	regs->gpr[3] = sig;
 	regs->gpr[4] = (unsigned long) &rt_sf->info;
 	regs->gpr[5] = (unsigned long) &rt_sf->uc;
 	regs->gpr[6] = (unsigned long) rt_sf;
 	regs->nip = (unsigned long) ka->sa.sa_handler;
-	regs->link = (unsigned long) frame->tramp;
 	regs->trap = 0;
 	regs->result = 0;

@@ -844,8 +851,15 @@
 	    || __put_user(sig, &sc->signal))
 		goto badframe;

-	if (save_user_regs(regs, &frame->mctx, __NR_sigreturn))
-		goto badframe;
+	if (vdso32_sigtramp) {
+		if (save_user_regs(regs, &frame->mctx, 0))
+			goto badframe;
+		regs->link = vdso32_sigtramp;
+	} else {
+		if (save_user_regs(regs, &frame->mctx, __NR_sigreturn))
+			goto badframe;
+		regs->link = (unsigned long) frame->mctx.tramp;
+	}

 	if (put_user(regs->gpr[1], (unsigned long __user *)newsp))
 		goto badframe;
@@ -853,7 +867,6 @@
 	regs->gpr[3] = sig;
 	regs->gpr[4] = (unsigned long) sc;
 	regs->nip = (unsigned long) ka->sa.sa_handler;
-	regs->link = (unsigned long) frame->mctx.tramp;
 	regs->trap = 0;
 	regs->result = 0;

diff -Nru a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ppc64/kernel/vdso.c	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,405 @@
+
+/*
+ *  linux/arch/ppc64/kernel/vdso.c
+ *
+ *    Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ *			 <benh at kernel.crashing.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/bootmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+
+/*
+ * The vDSOs themselves are here
+ */
+extern char vdso64_start, vdso64_end;
+extern char vdso32_start, vdso32_end;
+
+static void *vdso64_kbase = &vdso64_start;
+static void *vdso32_kbase = &vdso32_start;
+unsigned long vdso64_ubase;
+unsigned long vdso32_ubase;
+
+unsigned int vdso64_pages;
+unsigned int vdso32_pages;
+
+/* Signal trampolines user addresses */
+
+unsigned long vdso64_sigtramp;
+unsigned long vdso64_rt_sigtramp;
+unsigned long vdso32_sigtramp;
+unsigned long vdso32_rt_sigtramp;
+
+/*
+ * Some infos carried around for each of them during parsing at
+ * boot time.
+ */
+struct lib32_elfinfo
+{
+	Elf32_Ehdr	*hdr;		/* ptr to ELF */
+	Elf32_Sym	*dynsym;	/* ptr to .dynsym section */
+	unsigned long	dynsymsize;	/* size of .dynsym section */
+	char		*dynstr;	/* ptr to .dynstr section */
+	unsigned long	text;		/* offset of .text section in .so */
+};
+
+struct lib64_elfinfo
+{
+	Elf64_Ehdr	*hdr;
+	Elf64_Sym	*dynsym;
+	unsigned long	dynsymsize;
+	char		*dynstr;
+	unsigned long	text;
+};
+
+
+#ifdef __DEBUG
+static void dump_one_vdso_page(struct page *pg, struct page *upg)
+{
+	printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
+	       page_count(pg),
+	       pg->flags);
+	if (upg/* && pg != upg*/) {
+		printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) << PAGE_SHIFT),
+		       page_count(upg),
+		       upg->flags);
+	}
+	printk("\n");
+}
+
+static void dump_vdso_pages(struct vm_area_struct * vma)
+{
+	int i;
+
+	if (!vma || test_thread_flag(TIF_32BIT)) {
+		printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
+		for (i=0; i<vdso32_pages; i++) {
+			struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+			struct page *upg = (vma && vma->vm_mm) ?
+				follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
+				: NULL;
+			dump_one_vdso_page(pg, upg);
+		}
+	}
+	if (!vma || !test_thread_flag(TIF_32BIT)) {
+		printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
+		for (i=0; i<vdso64_pages; i++) {
+			struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+			struct page *upg = (vma && vma->vm_mm) ?
+				follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
+				: NULL;
+			dump_one_vdso_page(pg, upg);
+		}
+	}
+}
+#endif /* DEBUG */
+
+/*
+ * Keep a dummy vma_close for now, it will prevent VMA merging, though
+ * I wouldn't expect the stack beeing mergeable with out VMA due to flag
+ * differences, better be safe than sorry
+ */
+static void vdso_vma_close(struct vm_area_struct * vma)
+{
+}
+
+/*
+ * Our nopage() function, maps in the actual vDSO kernel pages, they will
+ * be mapped read-only by do_no_page(), and eventually COW'ed, either
+ * right away for an initial write access, or by do_wp_page().
+ */
+static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
+				     unsigned long address, int *type)
+{
+	unsigned long offset = address - vma->vm_start;;
+	struct page *pg;
+	void *vbase = test_thread_flag(TIF_32BIT) ? vdso32_kbase : vdso64_kbase;
+
+	DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n",
+	    current->comm, address, offset);
+
+	if (address < vma->vm_start || address > vma->vm_end)
+		return NOPAGE_SIGBUS;
+
+	pg = virt_to_page(vbase + offset);
+	get_page(pg);
+	DBG(" ->page count: %d\n", page_count(pg));
+
+	return pg;
+}
+
+static struct vm_operations_struct vdso_vmops = {
+	.close	= vdso_vma_close,
+	.nopage	= vdso_vma_nopage,
+};
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long vdso_base = STACK_TOP;
+	unsigned long vdso_pages = test_thread_flag(TIF_32BIT) ?
+		vdso32_pages : vdso64_pages;
+
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (vma == NULL)
+		return -ENOMEM;
+	if (security_vm_enough_memory(vdso_pages)) {
+		kmem_cache_free(vm_area_cachep, vma);
+		return -ENOMEM;
+	}
+	memset(vma, 0, sizeof(*vma));
+
+	vma->vm_mm = mm;
+	vma->vm_start = vdso_base;
+	vma->vm_end = TASK_SIZE;
+	/*
+	 * our vma flags don't have VM_WRITE so by default, the process isn't allowed
+	 * to write those pages.
+	 * gdb can break that with ptrace interface, and thus trigger COW on those
+	 * pages but it's then your responsibility to never do that on the "data" page
+	 * of the vDSO or you'll stop getting kernel updates and your nice userland
+	 * gettimeofday will be totally dead. It's fine to use that for setting
+	 * breakpoints in the vDSO code pages though
+	 */
+	vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+	vma->vm_flags |= mm->def_flags;
+	vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+	vma->vm_ops = &vdso_vmops;
+
+	down_write(&mm->mmap_sem);
+	insert_vm_struct(mm, vma);
+	mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	up_write(&mm->mmap_sem);
+
+	return 0;
+}
+
+static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
+				  unsigned long *size)
+{
+	Elf32_Shdr *sechdrs;
+	unsigned int i;
+	char *secnames;
+
+	/* Grab section headers and strings so we can tell who is who */
+	sechdrs = (void *)ehdr + ehdr->e_shoff;
+	secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+	/* Find the section they want */
+	for (i = 1; i < ehdr->e_shnum; i++) {
+		if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+			if (size)
+				*size = sechdrs[i].sh_size;
+			return (void *)ehdr + sechdrs[i].sh_offset;
+		}
+	}
+	*size = 0;
+	return NULL;
+}
+
+static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
+				  unsigned long *size)
+{
+	Elf64_Shdr *sechdrs;
+	unsigned int i;
+	char *secnames;
+
+	/* Grab section headers and strings so we can tell who is who */
+	sechdrs = (void *)ehdr + ehdr->e_shoff;
+	secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+	/* Find the section they want */
+	for (i = 1; i < ehdr->e_shnum; i++) {
+		if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+			if (size)
+				*size = sechdrs[i].sh_size;
+			return (void *)ehdr + sechdrs[i].sh_offset;
+		}
+	}
+	if (size)
+		*size = 0;
+	return NULL;
+}
+
+static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, const char *symname)
+{
+	unsigned int i;
+
+	for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
+		if (lib->dynsym[i].st_name == 0)
+			continue;
+		if (strcmp(symname, lib->dynstr + lib->dynsym[i].st_name) == 0)
+			return &lib->dynsym[i];
+	}
+	return NULL;
+}
+
+static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, const char *symname)
+{
+	unsigned int i;
+
+	for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
+		if (lib->dynsym[i].st_name == 0)
+			continue;
+		if (strcmp(symname, lib->dynstr + lib->dynsym[i].st_name) == 0)
+			return &lib->dynsym[i];
+	}
+	return NULL;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function32(struct lib32_elfinfo *lib, const char *symname,
+					    unsigned long offset)
+{
+	Elf32_Sym *sym = find_symbol32(lib, symname);
+
+	if (sym == NULL) {
+		printk(KERN_WARNING "vDSO32: function %s not found !\n", symname);
+		return 0;
+	}
+	return offset /*+ (unsigned long)lib->text*/ + sym->st_value;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function64(struct lib64_elfinfo *lib, const char *symname,
+					    unsigned long offset)
+{
+	Elf64_Sym *sym = find_symbol64(lib, symname);
+
+	if (sym == NULL) {
+		printk(KERN_WARNING "vDSO64: function %s not found !\n", symname);
+		return 0;
+	}
+	return offset /*+ (unsigned long)lib->text*/ + sym->st_value;
+}
+
+
+static __init int vdso_do_fixups(void)
+{
+	struct lib32_elfinfo	v32;
+	struct lib64_elfinfo	v64;
+	void *sect;
+
+	v32.hdr = vdso32_kbase;
+	v64.hdr = vdso64_kbase;
+
+	/*
+	 * Locate symbol tables & text section
+	 */
+
+	v32.dynsym = find_section32(v32.hdr, ".dynsym", &v32.dynsymsize);
+	v32.dynstr = find_section32(v32.hdr, ".dynstr", NULL);
+	if (v32.dynsym == NULL || v32.dynstr == NULL) {
+		printk(KERN_ERR "vDSO32: a required symbol section was not found\n");
+		return -1;
+	}
+	sect = find_section32(v32.hdr, ".text", NULL);
+	if (sect == NULL) {
+		printk(KERN_ERR "vDSO32: the .text section was not found\n");
+		return -1;
+	}
+	v32.text = sect - vdso32_kbase;
+
+	v64.dynsym = find_section64(v64.hdr, ".dynsym", &v64.dynsymsize);
+	v64.dynstr = find_section64(v64.hdr, ".dynstr", NULL);
+	if (v64.dynsym == NULL || v64.dynstr == NULL) {
+		printk(KERN_ERR "vDSO64: a required symbol section was not found\n");
+		return -1;
+	}
+	sect = find_section64(v64.hdr, ".text", NULL);
+	if (sect == NULL) {
+		printk(KERN_ERR "vDSO64: the .text section was not found\n");
+		return -1;
+	}
+	v64.text = sect - vdso64_kbase;
+
+	/*
+	 * Find signal trampolines
+	 */
+
+	vdso64_sigtramp		= find_function64(&v64, "_v_sigtramp64", vdso64_ubase);
+	vdso64_rt_sigtramp	= find_function64(&v64, "_v_sigtramp_rt64", vdso64_ubase);
+	vdso32_sigtramp		= find_function32(&v32, "_v_sigtramp32", vdso32_ubase);
+	vdso32_rt_sigtramp	= find_function32(&v32, "_v_sigtramp_rt32", vdso32_ubase);
+
+	return 0;
+}
+
+void __init vdso_init(void)
+{
+	int i;
+
+	vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
+	vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
+	vdso64_ubase = TASK_SIZE_USER64 - (vdso64_pages << PAGE_SHIFT);
+	vdso32_ubase = TASK_SIZE_USER32 - (vdso32_pages << PAGE_SHIFT);
+
+	DBG("vdso64_kbase: %p, 0x%x pages, vdso32_kbase: %p, 0x%x pages\n",
+	       vdso64_kbase, vdso64_pages, vdso32_kbase, vdso32_pages);
+
+	/* Do necessary fixups of vDSO symbols */
+	if (vdso_do_fixups()) {
+		printk(KERN_ERR "vDSO setup failure, not enabled !\n");
+		/* XXX should free pages here ? */
+		vdso64_pages = vdso32_pages = 0;
+		return;
+	}
+
+	/* Make sure pages are in the correct state */
+	for (i = 0; i < vdso64_pages; i++) {
+		struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+		ClearPageReserved(pg);
+		get_page(pg);
+	}
+	for (i = 0; i < vdso32_pages; i++) {
+		struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+		ClearPageReserved(pg);
+		get_page(pg);
+	}
+}
diff -Nru a/arch/ppc64/kernel/vdso32/Makefile b/arch/ppc64/kernel/vdso32/Makefile
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ppc64/kernel/vdso32/Makefile	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,43 @@
+# Choose compiler
+
+CROSS32_COMPILE ?=
+
+CROSS32CC		:= $(CROSS32_COMPILE)gcc
+CROSS32AS		:= $(CROSS32_COMPILE)as
+
+# List of files in the vdso, has to be asm only for now
+
+src-vdso32 = sigtramp.S testfunc.S
+
+# Build rules
+
+obj-vdso32 := $(addsuffix .o, $(basename $(src-vdso32)))
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+src-vdso32 := $(addprefix $(src)/, $(src-vdso32))
+
+VDSO32_CFLAGS := -shared -s -fno-common -Iinclude -fno-builtin -nostdlib
+VDSO32_CFLAGS += -Wl,-soname=linux-vdso32.so.1
+VDSO32_AFLAGS := -D__ASSEMBLY__ -s
+
+obj-y += vdso32_wrapper.o
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32)
+	$(call if_changed,vdso32ld)
+
+# assembly rules for the .S files
+# This is probably wrong with split src & obj trees
+$(obj-vdso32): %.o: %.S
+	$(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+      cmd_vdso32ld = $(CROSS32CC) -Wp,-MD,$(depfile) $(VDSO32_CFLAGS) \
+	-Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+      cmd_vdso32as = $(CROSS32CC) -Wp,-MD,$(depfile) $(VDSO32_AFLAGS) -c -o $@ $^
+
+targets += vdso32.so
diff -Nru a/arch/ppc64/kernel/vdso32/sigtramp.S b/arch/ppc64/kernel/vdso32/sigtramp.S
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ppc64/kernel/vdso32/sigtramp.S	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,15 @@
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+
+	.globl	_v_sigtramp32
+_v_sigtramp32:
+	li	r0,__NR_sigreturn
+	sc
+
+	.globl	_v_sigtramp_rt32
+_v_sigtramp_rt32:
+	li	r0,__NR_rt_sigreturn
+	sc
+
diff -Nru a/arch/ppc64/kernel/vdso32/testfunc.S b/arch/ppc64/kernel/vdso32/testfunc.S
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ppc64/kernel/vdso32/testfunc.S	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,15 @@
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+
+	.globl	__v_myfunc_1
+__v_myfunc_1:
+	blr
+
+	.globl	__v_myfunc_2
+__v_myfunc_2:
+	blr
+
+	.globl	_v_func
+_v_func:
diff -Nru a/arch/ppc64/kernel/vdso32/vdso32.lds b/arch/ppc64/kernel/vdso32/vdso32.lds
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ppc64/kernel/vdso32/vdso32.lds	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,98 @@
+/*
+ * This is the infamous ld script for the 32 bits vdso
+ * library
+ */
+OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc",
+	      "elf32-powerpc")
+OUTPUT_ARCH(powerpc:common)
+ENTRY(_start)
+
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+
+  . = 0 + SIZEOF_HEADERS;
+  .hash           : { *(.hash) }
+  .dynsym         : { *(.dynsym) }
+  .dynstr         : { *(.dynstr) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+  .text           :
+  {
+    *(.text .stub .text.* .gnu.linkonce.t.*)
+  } =0
+  PROVIDE (__etext = .);
+  PROVIDE (_etext = .);
+  PROVIDE (etext = .);
+
+  /* Other stuff is appended to the text segment: */
+
+  .rodata         : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+  .rodata1        : { *(.rodata1) }
+  .eh_frame_hdr : { *(.eh_frame_hdr) }
+  .eh_frame       : { KEEP (*(.eh_frame)) }
+  .gcc_except_table   : { *(.gcc_except_table) }
+  .fixup          : { *(.fixup) }
+  .dynamic        : { *(.dynamic) }
+
+  /* Stabs debugging sections are here too, away from the
+   * data page. Not much in there at the moment
+   */
+  .stab          0 : { *(.stab) }
+  .stabstr       0 : { *(.stabstr) }
+  .stab.excl     0 : { *(.stab.excl) }
+  .stab.exclstr  0 : { *(.stab.exclstr) }
+  .stab.index    0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment       0 : { *(.comment) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+
+  /* Adjust the address for the data segment.  On the vdso, we need it to
+   * be page aligned after the text. The data segment contains ONLY the
+   * .data section here, which is special in the case of the vdso as it's
+   * really read only and is kernel updated. The got stays there too
+   */
+  . = ALIGN (0x1000);
+
+  .data           :
+  {
+    *(.data .data.* .gnu.linkonce.d.*)
+  }
+  .got            : { *(.got.plt) *(.got) }
+  _edata = .;
+  PROVIDE (edata = .);
+  _end = .;
+  __end = .;
+  PROVIDE (end = .);
+
+  /DISCARD/ : { *(.note.GNU-stack) }
+  /* gas insist on generating these, bin them in here, they should be
+   * empty anyways
+   */
+  /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
diff -Nru a/arch/ppc64/kernel/vdso32/vdso32_wrapper.S b/arch/ppc64/kernel/vdso32/vdso32_wrapper.S
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ppc64/kernel/vdso32/vdso32_wrapper.S	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,12 @@
+#include <linux/init.h>
+
+	.section ".data"
+
+	.globl vdso32_start, vdso32_end
+	.balign 4096
+vdso32_start:
+	.incbin "arch/ppc64/kernel/vdso32/vdso32.so"
+	.balign 4096
+vdso32_end:
+
+	.previous
diff -Nru a/arch/ppc64/kernel/vdso64/Makefile b/arch/ppc64/kernel/vdso64/Makefile
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ppc64/kernel/vdso64/Makefile	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,36 @@
+# List of files in the vdso, has to be asm only for now
+
+src-vdso64 = sigtramp.S testfunc.S
+
+# Build rules
+
+obj-vdso64 := $(addsuffix .o, $(basename $(src-vdso64)))
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+src-vdso64 := $(addprefix $(src)/, $(src-vdso64))
+
+VDSO64_CFLAGS := -shared -s -fno-common -Iinclude -fno-builtin -nostdlib
+VDSO64_CFLAGS += -Wl,-soname=linux-vdso64.so.1
+VDSO64_AFLAGS := -D__ASSEMBLY__ -s
+
+obj-y += vdso64_wrapper.o
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64)
+	$(call if_changed,vdso64ld)
+
+# assembly rules for the .S files
+# This is probably wrong with split src & obj trees
+$(obj-vdso64): %.o: %.S
+	$(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+      cmd_vdso64ld = $(CC) -Wp,-MD,$(depfile) $(VDSO64_CFLAGS) \
+	-Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+      cmd_vdso64as = $(CC) -Wp,-MD,$(depfile) $(VDSO64_AFLAGS) -c -o $@ $^
+
+targets += vdso64.so
diff -Nru a/arch/ppc64/kernel/vdso64/sigtramp.S b/arch/ppc64/kernel/vdso64/sigtramp.S
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ppc64/kernel/vdso64/sigtramp.S	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,17 @@
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+
+	.globl	_v_sigtramp64
+_v_sigtramp64:
+	addi	r1, r1, __SIGNAL_FRAMESIZE
+	li	r0,__NR_sigreturn
+	sc
+
+	.globl	_v_sigtramp_rt64
+_v_sigtramp_rt64:
+	addi	r1, r1, __SIGNAL_FRAMESIZE
+	li	r0,__NR_rt_sigreturn
+	sc
+
diff -Nru a/arch/ppc64/kernel/vdso64/testfunc.S b/arch/ppc64/kernel/vdso64/testfunc.S
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ppc64/kernel/vdso64/testfunc.S	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,15 @@
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+
+	.globl	__v_myfunc_1
+__v_myfunc_1:
+	blr
+
+	.globl	__v_myfunc_2
+__v_myfunc_2:
+	blr
+
+	.globl	_v_func
+_v_func:
diff -Nru a/arch/ppc64/kernel/vdso64/vdso64.lds b/arch/ppc64/kernel/vdso64/vdso64.lds
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ppc64/kernel/vdso64/vdso64.lds	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,92 @@
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc",
+	      "elf64-powerpc")
+OUTPUT_ARCH(powerpc:common64)
+ENTRY(_start)
+
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+  . = 0 + SIZEOF_HEADERS;
+  .hash           : { *(.hash) }
+  .dynsym         : { *(.dynsym) }
+  .dynstr         : { *(.dynstr) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+  .text           :
+  {
+    *(.text .stub .text.* .gnu.linkonce.t.*)
+    *(.sfpr .glink)
+  } =0x60000000
+  PROVIDE (__etext = .);
+  PROVIDE (_etext = .);
+  PROVIDE (etext = .);
+
+  /* Other stuff is appended to the text segment: */
+  .rodata         : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+  .rodata1        : { *(.rodata1) }
+  .eh_frame_hdr : { *(.eh_frame_hdr) }
+  .eh_frame       : { KEEP (*(.eh_frame)) }
+  .gcc_except_table   : { *(.gcc_except_table) }
+  .dynamic        : { *(.dynamic) }
+
+  /* Stabs debugging sections are here too, away from the
+   * data page. Not much in there at the moment
+   */
+  .stab          0 : { *(.stab) }
+  .stabstr       0 : { *(.stabstr) }
+  .stab.excl     0 : { *(.stab.excl) }
+  .stab.exclstr  0 : { *(.stab.exclstr) }
+  .stab.index    0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment       0 : { *(.comment) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+
+  /* Adjust the address for the data segment.  On the vdso, we need it to
+   * be page aligned after the text. The data segment contains ONLY the
+   * .data section here, which is special in the case of the vdso as it's
+   * really read only and is kernel updated. The got stays there too
+   */
+  . = ALIGN (0x1000);
+
+  .data           :
+  {
+    *(.data .data.* .gnu.linkonce.d.*)
+  }
+  .got		ALIGN(8) : { *(.got .toc) }
+  _edata = .;
+  PROVIDE (edata = .);
+  _end = .;
+  PROVIDE (end = .);
+
+  /DISCARD/ : { *(.note.GNU-stack) }
+  /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
diff -Nru a/arch/ppc64/kernel/vdso64/vdso64_wrapper.S b/arch/ppc64/kernel/vdso64/vdso64_wrapper.S
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ppc64/kernel/vdso64/vdso64_wrapper.S	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,12 @@
+#include <linux/init.h>
+
+	.section ".data"
+
+	.globl vdso64_start, vdso64_end
+	.balign 4096
+vdso64_start:
+	.incbin "arch/ppc64/kernel/vdso64/vdso64.so"
+	.balign 4096
+vdso64_end:
+
+	.previous
diff -Nru a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
--- a/arch/ppc64/mm/init.c	2004-08-26 14:55:32 +10:00
+++ b/arch/ppc64/mm/init.c	2004-08-26 14:55:32 +10:00
@@ -61,6 +61,7 @@
 #include <asm/system.h>
 #include <asm/iommu.h>
 #include <asm/abs_addr.h>
+#include <asm/vdso.h>


 struct mmu_context_queue_t mmu_context_queue;
@@ -680,6 +681,8 @@
 #ifdef CONFIG_PPC_ISERIES
 	iommu_vio_init();
 #endif
+	/* Initialize the vDSO */
+	vdso_init();
 }

 /*
diff -Nru a/fs/binfmt_elf.c b/fs/binfmt_elf.c
--- a/fs/binfmt_elf.c	2004-08-26 14:55:32 +10:00
+++ b/fs/binfmt_elf.c	2004-08-26 14:55:32 +10:00
@@ -713,6 +713,14 @@
 		goto out_free_dentry;
 	}

+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+	retval = arch_setup_additional_pages(bprm, executable_stack);
+	if (retval < 0) {
+		send_sig(SIGKILL, current, 0);
+		goto out_free_dentry;
+	}
+#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
+
 	current->mm->start_stack = bprm->p;

 	/* Now we do a little grungy work by mmaping the ELF image into
diff -Nru a/include/asm-ppc64/a.out.h b/include/asm-ppc64/a.out.h
--- a/include/asm-ppc64/a.out.h	2004-08-26 14:55:32 +10:00
+++ b/include/asm-ppc64/a.out.h	2004-08-26 14:55:32 +10:00
@@ -2,6 +2,7 @@
 #define __PPC64_A_OUT_H__

 #include <asm/ppcdebug.h>
+#include <asm/vdso.h>

 /*
  * c 2001 PPC 64 Team, IBM Corp
@@ -30,14 +31,11 @@

 #ifdef __KERNEL__

-#define STACK_TOP_USER64 (TASK_SIZE_USER64)
+#define STACK_TOP_USER64 (vdso64_ubase)
+#define STACK_TOP_USER32 (vdso32_ubase)

-/* Give 32-bit user space a full 4G address space to live in. */
-#define STACK_TOP_USER32 (TASK_SIZE_USER32)
-
-#define STACK_TOP ((test_thread_flag(TIF_32BIT) || \
-		(ppcdebugset(PPCDBG_BINFMT_32ADDR))) ? \
-		STACK_TOP_USER32 : STACK_TOP_USER64)
+#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \
+		   STACK_TOP_USER32 : STACK_TOP_USER64)

 #endif /* __KERNEL__ */

diff -Nru a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h
--- a/include/asm-ppc64/processor.h	2004-08-26 14:55:32 +10:00
+++ b/include/asm-ppc64/processor.h	2004-08-26 14:55:32 +10:00
@@ -526,8 +526,8 @@
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(STACK_TOP_USER32 / 4))
-#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(STACK_TOP_USER64 / 4))
+#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
+#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4))

 #define TASK_UNMAPPED_BASE ((test_thread_flag(TIF_32BIT)||(ppcdebugset(PPCDBG_BINFMT_32ADDR))) ? \
 		TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
diff -Nru a/include/asm-ppc64/vdso.h b/include/asm-ppc64/vdso.h
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/include/asm-ppc64/vdso.h	2004-08-26 14:55:32 +10:00
@@ -0,0 +1,25 @@
+#ifndef __PPC64_VDSO_H__
+#define __PPC64_VDSO_H__
+
+#ifdef __KERNEL__
+
+extern unsigned int vdso64_pages;
+extern unsigned int vdso32_pages;
+
+extern unsigned long vdso64_ubase;
+extern unsigned long vdso32_ubase;
+
+extern unsigned long vdso64_sigtramp;
+extern unsigned long vdso64_rt_sigtramp;
+extern unsigned long vdso32_sigtramp;
+extern unsigned long vdso32_rt_sigtramp;
+
+extern void vdso_init(void);
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack);
+
+#endif /* __KERNEL__ */
+
+#endif /* __PPC64_VDSO_H__ */


** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/





More information about the Linuxppc64-dev mailing list