vDSO : Second shot & ready for glibc help

Benjamin Herrenschmidt benh at kernel.crashing.org
Mon Aug 30 14:10:14 EST 2004


Hi !

Here's a patch against Linus bk from last friday that implements the
vDSO basic mecanism for ppc64 (for both 32 and 64 bits apps). Currently,
there is no useful function exposed to userland, that will come next, only
a pair of bogus ones are exposed (along with the signal trampoline,
so at least you get the immediate benefit of getting that one out of the
stack). The address of the vDSO is passed in an elf aux table entry I
defined in include/asm-ppc64/elf.h

At this point, I'm a bit lost in glibc code, I would appreciate your help
getting a glibc patch (32 bits only at first would be fine for me
to move forward) that allows glibc to detect that vDSO and link it in
with applications. From that point, I'll start working on the fully
userland implementation of gettimeofday and start working on the various
cpu-optimized routines we want to put in there.

This version, unlike the previously posted one, does not put the vDSO
above the stack anymore, but rather down at +1Mb + random offset. The
random offset thing is more a proof-of-concept thing at this point than
anything else, and the +1Mb address was chosen so that the linker can
later be tweaked to use "ba" instruction to get there if we want to,
but none of this is burned in stone. We have been thinking about defining
an optinal program header for apps to tell where they want the vDSO to be
(in case apps like emulators need that space at 1Mb to be available for
something else) or that they don't want one at all.

Any comment of course is welcome,
Regards,
Ben.

diff -urN linux-2.5/arch/ppc64/Makefile linux-vdso/arch/ppc64/Makefile
--- linux-2.5/arch/ppc64/Makefile	2004-08-26 15:46:30.000000000 +1000
+++ linux-vdso/arch/ppc64/Makefile	2004-08-27 13:07:59.000000000 +1000
@@ -43,6 +43,8 @@

 libs-y				+= arch/ppc64/lib/
 core-y				+= arch/ppc64/kernel/
+core-y				+= arch/ppc64/kernel/vdso32/
+core-y				+= arch/ppc64/kernel/vdso64/
 core-y				+= arch/ppc64/mm/
 core-$(CONFIG_XMON)		+= arch/ppc64/xmon/
 drivers-$(CONFIG_OPROFILE)	+= arch/ppc64/oprofile/
diff -urN linux-2.5/arch/ppc64/kernel/Makefile linux-vdso/arch/ppc64/kernel/Makefile
--- linux-2.5/arch/ppc64/kernel/Makefile	2004-08-26 15:46:30.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/Makefile	2004-08-27 13:07:59.000000000 +1000
@@ -11,7 +11,7 @@
 			udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \
 			ptrace32.o signal32.o rtc.o init_task.o \
 			lmb.o cputable.o cpu_setup_power4.o idle_power4.o \
-			iommu.o sysfs.o vio.o
+			iommu.o sysfs.o vio.o vdso.o

 obj-$(CONFIG_PPC_OF) +=	of_device.o

diff -urN linux-2.5/arch/ppc64/kernel/signal.c linux-vdso/arch/ppc64/kernel/signal.c
--- linux-2.5/arch/ppc64/kernel/signal.c	2004-08-26 15:46:30.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/signal.c	2004-08-27 15:46:07.000000000 +1000
@@ -34,6 +34,7 @@
 #include <asm/ppcdebug.h>
 #include <asm/unistd.h>
 #include <asm/cacheflush.h>
+#include <asm/vdso.h>

 #define DEBUG_SIG 0

@@ -412,10 +413,14 @@
 		goto badframe;

 	/* Set up to return from userspace. */
-	err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
-	if (err)
-		goto badframe;
-
+	if (vdso64_rt_sigtramp && current->thread.vdso_base) {
+		regs->link = current->thread.vdso_base + vdso64_rt_sigtramp;
+	} else {
+		err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
+		if (err)
+			goto badframe;
+		regs->link = (unsigned long) &frame->tramp[0];
+	}
 	funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler;

 	/* Allocate a dummy caller frame for the signal handler. */
@@ -424,7 +429,6 @@

 	/* Set up "regs" so we "return" to the signal handler. */
 	err |= get_user(regs->nip, &funct_desc_ptr->entry);
-	regs->link = (unsigned long) &frame->tramp[0];
 	regs->gpr[1] = newsp;
 	err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
 	regs->gpr[3] = signr;
diff -urN linux-2.5/arch/ppc64/kernel/signal32.c linux-vdso/arch/ppc64/kernel/signal32.c
--- linux-2.5/arch/ppc64/kernel/signal32.c	2004-08-26 15:46:30.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/signal32.c	2004-08-27 15:46:07.000000000 +1000
@@ -30,6 +30,7 @@
 #include <asm/ppcdebug.h>
 #include <asm/unistd.h>
 #include <asm/cacheflush.h>
+#include <asm/vdso.h>

 #define DEBUG_SIG 0

@@ -677,18 +678,24 @@

 	/* Save user registers on the stack */
 	frame = &rt_sf->uc.uc_mcontext;
-	if (save_user_regs(regs, frame, __NR_rt_sigreturn))
-		goto badframe;
-
 	if (put_user(regs->gpr[1], (unsigned long __user *)newsp))
 		goto badframe;
+
+	if (vdso32_rt_sigtramp && current->thread.vdso_base) {
+		if (save_user_regs(regs, frame, 0))
+			goto badframe;
+		regs->link = current->thread.vdso_base + vdso32_rt_sigtramp;
+	} else {
+		if (save_user_regs(regs, frame, __NR_rt_sigreturn))
+			goto badframe;
+		regs->link = (unsigned long) frame->tramp;
+	}
 	regs->gpr[1] = (unsigned long) newsp;
 	regs->gpr[3] = sig;
 	regs->gpr[4] = (unsigned long) &rt_sf->info;
 	regs->gpr[5] = (unsigned long) &rt_sf->uc;
 	regs->gpr[6] = (unsigned long) rt_sf;
 	regs->nip = (unsigned long) ka->sa.sa_handler;
-	regs->link = (unsigned long) frame->tramp;
 	regs->trap = 0;
 	regs->result = 0;

@@ -842,8 +849,15 @@
 	    || __put_user(sig, &sc->signal))
 		goto badframe;

-	if (save_user_regs(regs, &frame->mctx, __NR_sigreturn))
-		goto badframe;
+	if (vdso32_sigtramp && current->thread.vdso_base) {
+		if (save_user_regs(regs, &frame->mctx, 0))
+			goto badframe;
+		regs->link = current->thread.vdso_base + vdso32_sigtramp;
+	} else {
+		if (save_user_regs(regs, &frame->mctx, __NR_sigreturn))
+			goto badframe;
+		regs->link = (unsigned long) frame->mctx.tramp;
+	}

 	if (put_user(regs->gpr[1], (unsigned long __user *)newsp))
 		goto badframe;
@@ -851,7 +865,6 @@
 	regs->gpr[3] = sig;
 	regs->gpr[4] = (unsigned long) sc;
 	regs->nip = (unsigned long) ka->sa.sa_handler;
-	regs->link = (unsigned long) frame->mctx.tramp;
 	regs->trap = 0;
 	regs->result = 0;

diff -urN linux-2.5/arch/ppc64/kernel/vdso.c linux-vdso/arch/ppc64/kernel/vdso.c
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/vdso.c	2004-08-27 15:46:07.000000000 +1000
@@ -0,0 +1,412 @@
+
+/*
+ *  linux/arch/ppc64/kernel/vdso.c
+ *
+ *    Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ *			 <benh at kernel.crashing.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/bootmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+
+/*
+ * The vDSOs themselves are here
+ */
+extern char vdso64_start, vdso64_end;
+extern char vdso32_start, vdso32_end;
+
+static void *vdso64_kbase = &vdso64_start;
+static void *vdso32_kbase = &vdso32_start;
+
+unsigned int vdso64_pages;
+unsigned int vdso32_pages;
+
+/* Signal trampolines user addresses */
+
+unsigned long vdso64_sigtramp;
+unsigned long vdso64_rt_sigtramp;
+unsigned long vdso32_sigtramp;
+unsigned long vdso32_rt_sigtramp;
+
+/*
+ * Some infos carried around for each of them during parsing at
+ * boot time.
+ */
+struct lib32_elfinfo
+{
+	Elf32_Ehdr	*hdr;		/* ptr to ELF */
+	Elf32_Sym	*dynsym;	/* ptr to .dynsym section */
+	unsigned long	dynsymsize;	/* size of .dynsym section */
+	char		*dynstr;	/* ptr to .dynstr section */
+	unsigned long	text;		/* offset of .text section in .so */
+};
+
+struct lib64_elfinfo
+{
+	Elf64_Ehdr	*hdr;
+	Elf64_Sym	*dynsym;
+	unsigned long	dynsymsize;
+	char		*dynstr;
+	unsigned long	text;
+};
+
+
+#ifdef __DEBUG
+static void dump_one_vdso_page(struct page *pg, struct page *upg)
+{
+	printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
+	       page_count(pg),
+	       pg->flags);
+	if (upg/* && pg != upg*/) {
+		printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) << PAGE_SHIFT),
+		       page_count(upg),
+		       upg->flags);
+	}
+	printk("\n");
+}
+
+static void dump_vdso_pages(struct vm_area_struct * vma)
+{
+	int i;
+
+	if (!vma || test_thread_flag(TIF_32BIT)) {
+		printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
+		for (i=0; i<vdso32_pages; i++) {
+			struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+			struct page *upg = (vma && vma->vm_mm) ?
+				follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
+				: NULL;
+			dump_one_vdso_page(pg, upg);
+		}
+	}
+	if (!vma || !test_thread_flag(TIF_32BIT)) {
+		printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
+		for (i=0; i<vdso64_pages; i++) {
+			struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+			struct page *upg = (vma && vma->vm_mm) ?
+				follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
+				: NULL;
+			dump_one_vdso_page(pg, upg);
+		}
+	}
+}
+#endif /* DEBUG */
+
+/*
+ * Keep a dummy vma_close for now, it will prevent VMA merging.
+ */
+static void vdso_vma_close(struct vm_area_struct * vma)
+{
+}
+
+/*
+ * Our nopage() function, maps in the actual vDSO kernel pages, they will
+ * be mapped read-only by do_no_page(), and eventually COW'ed, either
+ * right away for an initial write access, or by do_wp_page().
+ */
+static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
+				     unsigned long address, int *type)
+{
+	unsigned long offset = address - vma->vm_start;
+	struct page *pg;
+	void *vbase = test_thread_flag(TIF_32BIT) ? vdso32_kbase : vdso64_kbase;
+
+	DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n",
+	    current->comm, address, offset);
+
+	if (address < vma->vm_start || address > vma->vm_end)
+		return NOPAGE_SIGBUS;
+
+	pg = virt_to_page(vbase + offset);
+	get_page(pg);
+	DBG(" ->page count: %d\n", page_count(pg));
+
+	return pg;
+}
+
+static struct vm_operations_struct vdso_vmops = {
+	.close	= vdso_vma_close,
+	.nopage	= vdso_vma_nopage,
+};
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long vdso_pages = test_thread_flag(TIF_32BIT) ?
+		vdso32_pages : vdso64_pages;
+
+	/* vDSO has a problem and was disabled, just don't "enable" it for the
+	 * process
+	 */
+	if (vdso_pages == 0) {
+		current->thread.vdso_base = 0;
+		return 0;
+	}
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (vma == NULL)
+		return -ENOMEM;
+	if (security_vm_enough_memory(vdso_pages)) {
+		kmem_cache_free(vm_area_cachep, vma);
+		return -ENOMEM;
+	}
+	memset(vma, 0, sizeof(*vma));
+
+	/*
+	 * pick a base address for the vDSO in process space. We have a default
+	 * base of 1Mb on which we had a random offset up to 1Mb.
+	 * XXX: Add possibility for a program header to specify that location
+	 */
+	current->thread.vdso_base = 0x00100000 +
+		((unsigned long)vma & 0x000ff000);
+
+	vma->vm_mm = mm;
+	vma->vm_start = current->thread.vdso_base;
+	vma->vm_end = vma->vm_start + (vdso_pages << PAGE_SHIFT);
+
+	/*
+	 * our vma flags don't have VM_WRITE so by default, the process isn't allowed
+	 * to write those pages.
+	 * gdb can break that with ptrace interface, and thus trigger COW on those
+	 * pages but it's then your responsibility to never do that on the "data" page
+	 * of the vDSO or you'll stop getting kernel updates and your nice userland
+	 * gettimeofday will be totally dead. It's fine to use that for setting
+	 * breakpoints in the vDSO code pages though
+	 */
+	vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+	vma->vm_flags |= mm->def_flags;
+	vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+	vma->vm_ops = &vdso_vmops;
+
+	down_write(&mm->mmap_sem);
+	insert_vm_struct(mm, vma);
+	mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	up_write(&mm->mmap_sem);
+
+	return 0;
+}
+
+static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
+				  unsigned long *size)
+{
+	Elf32_Shdr *sechdrs;
+	unsigned int i;
+	char *secnames;
+
+	/* Grab section headers and strings so we can tell who is who */
+	sechdrs = (void *)ehdr + ehdr->e_shoff;
+	secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+	/* Find the section they want */
+	for (i = 1; i < ehdr->e_shnum; i++) {
+		if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+			if (size)
+				*size = sechdrs[i].sh_size;
+			return (void *)ehdr + sechdrs[i].sh_offset;
+		}
+	}
+	*size = 0;
+	return NULL;
+}
+
+static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
+				  unsigned long *size)
+{
+	Elf64_Shdr *sechdrs;
+	unsigned int i;
+	char *secnames;
+
+	/* Grab section headers and strings so we can tell who is who */
+	sechdrs = (void *)ehdr + ehdr->e_shoff;
+	secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+	/* Find the section they want */
+	for (i = 1; i < ehdr->e_shnum; i++) {
+		if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+			if (size)
+				*size = sechdrs[i].sh_size;
+			return (void *)ehdr + sechdrs[i].sh_offset;
+		}
+	}
+	if (size)
+		*size = 0;
+	return NULL;
+}
+
+static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, const char *symname)
+{
+	unsigned int i;
+
+	for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
+		if (lib->dynsym[i].st_name == 0)
+			continue;
+		if (strcmp(symname, lib->dynstr + lib->dynsym[i].st_name) == 0)
+			return &lib->dynsym[i];
+	}
+	return NULL;
+}
+
+static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, const char *symname)
+{
+	unsigned int i;
+
+	for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
+		if (lib->dynsym[i].st_name == 0)
+			continue;
+		if (strcmp(symname, lib->dynstr + lib->dynsym[i].st_name) == 0)
+			return &lib->dynsym[i];
+	}
+	return NULL;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function32(struct lib32_elfinfo *lib, const char *symname)
+{
+	Elf32_Sym *sym = find_symbol32(lib, symname);
+
+	if (sym == NULL) {
+		printk(KERN_WARNING "vDSO32: function %s not found !\n", symname);
+		return 0;
+	}
+	return sym->st_value;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function64(struct lib64_elfinfo *lib, const char *symname)
+{
+	Elf64_Sym *sym = find_symbol64(lib, symname);
+
+	if (sym == NULL) {
+		printk(KERN_WARNING "vDSO64: function %s not found !\n", symname);
+		return 0;
+	}
+	return sym->st_value;
+}
+
+
+static __init int vdso_do_fixups(void)
+{
+	struct lib32_elfinfo	v32;
+	struct lib64_elfinfo	v64;
+	void *sect;
+
+	v32.hdr = vdso32_kbase;
+	v64.hdr = vdso64_kbase;
+
+	/*
+	 * Locate symbol tables & text section
+	 */
+
+	v32.dynsym = find_section32(v32.hdr, ".dynsym", &v32.dynsymsize);
+	v32.dynstr = find_section32(v32.hdr, ".dynstr", NULL);
+	if (v32.dynsym == NULL || v32.dynstr == NULL) {
+		printk(KERN_ERR "vDSO32: a required symbol section was not found\n");
+		return -1;
+	}
+	sect = find_section32(v32.hdr, ".text", NULL);
+	if (sect == NULL) {
+		printk(KERN_ERR "vDSO32: the .text section was not found\n");
+		return -1;
+	}
+	v32.text = sect - vdso32_kbase;
+
+	v64.dynsym = find_section64(v64.hdr, ".dynsym", &v64.dynsymsize);
+	v64.dynstr = find_section64(v64.hdr, ".dynstr", NULL);
+	if (v64.dynsym == NULL || v64.dynstr == NULL) {
+		printk(KERN_ERR "vDSO64: a required symbol section was not found\n");
+		return -1;
+	}
+	sect = find_section64(v64.hdr, ".text", NULL);
+	if (sect == NULL) {
+		printk(KERN_ERR "vDSO64: the .text section was not found\n");
+		return -1;
+	}
+	v64.text = sect - vdso64_kbase;
+
+	/*
+	 * Find signal trampolines
+	 */
+
+	vdso64_sigtramp		= find_function64(&v64, "_v_sigtramp64");
+	vdso64_rt_sigtramp	= find_function64(&v64, "_v_sigtramp_rt64");
+	vdso32_sigtramp		= find_function32(&v32, "_v_sigtramp32");
+	vdso32_rt_sigtramp	= find_function32(&v32, "_v_sigtramp_rt32");
+
+	return 0;
+}
+
+void __init vdso_init(void)
+{
+	int i;
+
+	vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
+	vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
+
+	DBG("vdso64_kbase: %p, 0x%x pages, vdso32_kbase: %p, 0x%x pages\n",
+	       vdso64_kbase, vdso64_pages, vdso32_kbase, vdso32_pages);
+
+	/* Do necessary fixups of vDSO symbols */
+	if (vdso_do_fixups()) {
+		printk(KERN_ERR "vDSO setup failure, not enabled !\n");
+		/* XXX should free pages here ? */
+		vdso64_pages = vdso32_pages = 0;
+		return;
+	}
+
+	/* Make sure pages are in the correct state */
+	for (i = 0; i < vdso64_pages; i++) {
+		struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+		ClearPageReserved(pg);
+		get_page(pg);
+	}
+	for (i = 0; i < vdso32_pages; i++) {
+		struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+		ClearPageReserved(pg);
+		get_page(pg);
+	}
+}
diff -urN linux-2.5/arch/ppc64/kernel/vdso32/Makefile linux-vdso/arch/ppc64/kernel/vdso32/Makefile
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/vdso32/Makefile	2004-08-27 13:07:59.000000000 +1000
@@ -0,0 +1,43 @@
+# Choose compiler
+
+CROSS32_COMPILE ?=
+
+CROSS32CC		:= $(CROSS32_COMPILE)gcc
+CROSS32AS		:= $(CROSS32_COMPILE)as
+
+# List of files in the vdso, has to be asm only for now
+
+src-vdso32 = sigtramp.S testfunc.S
+
+# Build rules
+
+obj-vdso32 := $(addsuffix .o, $(basename $(src-vdso32)))
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+src-vdso32 := $(addprefix $(src)/, $(src-vdso32))
+
+VDSO32_CFLAGS := -shared -s -fno-common -Iinclude -fno-builtin -nostdlib
+VDSO32_CFLAGS += -Wl,-soname=linux-vdso32.so.1
+VDSO32_AFLAGS := -D__ASSEMBLY__ -s
+
+obj-y += vdso32_wrapper.o
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32)
+	$(call if_changed,vdso32ld)
+
+# assembly rules for the .S files
+# This is probably wrong with split src & obj trees
+$(obj-vdso32): %.o: %.S
+	$(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+      cmd_vdso32ld = $(CROSS32CC) -Wp,-MD,$(depfile) $(VDSO32_CFLAGS) \
+	-Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+      cmd_vdso32as = $(CROSS32CC) -Wp,-MD,$(depfile) $(VDSO32_AFLAGS) -c -o $@ $^
+
+targets += vdso32.so
diff -urN linux-2.5/arch/ppc64/kernel/vdso32/sigtramp.S linux-vdso/arch/ppc64/kernel/vdso32/sigtramp.S
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/vdso32/sigtramp.S	2004-08-27 13:07:59.000000000 +1000
@@ -0,0 +1,15 @@
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+
+	.globl	_v_sigtramp32
+_v_sigtramp32:
+	li	r0,__NR_sigreturn
+	sc
+
+	.globl	_v_sigtramp_rt32
+_v_sigtramp_rt32:
+	li	r0,__NR_rt_sigreturn
+	sc
+
diff -urN linux-2.5/arch/ppc64/kernel/vdso32/testfunc.S linux-vdso/arch/ppc64/kernel/vdso32/testfunc.S
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/vdso32/testfunc.S	2004-08-27 15:46:07.000000000 +1000
@@ -0,0 +1,12 @@
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+
+	.globl	__v_myfunc_1
+__v_myfunc_1:
+	blr
+
+	.globl	__v_myfunc_2
+__v_myfunc_2:
+	blr
diff -urN linux-2.5/arch/ppc64/kernel/vdso32/vdso32.lds linux-vdso/arch/ppc64/kernel/vdso32/vdso32.lds
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/vdso32/vdso32.lds	2004-08-27 13:07:59.000000000 +1000
@@ -0,0 +1,98 @@
+/*
+ * This is the infamous ld script for the 32 bits vdso
+ * library
+ */
+OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc",
+	      "elf32-powerpc")
+OUTPUT_ARCH(powerpc:common)
+ENTRY(_start)
+
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+
+  . = 0 + SIZEOF_HEADERS;
+  .hash           : { *(.hash) }
+  .dynsym         : { *(.dynsym) }
+  .dynstr         : { *(.dynstr) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+  .text           :
+  {
+    *(.text .stub .text.* .gnu.linkonce.t.*)
+  } =0
+  PROVIDE (__etext = .);
+  PROVIDE (_etext = .);
+  PROVIDE (etext = .);
+
+  /* Other stuff is appended to the text segment: */
+
+  .rodata         : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+  .rodata1        : { *(.rodata1) }
+  .eh_frame_hdr : { *(.eh_frame_hdr) }
+  .eh_frame       : { KEEP (*(.eh_frame)) }
+  .gcc_except_table   : { *(.gcc_except_table) }
+  .fixup          : { *(.fixup) }
+  .dynamic        : { *(.dynamic) }
+
+  /* Stabs debugging sections are here too, away from the
+   * data page. Not much in there at the moment
+   */
+  .stab          0 : { *(.stab) }
+  .stabstr       0 : { *(.stabstr) }
+  .stab.excl     0 : { *(.stab.excl) }
+  .stab.exclstr  0 : { *(.stab.exclstr) }
+  .stab.index    0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment       0 : { *(.comment) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+
+  /* Adjust the address for the data segment.  On the vdso, we need it to
+   * be page aligned after the text. The data segment contains ONLY the
+   * .data section here, which is special in the case of the vdso as it's
+   * really read only and is kernel updated. The got stays there too
+   */
+  . = ALIGN (0x1000);
+
+  .data           :
+  {
+    *(.data .data.* .gnu.linkonce.d.*)
+  }
+  .got            : { *(.got.plt) *(.got) }
+  _edata = .;
+  PROVIDE (edata = .);
+  _end = .;
+  __end = .;
+  PROVIDE (end = .);
+
+  /DISCARD/ : { *(.note.GNU-stack) }
+  /* gas insist on generating these, bin them in here, they should be
+   * empty anyways
+   */
+  /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
diff -urN linux-2.5/arch/ppc64/kernel/vdso32/vdso32_wrapper.S linux-vdso/arch/ppc64/kernel/vdso32/vdso32_wrapper.S
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/vdso32/vdso32_wrapper.S	2004-08-27 13:07:59.000000000 +1000
@@ -0,0 +1,12 @@
+#include <linux/init.h>
+
+	.section ".data"
+
+	.globl vdso32_start, vdso32_end
+	.balign 4096
+vdso32_start:
+	.incbin "arch/ppc64/kernel/vdso32/vdso32.so"
+	.balign 4096
+vdso32_end:
+
+	.previous
diff -urN linux-2.5/arch/ppc64/kernel/vdso64/Makefile linux-vdso/arch/ppc64/kernel/vdso64/Makefile
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/vdso64/Makefile	2004-08-27 13:07:59.000000000 +1000
@@ -0,0 +1,36 @@
+# List of files in the vdso, has to be asm only for now
+
+src-vdso64 = sigtramp.S testfunc.S
+
+# Build rules
+
+obj-vdso64 := $(addsuffix .o, $(basename $(src-vdso64)))
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+src-vdso64 := $(addprefix $(src)/, $(src-vdso64))
+
+VDSO64_CFLAGS := -shared -s -fno-common -Iinclude -fno-builtin -nostdlib
+VDSO64_CFLAGS += -Wl,-soname=linux-vdso64.so.1
+VDSO64_AFLAGS := -D__ASSEMBLY__ -s
+
+obj-y += vdso64_wrapper.o
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64)
+	$(call if_changed,vdso64ld)
+
+# assembly rules for the .S files
+# This is probably wrong with split src & obj trees
+$(obj-vdso64): %.o: %.S
+	$(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+      cmd_vdso64ld = $(CC) -Wp,-MD,$(depfile) $(VDSO64_CFLAGS) \
+	-Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+      cmd_vdso64as = $(CC) -Wp,-MD,$(depfile) $(VDSO64_AFLAGS) -c -o $@ $^
+
+targets += vdso64.so
diff -urN linux-2.5/arch/ppc64/kernel/vdso64/sigtramp.S linux-vdso/arch/ppc64/kernel/vdso64/sigtramp.S
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/vdso64/sigtramp.S	2004-08-27 13:07:59.000000000 +1000
@@ -0,0 +1,17 @@
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+
+	.globl	_v_sigtramp64
+_v_sigtramp64:
+	addi	r1, r1, __SIGNAL_FRAMESIZE
+	li	r0,__NR_sigreturn
+	sc
+
+	.globl	_v_sigtramp_rt64
+_v_sigtramp_rt64:
+	addi	r1, r1, __SIGNAL_FRAMESIZE
+	li	r0,__NR_rt_sigreturn
+	sc
+
diff -urN linux-2.5/arch/ppc64/kernel/vdso64/testfunc.S linux-vdso/arch/ppc64/kernel/vdso64/testfunc.S
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/vdso64/testfunc.S	2004-08-27 15:46:07.000000000 +1000
@@ -0,0 +1,12 @@
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+
+	.globl	__v_myfunc_1
+__v_myfunc_1:
+	blr
+
+	.globl	__v_myfunc_2
+__v_myfunc_2:
+	blr
diff -urN linux-2.5/arch/ppc64/kernel/vdso64/vdso64.lds linux-vdso/arch/ppc64/kernel/vdso64/vdso64.lds
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/vdso64/vdso64.lds	2004-08-27 13:07:59.000000000 +1000
@@ -0,0 +1,92 @@
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc",
+	      "elf64-powerpc")
+OUTPUT_ARCH(powerpc:common64)
+ENTRY(_start)
+
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+  . = 0 + SIZEOF_HEADERS;
+  .hash           : { *(.hash) }
+  .dynsym         : { *(.dynsym) }
+  .dynstr         : { *(.dynstr) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+  .text           :
+  {
+    *(.text .stub .text.* .gnu.linkonce.t.*)
+    *(.sfpr .glink)
+  } =0x60000000
+  PROVIDE (__etext = .);
+  PROVIDE (_etext = .);
+  PROVIDE (etext = .);
+
+  /* Other stuff is appended to the text segment: */
+  .rodata         : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+  .rodata1        : { *(.rodata1) }
+  .eh_frame_hdr : { *(.eh_frame_hdr) }
+  .eh_frame       : { KEEP (*(.eh_frame)) }
+  .gcc_except_table   : { *(.gcc_except_table) }
+  .dynamic        : { *(.dynamic) }
+
+  /* Stabs debugging sections are here too, away from the
+   * data page. Not much in there at the moment
+   */
+  .stab          0 : { *(.stab) }
+  .stabstr       0 : { *(.stabstr) }
+  .stab.excl     0 : { *(.stab.excl) }
+  .stab.exclstr  0 : { *(.stab.exclstr) }
+  .stab.index    0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment       0 : { *(.comment) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+
+  /* Adjust the address for the data segment.  On the vdso, we need it to
+   * be page aligned after the text. The data segment contains ONLY the
+   * .data section here, which is special in the case of the vdso as it's
+   * really read only and is kernel updated. The got stays there too
+   */
+  . = ALIGN (0x1000);
+
+  .data           :
+  {
+    *(.data .data.* .gnu.linkonce.d.*)
+  }
+  .got		ALIGN(8) : { *(.got .toc) }
+  _edata = .;
+  PROVIDE (edata = .);
+  _end = .;
+  PROVIDE (end = .);
+
+  /DISCARD/ : { *(.note.GNU-stack) }
+  /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
diff -urN linux-2.5/arch/ppc64/kernel/vdso64/vdso64_wrapper.S linux-vdso/arch/ppc64/kernel/vdso64/vdso64_wrapper.S
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/arch/ppc64/kernel/vdso64/vdso64_wrapper.S	2004-08-27 13:07:59.000000000 +1000
@@ -0,0 +1,12 @@
+#include <linux/init.h>
+
+	.section ".data"
+
+	.globl vdso64_start, vdso64_end
+	.balign 4096
+vdso64_start:
+	.incbin "arch/ppc64/kernel/vdso64/vdso64.so"
+	.balign 4096
+vdso64_end:
+
+	.previous
diff -urN linux-2.5/arch/ppc64/mm/init.c linux-vdso/arch/ppc64/mm/init.c
--- linux-2.5/arch/ppc64/mm/init.c	2004-08-26 15:46:30.000000000 +1000
+++ linux-vdso/arch/ppc64/mm/init.c	2004-08-27 13:07:59.000000000 +1000
@@ -61,6 +61,7 @@
 #include <asm/system.h>
 #include <asm/iommu.h>
 #include <asm/abs_addr.h>
+#include <asm/vdso.h>


 struct mmu_context_queue_t mmu_context_queue;
@@ -706,6 +707,8 @@
 #ifdef CONFIG_PPC_ISERIES
 	iommu_vio_init();
 #endif
+	/* Initialize the vDSO */
+	vdso_init();
 }

 /*
diff -urN linux-2.5/fs/binfmt_elf.c linux-vdso/fs/binfmt_elf.c
--- linux-2.5/fs/binfmt_elf.c	2004-08-26 15:46:35.000000000 +1000
+++ linux-vdso/fs/binfmt_elf.c	2004-08-27 13:08:01.000000000 +1000
@@ -715,6 +715,14 @@
 		goto out_free_dentry;
 	}

+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+	retval = arch_setup_additional_pages(bprm, executable_stack);
+	if (retval < 0) {
+		send_sig(SIGKILL, current, 0);
+		goto out_free_dentry;
+	}
+#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
+
 	current->mm->start_stack = bprm->p;

 	/* Now we do a little grungy work by mmaping the ELF image into
diff -urN linux-2.5/include/asm-ppc64/a.out.h linux-vdso/include/asm-ppc64/a.out.h
--- linux-2.5/include/asm-ppc64/a.out.h	2004-08-10 10:22:36.000000000 +1000
+++ linux-vdso/include/asm-ppc64/a.out.h	2004-08-27 15:46:34.000000000 +1000
@@ -2,6 +2,7 @@
 #define __PPC64_A_OUT_H__

 #include <asm/ppcdebug.h>
+#include <asm/vdso.h>

 /*
  * c 2001 PPC 64 Team, IBM Corp
@@ -30,14 +31,11 @@

 #ifdef __KERNEL__

-#define STACK_TOP_USER64 (TASK_SIZE_USER64)
+#define STACK_TOP_USER64 TASK_SIZE_USER64
+#define STACK_TOP_USER32 TASK_SIZE_USER32

-/* Give 32-bit user space a full 4G address space to live in. */
-#define STACK_TOP_USER32 (TASK_SIZE_USER32)
-
-#define STACK_TOP ((test_thread_flag(TIF_32BIT) || \
-		(ppcdebugset(PPCDBG_BINFMT_32ADDR))) ? \
-		STACK_TOP_USER32 : STACK_TOP_USER64)
+#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \
+		   STACK_TOP_USER32 : STACK_TOP_USER64)

 #endif /* __KERNEL__ */

diff -urN linux-2.5/include/asm-ppc64/elf.h linux-vdso/include/asm-ppc64/elf.h
--- linux-2.5/include/asm-ppc64/elf.h	2004-08-10 10:22:37.000000000 +1000
+++ linux-vdso/include/asm-ppc64/elf.h	2004-08-27 15:46:34.000000000 +1000
@@ -237,6 +237,8 @@
 #define AT_UCACHEBSIZE		21
 /* A special ignored type value for PPC, for glibc compatibility.  */
 #define AT_IGNOREPPC		22
+/* The vDSO location */
+#define AT_VDSO_BASE		23

 extern int dcache_bsize;
 extern int icache_bsize;
@@ -260,6 +262,8 @@
 	NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize);			\
 	NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize);			\
 	NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize);			\
+	/* vDSO base */							\
+	NEW_AUX_ENT(AT_VDSO_BASE, current->thread.vdso_base);		\
  } while (0)

 /* PowerPC64 relocations defined by the ABIs */
diff -urN linux-2.5/include/asm-ppc64/processor.h linux-vdso/include/asm-ppc64/processor.h
--- linux-2.5/include/asm-ppc64/processor.h	2004-08-26 15:46:40.000000000 +1000
+++ linux-vdso/include/asm-ppc64/processor.h	2004-08-27 15:46:34.000000000 +1000
@@ -526,8 +526,8 @@
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(STACK_TOP_USER32 / 4))
-#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(STACK_TOP_USER64 / 4))
+#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
+#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4))

 #define TASK_UNMAPPED_BASE ((test_thread_flag(TIF_32BIT)||(ppcdebugset(PPCDBG_BINFMT_32ADDR))) ? \
 		TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
@@ -543,7 +543,8 @@
 	double		fpr[32];	/* Complete floating point set */
 	unsigned long	fpscr;		/* Floating point status (plus pad) */
 	unsigned long	fpexc_mode;	/* Floating-point exception mode */
-	unsigned long	pad[3];		/* was saved_msr, saved_softe */
+	unsigned long	pad[2];		/* was saved_msr, saved_softe */
+	unsigned long	vdso_base;	/* base of the vDSO library */
 #ifdef CONFIG_ALTIVEC
 	/* Complete AltiVec register set */
 	vector128	vr[32] __attribute((aligned(16)));
diff -urN linux-2.5/include/asm-ppc64/vdso.h linux-vdso/include/asm-ppc64/vdso.h
--- /dev/null	2004-07-28 14:31:22.000000000 +1000
+++ linux-vdso/include/asm-ppc64/vdso.h	2004-08-27 15:46:34.000000000 +1000
@@ -0,0 +1,23 @@
+#ifndef __PPC64_VDSO_H__
+#define __PPC64_VDSO_H__
+
+#ifdef __KERNEL__
+
+extern unsigned int vdso64_pages;
+extern unsigned int vdso32_pages;
+
+/* Offsets relative to thread->vdso_base */
+extern unsigned long vdso64_sigtramp;
+extern unsigned long vdso64_rt_sigtramp;
+extern unsigned long vdso32_sigtramp;
+extern unsigned long vdso32_rt_sigtramp;
+
+extern void vdso_init(void);
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack);
+
+#endif /* __KERNEL__ */
+
+#endif /* __PPC64_VDSO_H__ */


** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/





More information about the Linuxppc64-dev mailing list