per page execute
Anton Blanchard
anton at samba.org
Sat Dec 27 23:15:25 EST 2003
Hi,
We need to move towards enforcing exec permission on all mappings.
Heres a start:
- Switch _PAGE_EXEC and _PAGE_RW so _PAGE_EXEC matches up with the
hardware noexec bit
- Add _PAGE_EXEC to hash_page permission check
- Check for exec permission in do_page_fault
- Remove redundant set of _PAGE_PRESENT in do_hash_page_DSI, we do it
again in __hash_page
- Invert linux _PAGE_EXEC bit and enter it in the ppc64 hpte
- Awful bss hack to force program BSS sections to be marked exec
- Split 32 and 64bit data and stack flags, only enforce exec permission
on mmap/brk for the moment. (ie always mark stack executable)
- More rigid pte_modify, avoid turning off no cache bits when doing
mprotect. (not related to the rest of this patch, I took the
opportunity to fix it while I was in the area)
- Our PXXX and SXXX were backwards :) They are in XWR order due to the
way our mmap flags are laid out. Wow, that bug must date back a few
years.
- Kill unused PAGE_KERNEL_CI, pte_cache, pte_uncache
When mapping in an executable, it seems the kernel doesnt look at
permissions of non load segments. We explode pretty early because our
plt in ppc32 is in such an area. The awful hack was an attempt to fix
this problem quickly without marking the entire bss as exec by default.
Its crying for a proper fix :)
Another thing that worries me:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[25] .plt NOBITS 10010c08 000c00 0000c0 00 WAX 0 0 4
[26] .bss NOBITS 10010cc8 000c00 000004 00 WA 0 0 1
Look how the non executable bss butts right onto the executable plt.
Even with the patch below, we are failing some security tests that try
and exec stuff out of the bss. Thats because the stuff ends up in the same
page as the plt. Alan, could this be considered a toolchain bug?
We also need to fix the kernel signal trampoline code before turning off
exec permission on the stack. If we did the fixmap trick that x86 does and
the trampoline always ended up in this page, that would work well.
Does glibc rely on the stack being executable? We may need a boot option
for people on old toolchains/glibcs (eg the bug where the toolchain forgot
to mark sections executable or the other bug where our GOT was not marked
executable).
Anton
===== arch/ppc64/kernel/head.S 1.42 vs edited =====
--- 1.42/arch/ppc64/kernel/head.S Wed Dec 17 15:27:52 2003
+++ edited/arch/ppc64/kernel/head.S Sat Dec 27 11:23:37 2003
@@ -35,6 +35,7 @@
#include <asm/offsets.h>
#include <asm/bug.h>
#include <asm/cputable.h>
+#include <asm/pgtable.h>
#ifdef CONFIG_PPC_ISERIES
#define DO_SOFT_DISABLE
@@ -658,7 +659,7 @@
andis. r0,r3,0xa450 /* weird error? */
bne 1f /* if not, try to put a PTE */
andis. r0,r3,0x0020 /* Is it a page table fault? */
- rlwinm r4,r3,32-23,29,29 /* DSISR_STORE -> _PAGE_RW */
+ rlwinm r4,r3,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
ld r3,_DAR(r1) /* into the hash table */
beq+ 2f /* If so handle it */
@@ -818,10 +819,9 @@
b .ret_from_except
_GLOBAL(do_hash_page_ISI)
- li r4,0
+ li r4,_PAGE_EXEC
_GLOBAL(do_hash_page_DSI)
rlwimi r4,r23,32-13,30,30 /* Insert MSR_PR as _PAGE_USER */
- ori r4,r4,1 /* add _PAGE_PRESENT */
mflr r21 /* Save LR in r21 */
===== arch/ppc64/mm/fault.c 1.14 vs edited =====
--- 1.14/arch/ppc64/mm/fault.c Fri Sep 12 21:01:40 2003
+++ edited/arch/ppc64/mm/fault.c Sat Dec 27 16:32:23 2003
@@ -59,6 +59,7 @@
siginfo_t info;
unsigned long code = SEGV_MAPERR;
unsigned long is_write = error_code & 0x02000000;
+ unsigned long is_exec = regs->trap == 0x400;
#ifdef CONFIG_DEBUG_KERNEL
if (debugger_fault_handler && (regs->trap == 0x300 ||
@@ -102,16 +103,20 @@
good_area:
code = SEGV_ACCERR;
+ if (is_exec) {
+ /* XXX huh? */
+ /* protection fault */
+ if (error_code & 0x08000000)
+ goto bad_area;
+ if (!(vma->vm_flags & VM_EXEC))
+ goto bad_area;
/* a write */
- if (is_write) {
+ } else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
/* a read */
} else {
- /* protection fault */
- if (error_code & 0x08000000)
- goto bad_area;
- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ if (!(vma->vm_flags & VM_READ))
goto bad_area;
}
===== arch/ppc64/mm/hash_low.S 1.1 vs edited =====
--- 1.1/arch/ppc64/mm/hash_low.S Wed Dec 17 15:55:14 2003
+++ edited/arch/ppc64/mm/hash_low.S Sat Dec 27 11:25:59 2003
@@ -90,7 +90,7 @@
/* Prepare new PTE value (turn access RW into DIRTY, then
* add BUSY,HASHPTE and ACCESSED)
*/
- rlwinm r30,r4,5,24,24 /* _PAGE_RW -> _PAGE_DIRTY */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
or r30,r30,r31
ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
/* Write the linux PTE atomically (setting busy) */
@@ -113,11 +113,11 @@
rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
xor r28,r5,r0
-
- /* Convert linux PTE bits into HW equivalents
- */
- andi. r3,r30,0x1fa /* Get basic set of flags */
- rlwinm r0,r30,32-2+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,_PAGE_EXEC /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
andc r0,r30,r0 /* r0 = pte & ~r0 */
===== fs/binfmt_elf.c 1.54 vs edited =====
--- 1.54/fs/binfmt_elf.c Thu Oct 23 08:29:22 2003
+++ edited/fs/binfmt_elf.c Sat Dec 27 22:00:22 2003
@@ -86,8 +86,10 @@
{
start = ELF_PAGEALIGN(start);
end = ELF_PAGEALIGN(end);
- if (end > start)
+ if (end > start) {
do_brk(start, end - start);
+ sys_mprotect(start, end-start, PROT_READ|PROT_WRITE|PROT_EXEC);
+ }
current->mm->start_brk = current->mm->brk = end;
}
===== include/asm-ppc64/page.h 1.22 vs edited =====
--- 1.22/include/asm-ppc64/page.h Fri Sep 12 21:06:51 2003
+++ edited/include/asm-ppc64/page.h Sat Dec 27 17:43:57 2003
@@ -234,8 +234,25 @@
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
-#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
+#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS \
+ (test_thread_flag(TIF_32BIT) ? \
+ VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
+
+#define VM_STACK_DEFAULT_FLAGS \
+ (test_thread_flag(TIF_32BIT) ? \
+ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
#endif /* __KERNEL__ */
#endif /* _PPC64_PAGE_H */
===== include/asm-ppc64/pgtable.h 1.30 vs edited =====
--- 1.30/include/asm-ppc64/pgtable.h Wed Dec 17 16:08:23 2003
+++ edited/include/asm-ppc64/pgtable.h Sat Dec 27 15:07:05 2003
@@ -78,24 +78,25 @@
#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
#define _PAGE_USER 0x0002 /* matches one of the PP bits */
#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
-#define _PAGE_RW 0x0004 /* software: user write access allowed */
+#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
#define _PAGE_GUARDED 0x0008
#define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */
#define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */
#define _PAGE_DIRTY 0x0080 /* C: page changed */
#define _PAGE_ACCESSED 0x0100 /* R: page referenced */
-#define _PAGE_EXEC 0x0200 /* software: i-cache coherence required */
+#define _PAGE_RW 0x0200 /* software: user write access allowed */
#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
/* Bits 0x7000 identify the index within an HPT Group */
#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
+
/* PAGE_MASK gives the right answer below, but only by accident */
/* It should be preserving the high 48 bits and then specifically */
/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HPTEFLAGS)
+#define _PAGE_CHG_MASK (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | _PAGE_WRITETHRU | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_HPTEFLAGS | PAGE_MASK)
#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT)
@@ -111,31 +112,32 @@
#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_WRENABLE)
-#define PAGE_KERNEL_CI __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
- _PAGE_WRENABLE | _PAGE_NO_CACHE | _PAGE_GUARDED)
/*
- * The PowerPC can only do execute protection on a segment (256MB) basis,
- * not on a page basis. So we consider execute permission the same as read.
+ * POWER4 and newer have per page execute protection, older chips can only
+ * do this on a segment (256MB) basis.
+ *
* Also, write permissions imply read permissions.
* This is the closest we can get..
+ *
+ * Note due to the way vm flags are laid out, the bits are XWR
*/
#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY_X
+#define __P001 PAGE_READONLY
#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY_X
-#define __P100 PAGE_READONLY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_X
#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY
+#define __P110 PAGE_COPY_X
#define __P111 PAGE_COPY_X
#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY_X
+#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED_X
-#define __S100 PAGE_READONLY
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_X
#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED
+#define __S110 PAGE_SHARED_X
#define __S111 PAGE_SHARED_X
#ifndef __ASSEMBLY__
@@ -191,7 +193,8 @@
})
#define pte_modify(_pte, newprot) \
- (__pte((pte_val(_pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)))
+ (__pte((pte_val(_pte) & _PAGE_CHG_MASK) | \
+ (pgprot_val(newprot) & ~_PAGE_CHG_MASK)))
#define pte_none(pte) ((pte_val(pte) & ~_PAGE_HPTEFLAGS) == 0)
#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
@@ -260,9 +263,6 @@
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;}
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;}
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;}
-
-static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
-static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; }
static inline pte_t pte_rdprotect(pte_t pte) {
pte_val(pte) &= ~_PAGE_USER; return pte; }
** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/
More information about the Linuxppc64-dev
mailing list