[PATCH 1/2] No-exec support for ppc64
Paul Mackerras
paulus at samba.org
Wed Mar 16 17:10:57 EST 2005
Jake Moilanen writes:
> It does not work w/o the sys_mprotect. It will hang in one of the first
> few binaries.
Hmmm, what distro is this with? I just tried a kernel with the patch
below on a SLES9 install and a Debian install and it came up and ran
just fine in both cases.
Paul.
diff -urN linux-2.5/arch/ppc64/kernel/head.S test/arch/ppc64/kernel/head.S
--- linux-2.5/arch/ppc64/kernel/head.S 2005-03-07 10:46:38.000000000 +1100
+++ test/arch/ppc64/kernel/head.S 2005-03-15 17:14:44.000000000 +1100
@@ -950,11 +950,12 @@
* accessing a userspace segment (even from the kernel). We assume
* kernel addresses always have the high bit set.
*/
- rlwinm r4,r4,32-23,29,29 /* DSISR_STORE -> _PAGE_RW */
+ rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
orc r0,r12,r0 /* MSR_PR | ~high_bit */
rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
ori r4,r4,1 /* add _PAGE_PRESENT */
+ rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */
/*
* On iSeries, we soft-disable interrupts here, then
diff -urN linux-2.5/arch/ppc64/kernel/iSeries_htab.c test/arch/ppc64/kernel/iSeries_htab.c
--- linux-2.5/arch/ppc64/kernel/iSeries_htab.c 2004-09-21 17:22:33.000000000 +1000
+++ test/arch/ppc64/kernel/iSeries_htab.c 2005-03-15 17:15:36.000000000 +1100
@@ -144,6 +144,10 @@
HvCallHpt_get(&hpte, slot);
if ((hpte.dw0.dw0.avpn == avpn) && (hpte.dw0.dw0.v)) {
+ /*
+ * Hypervisor expects bits as NPPP, which is
+ * different from how they are mapped in our PP.
+ */
HvCallHpt_setPp(slot, (newpp & 0x3) | ((newpp & 0x4) << 1));
iSeries_hunlock(slot);
return 0;
diff -urN linux-2.5/arch/ppc64/kernel/iSeries_setup.c test/arch/ppc64/kernel/iSeries_setup.c
--- linux-2.5/arch/ppc64/kernel/iSeries_setup.c 2005-03-07 10:46:38.000000000 +1100
+++ test/arch/ppc64/kernel/iSeries_setup.c 2005-03-15 16:55:05.000000000 +1100
@@ -633,6 +633,10 @@
unsigned long vpn = va >> PAGE_SHIFT;
unsigned long slot = HvCallHpt_findValid(&hpte, vpn);
+ /* Make non-kernel text non-executable */
+ if (!in_kernel_text(ea))
+ mode_rw |= HW_NO_EXEC;
+
if (hpte.dw0.dw0.v) {
/* HPTE exists, so just bolt it */
HvCallHpt_setSwBits(slot, 0x10, 0);
diff -urN linux-2.5/arch/ppc64/kernel/module.c test/arch/ppc64/kernel/module.c
--- linux-2.5/arch/ppc64/kernel/module.c 2004-05-10 21:25:58.000000000 +1000
+++ test/arch/ppc64/kernel/module.c 2005-03-15 16:55:05.000000000 +1100
@@ -102,7 +102,8 @@
{
if (size == 0)
return NULL;
- return vmalloc(size);
+
+ return vmalloc_exec(size);
}
/* Free memory returned from module_alloc */
diff -urN linux-2.5/arch/ppc64/kernel/pSeries_lpar.c test/arch/ppc64/kernel/pSeries_lpar.c
--- linux-2.5/arch/ppc64/kernel/pSeries_lpar.c 2005-03-07 10:46:38.000000000 +1100
+++ test/arch/ppc64/kernel/pSeries_lpar.c 2005-03-15 16:55:02.000000000 +1100
@@ -470,7 +470,7 @@
slot = pSeries_lpar_hpte_find(vpn);
BUG_ON(slot == -1);
- flags = newpp & 3;
+ flags = newpp & 7;
lpar_rc = plpar_pte_protect(flags, slot, 0);
BUG_ON(lpar_rc != H_Success);
diff -urN linux-2.5/arch/ppc64/mm/fault.c test/arch/ppc64/mm/fault.c
--- linux-2.5/arch/ppc64/mm/fault.c 2005-01-04 10:49:20.000000000 +1100
+++ test/arch/ppc64/mm/fault.c 2005-03-15 17:13:05.000000000 +1100
@@ -91,8 +91,9 @@
struct mm_struct *mm = current->mm;
siginfo_t info;
unsigned long code = SEGV_MAPERR;
- unsigned long is_write = error_code & 0x02000000;
+ unsigned long is_write = error_code & DSISR_ISSTORE;
unsigned long trap = TRAP(regs);
+ unsigned long is_exec = trap == 0x400;
BUG_ON((trap == 0x380) || (trap == 0x480));
@@ -109,7 +110,7 @@
if (!user_mode(regs) && (address >= TASK_SIZE))
return SIGSEGV;
- if (error_code & 0x00400000) {
+ if (error_code & DSISR_DABRMATCH) {
if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
11, SIGSEGV) == NOTIFY_STOP)
return 0;
@@ -199,16 +200,19 @@
good_area:
code = SEGV_ACCERR;
+ if (is_exec) {
+ /* protection fault */
+ if (error_code & DSISR_PROTFAULT)
+ goto bad_area;
+ if (!(vma->vm_flags & VM_EXEC))
+ goto bad_area;
/* a write */
- if (is_write) {
+ } else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
/* a read */
} else {
- /* protection fault */
- if (error_code & 0x08000000)
- goto bad_area;
- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ if (!(vma->vm_flags & VM_READ))
goto bad_area;
}
@@ -251,6 +255,12 @@
return 0;
}
+ if (trap == 0x400 && (error_code & DSISR_PROTFAULT)
+ && printk_ratelimit())
+ printk(KERN_CRIT "kernel tried to execute NX-protected"
+ " page (%lx) - exploit attempt? (uid: %d)\n",
+ address, current->uid);
+
return SIGSEGV;
/*
diff -urN linux-2.5/arch/ppc64/mm/hash_low.S test/arch/ppc64/mm/hash_low.S
--- linux-2.5/arch/ppc64/mm/hash_low.S 2005-01-05 13:48:02.000000000 +1100
+++ test/arch/ppc64/mm/hash_low.S 2005-03-15 16:55:02.000000000 +1100
@@ -89,7 +89,7 @@
/* Prepare new PTE value (turn access RW into DIRTY, then
* add BUSY,HASHPTE and ACCESSED)
*/
- rlwinm r30,r4,5,24,24 /* _PAGE_RW -> _PAGE_DIRTY */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
or r30,r30,r31
ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
/* Write the linux PTE atomically (setting busy) */
@@ -112,11 +112,11 @@
rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
xor r28,r5,r0
-
- /* Convert linux PTE bits into HW equivalents
- */
- andi. r3,r30,0x1fa /* Get basic set of flags */
- rlwinm r0,r30,32-2+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
andc r0,r30,r0 /* r0 = pte & ~r0 */
diff -urN linux-2.5/arch/ppc64/mm/hash_utils.c test/arch/ppc64/mm/hash_utils.c
--- linux-2.5/arch/ppc64/mm/hash_utils.c 2005-03-07 10:46:38.000000000 +1100
+++ test/arch/ppc64/mm/hash_utils.c 2005-03-15 17:20:35.000000000 +1100
@@ -51,6 +51,7 @@
#include <asm/cacheflush.h>
#include <asm/cputable.h>
#include <asm/abs_addr.h>
+#include <asm/sections.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -95,6 +96,7 @@
{
unsigned long addr;
unsigned int step;
+ unsigned long tmp_mode;
if (large)
step = 16*MB;
@@ -112,6 +114,13 @@
else
vpn = va >> PAGE_SHIFT;
+
+ tmp_mode = mode;
+
+ /* Make non-kernel text non-executable */
+ if (!in_kernel_text(addr))
+ tmp_mode = mode | HW_NO_EXEC;
+
hash = hpt_hash(vpn, large);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -120,12 +129,12 @@
if (systemcfg->platform & PLATFORM_LPAR)
ret = pSeries_lpar_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, mode, 1, large);
+ 0, tmp_mode, 1, large);
else
#endif /* CONFIG_PPC_PSERIES */
ret = native_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, mode, 1, large);
+ 0, tmp_mode, 1, large);
if (ret == -1) {
ppc64_terminate_msg(0x20, "create_pte_mapping");
@@ -238,8 +247,6 @@
{
struct page *page;
-#define PPC64_HWNOEXEC (1 << 2)
-
if (!pfn_valid(pte_pfn(pte)))
return pp;
@@ -251,7 +258,7 @@
__flush_dcache_icache(page_address(page));
set_bit(PG_arch_1, &page->flags);
} else
- pp |= PPC64_HWNOEXEC;
+ pp |= HW_NO_EXEC;
}
return pp;
}
diff -urN linux-2.5/arch/ppc64/mm/hugetlbpage.c test/arch/ppc64/mm/hugetlbpage.c
--- linux-2.5/arch/ppc64/mm/hugetlbpage.c 2005-03-07 14:01:43.000000000 +1100
+++ test/arch/ppc64/mm/hugetlbpage.c 2005-03-15 17:27:33.000000000 +1100
@@ -782,7 +782,6 @@
{
pte_t *ptep;
unsigned long va, vpn;
- int is_write;
pte_t old_pte, new_pte;
unsigned long hpteflags, prpn;
long slot;
@@ -809,8 +808,7 @@
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
*/
- is_write = access & _PAGE_RW;
- if (unlikely(is_write && !(pte_val(*ptep) & _PAGE_RW)))
+ if (unlikely(access & ~pte_val(*ptep)))
goto out;
/*
* At this point, we have a pte (old_pte) which can be used to build
@@ -829,6 +827,8 @@
new_pte = old_pte;
hpteflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+ /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
+ hpteflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
/* Check if pte already has an hpte (case 2) */
if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
diff -urN linux-2.5/include/asm-ppc64/elf.h test/include/asm-ppc64/elf.h
--- linux-2.5/include/asm-ppc64/elf.h 2005-03-07 10:46:39.000000000 +1100
+++ test/include/asm-ppc64/elf.h 2005-03-15 16:55:02.000000000 +1100
@@ -226,6 +226,13 @@
else if (current->personality != PER_LINUX32) \
set_personality(PER_LINUX); \
} while (0)
+
+/*
+ * An executable for which elf_read_implies_exec() returns TRUE will
+ * have the READ_IMPLIES_EXEC personality flag set automatically.
+ */
+#define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack))
+
#endif
/*
diff -urN linux-2.5/include/asm-ppc64/page.h test/include/asm-ppc64/page.h
--- linux-2.5/include/asm-ppc64/page.h 2005-03-07 10:46:39.000000000 +1100
+++ test/include/asm-ppc64/page.h 2005-03-15 16:55:02.000000000 +1100
@@ -235,8 +235,25 @@
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
-#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
+#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS \
+ (test_thread_flag(TIF_32BIT) ? \
+ VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
+
+#define VM_STACK_DEFAULT_FLAGS \
+ (test_thread_flag(TIF_32BIT) ? \
+ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
+
#endif /* __KERNEL__ */
#endif /* _PPC64_PAGE_H */
diff -urN linux-2.5/include/asm-ppc64/pgtable.h test/include/asm-ppc64/pgtable.h
--- linux-2.5/include/asm-ppc64/pgtable.h 2005-03-07 14:01:44.000000000 +1100
+++ test/include/asm-ppc64/pgtable.h 2005-03-15 17:41:14.000000000 +1100
@@ -82,14 +82,14 @@
#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
#define _PAGE_USER 0x0002 /* matches one of the PP bits */
#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
-#define _PAGE_RW 0x0004 /* software: user write access allowed */
+#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
#define _PAGE_GUARDED 0x0008
#define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */
#define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */
#define _PAGE_DIRTY 0x0080 /* C: page changed */
#define _PAGE_ACCESSED 0x0100 /* R: page referenced */
-#define _PAGE_EXEC 0x0200 /* software: i-cache coherence required */
+#define _PAGE_RW 0x0200 /* software: user write access allowed */
#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
@@ -118,29 +118,38 @@
#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_WRENABLE)
#define PAGE_KERNEL_CI __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
_PAGE_WRENABLE | _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_EXEC)
/*
- * The PowerPC can only do execute protection on a segment (256MB) basis,
- * not on a page basis. So we consider execute permission the same as read.
+ * This bit in a hardware PTE indicates that the page is *not* executable.
+ */
+#define HW_NO_EXEC _PAGE_EXEC
+
+/*
+ * POWER4 and newer have per page execute protection, older chips can only
+ * do this on a segment (256MB) basis.
+ *
* Also, write permissions imply read permissions.
* This is the closest we can get..
+ *
+ * Note due to the way vm flags are laid out, the bits are XWR
*/
#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY_X
+#define __P001 PAGE_READONLY
#define __P010 PAGE_COPY
#define __P011 PAGE_COPY_X
#define __P100 PAGE_READONLY
#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY
+#define __P110 PAGE_COPY_X
#define __P111 PAGE_COPY_X
#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY_X
+#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED_X
-#define __S100 PAGE_READONLY
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_X
#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED
+#define __S110 PAGE_SHARED_X
#define __S111 PAGE_SHARED_X
#ifndef __ASSEMBLY__
@@ -438,7 +447,7 @@
static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty)
{
unsigned long bits = pte_val(entry) &
- (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW);
+ (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
unsigned long old, tmp;
__asm__ __volatile__(
diff -urN linux-2.5/include/asm-ppc64/processor.h test/include/asm-ppc64/processor.h
--- linux-2.5/include/asm-ppc64/processor.h 2005-03-07 10:46:39.000000000 +1100
+++ test/include/asm-ppc64/processor.h 2005-03-15 17:08:21.000000000 +1100
@@ -173,6 +173,11 @@
#define SPRN_DEC 0x016 /* Decrement Register */
#define SPRN_DMISS 0x3D0 /* Data TLB Miss Register */
#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */
+#define DSISR_NOHPTE 0x40000000 /* no translation found */
+#define DSISR_PROTFAULT 0x08000000 /* protection fault */
+#define DSISR_ISSTORE 0x02000000 /* access was a store */
+#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
+#define DSISR_NOSEGMENT 0x00200000 /* STAB/SLB miss */
#define SPRN_EAR 0x11A /* External Address Register */
#define SPRN_ESR 0x3D4 /* Exception Syndrome Register */
#define ESR_IMCP 0x80000000 /* Instr. Machine Check - Protection */
diff -urN linux-2.5/include/asm-ppc64/sections.h test/include/asm-ppc64/sections.h
--- linux-2.5/include/asm-ppc64/sections.h 2004-02-12 14:57:14.000000000 +1100
+++ test/include/asm-ppc64/sections.h 2005-03-15 16:55:05.000000000 +1100
@@ -17,4 +17,13 @@
#define __openfirmware
#define __openfirmwaredata
+
+static inline int in_kernel_text(unsigned long addr)
+{
+ if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
+ return 1;
+
+ return 0;
+}
+
#endif
More information about the Linuxppc64-dev
mailing list