[Skiboot] [RFC PATCH] virtual memory for OPAL boot

Mon Aug 27 12:15:58 AEST 2018

I tried hacking on this a bit more. This turns on HPT virtual memory
quite early in boot. There is a global EA=RA map for "global" mappings
which are things that are always mapped and shared, like text and heap.
Then there are transient per-CPU mappings that use their own private
addresses for temporary mappings of things that are accessed carefully
(e.g., like the 0 page interrupt vectors).

VM gets shut down right before the kernel is booted.

This rearranges skiboot.lds.S a bit to put the most similar regions
together as possible, which makes it easier to map things with specific
protections. Everything but text is no-execute, rodata is read only, etc.

Not too sure where I'm going with this. I think it's good to minimise
the amount of time spent in real mode in general to catch bugs. Maybe
this is unintrusive enough to be worthwhile. But this is only boot, I
would like to get to a point where OPAL services run mostly in virtual
mode too, but that would look much different and probably require VM
provided by the OS.

Anyway this "works" (in mambo), it's fairly unintrusive, most code
changes are just juggling a link locations around.
---
 asm/head.S           | 106 +--------
 asm/misc.S           | 110 +++++++++
 core/Makefile.inc    |   2 +-
 core/cpu.c           |   4 +
 core/init.c          |  60 +++--
 core/mem_region.c    |  64 +++--
 core/opal.c          |   5 +-
 core/stack.c         |   2 -
 core/vm.c            | 538 +++++++++++++++++++++++++++++++++++++++++++
 hw/fake-nvram.c      |  12 +-
 hw/slw.c             |   4 +-
 include/cpu.h        |   8 +
 include/mem_region.h |   1 +
 include/processor.h  |   7 +-
 include/skiboot.h    |  19 ++
 libstb/container.c   |  12 +-
 skiboot.lds.S        |  94 ++++----
 17 files changed, 853 insertions(+), 195 deletions(-)
 create mode 100644 core/vm.c

diff --git a/asm/head.S b/asm/head.S
index 803fbf1a..729cfe87 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -23,13 +23,6 @@
 
 #define EPAPR_MAGIC	0x65504150
 
-/* Power management instructions */
-#define PPC_INST_NAP		.long 0x4c000364
-#define PPC_INST_SLEEP		.long 0x4c0003a4
-#define PPC_INST_RVWINKLE	.long 0x4c0003e4
-
-#define PPC_INST_STOP		.long 0x4c0002e4
-
 #define GET_STACK(stack_reg,pir_reg)					\
 	sldi	stack_reg,pir_reg,STACK_SHIFT;				\
 	addis	stack_reg,stack_reg,CPU_STACKS_OFFSET at ha;		\
@@ -520,104 +513,6 @@ disable_machine_check:
 1:	mtlr	%r0
 	blr
 
-pm_save_regs:
-	SAVE_GPR(2,%r1)
-	SAVE_GPR(14,%r1)
-	SAVE_GPR(15,%r1)
-	SAVE_GPR(16,%r1)
-	SAVE_GPR(17,%r1)
-	SAVE_GPR(18,%r1)
-	SAVE_GPR(19,%r1)
-	SAVE_GPR(20,%r1)
-	SAVE_GPR(21,%r1)
-	SAVE_GPR(22,%r1)
-	SAVE_GPR(23,%r1)
-	SAVE_GPR(24,%r1)
-	SAVE_GPR(25,%r1)
-	SAVE_GPR(26,%r1)
-	SAVE_GPR(27,%r1)
-	SAVE_GPR(28,%r1)
-	SAVE_GPR(29,%r1)
-	SAVE_GPR(30,%r1)
-	SAVE_GPR(31,%r1)
-	mfcr	%r4
-	mfxer	%r5
-	mfspr	%r6,SPR_HSPRG0
-	mfspr	%r7,SPR_HSPRG1
-	stw	%r4,STACK_CR(%r1)
-	stw	%r5,STACK_XER(%r1)
-	std	%r6,STACK_GPR0(%r1)
-	std	%r7,STACK_GPR1(%r1)
-	blr
-
-.global enter_p8_pm_state
-enter_p8_pm_state:
-	/* Before entering map or rvwinkle, we create a stack frame
-	 * and save our non-volatile registers.
-	 *
-	 * We also save these SPRs:
-	 *
-	 *  - HSPRG0	in GPR0 slot
-	 *  - HSPRG1	in GPR1 slot
-	 *
-	 *  - xxx TODO: HIDs
-	 *  - TODO: Mask MSR:ME during the process
-	 *
-	 * On entry, r3 indicates:
-	 *
-	 *    0 = nap
-	 *    1 = rvwinkle
-	 */
-	mflr	%r0
-	std	%r0,16(%r1)
-	stdu	%r1,-STACK_FRAMESIZE(%r1)
-
-	bl	pm_save_regs
-
-	/* Save stack pointer in struct cpu_thread */
-	std	%r1,CPUTHREAD_SAVE_R1(%r13)
-
-	/* Winkle or nap ? */
-	cmpli	%cr0,0,%r3,0
-	bne	1f
-
-	/* nap sequence */
-	ptesync
-0:	ld	%r0,CPUTHREAD_SAVE_R1(%r13)
-	cmpd	cr0,%r0,%r0
-	bne	0b
-	PPC_INST_NAP
-	b	.
-
-	/* rvwinkle sequence */
-1:	ptesync
-0:	ld	%r0,CPUTHREAD_SAVE_R1(%r13)
-	cmpd	cr0,%r0,%r0
-	bne	0b
-	PPC_INST_RVWINKLE
-	b	.
-
-.global enter_p9_pm_lite_state
-enter_p9_pm_lite_state:
-	mtspr	SPR_PSSCR,%r3
-	PPC_INST_STOP
-	blr
-
-.global enter_p9_pm_state
-enter_p9_pm_state:
-	mflr	%r0
-	std	%r0,16(%r1)
-	stdu	%r1,-STACK_FRAMESIZE(%r1)
-
-	bl	pm_save_regs
-
-	/* Save stack pointer in struct cpu_thread */
-	std	%r1,CPUTHREAD_SAVE_R1(%r13)
-
-	mtspr	SPR_PSSCR,%r3
-	PPC_INST_STOP
-	b	.
-
 /* This is a little piece of code that is copied down to
  * 0x100 for handling power management wakeups
  */
@@ -633,6 +528,7 @@ reset_patch_start:
 .global reset_patch_end
 reset_patch_end:
 
+.global reset_wakeup
 reset_wakeup:
 	/* Get PIR */
 	mfspr	%r31,SPR_PIR
diff --git a/asm/misc.S b/asm/misc.S
index 381590b9..916acf9c 100644
--- a/asm/misc.S
+++ b/asm/misc.S
@@ -123,3 +123,113 @@ cleanup_global_tlb:
 	ptesync
 
 	blr
+
+
+/* Power management instructions */
+#define PPC_INST_NAP		.long 0x4c000364
+#define PPC_INST_SLEEP		.long 0x4c0003a4
+#define PPC_INST_RVWINKLE	.long 0x4c0003e4
+
+#define PPC_INST_STOP		.long 0x4c0002e4
+
+#define SAVE_GPR(reg,sp)	std %r##reg,STACK_GPR##reg(sp)
+#define REST_GPR(reg,sp)	ld %r##reg,STACK_GPR##reg(sp)
+
+pm_save_regs:
+	SAVE_GPR(2,%r1)
+	SAVE_GPR(14,%r1)
+	SAVE_GPR(15,%r1)
+	SAVE_GPR(16,%r1)
+	SAVE_GPR(17,%r1)
+	SAVE_GPR(18,%r1)
+	SAVE_GPR(19,%r1)
+	SAVE_GPR(20,%r1)
+	SAVE_GPR(21,%r1)
+	SAVE_GPR(22,%r1)
+	SAVE_GPR(23,%r1)
+	SAVE_GPR(24,%r1)
+	SAVE_GPR(25,%r1)
+	SAVE_GPR(26,%r1)
+	SAVE_GPR(27,%r1)
+	SAVE_GPR(28,%r1)
+	SAVE_GPR(29,%r1)
+	SAVE_GPR(30,%r1)
+	SAVE_GPR(31,%r1)
+	mfcr	%r4
+	mfxer	%r5
+	mfspr	%r6,SPR_HSPRG0
+	mfspr	%r7,SPR_HSPRG1
+	stw	%r4,STACK_CR(%r1)
+	stw	%r5,STACK_XER(%r1)
+	std	%r6,STACK_GPR0(%r1)
+	std	%r7,STACK_GPR1(%r1)
+	blr
+
+.global enter_p8_pm_state
+enter_p8_pm_state:
+	/* Before entering map or rvwinkle, we create a stack frame
+	 * and save our non-volatile registers.
+	 *
+	 * We also save these SPRs:
+	 *
+	 *  - HSPRG0	in GPR0 slot
+	 *  - HSPRG1	in GPR1 slot
+	 *
+	 *  - xxx TODO: HIDs
+	 *  - TODO: Mask MSR:ME during the process
+	 *
+	 * On entry, r3 indicates:
+	 *
+	 *    0 = nap
+	 *    1 = rvwinkle
+	 */
+	mflr	%r0
+	std	%r0,16(%r1)
+	stdu	%r1,-STACK_FRAMESIZE(%r1)
+
+	bl	pm_save_regs
+
+	/* Save stack pointer in struct cpu_thread */
+	std	%r1,CPUTHREAD_SAVE_R1(%r13)
+
+	/* Winkle or nap ? */
+	cmpli	%cr0,0,%r3,0
+	bne	1f
+
+	/* nap sequence */
+	ptesync
+0:	ld	%r0,CPUTHREAD_SAVE_R1(%r13)
+	cmpd	cr0,%r0,%r0
+	bne	0b
+	PPC_INST_NAP
+	b	.
+
+	/* rvwinkle sequence */
+1:	ptesync
+0:	ld	%r0,CPUTHREAD_SAVE_R1(%r13)
+	cmpd	cr0,%r0,%r0
+	bne	0b
+	PPC_INST_RVWINKLE
+	b	.
+
+.global enter_p9_pm_lite_state
+enter_p9_pm_lite_state:
+	mtspr	SPR_PSSCR,%r3
+	PPC_INST_STOP
+	blr
+
+.global enter_p9_pm_state
+enter_p9_pm_state:
+	mflr	%r0
+	std	%r0,16(%r1)
+	stdu	%r1,-STACK_FRAMESIZE(%r1)
+
+	bl	pm_save_regs
+
+	/* Save stack pointer in struct cpu_thread */
+	std	%r1,CPUTHREAD_SAVE_R1(%r13)
+
+	mtspr	SPR_PSSCR,%r3
+	PPC_INST_STOP
+	b	.
+
diff --git a/core/Makefile.inc b/core/Makefile.inc
index d3635059..e057f479 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -1,7 +1,7 @@
 # -*-Makefile-*-
 
 SUBDIRS += core
-CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
+CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o vm.o
 CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o
 CORE_OBJS += opal-msg.o pci.o pci-iov.o pci-virt.o pci-slot.o pcie-slot.o
 CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
diff --git a/core/cpu.c b/core/cpu.c
index 88477f82..15829ede 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -469,12 +469,16 @@ static void cpu_idle_p9(enum cpu_wake_cause wake_on)
 		/* PSSCR SD=0 ESL=1 EC=1 PSSL=0 TR=3 MTL=0 RL=1 */
 		psscr = PPC_BIT(42) | PPC_BIT(43) |
 			PPC_BITMASK(54, 55) | PPC_BIT(63);
+		vm_exit();
 		enter_p9_pm_state(psscr);
+		vm_enter();
 	} else {
 		/* stop with EC=0 (resumes) which does not require sreset. */
 		/* PSSCR SD=0 ESL=0 EC=0 PSSL=0 TR=3 MTL=0 RL=1 */
 		psscr = PPC_BITMASK(54, 55) | PPC_BIT(63);
+		// vm_exit();
 		enter_p9_pm_lite_state(psscr);
+		// vm_enter();
 	}
 
 	/* Clear doorbell */
diff --git a/core/init.c b/core/init.c
index ca6c468c..8e99ab42 100644
--- a/core/init.c
+++ b/core/init.c
@@ -347,7 +347,7 @@ bool start_preload_kernel(void)
 static bool load_kernel(void)
 {
 	void *stb_container = NULL;
-	struct elf_hdr *kh;
+	struct elf_hdr *kh, *t;
 	int loaded;
 
 	prlog(PR_NOTICE, "INIT: Waiting for kernel...\n");
@@ -386,7 +386,7 @@ static bool load_kernel(void)
 		if (kernel_entry < EXCEPTION_VECTORS_END) {
 			cpu_set_sreset_enable(false);
 			memcpy(NULL, old_vectors, EXCEPTION_VECTORS_END);
-			sync_icache();
+			sync_icache(0);
 		}
 	} else {
 		if (!kernel_size) {
@@ -407,21 +407,25 @@ static bool load_kernel(void)
 	      "INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\n",
 	      kernel_size);
 
-	if (kh->ei_ident != ELF_IDENT) {
+//	t = vm_map((unsigned long)kh, sizeof(*kh));
+	vm_map_global((unsigned long)kh, sizeof(*kh));
+	t = kh;
+	if (t->ei_ident != ELF_IDENT) {
 		prerror("INIT: ELF header not found. Assuming raw binary.\n");
 		return true;
 	}
 
-	if (kh->ei_class == ELF_CLASS_64) {
-		if (!try_load_elf64(kh))
+	if (t->ei_class == ELF_CLASS_64) {
+		if (!try_load_elf64(t))
 			return false;
-	} else if (kh->ei_class == ELF_CLASS_32) {
-		if (!try_load_elf32(kh))
+	} else if (t->ei_class == ELF_CLASS_32) {
+		if (!try_load_elf32(t))
 			return false;
 	} else {
 		prerror("INIT: Neither ELF32 not ELF64 ?\n");
 		return false;
 	}
+//	vm_unmap((unsigned long)kh, sizeof(*kh));
 
 	if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
 		secureboot_verify(RESOURCE_ID_KERNEL,
@@ -481,6 +485,7 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
 	const struct dt_property *memprop;
 	const char *cmdline, *stdoutp;
 	uint64_t mem_top;
+	uint32_t *t;
 
 	memprop = dt_find_property(dt_root, DT_PRIVATE "maxmem");
 	if (memprop)
@@ -580,13 +585,23 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
 
 	debug_descriptor.state_flags |= OPAL_BOOT_COMPLETE;
 
+	printf("%s:%d\n", __FILE__, __LINE__);
 	fdt_set_boot_cpuid_phys(fdt, this_cpu()->pir);
+	printf("%s:%d\n", __FILE__, __LINE__);
 
+	t = vm_map(kernel_entry, 4);
+	printf("%s:%d\n", __FILE__, __LINE__);
 	/* Check there is something there before we branch to it */
-	if (*(uint32_t *)kernel_entry == 0) {
+	if (*t == 0) {
 		prlog(PR_EMERG, "FATAL: Kernel is zeros, can't execute!\n");
 		assert(0);
 	}
+	printf("%s:%d\n", __FILE__, __LINE__);
+	vm_unmap(kernel_entry, 4);
+	printf("%s:%d\n", __FILE__, __LINE__);
+
+	/* Go back to realmode and tear down our VM before booting kernel */
+	vm_destroy();
 
 	if (kernel_32bit)
 		start_kernel32(kernel_entry, fdt, mem_top);
@@ -747,23 +762,35 @@ static void setup_branch_null_catcher(void)
 
 void setup_reset_vector(void)
 {
+	static char patch[0x100];
 	uint32_t *src, *dst;
+	uint32_t *t;
+	uint32_t len = (void *)&reset_patch_end - (void *)&reset_patch_start;
 
 	/* Copy the reset code over the entry point. */
 	src = &reset_patch_start;
+	t = vm_map((unsigned long)src, len);
+	memcpy(patch, t, len);
+	vm_unmap((unsigned long)src, len);
+
 	dst = (uint32_t *)0x100;
-	while(src < &reset_patch_end)
-		*(dst++) = *(src++);
-	sync_icache();
+	t = vm_map((unsigned long)dst, len);
+	memcpy(t, patch, len);
+	sync_icache((unsigned long)t);
+	vm_unmap((unsigned long)dst, len);
 	cpu_set_sreset_enable(true);
 }
 
 void copy_exception_vectors(void)
 {
+	void *t;
+
+	t = vm_map(0x0, 0x2000);
+
 	/* Backup previous vectors as this could contain a kernel
 	 * image.
 	 */
-	memcpy(old_vectors, NULL, EXCEPTION_VECTORS_END);
+	memcpy(old_vectors, t, EXCEPTION_VECTORS_END);
 
 	/* Copy from 0x100 to EXCEPTION_VECTORS_END, avoid below 0x100 as
 	 * this is the boot flag used by CPUs still potentially entering
@@ -771,9 +798,10 @@ void copy_exception_vectors(void)
 	 */
 	BUILD_ASSERT((&reset_patch_end - &reset_patch_start) <
 			EXCEPTION_VECTORS_END - 0x100);
-	memcpy((void *)0x100, (void *)(SKIBOOT_BASE + 0x100),
+	memcpy(t + 0x100, (void *)(SKIBOOT_BASE + 0x100),
 			EXCEPTION_VECTORS_END - 0x100);
-	sync_icache();
+	sync_icache((unsigned long)t);
+	vm_unmap(0x0, 0x2000);
 }
 
 static void per_thread_sanity_checks(void)
@@ -971,6 +999,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	 */
 	mem_region_init();
 
+	vm_init();
+
 	/* Reserve HOMER and OCC area */
 	homer_init();
 
@@ -1184,6 +1214,8 @@ void __noreturn __secondary_cpu_entry(void)
 {
 	struct cpu_thread *cpu = this_cpu();
 
+	vm_init_secondary();
+
 	/* Secondary CPU called in */
 	cpu_callin(cpu);
 
diff --git a/core/mem_region.c b/core/mem_region.c
index bd387f3c..5c427523 100644
--- a/core/mem_region.c
+++ b/core/mem_region.c
@@ -65,24 +65,27 @@ static struct mem_region skiboot_os_reserve = {
 	.type		= REGION_OS,
 };
 
-struct mem_region skiboot_heap = {
-	.name		= "ibm,firmware-heap",
-	.start		= HEAP_BASE,
-	.len		= HEAP_SIZE,
-	.type		= REGION_SKIBOOT_HEAP,
-};
-
 static struct mem_region skiboot_code_and_text = {
 	.name		= "ibm,firmware-code",
 	.start		= SKIBOOT_BASE,
 	.len		= HEAP_BASE - SKIBOOT_BASE,
+	.vm_mapped_len	= HEAP_BASE - SKIBOOT_BASE,
 	.type		= REGION_SKIBOOT_FIRMWARE,
 };
 
+struct mem_region skiboot_heap = {
+	.name		= "ibm,firmware-heap",
+	.start		= HEAP_BASE,
+	.len		= HEAP_SIZE,
+	.vm_mapped_len	= HEAP_SIZE,
+	.type		= REGION_SKIBOOT_HEAP,
+};
+
 static struct mem_region skiboot_after_heap = {
 	.name		= "ibm,firmware-data",
 	.start		= HEAP_BASE + HEAP_SIZE,
 	.len		= SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE),
+	.vm_mapped_len	= SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE),
 	.type		= REGION_SKIBOOT_FIRMWARE,
 };
 
@@ -137,14 +140,6 @@ static struct alloc_hdr *next_hdr(const struct mem_region *region,
 #if POISON_MEM_REGION == 1
 static void mem_poison(struct free_hdr *f)
 {
-	size_t poison_size = (void*)tailer(f) - (void*)(f+1);
-
-	/* We only poison up to a limit, as otherwise boot is
-	 * kinda slow */
-	if (poison_size > POISON_MEM_REGION_LIMIT)
-		poison_size = POISON_MEM_REGION_LIMIT;
-
-	memset(f+1, POISON_MEM_REGION_WITH, poison_size);
 }
 #else
 static inline void mem_poison(struct free_hdr *f __unused) { }
@@ -154,21 +149,40 @@ static inline void mem_poison(struct free_hdr *f __unused) { }
 static void init_allocatable_region(struct mem_region *region)
 {
 	struct free_hdr *f = region_start(region);
+	unsigned int num_longs;
+	unsigned long *t;
+
 	assert(region->type == REGION_SKIBOOT_HEAP ||
 	       region->type == REGION_MEMORY);
-	f->hdr.num_longs = region->len / sizeof(long);
+
+	num_longs = region->len / sizeof(long);
+
+	if (!region->vm_mapped_len) {
+		/* SKIBOOT_BASE-SIZE regions already come mapped */
+		region->vm_mapped_len = PAGE_SIZE;
+		vm_map_global((unsigned long)f, PAGE_SIZE);
+	}
+
+	assert(PAGE_SIZE >= sizeof(*f));
+	assert(region->len >= PAGE_SIZE*2);
+
+	f->hdr.num_longs = num_longs;
 	f->hdr.free = true;
 	f->hdr.prev_free = false;
-	*tailer(f) = f->hdr.num_longs;
 	list_head_init(&region->free_list);
 	list_add(&region->free_list, &f->list);
-	mem_poison(f);
+
+	t = vm_map((unsigned long)tailer(f), sizeof(long));
+//	*tailer(f) = num_longs;
+	*t = num_longs;
+	vm_unmap((unsigned long)tailer(f), sizeof(long));
 }
 
 static void make_free(struct mem_region *region, struct free_hdr *f,
 		      const char *location, bool skip_poison)
 {
 	struct alloc_hdr *next;
+	unsigned long *t;
 
 	if (!skip_poison)
 		mem_poison(f);
@@ -192,7 +206,10 @@ static void make_free(struct mem_region *region, struct free_hdr *f,
 	}
 
 	/* Fix up tailer. */
-	*tailer(f) = f->hdr.num_longs;
+	t = vm_map((unsigned long)tailer(f), sizeof(long));
+//	*tailer(f) = f->hdr.num_longs;
+	*t = f->hdr.num_longs;
+	vm_unmap((unsigned long)tailer(f), sizeof(long));
 
 	/* If next is free, coalesce it */
 	next = next_hdr(region, &f->hdr);
@@ -381,6 +398,7 @@ static void *__mem_alloc(struct mem_region *region, size_t size, size_t align,
 	size_t alloc_longs, offset;
 	struct free_hdr *f;
 	struct alloc_hdr *next;
+	unsigned long newsz;
 
 	/* Align must be power of 2. */
 	assert(!((align - 1) & align));
@@ -455,6 +473,14 @@ found:
 		/* This coalesces as required. */
 		make_free(region, pre, location, true);
 	}
+	
+	newsz = ((void *)((unsigned long *)f + alloc_longs) - region_start(region) + sizeof(struct free_hdr));
+	if (newsz > region->vm_mapped_len) {
+		newsz += PAGE_SIZE-1;
+		newsz &= ~(PAGE_SIZE-1);
+		vm_map_global((unsigned long)region_start(region) + region->vm_mapped_len, newsz - region->vm_mapped_len);
+		region->vm_mapped_len = newsz;
+	}
 
 	/* We might be too long; put the rest back. */
 	discard_excess(region, &f->hdr, alloc_longs, location, true);
diff --git a/core/opal.c b/core/opal.c
index 7ffca9c1..14469062 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -320,9 +320,12 @@ opal_call(OPAL_QUIESCE, opal_quiesce, 2);
 
 void __opal_register(uint64_t token, void *func, unsigned int nargs)
 {
+	uint64_t *t;
 	assert(token <= OPAL_LAST);
 
-	opal_branch_table[token] = function_entry_address(func);
+	t = vm_map((unsigned long)&opal_branch_table[token], sizeof(uint64_t));
+	*t = function_entry_address(func);
+	vm_unmap((unsigned long)&opal_branch_table[token], sizeof(uint64_t));
 	opal_num_args[token] = nargs;
 }
 
diff --git a/core/stack.c b/core/stack.c
index 73700ce5..3a86a376 100644
--- a/core/stack.c
+++ b/core/stack.c
@@ -26,8 +26,6 @@
 #define STACK_BUF_ENTRIES	60
 static struct bt_entry bt_buf[STACK_BUF_ENTRIES];
 
-extern uint32_t _stext, _etext;
-
 /* Dumps backtrace to buffer */
 void __nomcount ___backtrace(struct bt_entry *entries, unsigned int *count,
 				unsigned long r1,
diff --git a/core/vm.c b/core/vm.c
new file mode 100644
index 00000000..f97f6f2d
--- /dev/null
+++ b/core/vm.c
@@ -0,0 +1,538 @@
+/* Copyright 2018 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <opal.h>
+#include <stack.h>
+#include <cpu.h>
+#include <trace.h>
+#include <ccan/str/str.h>
+#include <ccan/container_of/container_of.h>
+
+static bool vm_setup = false;
+
+#define SLB_SZ		(256UL*1024*1024)
+#define SLB_NR		32
+#define LOCAL_SLB_NR	2
+#define GLOBAL_SLB_NR	(SLB_NR - LOCAL_SLB_NR)
+#define LOCAL_SLB_BASE	GLOBAL_SLB_NR
+
+struct slbe {
+	int valid;
+	unsigned long esid;
+	unsigned long vsid;
+};
+
+/* Entry 31 is reserved for local SLB maps */
+static struct slbe global_slb[GLOBAL_SLB_NR];
+static int global_slb_used = 0;
+
+static void slb_install(unsigned long esid, unsigned long vsid, unsigned int index)
+{
+	unsigned long rs;
+	unsigned long rb;
+
+	rs = vsid << (63-51);		/* 256MB VSID */
+	rs |= 1UL << (63-53);		/* Kp = 1 */
+
+	rb = esid << (63-35);		/* 256MB ESID */
+	rb |= 1UL << (63-36);		/* V = 1 */
+	rb |= index;
+
+	asm volatile("slbmte %0,%1" : : "r"(rs), "r"(rb) : "memory");
+}
+
+#if 0
+static void slb_remove(unsigned long esid)
+{
+	asm volatile("isync ; slbie %0 ; isync" : : "r"(esid << 28) : "memory");
+}
+#endif
+
+static void slb_remove_all(void)
+{
+	asm volatile("isync ; slbmte %0,%0 ; slbia ; isync" : : "r"(0) : "memory");
+}
+
+static bool global_slb_hit(unsigned long esid)
+{
+	int i;
+
+	for (i = 0; i < global_slb_used; i++) {
+		struct slbe *s = &global_slb[i];
+
+		if (!s->valid)
+			continue;
+
+//		printf("global slb hit esid:%lx s->esid:%lx\n", esid, s->esid);
+		if (s->esid == esid)
+			return true;
+	}
+	return false;
+}
+
+static void global_slb_add(unsigned long esid, unsigned long vsid)
+{
+	struct slbe *s = &global_slb[global_slb_used];
+
+	assert(!global_slb_hit(esid));
+	assert(global_slb_used < GLOBAL_SLB_NR);
+	global_slb_used++;
+
+	s->valid = 1;
+	s->esid = esid;
+	s->vsid = vsid;
+}
+
+static void sync_global_slb(void)
+{
+	int i;
+
+//	slb_remove_all();
+	for (i = 0; i < GLOBAL_SLB_NR; i++) {
+		struct slbe *s = &global_slb[i];
+
+		if (!s->valid)
+			continue;
+		slb_install(s->esid, s->vsid, i);
+	}
+}
+
+static void cpu_sync_global_slb(void *arg __unused)
+{
+	// printf("CPU PIR 0x%04x cpu_sync_global_slb\n", this_cpu()->pir);
+	sync_global_slb();
+}
+
+static void cpu_all_sync_global_slb(void)
+{
+	struct cpu_thread *cpu;
+
+	cpu_sync_global_slb(NULL);
+	return;
+
+	/* XXX: deadlocks allocating memory */
+	for_each_available_cpu(cpu) {
+		if (cpu == this_cpu()) {
+			cpu_sync_global_slb(NULL);
+			continue;
+		}
+		cpu_wait_job(cpu_queue_job(cpu, "cpu_sync_global_slb",
+			cpu_sync_global_slb, NULL), true);
+	}
+}
+
+struct hpte {
+	unsigned long dword[2];
+};
+
+struct hpteg {
+	struct hpte hpte[8];
+};
+
+static struct hpteg *htab;
+static unsigned long htab_nr_bytes;
+static unsigned long htab_nr_ptegs;
+static unsigned long htab_pteg_mask;
+
+static void htab_install(unsigned long va, unsigned long pa, int rw, int ex, int global)
+{
+	unsigned long hash;
+	struct hpteg *hpteg;
+	unsigned int i;
+
+	hash = ((va >> 12) & 0xffff) ^ ((va >> 28) & 0x7fffffffffUL);
+	hpteg = &htab[hash & htab_pteg_mask];
+
+	for (i = 0; i < 8; i++) {
+		struct hpte *hpte = &hpteg->hpte[i];
+		unsigned long ava = va >> 23;
+		unsigned long arpn = pa >> 12;
+		unsigned long dw0, dw1;
+
+		dw0 = be64_to_cpu(hpte->dword[0]);
+		if (dw0 & 1) {
+			if (dw0 >> 7 == ava) {
+				printf("HTAB collision va:%lx pa:%lx rw:%d ex:%d global:%d\n", va, pa, rw, ex, global);
+				assert(0);
+				return;
+			}
+			continue;
+		}
+
+		assert(!hpte->dword[0]);
+		assert(!hpte->dword[1]);
+
+		dw0 = (ava << (63-56)) | 0x1;
+		if (!global)
+			dw0 |= 0x8;
+
+		dw1 = (arpn << (63-43 - 8));
+		if (!rw)
+			dw1 |= (1UL << (63 - 0)) | (1UL << (63 - 63 + 1));
+		if (!ex)
+			dw1 |= (1UL << (63 - 61));
+		dw1 |= (1UL << (63 - 60 + 1)); /* WIMG = 0010 */
+
+		hpte->dword[1] = cpu_to_be64(dw1);
+		eieio();
+		hpte->dword[0] = cpu_to_be64(dw0);
+
+		return;
+	}
+	assert(0);
+}
+
+static void htab_remove(unsigned long va, int global)
+{
+	unsigned long hash;
+	struct hpteg *hpteg;
+	unsigned int i;
+
+	hash = ((va >> 12) & 0xffff) ^ ((va >> 28) & 0x7fffffffffUL);
+	hpteg = &htab[hash & htab_pteg_mask];
+
+	for (i = 0; i < 8; i++) {
+		struct hpte *hpte = &hpteg->hpte[i];
+		unsigned long ava = va >> 23;
+		unsigned long dw0;
+
+		dw0 = be64_to_cpu(hpte->dword[0]);
+
+		if (!(dw0 & 1)) {
+			assert(!hpte->dword[0]);
+			assert(!hpte->dword[1]);
+			continue;
+		}
+
+		if (dw0 >> 7 != ava)
+			continue;
+
+		if (global)
+			assert(!(dw0 & 0x8));
+		else
+			assert(dw0 & 0x8);
+
+		hpte->dword[0] = 0;
+		eieio();
+		hpte->dword[1] = 0;
+		eieio();
+
+		if (global) {
+			asm volatile("tlbie %0,%1" : : "r"(ava<<12), "r"(0));
+			asm volatile("eieio ; tlbsync ; ptesync" ::: "memory");
+		} else {
+			asm volatile("tlbiel %0" : : "r"(ava<<12));
+			asm volatile("ptesync" ::: "memory");
+		}
+		return;
+	}
+	assert(0);
+}
+
+void vm_map_global(unsigned long addr, unsigned long len)
+{
+	unsigned long va;
+	unsigned long esid = addr >> 28;
+	unsigned long end = addr + len;
+	bool need_sync;
+
+	assert(vm_setup);
+	assert((addr >= SKIBOOT_BASE + SKIBOOT_SIZE) || (addr + len) <= SKIBOOT_BASE);
+
+	end = (end + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
+	addr &= ~(PAGE_SIZE - 1);
+	len = end - addr;
+
+	// printf("vm_map_global: %lx-%lx\n", addr, addr + len);
+
+	if (!global_slb_hit(esid)) {
+		global_slb_add(esid, esid);
+		need_sync = true;
+	}
+
+	for (va = addr; va < end; va += PAGE_SIZE) {
+		if (va >> 28 != esid) {
+			esid = va >> 28;
+			if (!global_slb_hit(esid)) {
+				global_slb_add(esid, esid);
+				need_sync = true;
+			}
+		}
+
+		htab_install(va, va, 1, 0, 1);
+	}
+
+	if (need_sync)
+		cpu_all_sync_global_slb();
+}
+
+void *vm_map(unsigned long addr, unsigned long len)
+{
+	struct cpu_thread *c = this_cpu();
+	unsigned long va;
+	unsigned long esid = (0x0800000000000000ULL + (c->pir << 28)) >> 28;
+	unsigned long vsid = (unsigned long)c->pir << 30; /* per-cpu VA */
+	unsigned long end = addr + len;
+	unsigned long offset = addr & (PAGE_SIZE - 1);
+
+	/* Can't do nested mappings */
+	assert(!c->vm_local_map);
+	c->vm_local_map = true;
+
+	if (!c->vm_setup)
+		return (void *)addr;
+
+	end = (end + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
+	addr &= ~(PAGE_SIZE - 1);
+	len = end - addr;
+
+	// printf("vm_map: %lx-%lx esid:%lx\n", addr, addr + len, esid);
+
+	assert(len < (1 << 28)); /* same segment */
+
+	va = vsid << 28;
+	while (addr < end) {
+		htab_install(va, addr, 1, 0, 0);
+		va += PAGE_SIZE;
+		addr += PAGE_SIZE;
+	}
+
+	printf("vm_map: %lx-%lx esid:%lx vsid:%lx addr=%lx\n", addr, addr + len, esid, vsid, (esid<<28)+offset);
+
+	return (void *)(esid << 28) + offset;
+}
+
+void vm_unmap(unsigned long addr, unsigned long len)
+{
+	struct cpu_thread *c = this_cpu();
+	unsigned long va;
+	unsigned long vsid = (unsigned long)c->pir << 30; /* per-cpu VA */
+	unsigned long end = addr + len;
+
+	assert(c->vm_local_map);
+	c->vm_local_map = false;
+
+	if (!c->vm_setup)
+		return;
+
+	end = (end + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
+	addr &= ~(PAGE_SIZE - 1);
+	len = end - addr;
+
+	// printf("vm_unmap: %lx-%lx esid:%lx\n", addr, addr + len, esid);
+
+	assert(len < (1 << 28)); /* same segment */
+
+	va = vsid << 28;
+	while (addr < end) {
+		htab_remove(va, 0);
+		va += PAGE_SIZE;
+		addr += PAGE_SIZE;
+	}
+}
+
+struct prte {
+	unsigned long dword[2];
+};
+
+static struct prte *prtab;
+
+static unsigned long stack_end = SKIBOOT_BASE + SKIBOOT_SIZE;
+
+void vm_map_stacks(void)
+{
+	unsigned long start = stack_end;
+	unsigned long end = start + (cpu_max_pir + 1)*STACK_SIZE;
+	unsigned long va;
+
+	if (start == end)
+		return;
+
+	for (va = start; va < end; va += PAGE_SIZE)
+		htab_install(va, va, 1, 0, 1);
+
+	stack_end = end;
+}
+
+static void vm_init_cpu(void)
+{
+	struct cpu_thread *c = this_cpu();
+	unsigned long esid = (0x0800000000000000ULL + (c->pir << 28)) >> 28;
+	unsigned long vsid = (unsigned long)c->pir << 30; /* per-cpu VA */
+
+	mtspr(SPR_LPCR, mfspr(SPR_LPCR) &
+		~(PPC_BITMASK(0,3) | PPC_BIT(41) | PPC_BIT(43) | PPC_BIT(54)));
+	mtspr(SPR_LPID, 0);
+	mtspr(SPR_PID, 0);
+	mtspr(SPR_HRMOR, 0);
+	mtspr(SPR_PTCR, (unsigned long)prtab);
+
+	sync_global_slb();
+
+	slb_install(esid, vsid, LOCAL_SLB_BASE);
+}
+
+static void vm_cleanup_cpu(void)
+{
+	slb_remove_all();
+	// XXX: have the last thread per core clear this reg
+	// mtspr(SPR_PTCR, 0);
+}
+
+void vm_init_secondary(void)
+{
+	vm_init_cpu();
+	vm_enter();
+}
+
+void vm_enter(void)
+{
+	struct cpu_thread *c = this_cpu();
+
+	assert(vm_setup);
+	assert(!c->vm_setup);
+	c->vm_setup = true;
+	mtmsr(mfmsr() | (MSR_IR|MSR_DR));
+}
+
+void vm_exit(void)
+{
+	struct cpu_thread *c = this_cpu();
+
+	assert(vm_setup);
+	assert(c->vm_setup);
+	c->vm_setup = false;
+	mtmsr(mfmsr() & ~(MSR_IR|MSR_DR));
+}
+
+static void cpu_stop_vm(void *arg __unused)
+{
+	printf("CPU PIR 0x%04x cpu_stop_vm\n", this_cpu()->pir);
+	vm_exit();
+	vm_cleanup_cpu();
+}
+
+static void cpu_all_stop_vm(void)
+{
+	struct cpu_thread *cpu;
+	struct cpu_job **jobs;
+
+	jobs = zalloc(sizeof(struct cpu_job *) * cpu_max_pir + 1);
+	assert(jobs);
+
+	for_each_available_cpu(cpu) {
+		if (cpu == this_cpu())
+			continue;
+		jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_stop_vm",
+						cpu_stop_vm, NULL);
+	}
+
+	/* this cpu */
+	cpu_stop_vm(NULL);
+
+	for_each_available_cpu(cpu) {
+		if (jobs[cpu->pir])
+			cpu_wait_job(jobs[cpu->pir], true);
+	}
+
+	free(jobs);
+}
+
+void vm_init(void)
+{
+	unsigned long va;
+
+//	prtab = local_alloc(0, 64*1024, 64*1024);
+	prtab = memalign(64*1024, 64*1024);
+	assert(prtab);
+	memset(prtab, 0, 64*1024);
+
+	global_slb_add(SKIBOOT_BASE >> 28, SKIBOOT_BASE >> 28);
+
+	htab_nr_bytes = 1UL<<18;
+	htab_nr_ptegs = htab_nr_bytes / sizeof(struct hpteg);
+	htab_pteg_mask = htab_nr_ptegs - 1;
+//	htab = local_alloc(0, htab_nr_bytes, 1UL<<18);
+	htab = memalign(1UL<<18, htab_nr_bytes);
+	assert(htab);
+	memset(htab, 0, htab_nr_bytes);
+
+	prtab[0].dword[0] = cpu_to_be64((unsigned long)htab);
+	prtab[0].dword[1] = 0;
+
+	eieio();
+
+	vm_init_cpu();
+
+//	for (va = (unsigned long)_stext; va < HEAP_BASE; va += PAGE_SIZE) {
+	for (va = (unsigned long)_stext; va < (unsigned long)_end; va += PAGE_SIZE) {
+		if (va >= (unsigned long)_stext && va <= (unsigned long)_etext)
+			htab_install(va, va, 0, 1, 1); /* text */
+		else if (va >= (unsigned long)__rodata_start &&
+				va <= (unsigned long)__rodata_end)
+			htab_install(va, va, 0, 0, 1);
+		else if (va >= (unsigned long)_sdata &&
+				va <= (unsigned long)_edata)
+			htab_install(va, va, 1, 0, 1);
+		else if (va >= (unsigned long)__sym_map_start &&
+				va <= (unsigned long)__sym_map_end)
+			htab_install(va, va, 0, 0, 1);
+		else if (va >= (unsigned long)_sbss &&
+				va <= (unsigned long)_ebss)
+			htab_install(va, va, 1, 0, 1);
+	}
+	for (; va < SKIBOOT_BASE + SKIBOOT_SIZE; va += PAGE_SIZE)
+		htab_install(va, va, 1, 0, 1);
+
+	vm_map_stacks();
+
+	printf("VMM: SETUP\n");
+	printf(" PRTAB:%p\n", prtab);
+	printf(" HTAB: %p\n", htab);
+	printf(" Global mappings\n");
+	printf("  text   %lx-%lx\n", (unsigned long)_stext, (unsigned long)_etext);
+	printf("  rodata %lx-%lx\n", (unsigned long)__rodata_start, (unsigned long)__rodata_end);
+	printf("  data   %lx-%lx\n", (unsigned long)_sdata, (unsigned long)_edata);
+	printf("  sym    %lx-%lx\n", (unsigned long)__sym_map_start, (unsigned long)__sym_map_end);
+	printf("  bss    %lx-%lx\n", (unsigned long)_sbss, (unsigned long)_ebss);
+	printf("  heap   %lx-%lx\n", (unsigned long)HEAP_BASE, (unsigned long)SKIBOOT_BASE + SKIBOOT_SIZE);
+	printf("  stacks %lx-%lx\n", (unsigned long)SKIBOOT_BASE + SKIBOOT_SIZE, stack_end);
+
+	eieio();
+
+	vm_setup = true;
+
+	vm_enter();
+}
+
+void vm_destroy(void)
+{
+	assert(vm_setup);
+
+	printf("VMM: TEARDOWN\n");
+
+	cpu_all_stop_vm();
+
+	vm_setup = false;
+
+	/* XXX: don't need to remove */
+//	for (va = SKIBOOT_BASE; va < SKIBOOT_BASE + SKIBOOT_SIZE + (cpu_max_pir + 1) * STACK_SIZE; va += PAGE_SIZE)
+//		htab_remove(va, 1);
+
+	free(htab);
+	free(prtab);
+}
diff --git a/hw/fake-nvram.c b/hw/fake-nvram.c
index 236ad5b9..50b76eb5 100644
--- a/hw/fake-nvram.c
+++ b/hw/fake-nvram.c
@@ -36,12 +36,16 @@ int fake_nvram_info(uint32_t *total_size)
 
 int fake_nvram_start_read(void *dst, uint32_t src, uint32_t len)
 {
+	void *t;
+
 	if (!nvram_region)
 		return -ENODEV;
 
+	t = vm_map(nvram_region->start + src, len);
 	lock(&fake_nvram_lock);
-	memcpy(dst, (void *) (nvram_region->start + src), len);
+	memcpy(dst, t, len);
 	unlock(&fake_nvram_lock);
+	vm_unmap(nvram_region->start + src, len);
 
 	nvram_read_complete(true);
 
@@ -50,12 +54,16 @@ int fake_nvram_start_read(void *dst, uint32_t src, uint32_t len)
 
 int fake_nvram_write(uint32_t offset, void *src, uint32_t size)
 {
+	void *t;
+
 	if (!nvram_region)
 		return OPAL_HARDWARE;
 
+	t = vm_map(nvram_region->start + offset, size);
 	lock(&fake_nvram_lock);
-	memcpy((void *) (nvram_region->start + offset), src, size);
+	memcpy(t, src, size);
 	unlock(&fake_nvram_lock);
+	vm_unmap(nvram_region->start + offset, size);
 
 	return 0;
 }
diff --git a/hw/slw.c b/hw/slw.c
index dfa9189b..32d6628d 100644
--- a/hw/slw.c
+++ b/hw/slw.c
@@ -164,7 +164,7 @@ static void slw_patch_reset(void)
 		*(sav++) = *(dst);
 		*(dst++) = *(src++);
 	}
-	sync_icache();
+	sync_icache(0);
 }
 
 static void slw_unpatch_reset(void)
@@ -180,7 +180,7 @@ static void slw_unpatch_reset(void)
 		*(dst++) = *(sav++);
 		src++;
 	}
-	sync_icache();
+	sync_icache(0);
 }
 
 static bool slw_general_init(struct proc_chip *chip, struct cpu_thread *c)
diff --git a/include/cpu.h b/include/cpu.h
index 2fe47982..64e35a51 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -82,10 +82,18 @@ struct cpu_thread {
 	struct bt_entry			stack_bot_bt[CPU_BACKTRACE_SIZE];
 	unsigned int			stack_bot_bt_count;
 #endif
+	/*
+	 * Per-thread VM parameters
+	 */
+	bool				vm_setup; /* virtual memory is up */
+	bool				vm_local_map; /* local mapping */
+	bool				vm_local_slb; /* local SLB used */
+
 	struct lock			job_lock;
 	struct list_head		job_queue;
 	uint32_t			job_count;
 	bool				job_has_no_return;
+
 	/*
 	 * Per-core mask tracking for threads in HMI handler and
 	 * a cleanup done bit.
diff --git a/include/mem_region.h b/include/mem_region.h
index 018dfa0e..415cbf4d 100644
--- a/include/mem_region.h
+++ b/include/mem_region.h
@@ -46,6 +46,7 @@ struct mem_region {
 	struct list_node list;
 	const char *name;
 	uint64_t start, len;
+	uint64_t vm_mapped_len;
 	struct dt_node *node;
 	enum mem_region_type type;
 	struct list_head free_list;
diff --git a/include/processor.h b/include/processor.h
index 6b262b45..6f815bb4 100644
--- a/include/processor.h
+++ b/include/processor.h
@@ -53,6 +53,7 @@
 #define SPR_SRR1	0x01b	/* RW: Exception save/restore reg 1 */
 #define SPR_CFAR	0x01c	/* RW: Come From Address Register */
 #define SPR_AMR		0x01d	/* RW: Authority Mask Register */
+#define SPR_PID		0x030	/* RW: PID register */
 #define SPR_IAMR	0x03d	/* RW: Instruction Authority Mask Register */
 #define SPR_RPR		0x0ba   /* RW: Relative Priority Register */
 #define SPR_TBRL	0x10c	/* RO: Timebase low */
@@ -75,10 +76,12 @@
 #define SPR_HSRR1	0x13b	/* RW: HV Exception save/restore reg 1 */
 #define SPR_TFMR	0x13d
 #define SPR_LPCR	0x13e
+#define SPR_LPID	0x13f	/* RW: LPID register */
 #define SPR_HMER	0x150	/* Hypervisor Maintenance Exception */
 #define SPR_HMEER	0x151	/* HMER interrupt enable mask */
 #define SPR_PCR		0x152
 #define SPR_AMOR	0x15d
+#define SPR_PTCR	0x1d0	/* RW: Partition table control register */
 #define SPR_PSSCR	0x357   /* RW: Stop status and control (ISA 3) */
 #define SPR_TSCR	0x399
 #define SPR_HID0	0x3f0
@@ -324,9 +327,9 @@ static inline void isync(void)
 /*
  * Cache sync
  */
-static inline void sync_icache(void)
+static inline void sync_icache(unsigned long ptr)
 {
-	asm volatile("sync; icbi 0,%0; sync; isync" : : "r" (0) : "memory");
+	asm volatile("sync; icbi 0,%0; sync; isync" : : "r" (ptr) : "memory");
 }
 
 /*
diff --git a/include/skiboot.h b/include/skiboot.h
index bba76c12..246a7344 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -49,8 +49,13 @@ struct mem_region;
 extern struct mem_region *mem_region_next(struct mem_region *region);
 
 #ifndef __TESTING__
+extern char _stext[], _etext[];
 /* Readonly section start and end. */
 extern char __rodata_start[], __rodata_end[];
+extern char _sdata[], _edata[];
+extern char __sym_map_start[], __sym_map_end[];
+extern char _sbss[], _ebss[];
+extern char _end[];
 
 static inline bool is_rodata(const void *p)
 {
@@ -291,4 +296,18 @@ extern int fake_nvram_info(uint32_t *total_size);
 extern int fake_nvram_start_read(void *dst, uint32_t src, uint32_t len);
 extern int fake_nvram_write(uint32_t offset, void *src, uint32_t size);
 
+/* core/vm.c */
+#define PAGE_SIZE 4096
+
+void vm_map_global(unsigned long addr, unsigned long len);
+void *vm_map(unsigned long addr, unsigned long len);
+void vm_unmap(unsigned long addr, unsigned long len);
+void vm_init(void);
+void vm_destroy(void);
+void vm_init_secondary(void);
+void vm_enter(void);
+void vm_exit(void);
+void vm_exit_cleanup(void);
+void vm_map_stacks(void);
+
 #endif /* __SKIBOOT_H */
diff --git a/libstb/container.c b/libstb/container.c
index a720fbbf..68111796 100644
--- a/libstb/container.c
+++ b/libstb/container.c
@@ -19,14 +19,20 @@
 
 bool stb_is_container(const void *buf, size_t size)
 {
+	uint32_t *t;
 	ROM_container_raw *c;
+	bool ret = true;;
 
 	c = (ROM_container_raw*) buf;
 	if (!buf || size < SECURE_BOOT_HEADERS_SIZE)
 		return false;
-	if (be32_to_cpu(c->magic_number) != ROM_MAGIC_NUMBER )
-		return false;
-	return true;
+
+	t = vm_map((unsigned long)&c->magic_number, sizeof(*t));
+	if (be32_to_cpu(*t) != ROM_MAGIC_NUMBER)
+		ret = false;
+	vm_unmap((unsigned long)&c->magic_number, sizeof(*t));
+
+	return ret;
 }
 
 uint32_t stb_payload_magic(const void *buf, size_t size)
diff --git a/skiboot.lds.S b/skiboot.lds.S
index a6e71077..a21e9af9 100644
--- a/skiboot.lds.S
+++ b/skiboot.lds.S
@@ -51,35 +51,41 @@ SECTIONS
 		KEEP(*(.cpuctrl.data))
 	}
 
+	/* Do I need to keep these ? */
+	.dynsym : { *(.dynsym)	}
+	.dynstr : { *(.dynstr)	}
+
+	/* Relocations */
 	. = ALIGN(0x10);
+	.dynamic : {
+		__dynamic_start = .;
+		*(.dynamic)
+		__dynamic_end = .;
+	}
+
+	. = ALIGN(0x10);
+	.rela.dyn : {
+		__rela_dyn_start = .;
+		*(.rela*)
+		__rela_dyn_end = .;
+	}
+
+	.hash   : { *(.hash)   }
+	.dynsym : { *(.dynsym) }
+	.dynstr : { *(.dynstr) }
+
+	. = ALIGN(0x1000);
 	_stext = .;
  	.text : {
 		*(.text*)
 		*(.sfpr)
 	}
 	_etext = .;
+	. = ALIGN(0x1000);
 
+	__rodata_start = .;
 	.rodata : {
-		__rodata_start = .;
 		*(.rodata .rodata.*)
-		__rodata_end = .;
-	}
-
-	.data : {
-		/*
-		 * A couple of things that need to be 4K aligned and
-		 * to reside in their own pages for the sake of TCE
-		 * mappings
-		 */
-		. = ALIGN(0x1000);
-		*(.data.memcons);
-		. = ALIGN(0x1000);
-		*(.data.boot_trace);
-		. = ALIGN(0x1000);
-		*(.data*)
-		*(.force.data)
-		*(.toc1)
-		*(.branch_lt)
 	}
 
 	. = ALIGN(0x10);
@@ -103,43 +109,43 @@ SECTIONS
 		*(.toc)
 	}
 
-	. = ALIGN(0x10);
-	.opal_table : {
-		__opal_table_start = .;
-		KEEP(*(.opal_table))
-		__opal_table_end = .;
-	}
-
 	.platforms : {
 		__platforms_start = .;
 		KEEP(*(.platforms))
 		__platforms_end = .;
 	}
 
-	/* Do I need to keep these ? */
-	.dynsym : { *(.dynsym)	}
-	.dynstr : { *(.dynstr)	}
-
-	/* Relocations */
 	. = ALIGN(0x10);
-	.dynamic : {
-		__dynamic_start = .;
-		*(.dynamic)
-		__dynamic_end = .;
+	.opal_table : {
+		__opal_table_start = .;
+		KEEP(*(.opal_table))
+		__opal_table_end = .;
 	}
+	__rodata_end = .;
 
-	. = ALIGN(0x10);
-	.rela.dyn : {
-		__rela_dyn_start = .;
-		*(.rela*)
-		__rela_dyn_end = .;
+	. = ALIGN(0x1000);
+
+	_sdata = .;
+	.data : {
+		/*
+		 * A couple of things that need to be 4K aligned and
+		 * to reside in their own pages for the sake of TCE
+		 * mappings
+		 */
+		. = ALIGN(0x1000);
+		*(.data.memcons);
+		. = ALIGN(0x1000);
+		*(.data.boot_trace);
+		. = ALIGN(0x1000);
+		*(.data*)
+		*(.force.data)
+		*(.toc1)
+		*(.branch_lt)
 	}
+	_edata = .;
 
-	.hash   : { *(.hash)   }
-	.dynsym : { *(.dynsym) }
-	.dynstr : { *(.dynstr) }
+	. = ALIGN(0x1000);
 
-	. = ALIGN(0x10);
 	.sym_map : {
 		__sym_map_start = . ;
 		KEEP(*(.sym_map))
-- 
2.18.0