[Skiboot] [RFC PATCH] lol: virtual memory for OPAL boot
Nicholas Piggin
npiggin at gmail.com
Fri Feb 16 17:59:29 AEDT 2018
This is a really quick hack to make skiboot boot with virtual
memory, at least in mambo (real hardware would require much more
work with MMIO etc).
Basically each CPU gets SLB and page tables for code, heap, and
stack bolted, but then has to map and unmap any other regions it
wants to touch.
All addresses are 1:1 and there is no real protection between
different CPUs (except for what's in SLB). But still it can
detect some wayward accesses.
Virtual memory gets shut down before booting the kernel.
---
core/Makefile.inc | 2 +-
core/cpu.c | 4 +
core/init.c | 53 +++++++--
core/stack.c | 2 +-
core/vm.c | 335 ++++++++++++++++++++++++++++++++++++++++++++++++++++
hw/fake-nvram.c | 5 +
include/processor.h | 3 +
include/skiboot.h | 12 ++
libstb/container.c | 14 ++-
skiboot.lds.S | 4 +-
10 files changed, 421 insertions(+), 13 deletions(-)
create mode 100644 core/vm.c
diff --git a/core/Makefile.inc b/core/Makefile.inc
index 5c120564..90acdd56 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -1,7 +1,7 @@
# -*-Makefile-*-
SUBDIRS += core
-CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
+CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o vm.o
CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o
CORE_OBJS += opal-msg.o pci.o pci-iov.o pci-virt.o pci-slot.o pcie-slot.o
CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
diff --git a/core/cpu.c b/core/cpu.c
index 213f80ee..71dd2f9b 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -407,12 +407,16 @@ static void cpu_idle_p9(enum cpu_wake_cause wake_on)
/* PSSCR SD=0 ESL=1 EC=1 PSSL=0 TR=3 MTL=0 RL=3 */
psscr = PPC_BIT(42) | PPC_BIT(43) |
PPC_BITMASK(54, 55) | PPC_BITMASK(62,63);
+ vm_exit();
enter_p9_pm_state(psscr);
+ vm_enter();
} else {
/* stop with EC=0 (resumes) which does not require sreset. */
/* PSSCR SD=0 ESL=0 EC=0 PSSL=0 TR=3 MTL=0 RL=3 */
psscr = PPC_BITMASK(54, 55) | PPC_BITMASK(62,63);
+ vm_exit();
enter_p9_pm_lite_state(psscr);
+ vm_enter();
}
skip_sleep:
diff --git a/core/init.c b/core/init.c
index d167e4b9..ec48ede3 100644
--- a/core/init.c
+++ b/core/init.c
@@ -403,6 +403,7 @@ static bool load_kernel(void)
"INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\n",
kernel_size);
+ vm_map((unsigned long)kh, sizeof(*kh));
if (kh->ei_ident != ELF_IDENT) {
prerror("INIT: ELF header not found. Assuming raw binary.\n");
return true;
@@ -418,6 +419,7 @@ static bool load_kernel(void)
prerror("INIT: Neither ELF32 not ELF64 ?\n");
return false;
}
+ vm_unmap((unsigned long)kh, sizeof(*kh));
if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
secureboot_verify(RESOURCE_ID_KERNEL,
@@ -458,6 +460,27 @@ int64_t mem_dump_free(void);
void *fdt;
+static void cpu_stop_vm(void *arg __unused)
+{
+ printf("CPU PIR 0x%04x cpu_stop_vm\n", this_cpu()->pir);
+ vm_exit();
+}
+
+static void cpu_all_stop_vm(void)
+{
+ struct cpu_thread *cpu;
+
+ for_each_available_cpu(cpu) {
+ if (cpu == this_cpu()) {
+ cpu_stop_vm(NULL);
+ continue;
+ }
+ cpu_wait_job(cpu_queue_job(cpu, "cpu_stop_vm",
+ cpu_stop_vm, NULL), true);
+ }
+}
+
+
void __noreturn load_and_boot_kernel(bool is_reboot)
{
const struct dt_property *memprop;
@@ -542,12 +565,6 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
cpu_give_self_os();
- mem_dump_free();
-
- /* Take processours out of nap */
- cpu_set_sreset_enable(false);
- cpu_set_ipi_enable(false);
-
/* Dump the selected console */
stdoutp = dt_prop_get_def(dt_chosen, "linux,stdout-path", NULL);
prlog(PR_DEBUG, "INIT: stdout-path: %s\n", stdoutp ? stdoutp : "");
@@ -559,6 +576,18 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
debug_descriptor.state_flags |= OPAL_BOOT_COMPLETE;
fdt_set_boot_cpuid_phys(fdt, this_cpu()->pir);
+
+ /* Take processours out of nap */
+ cpu_set_sreset_enable(false);
+ cpu_set_ipi_enable(false);
+
+ mem_dump_free();
+
+ /* Go back to realmode and tear down our VM before booting kernel */
+ printf("VMM: TEARDOWN\n");
+ cpu_all_stop_vm();
+ vm_destroy();
+
if (kernel_32bit)
start_kernel32(kernel_entry, fdt, mem_top);
start_kernel(kernel_entry, fdt, mem_top);
@@ -720,17 +749,21 @@ void setup_reset_vector(void)
{
uint32_t *src, *dst;
+ vm_map(0x100, 0x100);
/* Copy the reset code over the entry point. */
src = &reset_patch_start;
dst = (uint32_t *)0x100;
while(src < &reset_patch_end)
*(dst++) = *(src++);
sync_icache();
+ vm_unmap(0x100, 0x100);
cpu_set_sreset_enable(true);
}
void copy_exception_vectors(void)
{
+ vm_map(0x0, 0x2000);
+
/* Backup previous vectors as this could contain a kernel
* image.
*/
@@ -743,6 +776,7 @@ void copy_exception_vectors(void)
BUILD_ASSERT((&reset_patch_end - &reset_patch_start) < 0x1f00);
memcpy((void *)0x100, (void *)(SKIBOOT_BASE + 0x100), 0x1f00);
sync_icache();
+ vm_unmap(0x0, 0x2000);
}
static void per_thread_sanity_checks(void)
@@ -930,6 +964,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
*/
mem_region_init();
+ vm_init();
+
/* Reserve HOMER and OCC area */
homer_init();
@@ -950,7 +986,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
cpu_set_ipi_enable(true);
/* Allocate our split trace buffers now. Depends add_opal_node() */
- init_trace_buffers();
+ // XXX: this blows up due to NUMA allocation
+ // init_trace_buffers();
/* On P7/P8, get the ICPs and make sure they are in a sane state */
init_interrupts();
@@ -1111,6 +1148,8 @@ void __noreturn __secondary_cpu_entry(void)
{
struct cpu_thread *cpu = this_cpu();
+ vm_init_secondary();
+
/* Secondary CPU called in */
cpu_callin(cpu);
diff --git a/core/stack.c b/core/stack.c
index e04a4ddb..1bb763d1 100644
--- a/core/stack.c
+++ b/core/stack.c
@@ -26,7 +26,7 @@
#define STACK_BUF_ENTRIES 60
static struct bt_entry bt_buf[STACK_BUF_ENTRIES];
-extern uint32_t _stext, _etext;
+// extern uint32_t _stext, _extext;
/* Dumps backtrace to buffer */
void __nomcount __backtrace(struct bt_entry *entries, unsigned int *count)
diff --git a/core/vm.c b/core/vm.c
new file mode 100644
index 00000000..2ecb4efd
--- /dev/null
+++ b/core/vm.c
@@ -0,0 +1,335 @@
+/* Copyright 2018 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <opal.h>
+#include <stack.h>
+#include <cpu.h>
+#include <trace.h>
+#include <ccan/str/str.h>
+#include <ccan/container_of/container_of.h>
+
+static bool vm_setup = false;
+
+static void slb_install(unsigned long ea, unsigned long va, unsigned int index)
+{
+ unsigned long rs;
+ unsigned long rb;
+
+ rs = (va >> 28) << (63-51); /* 256MB VSID */
+ rs |= 1UL << (63-53); /* Kp = 1 */
+
+ rb = (ea >> 28) << (63-35); /* 256MB ESID */
+ rb |= 1UL << (63-36); /* V = 1 */
+ rb |= index;
+
+ asm volatile("slbmte %0,%1" : : "r"(rs), "r"(rb) : "memory");
+}
+
+static void slb_remove(unsigned long ea)
+{
+ asm volatile("slbie %0" : : "r"((ea >> 28)<<28) : "memory");
+}
+
+static void slb_remove_all(void)
+{
+ asm volatile("slbmte %0,%0 ; slbia" : : "r"(0) : "memory");
+}
+
+struct hpte {
+ unsigned long dword[2];
+};
+
+struct hpteg {
+ struct hpte hpte[8];
+};
+
+struct hpteg *htab;
+unsigned long htab_nr_bytes;
+unsigned long htab_nr_ptegs;
+unsigned long htab_pteg_mask;
+
+static void htab_install(unsigned long va, unsigned long pa, int rw, int ex)
+{
+ unsigned long hash;
+ struct hpteg *hpteg;
+ unsigned int i;
+
+ hash = ((va >> 12) & 0xffff) ^ ((va >> 28) & 0x7fffffffffUL);
+ hpteg = &htab[hash & htab_pteg_mask];
+
+ for (i = 0; i < 8; i++) {
+ struct hpte *hpte = &hpteg->hpte[i];
+ unsigned long ava = va >> 23;
+ unsigned long arpn = pa >> 12;
+ unsigned long dw0, dw1;
+
+ if (be64_to_cpu(hpte->dword[0]) & 1) {
+ assert(be64_to_cpu(hpte->dword[0]) >> 7 != ava);
+ continue;
+ }
+
+ assert(!hpte->dword[0]);
+ assert(!hpte->dword[1]);
+
+ dw0 = (ava << (63-56)) | 0x1;
+
+ dw1 = (arpn << (63-43 - 8));
+ if (!rw)
+ dw1 |= (1UL << (63 - 0)) | (1UL << (63 - 63 + 1));
+ if (!ex)
+ dw1 |= (1UL << (63 - 61));
+ dw1 |= (1UL << (63 - 60 + 1)); /* WIMG = 0010 */
+
+ hpte->dword[1] = cpu_to_be64(dw1);
+ eieio();
+ hpte->dword[0] = cpu_to_be64(dw0);
+
+ if (0 && va <= 0x000000003001748c)
+ printf("va:%lx pa:%lx hpteg:%p hpte:%p dw0:%lx dw1:%lx\n", va, pa, hpteg, hpte, dw0, dw1);
+ return;
+ }
+ assert(0);
+}
+
+static void htab_remove(unsigned long va)
+{
+ unsigned long hash;
+ struct hpteg *hpteg;
+ unsigned int i;
+
+ hash = ((va >> 12) & 0xffff) ^ ((va >> 28) & 0x7fffffffffUL);
+ hpteg = &htab[hash & htab_pteg_mask];
+
+ for (i = 0; i < 8; i++) {
+ struct hpte *hpte = &hpteg->hpte[i];
+ unsigned long ava = va >> 23;
+
+ if (!(be64_to_cpu(hpte->dword[0]) & 1)) {
+ assert(!hpte->dword[0]);
+ assert(!hpte->dword[1]);
+ continue;
+ }
+
+ if (be64_to_cpu(hpte->dword[0]) >> 7 != ava)
+ continue;
+
+ hpte->dword[0] = 0;
+ eieio();
+ hpte->dword[1] = 0;
+ eieio();
+ asm volatile("tlbie %0,%1" : : "r"(ava<<12), "r"(0));
+ asm volatile("eieio ; tlbsync ; ptesync" ::: "memory");
+ return;
+ }
+ assert(0);
+}
+
+#define PAGE_SIZE 4096
+
+void vm_map(unsigned long addr, unsigned long len)
+{
+ unsigned long va;
+ unsigned long vseg = addr >> 28;
+ unsigned long end = addr + len;
+ end = (end + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
+ addr &= ~(PAGE_SIZE - 1);
+ len = end - addr;
+
+ if (!vm_setup)
+ return;
+
+ printf("vm_map: %lx-%lx\n", addr, addr + len);
+ vseg = addr >> 28;
+ assert(vseg == (end >> 28)); /* same segment */
+
+ if (vseg != (SKIBOOT_BASE >> 28))
+ slb_install(addr, addr, 1);
+
+ for (va = addr; va < end; va += PAGE_SIZE) {
+ if (va >= SKIBOOT_BASE && va < SKIBOOT_BASE + SKIBOOT_SIZE)
+ continue;
+ htab_install(va, va, 1, 0);
+ }
+}
+
+void vm_unmap(unsigned long addr, unsigned long len)
+{
+ unsigned long va;
+ unsigned long vseg = addr >> 28;
+ unsigned long end = addr + len;
+ end = (end + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
+ addr &= ~(PAGE_SIZE - 1);
+ len = end - addr;
+
+ if (!vm_setup)
+ return;
+
+ vseg = addr >> 28;
+ assert(vseg == (end >> 28)); /* same segment */
+
+ printf("vm_unmap: %lx-%lx vseg:%lx\n", addr, addr + len, vseg);
+
+ if (vseg != (SKIBOOT_BASE >> 28))
+ slb_remove(addr);
+
+ for (va = addr; va < end; va += PAGE_SIZE) {
+ if (va >= SKIBOOT_BASE && va < SKIBOOT_BASE + SKIBOOT_SIZE)
+ continue;
+ htab_remove(va);
+ }
+}
+
+
+struct prte {
+ unsigned long dword[2];
+};
+
+static struct prte *prtab;
+
+static unsigned long stack_end = SKIBOOT_BASE + SKIBOOT_SIZE;
+
+void vm_map_stacks(void)
+{
+ unsigned long start = stack_end;
+ unsigned long end = start + (cpu_max_pir + 1)*STACK_SIZE;
+ unsigned long va;
+
+ if (start == end)
+ return;
+
+ printf("VMM: map stacks for %u\n", cpu_max_pir);
+
+ for (va = start; va < end; va += PAGE_SIZE)
+ htab_install(va, va, 1, 0);
+
+ printf("Installed TLB:%lx-%lx\n", start, end);
+
+ stack_end = end;
+}
+
+static void vm_init_cpu(void)
+{
+ mtspr(SPR_LPCR, mfspr(SPR_LPCR) &
+ ~(PPC_BITMASK(0,3) | PPC_BIT(41) | PPC_BIT(43) | PPC_BIT(54)));
+ mtspr(SPR_LPID, 0);
+ mtspr(SPR_PID, 0);
+ mtspr(SPR_HRMOR, 0);
+ mtspr(SPR_PTCR, (unsigned long)prtab);
+
+ slb_install(SKIBOOT_BASE, SKIBOOT_BASE, 0);
+}
+
+static void vm_cleanup_cpu(void)
+{
+ slb_remove_all();
+ mtspr(SPR_PTCR, 0);
+}
+
+void vm_init_secondary(void)
+{
+ vm_init_cpu();
+ vm_enter();
+}
+
+void vm_exit_cleanup(void)
+{
+ vm_exit();
+ vm_cleanup_cpu();
+}
+
+void vm_enter(void)
+{
+ if (vm_setup)
+ mtmsr(mfmsr() | (MSR_IR|MSR_DR));
+}
+
+void vm_exit(void)
+{
+ if (vm_setup)
+ mtmsr(mfmsr() & ~(MSR_IR|MSR_DR));
+}
+
+void vm_init(void)
+{
+ unsigned long va;
+
+// prtab = local_alloc(0, 64*1024, 64*1024);
+ prtab = memalign(64*1024, 64*1024);
+ assert(prtab);
+ memset(prtab, 0, 64*1024);
+
+ htab_nr_bytes = 1UL<<18;
+ htab_nr_ptegs = htab_nr_bytes / sizeof(struct hpteg);
+ htab_pteg_mask = htab_nr_ptegs - 1;
+// htab = local_alloc(0, htab_nr_bytes, 1UL<<18);
+ htab = memalign(1UL<<18, htab_nr_bytes);
+ assert(htab);
+ memset(htab, 0, htab_nr_bytes);
+
+ prtab[0].dword[0] = cpu_to_be64((unsigned long)htab);
+ prtab[0].dword[1] = 0;
+
+ eieio();
+
+ vm_init_cpu();
+
+ printf("mapping skiboot base %x-%x\n", SKIBOOT_BASE, HEAP_BASE);
+ printf(" text %lx-%lx\n", (unsigned long)_stext, (unsigned long)_etext);
+ printf("Installed SLB:%x-%x\n", SKIBOOT_BASE, SKIBOOT_BASE + (256*1024*1024));
+ for (va = SKIBOOT_BASE; va < HEAP_BASE; va += PAGE_SIZE) {
+ if (va >= (unsigned long)_stext && va <= (unsigned long)_etext)
+ htab_install(va, va, 0, 1);
+ else if (va >= (unsigned long)__rodata_start &&
+ va <= (unsigned long)__rodata_end)
+ htab_install(va, va, 0, 0);
+ else
+ htab_install(va, va, 1, 0);
+ }
+ for (; va < SKIBOOT_BASE + SKIBOOT_SIZE; va += PAGE_SIZE)
+ htab_install(va, va, 1, 0);
+
+ printf("Installed TLB:%x-%lx\n", SKIBOOT_BASE, va);
+
+ vm_map_stacks();
+
+ eieio();
+
+ printf("PRTAB:%p\n", prtab);
+ printf("HTAB:%p\n", htab);
+
+ vm_setup = true;
+
+ vm_enter();
+}
+
+void vm_destroy(void)
+{
+ unsigned long va;
+
+ if (!vm_setup)
+ return;
+
+ vm_exit_cleanup();
+
+ vm_setup = false;
+
+ for (va = SKIBOOT_BASE; va < SKIBOOT_BASE + SKIBOOT_SIZE + (cpu_max_pir + 1) * STACK_SIZE; va += PAGE_SIZE)
+ htab_remove(va);
+
+ free(htab);
+ free(prtab);
+}
diff --git a/hw/fake-nvram.c b/hw/fake-nvram.c
index 236ad5b9..1629728a 100644
--- a/hw/fake-nvram.c
+++ b/hw/fake-nvram.c
@@ -22,6 +22,9 @@
static struct mem_region *nvram_region;
static struct lock fake_nvram_lock = LOCK_UNLOCKED;
+void vm_map(unsigned long addr, unsigned long len);
+void vm_unmap(unsigned long addr, unsigned long len);
+
int fake_nvram_info(uint32_t *total_size)
{
nvram_region = find_mem_region("ibm,fake-nvram");
@@ -39,11 +42,13 @@ int fake_nvram_start_read(void *dst, uint32_t src, uint32_t len)
if (!nvram_region)
return -ENODEV;
+ vm_map(nvram_region->start + src, len);
lock(&fake_nvram_lock);
memcpy(dst, (void *) (nvram_region->start + src), len);
unlock(&fake_nvram_lock);
nvram_read_complete(true);
+ vm_unmap(nvram_region->start + src, len);
return 0;
}
diff --git a/include/processor.h b/include/processor.h
index 925cc7cd..e2a5577e 100644
--- a/include/processor.h
+++ b/include/processor.h
@@ -53,6 +53,7 @@
#define SPR_SRR1 0x01b /* RW: Exception save/restore reg 1 */
#define SPR_CFAR 0x01c /* RW: Come From Address Register */
#define SPR_AMR 0x01d /* RW: Authority Mask Register */
+#define SPR_PID 0x030 /* RW: PID register */
#define SPR_IAMR 0x03d /* RW: Instruction Authority Mask Register */
#define SPR_RPR 0x0ba /* RW: Relative Priority Register */
#define SPR_TBRL 0x10c /* RO: Timebase low */
@@ -75,9 +76,11 @@
#define SPR_HSRR1 0x13b /* RW: HV Exception save/restore reg 1 */
#define SPR_TFMR 0x13d
#define SPR_LPCR 0x13e
+#define SPR_LPID 0x13f /* RW: LPID register */
#define SPR_HMER 0x150 /* Hypervisor Maintenance Exception */
#define SPR_HMEER 0x151 /* HMER interrupt enable mask */
#define SPR_AMOR 0x15d
+#define SPR_PTCR 0x1d0 /* RW: Partition table control register */
#define SPR_PSSCR 0x357 /* RW: Stop status and control (ISA 3) */
#define SPR_TSCR 0x399
#define SPR_HID0 0x3f0
diff --git a/include/skiboot.h b/include/skiboot.h
index 2f81a058..f6f7e8c8 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -49,6 +49,7 @@ struct mem_region;
extern struct mem_region *mem_region_next(struct mem_region *region);
#ifndef __TESTING__
+extern char _stext[], _etext[];
/* Readonly section start and end. */
extern char __rodata_start[], __rodata_end[];
@@ -357,4 +358,15 @@ extern int occ_sensor_group_clear(u32 group_hndl, int token);
extern void occ_add_sensor_groups(struct dt_node *sg, u32 *phandles,
int nr_phandles, int chipid);
+/* core/vm.c */
+void vm_map(unsigned long addr, unsigned long len);
+void vm_unmap(unsigned long addr, unsigned long len);
+void vm_init(void);
+void vm_destroy(void);
+void vm_init_secondary(void);
+void vm_enter(void);
+void vm_exit(void);
+void vm_exit_cleanup(void);
+void vm_map_stacks(void);
+
#endif /* __SKIBOOT_H */
diff --git a/libstb/container.c b/libstb/container.c
index a720fbbf..0e4a3cc9 100644
--- a/libstb/container.c
+++ b/libstb/container.c
@@ -17,16 +17,24 @@
#include <skiboot.h>
#include "container.h"
+void vm_map(unsigned long addr, unsigned long len);
+void vm_unmap(unsigned long addr, unsigned long len);
+
bool stb_is_container(const void *buf, size_t size)
{
ROM_container_raw *c;
+ bool ret = true;;
c = (ROM_container_raw*) buf;
if (!buf || size < SECURE_BOOT_HEADERS_SIZE)
return false;
- if (be32_to_cpu(c->magic_number) != ROM_MAGIC_NUMBER )
- return false;
- return true;
+
+ vm_map((unsigned long )&c->magic_number, 4);
+ if (be32_to_cpu(c->magic_number) != ROM_MAGIC_NUMBER)
+ ret = false;
+ vm_unmap((unsigned long )&c->magic_number, 4);
+
+ return ret;
}
uint32_t stb_payload_magic(const void *buf, size_t size)
diff --git a/skiboot.lds.S b/skiboot.lds.S
index 7f71d5cd..31304a16 100644
--- a/skiboot.lds.S
+++ b/skiboot.lds.S
@@ -51,19 +51,21 @@ SECTIONS
KEEP(*(.cpuctrl.data))
}
- . = ALIGN(0x10);
+ . = ALIGN(0x1000);
_stext = .;
.text : {
*(.text*)
*(.sfpr)
}
_etext = .;
+ . = ALIGN(0x1000);
.rodata : {
__rodata_start = .;
*(.rodata .rodata.*)
__rodata_end = .;
}
+ . = ALIGN(0x1000);
.data : {
/*
--
2.16.1
More information about the Skiboot
mailing list