[Skiboot] [PATCH 2/2] virtual memory for OPAL boot
Nicholas Piggin
npiggin at gmail.com
Sat Sep 7 00:19:58 AEST 2019
vm_map / vm_unmap is a per-cpu mapping which can not nest. This returns
an EA which is different than the PA of the memory when in vmm mode.
vm_map_global / vm_unmap_global sets up globally visible 1:1 mappings.
A list of global extents + a local extent per cpu is kept to describe
active mappings. Fault handlers look these up to install SLB/HPTE entries.
This should move toward having fewer global mappings for things kept
around, and unmap them when finished, or better yet move to local
mappings.
---
core/Makefile.inc | 2 +-
core/cpu.c | 16 +-
core/exceptions.c | 40 ++-
core/fast-reboot.c | 30 +-
core/flash.c | 1 +
core/init.c | 169 +++++++--
core/mem_region.c | 89 +++--
core/opal.c | 20 +-
core/vm.c | 829 +++++++++++++++++++++++++++++++++++++++++++
hdata/spira.c | 34 +-
hw/fake-nvram.c | 12 +-
hw/homer.c | 5 +
hw/lpc-uart.c | 31 +-
hw/lpc.c | 2 +
hw/phb4.c | 9 +-
hw/psi.c | 2 +
hw/slw.c | 4 +-
hw/xive.c | 5 +
hw/xscom.c | 4 +-
include/cmpxchg.h | 3 +
include/cpu.h | 22 ++
include/elf-abi.h | 20 +-
include/io.h | 57 ++-
include/mem_region.h | 1 +
include/processor.h | 13 +-
include/skiboot.h | 28 ++
libstb/container.c | 12 +-
libstb/cvc.c | 3 +
libstb/secureboot.c | 5 +-
libstb/trustedboot.c | 6 +-
skiboot.lds.S | 56 +--
31 files changed, 1399 insertions(+), 131 deletions(-)
create mode 100644 core/vm.c
diff --git a/core/Makefile.inc b/core/Makefile.inc
index fddff50e9..c2b5251d7 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -3,7 +3,7 @@
# -*-Makefile-*-
SUBDIRS += core
-CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
+CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o vm.o
CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o
CORE_OBJS += opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o
CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
diff --git a/core/cpu.c b/core/cpu.c
index 232664b41..c05b3b5ec 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -416,6 +416,10 @@ static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)
}
isync();
+ /* P8 must enter nap with VM disabled */
+ if (cpu->vm_setup)
+ vm_exit();
+
/* Enter nap */
vec = enter_p8_pm_state(false);
@@ -476,11 +480,13 @@ static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
/* PSSCR SD=0 ESL=1 EC=1 PSSL=0 TR=3 MTL=0 RL=1 */
psscr = PPC_BIT(42) | PPC_BIT(43) |
PPC_BITMASK(54, 55) | PPC_BIT(63);
+ cpu->vm_setup = false; /* stop with EC=1 wakes with vm off */
vec = enter_p9_pm_state(psscr);
} else {
/* stop with EC=0 (resumes) which does not require sreset. */
/* PSSCR SD=0 ESL=0 EC=0 PSSL=0 TR=3 MTL=0 RL=1 */
psscr = PPC_BITMASK(54, 55) | PPC_BIT(63);
+ /* Can run with VM enabled */
enter_p9_pm_lite_state(psscr);
}
@@ -499,6 +505,7 @@ static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
static void cpu_idle_pm(enum cpu_wake_cause wake_on)
{
unsigned int vec;
+ bool was_vm_setup = this_cpu()->vm_setup;
switch(proc_gen) {
case proc_gen_p8:
@@ -523,12 +530,17 @@ static void cpu_idle_pm(enum cpu_wake_cause wake_on)
default:
break;
}
- mtmsrd(MSR_RI, 1);
} else if (vec == 0x200) {
exception_entry_pm_mce();
enable_machine_check();
+ }
+
+ if (vec != 0) {
+ /* 0x100 or 0x200 */
mtmsrd(MSR_RI, 1);
+ if (was_vm_setup)
+ vm_enter();
}
}
@@ -1361,7 +1373,7 @@ static int64_t opal_return_cpu(void)
printf("OPAL in_opal_call=%u\n", this_cpu()->in_opal_call);
}
- __secondary_cpu_entry();
+ __return_cpu_entry();
return OPAL_HARDWARE; /* Should not happen */
}
diff --git a/core/exceptions.c b/core/exceptions.c
index f85327873..feac484db 100644
--- a/core/exceptions.c
+++ b/core/exceptions.c
@@ -89,6 +89,41 @@ void exception_entry(struct stack_frame *stack)
"Fatal MCE at "REG" ", nip);
break;
+ case 0x300:
+ if (vm_dsi(nip, stack->dar, !!(stack->dsisr & DSISR_ISSTORE)))
+ goto out;
+ fatal = true;
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "Fatal %s address "REG" at "REG" ",
+ (stack->dsisr & DSISR_ISSTORE) ? "store" : "load",
+ stack->dar, nip);
+ break;
+
+ case 0x380:
+ if (vm_dslb(nip, stack->dar))
+ goto out;
+ fatal = true;
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "Fatal load/store address "REG" at "REG" ",
+ stack->dar, nip);
+ break;
+
+ case 0x400:
+ if (vm_isi(nip))
+ goto out;
+ fatal = true;
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "Fatal ifetch at "REG" ", nip);
+ break;
+
+ case 0x480:
+ if (vm_islb(nip))
+ goto out;
+ fatal = true;
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "Fatal ifetch at "REG" ", nip);
+ break;
+
default:
fatal = true;
prerror("***********************************************\n");
@@ -101,10 +136,11 @@ void exception_entry(struct stack_frame *stack)
prerror("%s\n", buf);
dump_regs(stack);
+ if (!fatal)
+ backtrace();
+out:
if (fatal)
abort();
- else
- backtrace();
if (hv) {
/* Set up for SRR return */
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 9631eb96d..e9739c81e 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -346,6 +346,9 @@ void __noreturn fast_reboot_entry(void)
* up and go processing jobs.
*/
if (this_cpu() != boot_cpu) {
+ cleanup_cpu_state();
+
+ sync();
if (!fast_boot_release) {
smt_lowest();
while (!fast_boot_release)
@@ -353,9 +356,6 @@ void __noreturn fast_reboot_entry(void)
smt_medium();
}
sync();
- cleanup_cpu_state();
- enable_machine_check();
- mtmsrd(MSR_RI, 1);
__secondary_cpu_entry();
}
@@ -370,15 +370,22 @@ void __noreturn fast_reboot_entry(void)
if (proc_gen == proc_gen_p9)
xive_reset();
+ /* Cleanup ourselves */
+ cleanup_cpu_state();
+
+ /* XXX: need this? */
+ enable_machine_check();
+ mtmsrd(MSR_RI, 1);
+
+ /* Enter virtual memory mode */
+ vm_init(true);
+
prlog(PR_INFO, "RESET: Releasing secondaries...\n");
/* Release everybody */
sync();
fast_boot_release = true;
- /* Cleanup ourselves */
- cleanup_cpu_state();
-
/* Set our state to active */
sync();
this_cpu()->state = cpu_state_active;
@@ -405,6 +412,7 @@ void __noreturn fast_reboot_entry(void)
cpu_set_ipi_enable(true);
if (!chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
+ void *t;
/*
* mem_region_clear_unused avoids these preload regions
* so it can run along side image preloading. Clear these
@@ -414,8 +422,14 @@ void __noreturn fast_reboot_entry(void)
* Mambo may have embedded payload here, so don't clear
* it at all.
*/
- memset(KERNEL_LOAD_BASE, 0, KERNEL_LOAD_SIZE);
- memset(INITRAMFS_LOAD_BASE, 0, INITRAMFS_LOAD_SIZE);
+
+ t = vm_map((unsigned long)KERNEL_LOAD_BASE, KERNEL_LOAD_SIZE, true);
+ memset(t, 0, KERNEL_LOAD_SIZE);
+ vm_unmap((unsigned long)t, KERNEL_LOAD_SIZE);
+
+ t = vm_map((unsigned long)INITRAMFS_LOAD_BASE, INITRAMFS_LOAD_SIZE, true);
+ memset(t, 0, INITRAMFS_LOAD_SIZE);
+ vm_unmap((unsigned long)t, INITRAMFS_LOAD_SIZE);
}
/* Start preloading kernel and ramdisk */
diff --git a/core/flash.c b/core/flash.c
index 7fbfca22b..bfe124fe3 100644
--- a/core/flash.c
+++ b/core/flash.c
@@ -778,6 +778,7 @@ done_reading:
* Verify and measure the retrieved PNOR partition as part of the
* secure boot and trusted boot requirements
*/
+
secureboot_verify(id, buf, *len);
trustedboot_measure(id, buf, *len);
diff --git a/core/init.c b/core/init.c
index cd333dcbd..1eb52c2b4 100644
--- a/core/init.c
+++ b/core/init.c
@@ -90,6 +90,7 @@ static bool try_load_elf64_le(struct elf_hdr *header)
uint64_t load_base = (uint64_t)kh;
struct elf64_phdr *ph;
unsigned int i;
+ bool ret = false;
printf("INIT: 64-bit LE kernel discovered\n");
@@ -101,6 +102,9 @@ static bool try_load_elf64_le(struct elf_hdr *header)
* but it will not work for any ELF binary.
*/
ph = (struct elf64_phdr *)(load_base + le64_to_cpu(kh->e_phoff));
+ vm_map_global("KERNEL ELF Program Headers", (unsigned long)ph,
+ le16_to_cpu(kh->e_phnum)*sizeof(struct elf64_phdr),
+ false, false);
for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {
if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
continue;
@@ -117,7 +121,7 @@ static bool try_load_elf64_le(struct elf_hdr *header)
if (!kernel_entry) {
prerror("INIT: Failed to find kernel entry !\n");
- return false;
+ goto out_unmap;
}
kernel_entry += load_base;
kernel_32bit = false;
@@ -129,7 +133,12 @@ static bool try_load_elf64_le(struct elf_hdr *header)
prlog(PR_DEBUG, "INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n",
kernel_entry, kernel_size);
- return true;
+ ret = true;
+
+out_unmap:
+ vm_unmap_global((unsigned long)ph, le16_to_cpu(kh->e_phnum)*sizeof(struct elf64_phdr));
+
+ return ret;
}
static bool try_load_elf64(struct elf_hdr *header)
@@ -139,12 +148,17 @@ static bool try_load_elf64(struct elf_hdr *header)
struct elf64_phdr *ph;
struct elf64_shdr *sh;
unsigned int i;
+ bool ret = false;
+
+ vm_map_global("KERNEL ELF64 Header", (unsigned long)header,
+ sizeof(struct elf64_hdr), false, false);
/* Check it's a ppc64 LE ELF */
if (kh->ei_ident == ELF_IDENT &&
kh->ei_data == ELF_DATA_LSB &&
kh->e_machine == le16_to_cpu(ELF_MACH_PPC64)) {
- return try_load_elf64_le(header);
+ ret = try_load_elf64_le(header);
+ goto out_unmap1;
}
/* Check it's a ppc64 ELF */
@@ -152,7 +166,7 @@ static bool try_load_elf64(struct elf_hdr *header)
kh->ei_data != ELF_DATA_MSB ||
kh->e_machine != ELF_MACH_PPC64) {
prerror("INIT: Kernel doesn't look like an ppc64 ELF\n");
- return false;
+ goto out_unmap1;
}
/* Look for a loadable program header that has our entry in it
@@ -163,6 +177,8 @@ static bool try_load_elf64(struct elf_hdr *header)
* but it will not work for any ELF binary.
*/
ph = (struct elf64_phdr *)(load_base + kh->e_phoff);
+ vm_map_global("KERNEL ELF Program Headers", (unsigned long)ph,
+ kh->e_phnum*sizeof(struct elf64_phdr), false, false);
for (i = 0; i < kh->e_phnum; i++, ph++) {
if (ph->p_type != ELF_PTYPE_LOAD)
continue;
@@ -177,7 +193,7 @@ static bool try_load_elf64(struct elf_hdr *header)
if (!kernel_entry) {
prerror("INIT: Failed to find kernel entry !\n");
- return false;
+ goto out_unmap2;
}
/* For the normal big-endian ELF ABI, the kernel entry points
@@ -187,6 +203,8 @@ static bool try_load_elf64(struct elf_hdr *header)
* to assuming it obeys the ABI.
*/
sh = (struct elf64_shdr *)(load_base + kh->e_shoff);
+ vm_map_global("KERNEL ELF Section Headers", (unsigned long)sh,
+ kh->e_shnum*sizeof(struct elf64_shdr), false, false);
for (i = 0; i < kh->e_shnum; i++, sh++) {
if (sh->sh_addr <= kh->e_entry &&
(sh->sh_addr + sh->sh_size) > kh->e_entry)
@@ -207,7 +225,15 @@ static bool try_load_elf64(struct elf_hdr *header)
printf("INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n",
kernel_entry, kernel_size);
- return true;
+ ret = true;
+
+ vm_unmap_global((unsigned long)sh, kh->e_shnum*sizeof(struct elf64_shdr));
+out_unmap2:
+ vm_unmap_global((unsigned long)ph, kh->e_phnum*sizeof(struct elf64_phdr));
+out_unmap1:
+ vm_unmap_global((unsigned long)header, sizeof(struct elf64_hdr));
+
+ return ret;
}
static bool try_load_elf32_le(struct elf_hdr *header)
@@ -320,6 +346,7 @@ bool start_preload_kernel(void)
int loaded;
/* Try to load an external kernel payload through the platform hooks */
+ vm_map_global("KERNEL", (unsigned long)KERNEL_LOAD_BASE, KERNEL_LOAD_SIZE, true, false);
kernel_size = KERNEL_LOAD_SIZE;
loaded = start_preload_resource(RESOURCE_ID_KERNEL,
RESOURCE_SUBID_NONE,
@@ -328,9 +355,11 @@ bool start_preload_kernel(void)
if (loaded != OPAL_SUCCESS) {
printf("INIT: platform start load kernel failed\n");
kernel_size = 0;
+ vm_unmap_global((unsigned long)KERNEL_LOAD_BASE, KERNEL_LOAD_SIZE);
return false;
}
+ vm_map_global("INITRAMFS", (unsigned long)INITRAMFS_LOAD_BASE, INITRAMFS_LOAD_SIZE, true, false);
initramfs_size = INITRAMFS_LOAD_SIZE;
loaded = start_preload_resource(RESOURCE_ID_INITRAMFS,
RESOURCE_SUBID_NONE,
@@ -338,6 +367,7 @@ bool start_preload_kernel(void)
if (loaded != OPAL_SUCCESS) {
printf("INIT: platform start load initramfs failed\n");
initramfs_size = 0;
+ vm_unmap_global((unsigned long)INITRAMFS_LOAD_BASE, INITRAMFS_LOAD_SIZE);
return false;
}
@@ -347,13 +377,16 @@ bool start_preload_kernel(void)
static bool load_kernel(void)
{
void *stb_container = NULL;
- struct elf_hdr *kh;
+ struct elf_hdr *kh, *t;
+ uint32_t ei_ident;
+ uint8_t ei_class;
int loaded;
prlog(PR_NOTICE, "INIT: Waiting for kernel...\n");
loaded = wait_for_resource_loaded(RESOURCE_ID_KERNEL,
RESOURCE_SUBID_NONE);
+ vm_unmap_global((unsigned long)KERNEL_LOAD_BASE, KERNEL_LOAD_SIZE);
if (loaded != OPAL_SUCCESS) {
printf("INIT: platform wait for kernel load failed\n");
@@ -369,8 +402,10 @@ static bool load_kernel(void)
((uint64_t)__builtin_kernel_start) -
SKIBOOT_BASE + boot_offset;
printf("Using built-in kernel\n");
+ vm_map_global("KERNEL", (unsigned long)KERNEL_LOAD_BASE, kernel_size, true, false);
memmove(KERNEL_LOAD_BASE, (void*)builtin_base,
kernel_size);
+ vm_unmap_global((unsigned long)KERNEL_LOAD_BASE, kernel_size);
}
}
@@ -386,7 +421,7 @@ static bool load_kernel(void)
if (kernel_entry < EXCEPTION_VECTORS_END) {
cpu_set_sreset_enable(false);
memcpy_null(NULL, old_vectors, EXCEPTION_VECTORS_END);
- sync_icache();
+ sync_icache(0);
} else {
/* Hack for STB in Mambo, assume at least 4kb in mem */
if (!kernel_size)
@@ -417,15 +452,20 @@ static bool load_kernel(void)
"INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\n",
kernel_size);
- if (kh->ei_ident != ELF_IDENT) {
+ t = vm_map((unsigned long)kh, sizeof(*kh), false);
+ ei_ident = t->ei_ident;
+ ei_class = t->ei_class;
+ vm_unmap((unsigned long)t, sizeof(*kh));
+
+ if (ei_ident != ELF_IDENT) {
prerror("INIT: ELF header not found. Assuming raw binary.\n");
return true;
}
- if (kh->ei_class == ELF_CLASS_64) {
+ if (ei_class == ELF_CLASS_64) {
if (!try_load_elf64(kh))
return false;
- } else if (kh->ei_class == ELF_CLASS_32) {
+ } else if (ei_class == ELF_CLASS_32) {
if (!try_load_elf32(kh))
return false;
} else {
@@ -453,7 +493,7 @@ static void load_initramfs(void)
loaded = wait_for_resource_loaded(RESOURCE_ID_INITRAMFS,
RESOURCE_SUBID_NONE);
-
+ vm_unmap_global((unsigned long)INITRAMFS_LOAD_BASE, INITRAMFS_LOAD_SIZE);
if (loaded != OPAL_SUCCESS || !initramfs_size)
return;
@@ -525,6 +565,7 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
const struct dt_property *memprop;
const char *cmdline, *stdoutp;
uint64_t mem_top;
+ uint32_t *t;
memprop = dt_find_property(dt_root, DT_PRIVATE "maxmem");
if (memprop)
@@ -599,11 +640,13 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
fdt_set_boot_cpuid_phys(fdt, this_cpu()->pir);
+ t = vm_map(kernel_entry, 4, false);
/* Check there is something there before we branch to it */
- if (*(uint32_t *)kernel_entry == 0) {
+ if (*t == 0) {
prlog(PR_EMERG, "FATAL: Kernel is zeros, can't execute!\n");
assert(0);
}
+ vm_unmap(kernel_entry, 4);
if (platform.exit)
platform.exit();
@@ -615,6 +658,9 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
printf("INIT: Starting kernel at 0x%llx, fdt at %p %u bytes\n",
kernel_entry, fdt, fdt_totalsize(fdt));
+ /* Go back to realmode and tear down our VM before booting kernel */
+ vm_destroy();
+
/* Disable machine checks on all */
cpu_disable_ME_RI_all();
@@ -781,34 +827,55 @@ static void setup_branch_null_catcher(void)
void copy_sreset_vector(void)
{
+ static char patch[0x100];
uint32_t *src, *dst;
+ uint32_t *t;
+ uint32_t len = (void *)&reset_patch_end - (void *)&reset_patch_start;
/* Copy the reset code over the entry point. */
src = &reset_patch_start;
+ t = vm_map((unsigned long)src, len, false);
+ memcpy(patch, t, len);
+ vm_unmap((unsigned long)src, len);
+
dst = (uint32_t *)0x100;
- while(src < &reset_patch_end)
- *(dst++) = *(src++);
- sync_icache();
+ t = vm_map((unsigned long)dst, len, true);
+ memcpy(t, patch, len);
+ sync_icache((unsigned long)t);
+ vm_unmap((unsigned long)dst, len);
}
void copy_sreset_vector_fast_reboot(void)
{
+ static char patch[0x100];
uint32_t *src, *dst;
+ uint32_t *t;
+ uint32_t len = (void *)&reset_fast_reboot_patch_end -
+ (void *)&reset_fast_reboot_patch_start;
/* Copy the reset code over the entry point. */
src = &reset_fast_reboot_patch_start;
+ t = vm_map((unsigned long)src, len, false);
+ memcpy(patch, t, len);
+ vm_unmap((unsigned long)src, len);
+
dst = (uint32_t *)0x100;
- while(src < &reset_fast_reboot_patch_end)
- *(dst++) = *(src++);
- sync_icache();
+ t = vm_map((unsigned long)dst, len, true);
+ memcpy(t, patch, len);
+ sync_icache((unsigned long)t);
+ vm_unmap((unsigned long)dst, len);
}
void copy_exception_vectors(void)
{
+ void *t;
+
+ t = vm_map(0x0, 0x2000, true);
+
/* Backup previous vectors as this could contain a kernel
* image.
*/
- memcpy_null(old_vectors, NULL, EXCEPTION_VECTORS_END);
+ memcpy(old_vectors, t, EXCEPTION_VECTORS_END);
/* Copy from 0x100 to EXCEPTION_VECTORS_END, avoid below 0x100 as
* this is the boot flag used by CPUs still potentially entering
@@ -816,9 +883,10 @@ void copy_exception_vectors(void)
*/
BUILD_ASSERT((&reset_patch_end - &reset_patch_start) <
EXCEPTION_VECTORS_END - 0x100);
- memcpy((void *)0x100, (void *)(SKIBOOT_BASE + 0x100),
+ memcpy(t + 0x100, (void *)(SKIBOOT_BASE + 0x100),
EXCEPTION_VECTORS_END - 0x100);
- sync_icache();
+ sync_icache((unsigned long)t);
+ vm_unmap(0x0, 0x2000);
}
static void per_thread_sanity_checks(void)
@@ -888,16 +956,25 @@ static uint32_t romem_csum;
static void checksum_romem(void)
{
+ void *t;
+ unsigned long size;
uint32_t csum;
romem_csum = 0;
if (chip_quirk(QUIRK_SLOW_SIM))
return;
- csum = mem_csum(_start, _romem_end);
+ size = (unsigned long)_romem_end - (unsigned long)_start;
+ t = vm_map((unsigned long)_start, size, false);
+ csum = mem_csum(t, t + size);
romem_csum ^= csum;
- csum = mem_csum(__builtin_kernel_start, __builtin_kernel_end);
+ vm_unmap((unsigned long)_start, size);
+
+ size = (unsigned long)__builtin_kernel_end - (unsigned long)__builtin_kernel_start;
+ t = vm_map((unsigned long)__builtin_kernel_start, size, false);
+ csum = mem_csum(t, t + size);
romem_csum ^= csum;
+ vm_unmap((unsigned long)__builtin_kernel_start, size);
}
bool verify_romem(void)
@@ -968,7 +1045,7 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
prlog(PR_DEBUG, "initial console log level: memory %d, driver %d\n",
(debug_descriptor.console_log_levels >> 4),
(debug_descriptor.console_log_levels & 0x0f));
- prlog(PR_TRACE, "OPAL is Powered By Linked-List Technology.\n");
+ prlog(PR_TRACE, "OPAL is Powered By Linked-List Technology. Now with more indirection.\n");
#ifdef SKIBOOT_GCOV
skiboot_gcov_done();
@@ -980,6 +1057,9 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
/* Now locks can be used */
init_locks();
+ /* Enter virtual memory mode */
+ vm_init(false);
+
/* Create the OPAL call table early on, entries can be overridden
* later on (FSP console code for example)
*/
@@ -1005,7 +1085,20 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
if (parse_hdat(false) < 0)
abort();
} else {
+ void *t;
+ uint32_t size;
+
+ t = vm_map((unsigned long)fdt, sizeof(struct fdt_header), false);
+ size = fdt_totalsize(t);
+ vm_unmap((unsigned long)fdt, sizeof(struct fdt_header));
+
+ /*
+ * Would be nice to make this a local map, but it seems
+ * to need to be expanded in place.
+ */
+ vm_map_global("fdt", (unsigned long)fdt, size, false, false);
dt_expand(fdt);
+ vm_unmap_global((unsigned long)fdt, size);
}
dt_add_cpufeatures(dt_root);
@@ -1056,6 +1149,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
*/
init_cpu_max_pir();
+ vm_init_stacks();
+
/*
* Now, we init our memory map from the device-tree, and immediately
* reserve areas which we know might contain data coming from
@@ -1295,6 +1390,30 @@ void __noreturn __secondary_cpu_entry(void)
enable_machine_check();
mtmsrd(MSR_RI, 1);
+ vm_init_secondary();
+
+ /* Some XIVE setup */
+ xive_cpu_callin(cpu);
+
+ /* Wait for work to do */
+ while(true) {
+ if (cpu_check_jobs(cpu))
+ cpu_process_jobs();
+ else
+ cpu_idle_job();
+ }
+}
+
+void __noreturn __return_cpu_entry(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ /* Secondary CPU called in */
+ cpu_callin(cpu);
+
+ enable_machine_check();
+ mtmsrd(MSR_RI, 1);
+
/* Some XIVE setup */
xive_cpu_callin(cpu);
diff --git a/core/mem_region.c b/core/mem_region.c
index 47d754455..cf51c241b 100644
--- a/core/mem_region.c
+++ b/core/mem_region.c
@@ -57,24 +57,27 @@ static struct mem_region skiboot_os_reserve = {
.type = REGION_OS,
};
-struct mem_region skiboot_heap = {
- .name = "ibm,firmware-heap",
- .start = HEAP_BASE,
- .len = HEAP_SIZE,
- .type = REGION_SKIBOOT_HEAP,
-};
-
static struct mem_region skiboot_code_and_text = {
.name = "ibm,firmware-code",
.start = SKIBOOT_BASE,
.len = HEAP_BASE - SKIBOOT_BASE,
+ .vm_mapped_len = HEAP_BASE - SKIBOOT_BASE,
.type = REGION_SKIBOOT_FIRMWARE,
};
+struct mem_region skiboot_heap = {
+ .name = "ibm,firmware-heap",
+ .start = HEAP_BASE,
+ .len = HEAP_SIZE,
+ .vm_mapped_len = HEAP_SIZE,
+ .type = REGION_SKIBOOT_HEAP,
+};
+
static struct mem_region skiboot_after_heap = {
.name = "ibm,firmware-data",
.start = HEAP_BASE + HEAP_SIZE,
.len = SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE),
+ .vm_mapped_len = SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE),
.type = REGION_SKIBOOT_FIRMWARE,
};
@@ -142,16 +145,8 @@ static struct alloc_hdr *next_hdr(const struct mem_region *region,
}
#if POISON_MEM_REGION == 1
-static void mem_poison(struct free_hdr *f)
+static void mem_poison(struct free_hdr *f __unused)
{
- size_t poison_size = (void*)tailer(f) - (void*)(f+1);
-
- /* We only poison up to a limit, as otherwise boot is
- * kinda slow */
- if (poison_size > POISON_MEM_REGION_LIMIT)
- poison_size = POISON_MEM_REGION_LIMIT;
-
- memset(f+1, POISON_MEM_REGION_WITH, poison_size);
}
#endif
@@ -159,23 +154,42 @@ static void mem_poison(struct free_hdr *f)
static void init_allocatable_region(struct mem_region *region)
{
struct free_hdr *f = region_start(region);
+ unsigned long num_longs;
+ unsigned long *t;
+
assert(region->type == REGION_SKIBOOT_HEAP ||
region->type == REGION_MEMORY);
- f->hdr.num_longs = region->len / sizeof(long);
+
+ num_longs = region->len / sizeof(long);
+
+ if (!region->vm_mapped_len) {
+ /* SKIBOOT_BASE-SIZE regions already come mapped */
+ region->vm_mapped_len = PAGE_SIZE;
+ vm_map_global(region->name, region->start, PAGE_SIZE, true, false);
+ }
+
+ assert(PAGE_SIZE >= sizeof(*f));
+ assert(region->len >= PAGE_SIZE*2);
+
+ f->hdr.num_longs = num_longs;
f->hdr.free = true;
f->hdr.prev_free = false;
- *tailer(f) = f->hdr.num_longs;
list_head_init(®ion->free_list);
list_add(®ion->free_list, &f->list);
-#if POISON_MEM_REGION == 1
+#if 0 && POISON_MEM_REGION == 1
mem_poison(f);
#endif
+
+ t = vm_map((unsigned long)tailer(f), sizeof(long), true);
+ *t = num_longs;
+ vm_unmap((unsigned long)tailer(f), sizeof(long));
}
static void make_free(struct mem_region *region, struct free_hdr *f,
const char *location, bool skip_poison)
{
struct alloc_hdr *next;
+ unsigned long *t;
#if POISON_MEM_REGION == 1
if (!skip_poison)
@@ -203,7 +217,9 @@ static void make_free(struct mem_region *region, struct free_hdr *f,
}
/* Fix up tailer. */
- *tailer(f) = f->hdr.num_longs;
+ t = vm_map((unsigned long)tailer(f), sizeof(long), true);
+ *t = f->hdr.num_longs;
+ vm_unmap((unsigned long)tailer(f), sizeof(long));
/* If next is free, coalesce it */
next = next_hdr(region, &f->hdr);
@@ -392,6 +408,7 @@ static void *__mem_alloc(struct mem_region *region, size_t size, size_t align,
size_t alloc_longs, offset;
struct free_hdr *f;
struct alloc_hdr *next;
+ unsigned long newsz;
/* Align must be power of 2. */
assert(!((align - 1) & align));
@@ -447,6 +464,17 @@ found:
next->prev_free = false;
}
+ newsz = ((void *)((unsigned long *)f + alloc_longs + offset) - region_start(region) + sizeof(struct free_hdr));
+ if (newsz > region->vm_mapped_len) {
+ /* TODO: unmap on free */
+ newsz += PAGE_SIZE-1;
+ newsz &= ~(PAGE_SIZE-1);
+ vm_map_global(location,
+ region->start + region->vm_mapped_len,
+ newsz - region->vm_mapped_len, true, false);
+ region->vm_mapped_len = newsz;
+ }
+
if (offset != 0) {
struct free_hdr *pre = f;
@@ -691,6 +719,7 @@ static struct mem_region *new_region(const char *name,
region->name = name;
region->start = start;
region->len = len;
+ region->vm_mapped_len = 0;
region->node = node;
region->type = type;
region->free_list.n.next = NULL;
@@ -1260,9 +1289,13 @@ static void mem_clear_range(uint64_t s, uint64_t e)
return;
}
- prlog(PR_DEBUG, "Clearing region %llx-%llx\n",
- (long long)s, (long long)e);
+ /*
+ * Large clear thrashes the small hash table, with parallel clearing
+ * this can livelock. Clear in real mode.
+ */
+ vm_exit();
memset((void *)s, 0, e - s);
+ vm_enter();
}
struct mem_region_clear_job_args {
@@ -1276,7 +1309,8 @@ static void mem_region_clear_job(void *data)
mem_clear_range(arg->s, arg->e);
}
-#define MEM_REGION_CLEAR_JOB_SIZE (16ULL*(1<<30))
+/* Limited by 256MB segment size (could fix) */
+#define MEM_REGION_CLEAR_JOB_SIZE (128ULL*(1<<20))
static struct cpu_job **mem_clear_jobs;
static struct mem_region_clear_job_args *mem_clear_job_args;
@@ -1370,9 +1404,14 @@ void start_mem_region_clear_unused(void)
job_args[i].job_name,
mem_region_clear_job,
&job_args[i]);
+ if (!jobs[i])
+ jobs[i] = cpu_queue_job(NULL,
+ job_args[i].job_name,
+ mem_region_clear_job,
+ &job_args[i]);
+ assert(jobs[i]);
i++;
}
- unlock(&mem_region_lock);
cpu_process_local_jobs();
}
@@ -1394,6 +1433,8 @@ void wait_mem_region_clear_unused(void)
l>>30, total>>30);
free(mem_clear_job_args[i].job_name);
}
+ unlock(&mem_region_lock);
+
free(mem_clear_jobs);
free(mem_clear_job_args);
}
diff --git a/core/opal.c b/core/opal.c
index da746e805..3274719f8 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -53,7 +53,16 @@ void opal_table_init(void)
prlog(PR_DEBUG, "OPAL table: %p .. %p, branch table: %p\n",
s, e, opal_branch_table);
while(s < e) {
- opal_branch_table[s->token] = function_entry_address(s->func);
+ uint64_t f;
+ uint64_t *t;
+
+ f = function_entry_address(s->func);
+
+ t = vm_map((unsigned long)&opal_branch_table[s->token], sizeof(*t), true);
+
+ *t = f;
+ vm_unmap((unsigned long)&opal_branch_table[s->token], sizeof(*t));
+
opal_num_args[s->token] = s->nargs;
s++;
}
@@ -316,9 +325,16 @@ opal_call(OPAL_QUIESCE, opal_quiesce, 2);
void __opal_register(uint64_t token, void *func, unsigned int nargs)
{
+ uint64_t f;
+ uint64_t *t;
+
assert(token <= OPAL_LAST);
- opal_branch_table[token] = function_entry_address(func);
+ f = function_entry_address(func);
+
+ t = vm_map((unsigned long)&opal_branch_table[token], sizeof(uint64_t), true);
+ *t = f;
+ vm_unmap((unsigned long)&opal_branch_table[token], sizeof(uint64_t));
opal_num_args[token] = nargs;
}
diff --git a/core/vm.c b/core/vm.c
new file mode 100644
index 000000000..e63649bfc
--- /dev/null
+++ b/core/vm.c
@@ -0,0 +1,829 @@
+/* Copyright 2018 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <ccan/container_of/container_of.h>
+#include <ccan/list/list.h>
+#include <ccan/str/str.h>
+#include <cmpxchg.h>
+#include <cpu.h>
+#include <opal.h>
+#include <skiboot.h>
+#include <stack.h>
+#include <timebase.h>
+#include <trace.h>
+
+static bool vm_setup = false;
+static bool vm_globals_allocated = false;
+
+#define SLB_SZ (256UL*1024*1024)
+#define SLB_NR 32
+#define LOCAL_SLB_NR 2
+#define GLOBAL_SLB_NR (SLB_NR - LOCAL_SLB_NR)
+#define LOCAL_SLB_BASE GLOBAL_SLB_NR
+
+#define LOCAL_EA_BEGIN 0x0800000000000000ULL
+#define LOCAL_EA_END 0x0900000000000000ULL
+
+static void __nomcount slb_install(unsigned long esid, unsigned long vsid, unsigned int index)
+{
+ unsigned long rs;
+ unsigned long rb;
+
+ rs = vsid << (63-51); /* 256MB VSID */
+ rs |= 1UL << (63-53); /* Kp = 1 */
+
+ rb = esid << (63-35); /* 256MB ESID */
+ rb |= 1UL << (63-36); /* V = 1 */
+ rb |= index;
+
+ asm volatile("slbmte %0,%1" : : "r"(rs), "r"(rb) : "memory");
+}
+
+#if 0
+static void slb_remove(unsigned long esid)
+{
+ asm volatile("isync ; slbie %0 ; isync" : : "r"(esid << 28) : "memory");
+}
+#endif
+
+static void slb_remove_all(void)
+{
+ asm volatile("isync ; slbmte %0,%0 ; slbia ; isync" : : "r"(0) : "memory");
+}
+
+static void __nomcount slb_add(unsigned long ea)
+{
+ struct cpu_thread *cpu = this_cpu();
+ uint64_t esid = ea >> 28;
+ uint64_t vsid = ea >> 28;
+
+ slb_install(esid, vsid, cpu->vm_slb_rr);
+
+ cpu->vm_slb_rr++;
+ if (cpu->vm_slb_rr == GLOBAL_SLB_NR)
+ cpu->vm_slb_rr = 0;
+}
+
+struct hpte {
+ uint64_t dword[2];
+};
+
+struct hpteg {
+ struct hpte hpte[8];
+};
+
+static struct hpteg *htab;
+static unsigned long htab_shift;
+static unsigned long htab_pteg_mask;
+
+static struct lock htab_lock;
+
+static void __nomcount htab_install(unsigned long va, unsigned long pa, int rw, int ex, int ci, bool local)
+{
+ unsigned long hash;
+ struct hpteg *hpteg;
+ struct hpte *hpte;
+ unsigned long ava = va >> 23;
+ unsigned long arpn = pa >> 12;
+ unsigned long dw0, dw1;
+ unsigned long _dw0;
+ unsigned long _ava;
+ unsigned int hstart, hend;
+ unsigned int i;
+
+ dw0 = ava << (63-56); /* AVA = ava */
+ dw0 |= 0x1; /* V = 1 */
+ if (local)
+ dw0 |= 0x8; /* SW[0] = 1 */
+
+ dw1 = (arpn << (63-43 - 8)); /* ARPN||LP = arpn */
+ if (!rw)
+ dw1 |= (1UL << (63 - 0)) | (1UL << (63 - 63 + 1)); /* pp = 110 */
+ if (!ex)
+ dw1 |= (1UL << (63 - 61)); /* N = 1 */
+ dw1 |= (1UL << (63 - 60 + 1)); /* WIMG = 0010 */
+ if (ci)
+ dw1 |= (1UL << (63 - 60)) | (1UL << (63 - 60 + 2)); /* WIMG = 0111 */
+ dw1 |= (1UL << (63 - 55)) | (1UL << (63 - 56)); /* R=C=1 */
+
+ hash = ((va >> 12) & 0xffff) ^ ((va >> 28) & 0x7fffffffffUL);
+ hpteg = &htab[hash & htab_pteg_mask];
+
+ lock(&htab_lock);
+
+ hstart = 0;
+ hend = 7;
+
+ for (i = hstart; i <= hend; i++) {
+ hpte = &hpteg->hpte[i];
+
+ _dw0 = be64_to_cpu(hpte->dword[0]);
+ if (_dw0 & 1) {
+ _ava = _dw0 >> (63 - 56);
+ if (_ava == ava) {
+ /* Replace insertion */
+ goto install;
+ }
+
+ continue;
+ }
+
+ assert(!_dw0);
+ goto install;
+ }
+
+ i = mftb();
+ i = (i ^ (i >> 4)) & 0x7;
+ hpte = &hpteg->hpte[i];
+
+install:
+ hpte->dword[0] = 0;
+ eieio();
+ hpte->dword[1] = cpu_to_be64(dw1);
+ eieio();
+ hpte->dword[0] = cpu_to_be64(dw0);
+ asm volatile("ptesync" ::: "memory");
+ unlock(&htab_lock);
+}
+
+static void htab_remove(unsigned long va, int local)
+{
+ unsigned long hash;
+ struct hpteg *hpteg;
+ unsigned long ava = va >> 23;
+ unsigned long dw0;
+ unsigned int hstart, hend;
+ unsigned int i;
+
+ dw0 = ava << (63-56);
+ dw0 |= 0x1;
+ if (local)
+ dw0 |= 0x8;
+
+ hash = ((va >> 12) & 0xffff) ^ ((va >> 28) & 0x7fffffffffUL);
+ hpteg = &htab[hash & htab_pteg_mask];
+
+ if (!local)
+ lock(&htab_lock);
+again:
+ hstart = 0;
+ hend = 7;
+
+ for (i = hstart; i <= hend; i++) {
+ struct hpte *hpte = &hpteg->hpte[i];
+ unsigned long _raw_dw0, _dw0;
+
+ _raw_dw0 = hpte->dword[0];
+ _dw0 = be64_to_cpu(_raw_dw0);
+
+ if (!(_dw0 & 1)) {
+ assert(!_raw_dw0);
+ continue;
+ }
+
+ if (_dw0 != dw0) {
+ assert(_dw0 >> 7 != ava);
+ continue;
+ }
+
+ if (local) {
+ if (__cmpxchg64(&hpte->dword[0], _raw_dw0, 0) != _raw_dw0)
+ goto again;
+ } else {
+ hpte->dword[0] = 0;
+ }
+
+ break;
+ }
+
+ if (local) {
+ asm volatile("ptesync" ::: "memory");
+ asm volatile("tlbiel %0" : : "r"(va & ~0xfffULL));
+ asm volatile("ptesync" ::: "memory");
+ } else {
+ unlock(&htab_lock);
+ asm volatile("ptesync" ::: "memory");
+ asm volatile("tlbie %0,%1" : : "r"(va & ~0xfffULL), "r"(0));
+ asm volatile("eieio ; tlbsync ; ptesync" ::: "memory");
+ }
+}
+
+/*
+ * Try to fix problems in callers if !strict.
+ */
+static bool vm_strict = false;
+
+static struct list_head vm_maps = LIST_HEAD_INIT(vm_maps);
+static struct lock vm_maps_lock;
+static unsigned long nr_vm_maps;
+
+static void __vm_map(const char *name, unsigned long addr, unsigned long len, unsigned long pa, bool r, bool w, bool x, bool ci, bool local)
+{
+ struct cpu_thread *c = this_cpu();
+ bool vm_setup = c->vm_setup;
+ struct vm_map *new;
+ struct vm_map *vmm;
+
+ if (local) {
+ new = &c->vm_local_map;
+ new->name = name;
+ new->address = addr;
+ new->length = len;
+ new->pa = pa;
+ new->readable = r;
+ new->writeable = w;
+ new->executable = x;
+ new->ci = ci;
+
+ return;
+ }
+
+ new = zalloc(sizeof(*new));
+ assert(new);
+
+ new->name = name;
+ new->address = addr;
+ new->length = len;
+ new->pa = pa;
+ new->readable = r;
+ new->writeable = w;
+ new->executable = x;
+ new->ci = ci;
+
+ /* Can not take a d-side fault while holding this lock */
+ if (vm_setup)
+ vm_exit();
+ lock(&vm_maps_lock);
+
+ list_for_each(&vm_maps, vmm, list) {
+ if (addr >= vmm->address + vmm->length)
+ continue;
+ if (addr + len <= vmm->address) {
+ list_add_before(&vm_maps, &new->list, &vmm->list);
+ goto found;
+ }
+
+ if (!vm_strict) {
+ prerror("vm_map_global %s %lx-%lx collided with vmm:%s %llx-%llx\n", name, addr, addr + len, vmm->name, vmm->address, vmm->address + vmm->length);
+ list_add_before(&vm_maps, &new->list, &vmm->list);
+ goto found;
+ }
+ assert(0);
+ }
+ list_add_tail(&vm_maps, &new->list);
+found:
+ nr_vm_maps++;
+ unlock(&vm_maps_lock);
+ if (vm_setup)
+ vm_enter();
+}
+
+static void __vm_unmap(unsigned long addr, unsigned long len, bool local)
+{
+ struct cpu_thread *c = this_cpu();
+ bool vm_setup = c->vm_setup;
+ unsigned long end = addr + len;
+ struct vm_map *vmm;
+
+ if (local) {
+ vmm = &c->vm_local_map;
+ assert(addr == vmm->address);
+ assert(len == vmm->length);
+ memset(vmm, 0, sizeof(struct vm_map));
+
+ if (vm_setup) {
+ while (addr < end) {
+ htab_remove(addr, local);
+ addr += PAGE_SIZE;
+ }
+ }
+
+ return;
+ }
+
+ /* Can not take a d-side fault while holding this lock */
+ if (vm_setup)
+ vm_exit();
+ lock(&vm_maps_lock);
+ list_for_each(&vm_maps, vmm, list) {
+ if (addr != vmm->address)
+ continue;
+ if (len != vmm->length)
+ continue;
+ goto found;
+ }
+ vmm = NULL;
+ unlock(&vm_maps_lock);
+ if (!vm_strict) {
+ prerror("unmap didn't find anything\n");
+ backtrace();
+ goto out;
+ }
+ assert(0);
+
+found:
+ list_del(&vmm->list);
+
+ if (vm_setup) {
+ while (addr < end) {
+ htab_remove(addr, local);
+ addr += PAGE_SIZE;
+ }
+ }
+
+ nr_vm_maps--;
+ unlock(&vm_maps_lock);
+out:
+ if (vm_setup)
+ vm_enter();
+
+ if (vmm)
+ free(vmm);
+}
+
+
+void vm_map_global(const char *name, unsigned long addr, unsigned long len, bool rw, bool ci)
+{
+ __vm_map(name, addr, len, addr, true, rw, false, ci, false);
+}
+
+void vm_map_global_text(const char *name, unsigned long addr, unsigned long len)
+{
+ __vm_map(name, addr, len, addr, true, false, true, false, false);
+}
+
+void vm_unmap_global(unsigned long addr, unsigned long len)
+{
+ __vm_unmap(addr, len, false);
+}
+
+
+void *vm_map(unsigned long addr, unsigned long len, bool rw)
+{
+ struct cpu_thread *c = this_cpu();
+ unsigned long newaddr = (LOCAL_EA_BEGIN + ((unsigned long)c->pir << 30));
+ unsigned long end = addr + len;
+ unsigned long offset = addr & (PAGE_SIZE - 1);
+
+ /* Can't do nested mappings */
+ assert(!c->vm_local_map_inuse);
+ c->vm_local_map_inuse = true;
+
+ if (!c->vm_setup)
+ return (void *)addr;
+
+ end = (end + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
+ addr &= ~(PAGE_SIZE - 1);
+ len = end - addr;
+
+ assert(len < (1 << 28)); /* same segment */
+
+ __vm_map("local", newaddr, len, addr, true, rw, false, false, true);
+
+ return (void *)newaddr + offset;
+}
+
+void vm_unmap(unsigned long addr, unsigned long len)
+{
+ struct cpu_thread *c = this_cpu();
+ unsigned long newaddr = (LOCAL_EA_BEGIN + ((unsigned long)c->pir << 30));
+ unsigned long end = addr + len;
+
+ assert(c->vm_local_map_inuse);
+ c->vm_local_map_inuse = false;
+
+ if (!c->vm_setup)
+ return;
+
+ end = (end + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
+ addr &= ~(PAGE_SIZE - 1);
+ len = end - addr;
+
+ assert(len < (1 << 28)); /* same segment */
+
+ __vm_unmap(newaddr, len, true);
+}
+
+struct prte {
+ unsigned long dword[2];
+};
+
+static struct prte *prtab;
+
+static void vm_init_cpu(void)
+{
+ struct cpu_thread *c = this_cpu();
+ unsigned long esid = (LOCAL_EA_BEGIN + ((unsigned long)c->pir << 30)) >> 28;
+ unsigned long vsid = (LOCAL_EA_BEGIN + ((unsigned long)c->pir << 30)) >> 28;
+
+ mtspr(SPR_LPCR, mfspr(SPR_LPCR) &
+ ~(PPC_BITMASK(0,3) | PPC_BIT(41) | PPC_BIT(43) | PPC_BIT(54)));
+ mtspr(SPR_LPID, 0);
+ mtspr(SPR_PID, 0);
+ mtspr(SPR_HRMOR, 0);
+ mtspr(SPR_PTCR, (unsigned long)prtab);
+ mtspr(SPR_AMR, 0);
+ mtspr(SPR_IAMR, 0);
+ mtspr(SPR_AMOR, 0);
+ mtspr(SPR_UAMOR, 0);
+
+ slb_remove_all();
+ slb_install(esid, vsid, LOCAL_SLB_BASE);
+}
+
+void vm_init_secondary(void)
+{
+ vm_init_cpu();
+ vm_enter();
+}
+
+bool vm_realmode(void)
+{
+ struct cpu_thread *c = this_cpu();
+
+ return !vm_setup || !c->vm_setup;
+}
+
+void vm_enter(void)
+{
+ struct cpu_thread *c = this_cpu();
+
+ assert(vm_setup);
+ if (c->vm_setup) {
+ mtmsr(mfmsr() | (MSR_IR|MSR_DR));
+ prerror("CPU:%d vm_enter already entered\n", c->pir);
+ backtrace();
+ return;
+ }
+ c->vm_setup = true;
+ mtmsr(mfmsr() | (MSR_IR|MSR_DR));
+}
+
+void vm_exit(void)
+{
+ struct cpu_thread *c = this_cpu();
+
+ assert(vm_setup);
+ if (!c->vm_setup) {
+ mtmsr(mfmsr() & ~(MSR_IR|MSR_DR));
+ prerror("CPU:%d vm_exit already exited\n", c->pir);
+ backtrace();
+ return;
+ }
+ c->vm_setup = false;
+ mtmsr(mfmsr() & ~(MSR_IR|MSR_DR));
+}
+
+bool __nomcount vm_dslb(uint64_t nia, uint64_t dar)
+{
+ struct cpu_thread *c = this_cpu();
+ bool vm_setup = c->vm_setup;
+
+ assert(vm_setup);
+ c->vm_setup = false;
+
+ /*
+ * Per-cpu map ranges are bolted to per-cpu SLBs.
+ */
+ assert((dar < LOCAL_EA_BEGIN) ||
+ (dar >= LOCAL_EA_END));
+
+ (void)nia;
+ slb_add(dar);
+
+ c->vm_setup = true;
+
+ return true;
+}
+
+bool __nomcount vm_islb(uint64_t nia)
+{
+ struct cpu_thread *c = this_cpu();
+ bool vm_setup = c->vm_setup;
+
+ assert(vm_setup);
+ c->vm_setup = false;
+
+ slb_add(nia);
+
+ c->vm_setup = true;
+
+ return true;
+}
+
+bool __nomcount vm_dsi(uint64_t nia, uint64_t dar, bool store)
+{
+ struct cpu_thread *c = this_cpu();
+ bool vm_setup = c->vm_setup;
+ struct vm_map *vmm;
+ uint64_t pa;
+ bool ret = true;
+ bool local;
+
+ (void)nia;
+
+ assert(vm_setup);
+ c->vm_setup = false;
+
+ if ((dar >= LOCAL_EA_BEGIN) && (dar < LOCAL_EA_END)) {
+ local = true;
+ vmm = &c->vm_local_map;
+ if (dar >= vmm->address && dar < vmm->address + vmm->length)
+ goto found;
+ goto not_found;
+ }
+
+ local = false;
+
+ lock(&vm_maps_lock);
+ list_for_each(&vm_maps, vmm, list) {
+ assert(vmm->pa == vmm->address);
+ if (dar >= vmm->address && dar < vmm->address + vmm->length)
+ goto found;
+ }
+ if (!vm_strict) {
+ if (dar >= 0x0006000000000000 && dar < 0x0007000000000000)
+ /* MMIO */
+ htab_install(dar, dar, 1, 0, 1, false);
+ else if (dar < LOCAL_EA_BEGIN)
+ htab_install(dar, dar, 1, 0, 0, false);
+ else
+ ret = false;
+ unlock(&vm_maps_lock);
+ prerror("Page fault with no VMM at NIA:0x%016llx DAR:0x%016llx, store:%d\n", nia, dar, store);
+ backtrace();
+ goto out;
+ }
+ unlock(&vm_maps_lock);
+not_found:
+ prerror(" vmm not found\n");
+ ret = false;
+ assert(0);
+ goto out;
+
+found:
+ pa = vmm->pa + (dar & ~(PAGE_SIZE - 1)) - vmm->address;
+ if (!vmm->readable) {
+ if (!local)
+ unlock(&vm_maps_lock);
+ prerror(" vmm not readable\n");
+ ret = false;
+ assert(0);
+ goto out;
+ }
+ if (store && !vmm->writeable) {
+ if (!vm_strict) {
+ htab_install(dar, pa, store, 0, vmm->ci, local);
+ if (!local)
+ unlock(&vm_maps_lock);
+ prerror("Page fault store to RO VMM:%s at NIA:0x%016llx DAR:0x%016llx\n", vmm->name, nia, dar);
+ backtrace();
+ goto out;
+ }
+ if (!local)
+ unlock(&vm_maps_lock);
+ prerror(" vmm not writeable\n");
+ ret = false;
+ assert(0);
+ goto out;
+ }
+
+ htab_install(dar, pa, vmm->writeable, vmm->executable, vmm->ci, local);
+ if (!local)
+ unlock(&vm_maps_lock);
+
+out:
+ c->vm_setup = true;
+ return ret;
+}
+
+bool __nomcount vm_isi(uint64_t nia)
+{
+ struct cpu_thread *c = this_cpu();
+ bool vm_setup = c->vm_setup;
+ struct vm_map *vmm;
+
+ assert(vm_setup);
+ c->vm_setup = false;
+
+ lock(&vm_maps_lock);
+ list_for_each(&vm_maps, vmm, list) {
+ assert(vmm->pa == vmm->address);
+ if (nia >= vmm->address && nia < vmm->address + vmm->length) {
+ if (!vmm->executable)
+ prerror("Page fault at NIA:0x%016llx NX mapping!\n", nia);
+ goto found;
+ }
+ }
+
+ prerror("Page fault, no mapping for NIA:0x%016llx !\n", nia);
+
+found:
+ unlock(&vm_maps_lock);
+ htab_install(nia, nia, 0, 1, 0, false);
+ c->vm_setup = true;
+
+ return true;
+}
+
+static void cpu_stop_vm(void *arg __unused)
+{
+ vm_exit();
+}
+
+static void cpu_cleanup_vm(void *arg __unused)
+{
+ slb_remove_all();
+ mtspr(SPR_PTCR, 0);
+}
+
+static void cpu_all_destroy_vm(void)
+{
+ struct cpu_thread *cpu;
+ struct cpu_job **jobs;
+
+ jobs = zalloc(sizeof(struct cpu_job *) * cpu_max_pir + 1);
+ assert(jobs);
+
+ /* Stop all CPUs */
+ for_each_available_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+ jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_stop_vm",
+ cpu_stop_vm, NULL);
+ }
+
+ /* this cpu */
+ cpu_stop_vm(NULL);
+
+ /* Cleaup after all stop */
+ for_each_available_cpu(cpu) {
+ if (jobs[cpu->pir])
+ cpu_wait_job(jobs[cpu->pir], true);
+ }
+
+ for_each_available_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+ jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_cleanup_vm",
+ cpu_cleanup_vm, NULL);
+ }
+
+ /* this cpu */
+ cpu_cleanup_vm(NULL);
+
+ for_each_available_cpu(cpu) {
+ if (jobs[cpu->pir])
+ cpu_wait_job(jobs[cpu->pir], true);
+ }
+
+ free(jobs);
+}
+
+void vm_init(bool fast_reboot)
+{
+ unsigned long stack_start = SKIBOOT_BASE + SKIBOOT_SIZE;
+ unsigned long stack_end = stack_start + (cpu_max_pir + 1)*STACK_SIZE;
+ unsigned long htab_nr_bytes;
+ unsigned long htab_nr_ptegs;
+
+ prtab = memalign(64*1024, 64*1024);
+ assert(prtab);
+ memset(prtab, 0, 64*1024);
+
+ htab_shift = 20; /* 1MB table */
+ htab_nr_bytes = 1UL << htab_shift;
+ htab_nr_ptegs = htab_nr_bytes / sizeof(struct hpteg);
+ htab_pteg_mask = htab_nr_ptegs - 1;
+ htab = memalign(1UL << htab_shift, htab_nr_bytes);
+ assert(htab);
+ memset(htab, 0, htab_nr_bytes);
+
+ prtab[0].dword[0] = cpu_to_be64((unsigned long)htab | (htab_shift - 18));
+ prtab[0].dword[1] = 0;
+
+ eieio();
+
+ vm_init_cpu();
+
+ cleanup_global_tlb();
+
+ if (vm_globals_allocated) {
+ assert(fast_reboot);
+ goto done;
+ }
+
+ assert(!fast_reboot);
+ vm_globals_allocated = true;
+
+ vm_map_global_text("OPAL text", (unsigned long)_stext,
+ (unsigned long)_etext - (unsigned long)_stext);
+ vm_map_global("OPAL rodata", (unsigned long)__rodata_start,
+ (unsigned long)__rodata_end - (unsigned long)__rodata_start,
+ false, false);
+ vm_map_global("OPAL data", (unsigned long)_sdata,
+ (unsigned long)_edata - (unsigned long)_sdata,
+ true, false);
+ vm_map_global("OPAL bss", (unsigned long)_sbss,
+ (unsigned long)_ebss - (unsigned long)_sbss,
+ true, false);
+ vm_map_global("OPAL sym map", (unsigned long)__sym_map_start,
+ (unsigned long)__sym_map_end - (unsigned long)__sym_map_start,
+ false, false);
+ vm_map_global("OPAL heap", HEAP_BASE, HEAP_SIZE, true, false);
+ vm_map_global("Memory console", INMEM_CON_START, INMEM_CON_LEN, true, false);
+ vm_map_global("Hostboot console", HBRT_CON_START, HBRT_CON_LEN, false, false);
+ vm_map_global("SPIRA heap", SPIRA_HEAP_BASE, SPIRA_HEAP_SIZE, false, false);
+ vm_map_global("PSI TCE table", PSI_TCE_TABLE_BASE, PSI_TCE_TABLE_SIZE_P8, false, false);
+ vm_map_global("OPAL boot stacks", stack_start, stack_end - stack_start, true, false);
+
+done:
+ if (1) {
+ struct vm_map *vmm;
+ prlog(PR_DEBUG, "VMM: SETUP\n");
+ prlog(PR_DEBUG, " PRTAB:%p\n", prtab);
+ prlog(PR_DEBUG, " HTAB: %p\n", htab);
+ prlog(PR_DEBUG, " Global mappings\n");
+ list_for_each(&vm_maps, vmm, list)
+ prlog(PR_DEBUG, "%28s 0x%08llx-0x%08llx\n", vmm->name,
+ vmm->address, vmm->address + vmm->length);
+ }
+
+ vm_setup = true;
+
+ vm_enter();
+}
+
+void vm_init_stacks(void)
+{
+ unsigned long stack_start = SKIBOOT_BASE + SKIBOOT_SIZE;
+ unsigned long stack_end = stack_start + (cpu_max_pir + 1)*STACK_SIZE;
+ struct cpu_thread *c = this_cpu();
+ struct vm_map *vmm;
+
+ /* Can not take a d-side fault while holdig this lock */
+ if (c->vm_setup)
+ mtmsr(mfmsr() & ~MSR_DR);
+ lock(&vm_maps_lock);
+ list_for_each(&vm_maps, vmm, list) {
+ if (vmm->address >= stack_end)
+ continue;
+ if (vmm->address + vmm->length <= stack_start)
+ continue;
+ goto found;
+ }
+ unlock(&vm_maps_lock);
+ assert(0);
+
+found:
+ vmm->name = "OPAL stacks";
+ vmm->address = stack_start;
+ vmm->length = stack_end - stack_start;
+ unlock(&vm_maps_lock);
+ if (c->vm_setup)
+ mtmsr(mfmsr() | MSR_DR);
+}
+
+void vm_destroy(void)
+{
+ assert(vm_setup);
+
+ if (1) {
+ struct vm_map *vmm;
+ prlog(PR_DEBUG, "VMM: TEARDOWN\n");
+ prlog(PR_DEBUG, " Global mappings\n");
+ list_for_each(&vm_maps, vmm, list)
+ prlog(PR_DEBUG, "%28s 0x%08llx-0x%08llx\n", vmm->name,
+ vmm->address, vmm->address + vmm->length);
+ }
+
+ cpu_all_destroy_vm();
+
+ vm_setup = false;
+
+ if (0) { /* XXX: leave for VMM enabled fast-reboot */
+ while (!list_empty(&vm_maps)) {
+ struct vm_map *vmm;
+ vmm = list_pop(&vm_maps, struct vm_map, list);
+ free(vmm);
+ }
+ }
+
+ free(htab);
+ htab = NULL;
+ free(prtab);
+ prtab = NULL;
+}
diff --git a/hdata/spira.c b/hdata/spira.c
index e118e229f..ce71b9657 100644
--- a/hdata/spira.c
+++ b/hdata/spira.c
@@ -1681,14 +1681,20 @@ static void update_spirah_addr(void)
{
#if !defined(TEST)
extern uint32_t naca;
- uint64_t *spirah_offset = (uint64_t *)&naca;
- uint64_t *spira_offset = (uint64_t *)((u64)(&naca) + 0x30);
+ uint64_t *spirah_offset;
+ uint64_t *spira_offset;
if (proc_gen < proc_gen_p9)
return;
+ spirah_offset = vm_map((u64)&naca, sizeof(u64), true);
*spirah_offset = SPIRAH_OFF;
+ vm_unmap((unsigned long)spirah_offset, sizeof(u64));
+
+ spira_offset = vm_map((u64)&naca + 0x30, sizeof(u64), true);
*spira_offset = SPIRA_OFF;
+ vm_unmap((unsigned long)spira_offset, sizeof(u64));
+
spirah.ntuples.hs_data_area.addr = CPU_TO_BE64(SPIRA_HEAP_BASE - SKIBOOT_BASE);
spirah.ntuples.mdump_res.addr = CPU_TO_BE64(MDRT_TABLE_BASE - SKIBOOT_BASE);
#endif
@@ -1696,14 +1702,22 @@ static void update_spirah_addr(void)
int parse_hdat(bool is_opal)
{
+ int ret = 0;
+
cpu_type = PVR_TYPE(mfspr(SPR_PVR));
prlog(PR_DEBUG, "Parsing HDAT...\n");
+ vm_map_global("SPIRA", SKIBOOT_BASE + SPIRA_OFF, sizeof(spira), true, false);
fixup_spira();
-
+ vm_map_global("SPIRA-H", SKIBOOT_BASE + SPIRAH_OFF, sizeof(spirah), false, false);
update_spirah_addr();
+ /* Downgrade to read-only */
+ vm_unmap_global(SKIBOOT_BASE + SPIRA_OFF, sizeof(spira));
+
+ vm_map_global("SPIRA", SKIBOOT_BASE + SPIRA_OFF, sizeof(spira), false, false);
+
/*
* Basic DT root stuff
*/
@@ -1723,9 +1737,12 @@ int parse_hdat(bool is_opal)
dt_init_led_node();
/* Parse SPPACA and/or PCIA */
- if (!pcia_parse())
- if (paca_parse() < 0)
- return -1;
+ if (!pcia_parse()) {
+ if (paca_parse() < 0) {
+ ret = -1;
+ goto out;
+ }
+ }
/* IPL params */
add_iplparams();
@@ -1771,6 +1788,9 @@ int parse_hdat(bool is_opal)
node_stb_parse();
prlog(PR_DEBUG, "Parsing HDAT...done\n");
+out:
+ vm_unmap_global(SKIBOOT_BASE + SPIRA_OFF, sizeof(spira));
+ vm_unmap_global(SKIBOOT_BASE + SPIRAH_OFF, sizeof(spirah));
- return 0;
+ return ret;
}
diff --git a/hw/fake-nvram.c b/hw/fake-nvram.c
index 6411400eb..bac13a3de 100644
--- a/hw/fake-nvram.c
+++ b/hw/fake-nvram.c
@@ -23,12 +23,16 @@ int fake_nvram_info(uint32_t *total_size)
int fake_nvram_start_read(void *dst, uint32_t src, uint32_t len)
{
+ void *t;
+
if (!nvram_region)
return -ENODEV;
+ t = vm_map(nvram_region->start + src, len, false);
lock(&fake_nvram_lock);
- memcpy(dst, (void *) (nvram_region->start + src), len);
+ memcpy(dst, t, len);
unlock(&fake_nvram_lock);
+ vm_unmap(nvram_region->start + src, len);
nvram_read_complete(true);
@@ -37,12 +41,16 @@ int fake_nvram_start_read(void *dst, uint32_t src, uint32_t len)
int fake_nvram_write(uint32_t offset, void *src, uint32_t size)
{
+ void *t;
+
if (!nvram_region)
return OPAL_HARDWARE;
+ t = vm_map(nvram_region->start + offset, size, true);
lock(&fake_nvram_lock);
- memcpy((void *) (nvram_region->start + offset), src, size);
+ memcpy(t, src, size);
unlock(&fake_nvram_lock);
+ vm_unmap(nvram_region->start + offset, size);
return 0;
}
diff --git a/hw/homer.c b/hw/homer.c
index 96a01fdbc..b1819ede7 100644
--- a/hw/homer.c
+++ b/hw/homer.c
@@ -108,6 +108,9 @@ static void homer_init_chip(struct proc_chip *chip)
chip->homer_base = hbase;
chip->homer_size = hsize;
+ /* slw late init and xive late init want to write to HOMER */
+ /* XXX: make it read only until then? */
+ vm_map_global("HOMER Image", hbase, hsize, true, false);
}
/*
@@ -134,6 +137,7 @@ static void homer_init_chip(struct proc_chip *chip)
chip->slw_base = sbase;
chip->slw_bar_size = ssize;
chip->slw_image_size = ssize; /* will be adjusted later */
+ /* XXX */
}
if (read_pba_bar(chip, bar_occ_common, &obase, &osize)) {
@@ -141,6 +145,7 @@ static void homer_init_chip(struct proc_chip *chip)
obase, osize / 0x100000);
chip->occ_common_base = obase;
chip->occ_common_size = osize;
+ vm_map_global("OCC Common Area", obase, osize, false, false);
}
}
diff --git a/hw/lpc-uart.c b/hw/lpc-uart.c
index feca229b6..0a64ac6ce 100644
--- a/hw/lpc-uart.c
+++ b/hw/lpc-uart.c
@@ -591,6 +591,8 @@ void early_uart_init(void)
if (!mmio_uart_base)
return;
+ vm_map_global("UART MMIO", (unsigned long)mmio_uart_base, 8, true, true);
+
clk = dt_prop_get_u32(uart_node, "clock-frequency");
baud = dt_prop_get_u32(uart_node, "current-speed");
@@ -599,6 +601,7 @@ void early_uart_init(void)
prlog(PR_DEBUG, "UART: Using UART at %p\n", mmio_uart_base);
} else {
prerror("UART: Early init failed!");
+ vm_unmap_global((unsigned long)mmio_uart_base, 8);
mmio_uart_base = NULL;
}
}
@@ -610,9 +613,6 @@ void uart_init(void)
char *path __unused;
const be32 *irqp;
- /* Clean up after early_uart_init() */
- mmio_uart_base = NULL;
-
/* UART lock is in the console path and thus must block
* printf re-entrancy
*/
@@ -630,13 +630,28 @@ void uart_init(void)
* directly mapped UARTs in simulation environments
*/
if (n->parent == dt_root) {
+ void *base;
+
printf("UART: Found at root !\n");
- mmio_uart_base = (void *)dt_translate_address(n, 0, NULL);
- if (!mmio_uart_base) {
+
+ base = (void *)dt_translate_address(n, 0, NULL);
+ if (!base) {
printf("UART: Failed to translate address !\n");
return;
}
+ if (mmio_uart_base != base) {
+ void *old;
+
+ vm_map_global("UART MMIO", (unsigned long)base, 8, true, true);
+ old = mmio_uart_base;
+ mmio_uart_base = base;
+
+ /* Clean up after early_uart_init() */
+ if (old)
+ vm_unmap_global((unsigned long)old, 8);
+ }
+
/* If it has an interrupt properly, we consider this to be
* a direct XICS/XIVE interrupt
*/
@@ -665,6 +680,12 @@ void uart_init(void)
lpc_irq = be32_to_cpu(*irqp);
prlog(PR_DEBUG, "UART: Using LPC IRQ %d\n", lpc_irq);
}
+
+ /* Clean up after early_uart_init() */
+ if (mmio_uart_base) {
+ vm_unmap_global((unsigned long)mmio_uart_base, 8);
+ mmio_uart_base = NULL;
+ }
}
diff --git a/hw/lpc.c b/hw/lpc.c
index 354d2b4f0..ffa86c642 100644
--- a/hw/lpc.c
+++ b/hw/lpc.c
@@ -1252,6 +1252,8 @@ static void lpc_init_chip_p9(struct dt_node *opb_node)
if (!lpc_node)
return;
+ vm_map_global("LPC MMIO", addr, 0x100000000UL, true, true);
+
lpc = zalloc(sizeof(struct lpcm));
assert(lpc);
lpc->chip_id = gcid;
diff --git a/hw/phb4.c b/hw/phb4.c
index 3c71427ae..6cbd84aca 100644
--- a/hw/phb4.c
+++ b/hw/phb4.c
@@ -5766,6 +5766,7 @@ static void phb4_probe_stack(struct dt_node *stk_node, uint32_t pec_index,
uint64_t val, phb_bar = 0, irq_bar = 0, bar_en;
uint64_t mmio0_bar = 0, mmio0_bmask, mmio0_sz;
uint64_t mmio1_bar = 0, mmio1_bmask, mmio1_sz;
+ uint64_t bar_sz;
uint64_t reg[4];
void *foo;
uint64_t mmio_win[4];
@@ -5795,7 +5796,8 @@ static void phb4_probe_stack(struct dt_node *stk_node, uint32_t pec_index,
bar_en = 0;
/* Initialize PHB register BAR */
- phys_map_get(gcid, PHB4_REG_SPC, phb_num, &phb_bar, NULL);
+ phys_map_get(gcid, PHB4_REG_SPC, phb_num, &phb_bar, &bar_sz);
+ vm_map_global("PHB REGS", phb_bar, bar_sz, true, true);
rc = xscom_write(gcid, nest_stack + XPEC_NEST_STK_PHB_REG_BAR,
phb_bar << 8);
@@ -5809,18 +5811,21 @@ static void phb4_probe_stack(struct dt_node *stk_node, uint32_t pec_index,
bar_en |= XPEC_NEST_STK_BAR_EN_PHB;
/* Same with INT BAR (ESB) */
- phys_map_get(gcid, PHB4_XIVE_ESB, phb_num, &irq_bar, NULL);
+ phys_map_get(gcid, PHB4_XIVE_ESB, phb_num, &irq_bar, &bar_sz);
+ vm_map_global("PHB IRQ", irq_bar, bar_sz, true, true);
xscom_write(gcid, nest_stack + XPEC_NEST_STK_IRQ_BAR, irq_bar << 8);
bar_en |= XPEC_NEST_STK_BAR_EN_INT;
/* Same with MMIO windows */
phys_map_get(gcid, PHB4_64BIT_MMIO, phb_num, &mmio0_bar, &mmio0_sz);
+ vm_map_global("PHB MMIO0", mmio0_bar, mmio0_sz, true, true);
mmio0_bmask = (~(mmio0_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;
xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0, mmio0_bar << 8);
xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0_MASK, mmio0_bmask << 8);
phys_map_get(gcid, PHB4_32BIT_MMIO, phb_num, &mmio1_bar, &mmio1_sz);
+ vm_map_global("PHB MMIO1", mmio1_bar, mmio1_sz, true, true);
mmio1_bmask = (~(mmio1_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;
xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1, mmio1_bar << 8);
xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1_MASK, mmio1_bmask << 8);
diff --git a/hw/psi.c b/hw/psi.c
index bc170bbcf..3140001f2 100644
--- a/hw/psi.c
+++ b/hw/psi.c
@@ -907,6 +907,8 @@ static bool psi_init_psihb(struct dt_node *psihb)
list_add(&psis, &psi->list);
+ vm_map_global("PSI", (unsigned long)psi->regs, 0x100, true, true);
+
val = in_be64(psi->regs + PSIHB_CR);
if (val & PSIHB_CR_FSP_LINK_ACTIVE) {
lock(&psi_lock);
diff --git a/hw/slw.c b/hw/slw.c
index ed8a5f9e6..cb012dcb2 100644
--- a/hw/slw.c
+++ b/hw/slw.c
@@ -155,7 +155,7 @@ static void slw_patch_reset(void)
*(sav++) = *(dst);
*(dst++) = *(src++);
}
- sync_icache();
+ sync_icache(0);
}
static void slw_unpatch_reset(void)
@@ -171,7 +171,7 @@ static void slw_unpatch_reset(void)
*(dst++) = *(sav++);
src++;
}
- sync_icache();
+ sync_icache(0);
}
static bool slw_general_init(struct proc_chip *chip, struct cpu_thread *c)
diff --git a/hw/xive.c b/hw/xive.c
index 96a9bc647..0f76257ff 100644
--- a/hw/xive.c
+++ b/hw/xive.c
@@ -1613,6 +1613,7 @@ static bool xive_configure_bars(struct xive *x)
/* IC BAR */
phys_map_get(chip_id, XIVE_IC, 0, (uint64_t *)&x->ic_base, &x->ic_size);
+ vm_map_global("XIVE IC", (unsigned long)x->ic_base, x->ic_size, true, true);
val = (uint64_t)x->ic_base | CQ_IC_BAR_VALID;
if (IC_PAGE_SIZE == 0x10000) {
val |= CQ_IC_BAR_64K;
@@ -1628,6 +1629,8 @@ static bool xive_configure_bars(struct xive *x)
* all phys_map_get(XIVE_TM) calls.
*/
phys_map_get(0, XIVE_TM, 0, (uint64_t *)&x->tm_base, &x->tm_size);
+ if (chip_id == 0)
+ vm_map_global("XIVE TM", (unsigned long)x->tm_base, x->tm_size, true, true);
val = (uint64_t)x->tm_base | CQ_TM_BAR_VALID;
if (TM_PAGE_SIZE == 0x10000) {
x->tm_shift = 16;
@@ -1643,6 +1646,7 @@ static bool xive_configure_bars(struct xive *x)
/* PC BAR. Clear first, write mask, then write value */
phys_map_get(chip_id, XIVE_PC, 0, (uint64_t *)&x->pc_base, &x->pc_size);
+ vm_map_global("XIVE PC", (unsigned long)x->pc_base, x->pc_size, true, true);
xive_regwx(x, CQ_PC_BAR, 0);
if (x->last_reg_error)
return false;
@@ -1657,6 +1661,7 @@ static bool xive_configure_bars(struct xive *x)
/* VC BAR. Clear first, write mask, then write value */
phys_map_get(chip_id, XIVE_VC, 0, (uint64_t *)&x->vc_base, &x->vc_size);
+ vm_map_global("XIVE VC", (unsigned long)x->vc_base, x->vc_size, true, true);
xive_regwx(x, CQ_VC_BAR, 0);
if (x->last_reg_error)
return false;
diff --git a/hw/xscom.c b/hw/xscom.c
index 9b28422d2..a664fefa9 100644
--- a/hw/xscom.c
+++ b/hw/xscom.c
@@ -860,6 +860,8 @@ void xscom_init(void)
assert(reg);
chip->xscom_base = dt_translate_address(xn, 0, NULL);
+ /* XXX: how large is this window? */
+ vm_map_global("XSCOM MMIO", chip->xscom_base, 0x200000000UL, true, true);
/* Grab processor type and EC level */
xscom_init_chip_info(chip);
@@ -873,7 +875,7 @@ void xscom_init(void)
prlog(PR_NOTICE, "CHIP: Chip ID %04x type: %s DD%x.%x%d\n",
gcid, chip_name, chip->ec_level >> 4,
chip->ec_level & 0xf, chip->ec_rev);
- prlog(PR_DEBUG, "XSCOM: Base address: 0x%llx\n", chip->xscom_base);
+ prlog(PR_NOTICE, "XSCOM: Base address: 0x%llx\n", chip->xscom_base);
}
/* Collect details to trigger xstop via XSCOM write */
diff --git a/include/cmpxchg.h b/include/cmpxchg.h
index 3541a41f4..5d518ece6 100644
--- a/include/cmpxchg.h
+++ b/include/cmpxchg.h
@@ -5,6 +5,9 @@
#define __CMPXCHG_H
#ifndef __TEST__
+#include <stdint.h>
+#include <processor.h>
+
/*
* Bare cmpxchg, no barriers.
*/
diff --git a/include/cpu.h b/include/cpu.h
index cda78644d..f7ea9ab5f 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -12,6 +12,19 @@
#include <stack.h>
#include <timer.h>
+struct vm_map {
+ struct list_node list;
+
+ const char *name;
+ uint64_t address;
+ uint64_t pa;
+ uint64_t length;
+ bool readable;
+ bool writeable;
+ bool executable;
+ bool ci;
+};
+
/*
* cpu_thread is our internal structure representing each
* thread in the system
@@ -70,10 +83,19 @@ struct cpu_thread {
struct bt_entry stack_bot_bt[CPU_BACKTRACE_SIZE];
struct bt_metadata stack_bot_bt_metadata;
#endif
+ /*
+ * Per-thread VM parameters
+ */
+ struct vm_map vm_local_map; /* per-cpu map */
+ bool vm_local_map_inuse;
+ uint8_t vm_slb_rr; /* RR allocator */
+ bool vm_setup; /* virtual memory is up */
+
struct lock job_lock;
struct list_head job_queue;
uint32_t job_count;
bool job_has_no_return;
+
/*
* Per-core mask tracking for threads in HMI handler and
* a cleanup done bit.
diff --git a/include/elf-abi.h b/include/elf-abi.h
index 827f2af19..3e63c06ac 100644
--- a/include/elf-abi.h
+++ b/include/elf-abi.h
@@ -21,7 +21,15 @@
static inline uint64_t function_entry_address(void *func)
{
#ifdef ELF_ABI_v2
- u32 *insn = func;
+ u32 *i;
+ u32 insn;
+ u32 insn2;
+
+ i = vm_map((unsigned long)func, sizeof(insn*2), false);
+ insn = *i;
+ insn2 = *(i+1);
+ vm_unmap((unsigned long)func, sizeof(insn*2));
+
/*
* A PPC64 ABIv2 function may have a local and a global entry
* point. We use the local entry point for branch tables called
@@ -38,12 +46,12 @@ static inline uint64_t function_entry_address(void *func)
* lis r2,XXXX
* addi r2,r2,XXXX
*/
- if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
- ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
- ((*(insn+1) & OP_RT_RA_MASK) == ADDI_R2_R2))
- return (uint64_t)(insn + 2);
+ if ((((insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((insn2 & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (uint64_t)(i + 2);
else
- return (uint64_t)func;
+ return (uint64_t)i;
#else
return *(uint64_t *)func;
#endif
diff --git a/include/io.h b/include/io.h
index c6203a274..806e4a152 100644
--- a/include/io.h
+++ b/include/io.h
@@ -7,6 +7,7 @@
#ifndef __ASSEMBLY__
#include <compiler.h>
+#include <skiboot.h>
#include <stdint.h>
#include <processor.h>
#include <ccan/endian/endian.h>
@@ -22,8 +23,14 @@
static inline uint8_t __in_8(const volatile uint8_t *addr)
{
uint8_t val;
- asm volatile("lbzcix %0,0,%1" :
+
+ if (vm_realmode())
+ asm volatile("lbzcix %0,0,%1" :
+ "=r"(val) : "r"(addr), "m"(*addr) : "memory");
+ else
+ asm volatile("lbzx %0,0,%1" :
"=r"(val) : "r"(addr), "m"(*addr) : "memory");
+
return val;
}
@@ -36,8 +43,14 @@ static inline uint8_t in_8(const volatile uint8_t *addr)
static inline uint16_t __in_be16(const volatile uint16_t *addr)
{
uint16_t val;
- asm volatile("lhzcix %0,0,%1" :
+
+ if (vm_realmode())
+ asm volatile("lhzcix %0,0,%1" :
"=r"(val) : "r"(addr), "m"(*addr) : "memory");
+ else
+ asm volatile("lhzx %0,0,%1" :
+ "=r"(val) : "r"(addr), "m"(*addr) : "memory");
+
return val;
}
@@ -55,8 +68,14 @@ static inline uint16_t in_le16(const volatile uint16_t *addr)
static inline uint32_t __in_be32(const volatile uint32_t *addr)
{
uint32_t val;
- asm volatile("lwzcix %0,0,%1" :
+
+ if (vm_realmode())
+ asm volatile("lwzcix %0,0,%1" :
+ "=r"(val) : "r"(addr), "m"(*addr) : "memory");
+ else
+ asm volatile("lwzx %0,0,%1" :
"=r"(val) : "r"(addr), "m"(*addr) : "memory");
+
return val;
}
@@ -74,8 +93,14 @@ static inline uint32_t in_le32(const volatile uint32_t *addr)
static inline uint64_t __in_be64(const volatile uint64_t *addr)
{
uint64_t val;
- asm volatile("ldcix %0,0,%1" :
+
+ if (vm_realmode())
+ asm volatile("ldcix %0,0,%1" :
"=r"(val) : "r"(addr), "m"(*addr) : "memory");
+ else
+ asm volatile("ldx %0,0,%1" :
+ "=r"(val) : "r"(addr), "m"(*addr) : "memory");
+
return val;
}
@@ -92,7 +117,11 @@ static inline uint64_t in_le64(const volatile uint64_t *addr)
static inline void __out_8(volatile uint8_t *addr, uint8_t val)
{
- asm volatile("stbcix %0,0,%1"
+ if (vm_realmode())
+ asm volatile("stbcix %0,0,%1"
+ : : "r"(val), "r"(addr), "m"(*addr) : "memory");
+ else
+ asm volatile("stbx %0,0,%1"
: : "r"(val), "r"(addr), "m"(*addr) : "memory");
}
@@ -104,7 +133,11 @@ static inline void out_8(volatile uint8_t *addr, uint8_t val)
static inline void __out_be16(volatile uint16_t *addr, uint16_t val)
{
- asm volatile("sthcix %0,0,%1"
+ if (vm_realmode())
+ asm volatile("sthcix %0,0,%1"
+ : : "r"(val), "r"(addr), "m"(*addr) : "memory");
+ else
+ asm volatile("sthx %0,0,%1"
: : "r"(val), "r"(addr), "m"(*addr) : "memory");
}
@@ -121,7 +154,11 @@ static inline void out_le16(volatile uint16_t *addr, uint16_t val)
static inline void __out_be32(volatile uint32_t *addr, uint32_t val)
{
- asm volatile("stwcix %0,0,%1"
+ if (vm_realmode())
+ asm volatile("stwcix %0,0,%1"
+ : : "r"(val), "r"(addr), "m"(*addr) : "memory");
+ else
+ asm volatile("stwx %0,0,%1"
: : "r"(val), "r"(addr), "m"(*addr) : "memory");
}
@@ -138,7 +175,11 @@ static inline void out_le32(volatile uint32_t *addr, uint32_t val)
static inline void __out_be64(volatile uint64_t *addr, uint64_t val)
{
- asm volatile("stdcix %0,0,%1"
+ if (vm_realmode())
+ asm volatile("stdcix %0,0,%1"
+ : : "r"(val), "r"(addr), "m"(*addr) : "memory");
+ else
+ asm volatile("stdx %0,0,%1"
: : "r"(val), "r"(addr), "m"(*addr) : "memory");
}
diff --git a/include/mem_region.h b/include/mem_region.h
index 65eda4222..df8d03f01 100644
--- a/include/mem_region.h
+++ b/include/mem_region.h
@@ -33,6 +33,7 @@ struct mem_region {
struct list_node list;
const char *name;
uint64_t start, len;
+ uint64_t vm_mapped_len;
struct dt_node *node;
enum mem_region_type type;
struct list_head free_list;
diff --git a/include/processor.h b/include/processor.h
index 352fd1ec4..c8fd57157 100644
--- a/include/processor.h
+++ b/include/processor.h
@@ -39,7 +39,9 @@
#define SPR_SRR1 0x01b /* RW: Exception save/restore reg 1 */
#define SPR_CFAR 0x01c /* RW: Come From Address Register */
#define SPR_AMR 0x01d /* RW: Authority Mask Register */
+#define SPR_PID 0x030 /* RW: PID register */
#define SPR_IAMR 0x03d /* RW: Instruction Authority Mask Register */
+#define SPR_UAMOR 0x09d
#define SPR_RPR 0x0ba /* RW: Relative Priority Register */
#define SPR_TBRL 0x10c /* RO: Timebase low */
#define SPR_TBRU 0x10d /* RO: Timebase high */
@@ -61,10 +63,12 @@
#define SPR_HSRR1 0x13b /* RW: HV Exception save/restore reg 1 */
#define SPR_TFMR 0x13d
#define SPR_LPCR 0x13e
+#define SPR_LPID 0x13f /* RW: LPID register */
#define SPR_HMER 0x150 /* Hypervisor Maintenance Exception */
#define SPR_HMEER 0x151 /* HMER interrupt enable mask */
#define SPR_PCR 0x152
#define SPR_AMOR 0x15d
+#define SPR_PTCR 0x1d0 /* RW: Partition table control register */
#define SPR_PSSCR 0x357 /* RW: Stop status and control (ISA 3) */
#define SPR_TSCR 0x399
#define SPR_HID0 0x3f0
@@ -80,6 +84,11 @@
#define SPR_SRR1_PM_WAKE_SRESET 0x100000
#define SPR_SRR1_PM_WAKE_MCE 0x3c0000 /* Use reserved value for MCE */
+/* Bits in DSISR */
+
+#define DSISR_ISSTORE 0x02000000
+
+
/* Bits in LPCR */
/* Powersave Exit Cause Enable is different on each generation */
@@ -309,9 +318,9 @@ static inline void isync(void)
/*
* Cache sync
*/
-static inline void sync_icache(void)
+static inline void sync_icache(unsigned long ptr)
{
- asm volatile("sync; icbi 0,%0; sync; isync" : : "r" (0) : "memory");
+ asm volatile("sync; icbi 0,%0; sync; isync" : : "r" (ptr) : "memory");
}
/*
diff --git a/include/skiboot.h b/include/skiboot.h
index 96d25b83d..ed7784359 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -43,8 +43,13 @@ extern char __sym_map_end[];
extern char _romem_end[];
#ifndef __TESTING__
+extern char _stext[], _etext[];
/* Readonly section start and end. */
extern char __rodata_start[], __rodata_end[];
+extern char _sdata[], _edata[];
+extern char __sym_map_start[], __sym_map_end[];
+extern char _sbss[], _ebss[];
+extern char _end[];
static inline bool is_rodata(const void *p)
{
@@ -183,6 +188,7 @@ extern void disable_fast_reboot(const char *reason);
extern void add_fast_reboot_dt_entries(void);
extern void fast_reboot(void);
extern void __noreturn __secondary_cpu_entry(void);
+extern void __noreturn __return_cpu_entry(void);
extern void __noreturn load_and_boot_kernel(bool is_reboot);
extern void cleanup_local_tlb(void);
extern void cleanup_global_tlb(void);
@@ -335,4 +341,26 @@ extern int fake_nvram_info(uint32_t *total_size);
extern int fake_nvram_start_read(void *dst, uint32_t src, uint32_t len);
extern int fake_nvram_write(uint32_t offset, void *src, uint32_t size);
+/* core/vm.c */
+#define PAGE_SIZE 4096
+
+bool vm_realmode(void);
+void vm_map_global(const char *name, unsigned long addr, unsigned long len, bool rw, bool ci);
+void vm_map_global_text(const char *name, unsigned long addr, unsigned long len);
+void vm_unmap_global(unsigned long addr, unsigned long len);
+void *vm_map(unsigned long addr, unsigned long len, bool rw);
+void vm_unmap(unsigned long addr, unsigned long len);
+void vm_init(bool fast_reboot);
+void vm_init_stacks(void);
+void vm_destroy(void);
+void vm_init_secondary(void);
+void vm_enter(void);
+void vm_exit(void);
+void vm_exit_cleanup(void);
+void vm_map_stacks(void);
+bool vm_dslb(uint64_t nia, uint64_t dar);
+bool vm_islb(uint64_t nia);
+bool vm_dsi(uint64_t nia, uint64_t dar, bool store);
+bool vm_isi(uint64_t nia);
+
#endif /* __SKIBOOT_H */
diff --git a/libstb/container.c b/libstb/container.c
index 58fd18f9e..63c99406e 100644
--- a/libstb/container.c
+++ b/libstb/container.c
@@ -6,14 +6,20 @@
bool stb_is_container(const void *buf, size_t size)
{
+ uint32_t *t;
ROM_container_raw *c;
+ bool ret = true;;
c = (ROM_container_raw*) buf;
if (!buf || size < SECURE_BOOT_HEADERS_SIZE)
return false;
- if (be32_to_cpu(c->magic_number) != ROM_MAGIC_NUMBER )
- return false;
- return true;
+
+ t = vm_map((unsigned long)&c->magic_number, sizeof(*t), false);
+ if (be32_to_cpu(*t) != ROM_MAGIC_NUMBER)
+ ret = false;
+ vm_unmap((unsigned long)&c->magic_number, sizeof(*t));
+
+ return ret;
}
uint32_t stb_payload_magic(const void *buf, size_t size)
diff --git a/libstb/cvc.c b/libstb/cvc.c
index dca4ac857..46fba0e52 100644
--- a/libstb/cvc.c
+++ b/libstb/cvc.c
@@ -154,6 +154,9 @@ static int cvc_reserved_mem_init(struct dt_node *parent) {
return -1;
}
addr = dt_get_address(cvc_resv_mem, 0, &size);
+ if (size == 0) // MAMBO HACK
+ size = 64*1024;
+ vm_map_global_text("STB-CVC", addr, size);
cvc_register(addr, addr + size-1);
/*
diff --git a/libstb/secureboot.c b/libstb/secureboot.c
index bfc98f2ae..a3bc04833 100644
--- a/libstb/secureboot.c
+++ b/libstb/secureboot.c
@@ -163,6 +163,7 @@ void secureboot_init(void)
int secureboot_verify(enum resource_id id, void *buf, size_t len)
{
const char *name;
+ void *vbuf;
uint64_t log;
int rc = -1;
@@ -181,7 +182,9 @@ int secureboot_verify(enum resource_id id, void *buf, size_t len)
return -1;
}
- rc = call_cvc_verify(buf, len, hw_key_hash, hw_key_hash_size, &log);
+ vbuf = vm_map((unsigned long)buf, len, false);
+ rc = call_cvc_verify(vbuf, len, hw_key_hash, hw_key_hash_size, &log);
+ vm_unmap((unsigned long)buf, len);
if (rc == OPAL_SUCCESS) {
prlog(PR_NOTICE, "%s verified\n", name);
diff --git a/libstb/trustedboot.c b/libstb/trustedboot.c
index 3f977de10..cd5b5207d 100644
--- a/libstb/trustedboot.c
+++ b/libstb/trustedboot.c
@@ -161,7 +161,7 @@ out_free:
int trustedboot_measure(enum resource_id id, void *buf, size_t len)
{
uint8_t digest[SHA512_DIGEST_LENGTH];
- void *buf_aux;
+ void *buf_aux, *vbuf;
size_t len_aux;
const char *name;
TPM_Pcr pcr;
@@ -219,7 +219,9 @@ int trustedboot_measure(enum resource_id id, void *buf, size_t len)
len_aux = len;
}
- rc = call_cvc_sha512(buf_aux, len_aux, digest, SHA512_DIGEST_LENGTH);
+ vbuf = vm_map((unsigned long)buf_aux, len_aux, false);
+ rc = call_cvc_sha512(vbuf, len_aux, digest, SHA512_DIGEST_LENGTH);
+ vm_unmap((unsigned long)buf_aux, len_aux);
if (rc == OPAL_SUCCESS) {
prlog(PR_NOTICE, "%s hash calculated\n", name);
diff --git a/skiboot.lds.S b/skiboot.lds.S
index 5b4bb41a2..3b0fbcef2 100644
--- a/skiboot.lds.S
+++ b/skiboot.lds.S
@@ -94,18 +94,33 @@ SECTIONS
KEEP(*(.cpuctrl.data))
}
+ /* Relocations */
. = ALIGN(0x10);
+ .dynamic : {
+ __dynamic_start = .;
+ *(.dynamic)
+ __dynamic_end = .;
+ }
+
+ . = ALIGN(0x10);
+ .rela.dyn : {
+ __rela_dyn_start = .;
+ *(.rela*)
+ __rela_dyn_end = .;
+ }
+
+ . = ALIGN(0x1000);
_stext = .;
.text : {
*(.text*)
*(.sfpr .glink)
}
_etext = .;
+ . = ALIGN(0x1000);
+ __rodata_start = .;
.rodata : {
- __rodata_start = .;
*(.rodata .rodata.*)
- __rodata_end = .;
}
. = ALIGN(0x10);
@@ -129,38 +144,21 @@ SECTIONS
*(.toc)
}
- . = ALIGN(0x10);
- .opal_table : {
- __opal_table_start = .;
- KEEP(*(.opal_table))
- __opal_table_end = .;
- }
-
.platforms : {
__platforms_start = .;
KEEP(*(.platforms))
__platforms_end = .;
}
- /* Do I need to keep these ? */
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
-
- /* Relocations */
. = ALIGN(0x10);
- .dynamic : {
- __dynamic_start = .;
- *(.dynamic)
- __dynamic_end = .;
+ .opal_table : {
+ __opal_table_start = .;
+ KEEP(*(.opal_table))
+ __opal_table_end = .;
}
+ __rodata_end = .;
- . = ALIGN(0x10);
- .rela.dyn : {
- __rela_dyn_start = .;
- *(.rela*)
- __rela_dyn_end = .;
- }
- .plt : { *(.plt) *(.iplt) }
+ . = ALIGN(0x1000);
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
@@ -170,7 +168,6 @@ SECTIONS
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
- . = ALIGN(0x10);
.sym_map : {
__sym_map_start = . ;
KEEP(*(.sym_map))
@@ -183,6 +180,9 @@ SECTIONS
*/
_romem_end = .;
+ . = ALIGN(0x1000);
+
+ _sdata = .;
.data : {
/*
* A couple of things that need to be 4K aligned and
@@ -199,6 +199,10 @@ SECTIONS
*(.toc1)
*(.branch_lt)
}
+ .plt : { *(.plt) *(.iplt) }
+ _edata = .;
+
+ . = ALIGN(0x1000);
/* We locate the BSS at 4M to leave room for the symbol map */
. = 0x400000;
--
2.22.0
More information about the Skiboot
mailing list