[PATCH 5/8] bootwrapper: Add kexec callable zImage wrapper

Milton Miller miltonm at bga.com
Wed Apr 11 18:33:34 EST 2007


This code creates a 32 bit zImage wrapper for a 32 or 64 bit PowerPC
Linux kernel.   This allows you to kexec a zImage (instead of a kernel).
This can reduce the memory needed to reboot, or to provide a common
image for boot and reboot.

It also provides rules to pretend the binary is a 64-bit elf, with
the same calling convention as the 64-bit kernel.

In theory this code should be useable in any envrionment that supplies
a flat device tree to describe the usable memory of the machine.

Limitations:

The memory node off the root with a name starting with "memory" must
contain enough free memory (not in the reserved ranges) in the first
reg range to uncompress the the kenrel with padding.

Signed-off-by: Milton Miller <miltonm at bga.com>
---
Status: Successfully boots from and to a 64 bit kernel when loaded
at 0 and after the kernel _end,  when initrds and various other data
reserved are loaded discontigiously above the size of the kernel.   

The memory search should be converted to use the address translation
framework and find_node_by_type, although we have a limited malloc
space during the search.  That points out the need for a read-only
scan of the tree.

The next patch is needed to call from kexec-tools without supplying
an externally generated flat device tree.

kexec.c is added as a library in the belief that it can be used by
multiple platforms.

I didn't test any serial drivers in this environment.  kexec will
clear out the mmu, so relying on initialized translations will fail.


Index: kernel/arch/powerpc/boot/kexec.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ kernel/arch/powerpc/boot/kexec.c	2007-04-10 21:35:03.000000000 -0500
@@ -0,0 +1,255 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corporation 2007
+ *
+ * Authors: Milton Miller <miltonm at bga.com>
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "flatdevtree.h"
+#include "page.h"
+#include "types.h"
+
+extern char _start[];
+extern char _end[];
+
+BSS_STACK(16*1024);
+
+static void find_console_from_tree(void)
+{
+	int rc;
+
+	rc = serial_console_init();
+	if (rc) {
+		/* no console, oh well */
+	}
+}
+
+/* fixme: use find_device_by_type "memory" and xlate_reg */
+static void find_rmo_end(void)
+{
+	unsigned int na, ns, reg[4], *rp;
+	void *devp;
+	int rc;
+
+	devp = finddevice("/");
+	if (!devp)
+		fatal("Ack, device-tree root");
+	rc = getprop(devp, "#address-cells", &na, sizeof(na));
+	if (rc != sizeof(na))
+		fatal("Ack, no #address-cells in root");
+	rc = getprop(devp, "#size-cells", &ns, sizeof(ns));
+	if (rc != sizeof(ns))
+		fatal("Ack, no #size-cells in root");
+	if (!na || !ns || na + ns > ARRAY_SIZE(reg))
+		fatal("#addr-cells or #size-cells unusable");
+	do {
+		devp = finddevice("/memory at 0");
+		if (!devp)
+			devp = finddevice("/memory");
+		if (!devp)
+			devp = finddevice("/memory at 00000000");
+		if (!devp)
+			devp = finddevice("/memory at 0000000000000000");
+		if (!devp)
+			fatal("Ack, can't find  memory");
+		rc = getprop(devp, "reg", reg, sizeof(reg));
+		if (rc < (na + ns) * sizeof(int))
+			fatal("Ack, no valid reg property in memory");
+
+		rp = &reg[0];
+		while (na--) {
+			if (*rp)
+				continue;
+			rp++;
+		}
+		while (--ns) {
+			if (*rp)
+				continue;
+			rp++;
+		}
+	} while (0);
+
+	rmo_end = (void *)*rp;
+}
+
+static void find_dt_initrd(void)
+{
+	int rc;
+	unsigned long long initrd_start, initrd_end;
+	void *devp;
+
+	devp = finddevice("/chosen");
+	if (! devp) {
+		return;
+	}
+
+	/* The properties had to be 8 bytes until 2.6.22  */
+	rc = getprop(devp, "linux,initrd-start", &initrd_start,
+		sizeof(initrd_start));
+	if (rc < 0)
+		return;
+	if (rc == sizeof(unsigned long)) {
+		unsigned long tmp;
+		memcpy(&tmp, &initrd_start, rc);
+		initrd_start = tmp;
+	} else if (rc != sizeof(initrd_start)) {
+		printf("unexpected length of linux,initrd_start in /chosen!\n\r");
+		return;
+	}
+
+	rc = getprop(devp, "linux,initrd-end", &initrd_end, sizeof(initrd_end));
+	if (rc < 0) {
+		printf("chosen has linux,initrd_start but no linux,initrd_end!\n\r");
+		return;
+	}
+	if (rc == sizeof(unsigned long)) {
+		unsigned long tmp;
+		memcpy(&tmp, &initrd_end, rc);
+		initrd_end = tmp;
+	} else if (rc != sizeof(initrd_end)) {
+		printf("unexpected length of linux,initrd_end in /chosen!\n\r");
+		return;
+	}
+
+	if (!initrd_start)
+		return;
+
+	/* if the initrd is above 4G, its untouchable in 32 bit mode */
+	if (initrd_end <= UINT_MAX && initrd_start < initrd_end) {
+		loader_info.initrd_addr = initrd_start;
+		loader_info.initrd_size  = initrd_end - initrd_start;
+	}
+}
+
+/**
+ * setup_initial_heap - setup a small heap in the bss
+ * Using a preallocated heap, setup for scanning the device tree.
+ * Intended for the initial read while the tree will remain read-only so
+ * a minimal malloc and search limit can be used.  This way we don't have
+ * lots of data or bss to clear.
+ */
+static void setup_initial_heap(void)
+{
+	static char initial_heap[8*1024];
+	void *heap_end;
+
+	heap_end = simple_alloc_init(initial_heap,
+			sizeof(initial_heap) * 7 / 8,
+			sizeof(long), 64);
+
+	if (heap_end - sizeof(initial_heap) > (void *)&initial_heap[0])
+		fatal("Initial heap too big\n\r");
+}
+
+static void early_scan_flat_tree(struct boot_param_header *dt_blob)
+{
+	int rc;
+
+	rc = ft_init(dt_blob, dt_blob->totalsize, 50);
+	if (rc)
+		fatal("couldn't initialize device-tree\n\r");
+
+	find_rmo_end();
+	find_dt_initrd();
+}
+
+static void init_flat_tree(struct boot_param_header *dt_blob)
+{
+	int rc;
+
+	rc = ft_init(dt_blob, dt_blob->totalsize, /* max_finddevice */ 1024);
+	if (rc)
+		fatal("Unable to initialize device_tree library!\n\r");
+}
+
+static void *saved_vmlinux_addr;
+
+static void *kexec_vmlinux_alloc(unsigned long vmsize)
+{
+	unsigned long size = vmsize;
+	void *addr;
+
+	/*
+	 * If we are running where the kernel will decompress itself,
+	 * tack some more space onto the allocations and move the slaves
+	 * there.  This avoids the kernel decompressing before the slaves
+	 * catch on that they should move down to 0x60.
+	 */
+	if (size > (unsigned long)_start)
+		size += SMP_SLAVE_SIZE;
+
+	addr = ranges_vmlinux_alloc(size);
+
+	if (size > vmsize) {
+		if (addr < (void *)_start) {
+			/*
+			 * The kernel will memmove its self down.  The extra
+			 * space is at the end, make sure it is alligned.
+			 * We don't care if the kernel overwrites the first
+			 * instruction, that is the master entry point.
+			 */
+			vmsize &= ~3UL;
+			move_slaves_here(addr + vmsize);
+		} else {
+			/* nice aligned space at the beginning */
+			move_slaves_here(addr);
+			addr += SMP_SLAVE_SIZE;
+		}
+	}
+
+	saved_vmlinux_addr = addr;
+	return addr;
+}
+
+static void kexec_fixups(void)
+{
+	wait_slaves_moved();
+}
+
+static unsigned long (*finalize_chain)(void);
+
+static unsigned long kexec_finalize(void)
+{
+	send_slaves_to_kernel(saved_vmlinux_addr);
+
+	return finalize_chain();
+}
+
+void kexec_platform_init(struct boot_param_header *dt_blob)
+{
+	slaves_are_low();
+	move_slaves_up();
+
+	setup_initial_heap();
+	early_scan_flat_tree(dt_blob);
+
+	/* drivers can malloc and read the tree, but not realloc later
+	 * or modify the tree now.
+	 */
+	if (!console_ops.write)
+		find_console_from_tree();
+
+	add_known_ranges(dt_blob);
+	ranges_init_malloc();
+	init_flat_tree(dt_blob);
+
+	platform_ops.vmlinux_alloc = kexec_vmlinux_alloc;
+	platform_ops.fixups = kexec_fixups;
+	finalize_chain = dt_ops.finalize;
+	dt_ops.finalize = kexec_finalize;
+}
Index: kernel/arch/powerpc/boot/crt0_kexec.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ kernel/arch/powerpc/boot/crt0_kexec.S	2007-04-10 21:35:03.000000000 -0500
@@ -0,0 +1,46 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2007 IBM Corporation.
+ *
+ * Authors: Milton Miller <miltonm at bga.com>
+ *
+ */
+	/*
+	 * The kernel calls out to the first image with
+	 * r3 = boot cpu, r4 = entrypoint, r5 = 0
+	 *
+	 * kexec-tools purgatory calls this as it would a linux kernel:
+	 * r3 = boot block, r4 = entrypoint, r5 = 0
+	 *
+	 * The boot block boot_cpu field has been filled in.
+	 *
+	 * kexec-tools and its purgatory are suppposed to copy SMP_SLAVE_SIZE
+	 * bytes from the from entry point, but aparently instead it copies
+	 * from the image start.
+	 */
+	.globl _zimage_start
+_zimage_start:
+
+#include "marshal_low.S"
+
+	.globl	platform_init
+platform_init:
+	b	kexec_platform_init
+
+
+	.globl	_zimage_start_plat
+_zimage_start_plat:
+	b	_zimage_start_32_64
Index: kernel/arch/powerpc/boot/wrapper
===================================================================
--- kernel.orig/arch/powerpc/boot/wrapper	2007-04-10 21:28:30.000000000 -0500
+++ kernel/arch/powerpc/boot/wrapper	2007-04-10 21:35:03.000000000 -0500
@@ -133,6 +133,12 @@ pmaccoff)
     platformo=$object/of.o
     lds=$object/zImage.coff.lds
     ;;
+kexec)
+    platformo=$object/crt0_kexec.o
+    ;;
+kexec64)
+    platformo="-e _zimage_start64 $object/crt0_kexec.o"
+    ;;
 miboot|uboot)
     # miboot and U-boot want just the bare bits, not an ELF binary
     ext=bin
@@ -216,4 +222,7 @@ pmaccoff)
     ${CROSS}objcopy -O aixcoff-rs6000 --set-start "$entry" "$ofile"
     $object/hack-coff "$ofile"
     ;;
+kexec64)
+    ${CROSS}objcopy -O elf64-powerpc $ofile
+    ;;
 esac
Index: kernel/arch/powerpc/boot/Makefile
===================================================================
--- kernel.orig/arch/powerpc/boot/Makefile	2007-04-10 21:34:43.000000000 -0500
+++ kernel/arch/powerpc/boot/Makefile	2007-04-10 21:35:03.000000000 -0500
@@ -43,10 +43,11 @@ $(addprefix $(obj)/,$(zlib) main.o): $(a
 src-wlib := string.S crt0.S stdio.c main.c flatdevtree.c flatdevtree_misc.c \
 		marshal.c memranges.c misc64.S \
 		ns16550.c serial.c simple_alloc.c div64.S util.S \
-		gunzip_util.c $(zlib)
+		gunzip_util.c $(zlib) kexec.c
 src-plat := of.c
-src-boot := $(src-wlib) $(src-plat) empty.c
+src-plat += crt0_kexec.S
 
+src-boot := $(src-wlib) $(src-plat) empty.c
 src-boot := $(addprefix $(obj)/, $(src-boot))
 obj-boot := $(addsuffix .o, $(basename $(src-boot)))
 obj-wlib := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-wlib))))
@@ -122,6 +123,9 @@ quiet_cmd_wrap	= WRAP    $@
       cmd_wrap	=$(CONFIG_SHELL) $(wrapper) -c -o $@ -p $2 $(CROSSWRAP) \
 		$(if $3, -s $3)$(if $4, -d $4)$(if $5, -i $5) vmlinux
 
+kexec-$(CONFIG_PPC32)			+= zImage.kexec
+kexec-$(CONFIG_PPC64)			+= zImage.kexec64
+
 image-$(CONFIG_PPC_PSERIES)		+= zImage.pseries
 image-$(CONFIG_PPC_MAPLE)		+= zImage.pseries
 image-$(CONFIG_PPC_IBM_CELL_BLADE)	+= zImage.pseries
@@ -131,6 +135,7 @@ image-$(CONFIG_PPC_CHRP)		+= zImage.chrp
 image-$(CONFIG_PPC_EFIKA)		+= zImage.chrp
 image-$(CONFIG_PPC_PMAC)		+= zImage.pmac
 image-$(CONFIG_DEFAULT_UIMAGE)		+= uImage
+image-$(CONFIG_KEXEC)			+= $(kexec-y)
 
 # For 32-bit powermacs, build the COFF and miboot images
 # as well as the ELF images.
@@ -138,7 +143,7 @@ ifeq ($(CONFIG_PPC32),y)
 image-$(CONFIG_PPC_PMAC)	+= zImage.coff zImage.miboot
 endif
 
-initrd-  := $(patsubst zImage%, zImage.initrd%, $(image-n) $(image-))
+initrd-  := $(patsubst zImage%, zImage.initrd%, $(image-n) $(image-) $(kexec-))
 initrd-y := $(patsubst zImage%, zImage.initrd%, $(image-y))
 initrd-y := $(filter-out $(image-y), $(initrd-y))
 targets	+= $(image-y) $(initrd-y)
@@ -172,7 +177,7 @@ install: $(CONFIGURE) $(image-y)
 	sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $<
 
 # anything not in $(targets)
-clean-files += $(image-) $(initrd-) zImage zImage.initrd
+clean-files += $(image-) $(initrd-) $(kexec-) zImage zImage.initrd
 
 # clean up files cached by wrapper
 clean-kernel := vmlinux.strip vmlinux.bin
Index: kernel/arch/powerpc/boot/ops.h
===================================================================
--- kernel.orig/arch/powerpc/boot/ops.h	2007-04-10 21:34:18.000000000 -0500
+++ kernel/arch/powerpc/boot/ops.h	2007-04-10 21:35:03.000000000 -0500
@@ -83,6 +83,7 @@ int ns16550_console_init(void *devp, str
 void *simple_alloc_init(char *base, u32 heap_size, u32 granularity,
 		u32 max_allocs);
 void flush_cache(void *, unsigned long);
+void kexec_platform_init(struct boot_param_header *dt_blob);
 
 /* marshal slave cpus around to kernel */
 void move_slaves_up(void);



More information about the Linuxppc-dev mailing list