[2/5][POWERPC] boot: Add kexec callable zImage wrapper

Milton Miller miltonm at bga.com
Thu Jun 29 23:25:58 EST 2006


This code creates a 32 bit zImage wrapper for a 32 or 64 bit PowerPC
Linux kernel.   It does not support initrd at present.  What it does
give you is a compressed kernel image that is expanded when used.

Limitations:

The memory node off the root with a name starting with "memory" must
contain enough free memory (not in the reserved ranges) in the first
reg range to uncompress the the kenrel with padding.

The claim allocator does not record its handouts.

Signed-off-by: Milton Miller <miltonm at bga.com>

Index: kernel/arch/powerpc/boot/kexec.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ kernel/arch/powerpc/boot/kexec.c	2006-06-29 03:47:06.865856735 -0500
@@ -0,0 +1,476 @@
+/*
+ * Copyright (C) 1997 Paul Mackerras 1997.
+ * Copyright (C) 2006 Milton Miller, IBM Corportation.
+ * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corportation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "string.h"
+#include "stdio.h"
+#include "prom.h"
+#include "page.h"
+#include "dt.h"
+#include "stringify.h"
+
+#define BUG_ON(x) if (x) { printf("zImage BUG %s\n\r", __stringify(x)); exit();}
+
+extern unsigned int gohere;	/* slaves poll and branch when told */
+
+int (*prom)(void *);
+phandle chosen_handle;
+ihandle stdout;
+
+#define MAX_MEM 0x80000000
+static unsigned int memsize;
+static unsigned int na=2, ns=1;
+
+
+static struct boot_param_header *initial_boot_params;
+
+static int (*rtas)(void *, void *);
+static void *rtas_data;
+static int rtas_put_term_char;
+static int rtas_display_char;
+
+int call_rtas(int token, int nargs, int nret, ...)
+{
+	int i;
+	struct rtas_args {
+		int token;
+		int nargs;
+		int nret;
+		unsigned int args[12];
+	} args;
+	va_list list;
+
+	if (!rtas || !token)
+		return -1;	/* Hardware error */
+	if (nargs + nret > 12)
+		return -1;	/* Hardware error */
+
+	args.token = token;
+	args.nargs = nargs;
+	args.nret = nret;
+
+	va_start(list, nret);
+	for (i = 0; i < nargs; i++)
+		args.args[i] = va_arg(list, unsigned int);
+	va_end(list);
+
+	for (i = 0; i < nret; i++)
+		args.args[nargs+i] = 0;
+
+	rtas(&args, rtas_data);
+
+	return args.args[nargs];
+}
+
+
+int write(void *handle, void *ptr, int nb)
+{
+	int i=0;
+	int err;
+	char *p;
+
+	p = ptr;
+	err = 0;
+
+	if (rtas_put_term_char)
+		for (; i < nb && !err; i++)
+			err = call_rtas(rtas_put_term_char, 1, 1, p[i]);
+#if 0
+	/* not tested, doesn't check line limits, do we care? */
+	else if (rtas_display_char)
+		for (; i < nb && !err; i++)
+			err = call_rtas(rtas_put_term_char, 1, 1, p[i]);
+#endif
+
+	return i-1;
+}
+
+
+/* returns true if s2 is a prefix of s1 */
+static int string_match(const char *s1, const char *s2)
+{
+	for (; *s2; ++s2)
+		if (*s1++ != *s2)
+			return 0;
+	return 1;
+}
+
+
+/* return -1 on fail, address on success */
+void *claim(unsigned long virt, unsigned long size, unsigned long align)
+{
+
+	unsigned long al = align ? align : 1;
+	unsigned long try = _ALIGN_UP(virt, al);
+	unsigned int *r;
+
+#if 0 /* Debug */
+
+	printf("claim: request %lx size %lx align %lx, aligned %p ",
+			virt, size, align,  (void *)try);
+
+
+#define BUSY(s) printf("%s\n\r", s), ((void *)(-1))
+#define RANGE_PRINT printf("range %08x %08x ",r[1], r[3]);
+#define RANGE_CLEAR printf("is clear\n\r");
+#else
+#define BUSY(s) ((void *)(-1))
+#define RANGE_PRINT do {} while(0)
+#define RANGE_CLEAR do {} while(0)
+#endif
+
+
+	if (try + size < try)
+		return BUSY("wraps");
+
+	if (memsize && try + size > memsize)
+		return BUSY("overflows");
+
+	for (r = (unsigned int *)(((unsigned long)initial_boot_params)
+				+ initial_boot_params->off_mem_rsvmap);
+				r[2] || r[3]; r += 4) {
+
+		if (r[0] || r[1] > try + size)
+			continue;		/* starts after */
+		if (r[2] || r[1]+r[3] < r[1])
+			return BUSY("BEYOND");	/* extends beyond */
+		if (r[1] + r[3] < try)
+			continue;		/* contained before */
+
+		RANGE_PRINT;
+		return BUSY("overlaps");		/* overlaps */
+	}
+
+	/*
+	 * XXX: Fixme: we should keep track here and not hand out the
+	 * same address again.
+	 */
+
+	RANGE_CLEAR;
+	return (void *)(try);
+}
+
+
+
+/* code lifted from arch/powerpc/kernel/prom.c */
+
+static inline char *find_flat_dt_string(u32 offset)
+{
+	return ((char *)initial_boot_params) +
+		initial_boot_params->off_dt_strings + offset;
+}
+
+/**
+ * This function is used to scan the flattened device-tree, it is
+ * used to extract the memory informations at boot before we can
+ * unflatten the tree
+ */
+int of_scan_flat_dt(int (*it)(unsigned long node,
+				     const char *uname, int depth,
+				     void *data),
+			   void *data)
+{
+	unsigned long p = ((unsigned long)initial_boot_params) +
+		initial_boot_params->off_dt_struct;
+	int rc = 0;
+	int depth = -1;
+
+	do {
+		u32 tag = *((u32 *)p);
+		char *pathp;
+
+		p += 4;
+		if (tag == OF_DT_END_NODE) {
+			depth --;
+			continue;
+		}
+		if (tag == OF_DT_NOP)
+			continue;
+		if (tag == OF_DT_END)
+			break;
+		if (tag == OF_DT_PROP) {
+			u32 sz = *((u32 *)p);
+			p += 8;
+			if (initial_boot_params->version < 0x10)
+				p = _ALIGN(p, sz >= 8 ? 8 : 4);
+			p += sz;
+			p = _ALIGN(p, 4);
+			continue;
+		}
+		if (tag != OF_DT_BEGIN_NODE) {
+			printf("Invalid tag %x scanning flattened"
+			       " device tree !\n\r", tag);
+			return -1;
+		}
+		depth++;
+		pathp = (char *)p;
+		p = _ALIGN(p + strlen(pathp) + 1, 4);
+		if ((*pathp) == '/') {
+			char *lp, *np;
+			for (lp = NULL, np = pathp; *np; np++)
+				if ((*np) == '/')
+					lp = np+1;
+			if (lp != NULL)
+				pathp = lp;
+		}
+		rc = it(p, pathp, depth, data);
+		if (rc != 0)
+			break;
+	} while(1);
+
+	return rc;
+}
+
+unsigned long of_get_flat_dt_root(void)
+{
+	unsigned long p = ((unsigned long)initial_boot_params) +
+		initial_boot_params->off_dt_struct;
+
+	while(*((u32 *)p) == OF_DT_NOP)
+		p += 4;
+	BUG_ON (*((u32 *)p) != OF_DT_BEGIN_NODE);
+	p += 4;
+	return _ALIGN(p + strlen((char *)p) + 1, 4);
+}
+
+/**
+ * This  function can be used within scan_flattened_dt callback to get
+ * access to properties
+ */
+void* of_get_flat_dt_prop(unsigned long node, const char *name,
+				 unsigned long *size)
+{
+	unsigned long p = node;
+
+	do {
+		u32 tag = *((u32 *)p);
+		u32 sz, noff;
+		const char *nstr;
+
+		p += 4;
+		if (tag == OF_DT_NOP)
+			continue;
+		if (tag != OF_DT_PROP)
+			return NULL;
+
+		sz = *((u32 *)p);
+		noff = *((u32 *)(p + 4));
+		p += 8;
+		if (initial_boot_params->version < 0x10)
+			p = _ALIGN(p, sz >= 8 ? 8 : 4);
+
+		nstr = find_flat_dt_string(noff);
+		if (nstr == NULL) {
+			printf("Can't find property index"
+			       " name !\n\r");
+			return NULL;
+		}
+		if (strcmp(name, nstr) == 0) {
+			if (size)
+				*size = sz;
+			return (void *)p;
+		}
+		p += sz;
+		p = _ALIGN(p, 4);
+	} while(1);
+}
+
+/* end code lifted from arch/powerpc/kernel/prom.c */
+
+
+/* This only handles device nodes directly off of the root node
+ * and will return on any partial name match
+ */
+static int match_device_node(unsigned long node, const char *uname,
+				int depth, void *data)
+{
+	if (depth == 1 && string_match(uname, data))
+		return node;
+	return 0;
+}
+
+/* Find Real Memory (offset) region.   The first reg, size pair of the
+ * memory node defines the memory that is accessable in real mode on
+ * PPC64 LPAR systems.  Assume that will be enough everywhere.
+ */
+static int find_rmo(unsigned long node, const char *uname, int depth,
+		     void *data)
+{
+	if (match_device_node(node, uname, depth, data)) {
+		unsigned long size;
+		unsigned int *reg;
+		int i;
+
+		reg = of_get_flat_dt_prop(node, "reg", &size);
+		if (!reg || (size < (na + ns)*4)) {
+			printf("Warning: Can't parse reg property on %s\n\r",
+					uname);
+			return 0;
+		}
+
+		/* ignore if not address 0 */
+		for (i=0; i < na; i++)
+			if (reg[i])
+				return 0;
+
+		/* if more than 1 cell then MAX_MEM */
+		for (i=0; i < ns-1; i++)
+			if (reg[na+i])
+				return MAX_MEM;
+
+		/* else return size of first reg */
+		return reg[na+i];
+	}
+
+	return 0;
+}
+
+/* dummy prom that recognises the services used by elsewhere */
+int call_prom(const char *service, int nargs, int nret, ...)
+{
+	va_list list;
+
+	if (string_match(service, "finddevice")) {
+		char *path;
+
+		if ((nargs != 1) || (nret != 1)) {
+			printf("finddevice: unexpected arg counts %d %d\n\r",
+					nargs, nret);
+			return -1;
+		}
+		va_start(list, nret);
+		path = (void *)va_arg(list, unsigned int);
+		va_end(list);
+
+		if (path[0] == '/')
+			path++;
+
+		return (int) of_scan_flat_dt(match_device_node, path);
+
+	} else if (string_match(service, "getprop")) {
+		unsigned long node;
+		char *prop;
+		char *buf;
+		unsigned int len;
+		va_list list;
+		char *ret;
+		unsigned long size;
+
+		if ((nargs != 4) || (nret != 1)) {
+			printf("getprop: unexpected arg counts %d %d\n\r",
+					nargs, nret);
+			return -1;
+		}
+
+		va_start(list, nret);
+		node = va_arg(list, unsigned int);
+		prop = (void *)va_arg(list, unsigned int);
+		buf = (void *)va_arg(list, unsigned int);
+		len = va_arg(list, unsigned int);
+		va_end(list);
+
+		ret = of_get_flat_dt_prop(node, prop, &size);
+		if  (ret) {
+			memcpy(buf, ret, len < size ? len : size);
+		}
+		return size;
+	} else if (!string_match(service, "exit")) {
+		printf("Unimplemented prom service %s(%d, %d) called\n\r",
+			service, nargs, nret);
+		/* fall through */
+	}
+	for(;;)
+		;	/* Forever */
+}
+
+void init_prom(unsigned long a1, unsigned long a2, void *p)
+{
+	unsigned long node;
+
+	if (p)		/* this version doesn't support a prom interface */
+		exit();
+
+	initial_boot_params = (void *) a1;
+
+	BUG_ON(initial_boot_params->magic != OF_DT_HEADER);
+	BUG_ON(initial_boot_params->last_comp_version > 0x10);
+
+	chosen_handle = finddevice("/chosen");
+	if (chosen_handle == (void *) -1)
+		exit();
+
+	node = of_scan_flat_dt(match_device_node, "rtas");
+
+	if (node) {
+		unsigned int *p;
+		p = of_get_flat_dt_prop(node,
+				"linux,rtas-base", NULL);
+		if (p)
+			rtas_data = (void *)*p;
+		p = of_get_flat_dt_prop(node,
+				"linux,rtas-entry", NULL);
+		if (p)
+			rtas = (void *)*p;
+		p = of_get_flat_dt_prop(node,
+				"put-term-char", NULL);
+		if (p)
+			rtas_put_term_char = *p;
+		p = of_get_flat_dt_prop(node,
+				"display-character", NULL);
+		if (p)
+			rtas_display_char = *p;
+	}
+
+	node = of_get_flat_dt_root();
+	if (node) {
+		unsigned long tmp;
+		void *p;
+
+		p = of_get_flat_dt_prop(node, "#addr-cells", &tmp);
+		if (tmp == 4)
+			na = *(int *)p;
+		else
+			printf("Warning, could't find or parse #addr-cells\n\r");
+		p = of_get_flat_dt_prop(node, "#size-cells", &tmp);
+		if (tmp == 4)
+			ns = *(int *)p;
+		else
+			printf("Warning, could't find or parse #size-cells\n\r");
+	}
+
+	memsize = of_scan_flat_dt(find_rmo, "memory");
+
+	if (!memsize)
+		BUG_ON("Couldn't find RMO memory size\n\r");
+
+	if (memsize > MAX_MEM)
+		memsize = MAX_MEM;
+
+	printf("Can use %x bytes of memory in RMO\n\r", memsize);
+
+	ram_end = memsize;
+}
+
+/*
+ * Kexec smp hook:
+ * copy 0x100 bytes from the kernel entry point down to address zero,
+ * flush, then tell any slaves to branch down to address 0x60.
+ */
+void prom_smp_hook(unsigned long kernel_start)
+{
+	char *kern = (char *)kernel_start;
+	char *zero = (char *)0;
+
+	memcpy(zero, kern, 0x100);
+	flush_cache(zero, 0x100);
+
+	gohere = 0x60;
+}
Index: kernel/arch/powerpc/boot/crt0_kexec.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ kernel/arch/powerpc/boot/crt0_kexec.S	2006-06-29 03:47:06.868856261 -0500
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 1997 Paul Mackerras.
+ * Copyright (C) 2006 Milton Miller, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * NOTE: This code runs in 32 bit mode and is linked as ELF32.
+ * It will switch from 64 to 32 bit mode with some assumptions.
+ */
+
+#include "ppc_asm.h"
+
+	.text
+	/* This is the actual "entry" point as mentioned in the headers.
+	 * It is this code that should be copied down to zero.
+	 * This code must be loaded above 0x100 or the slaves will
+	 * crash when we copy the kernels entry point down.
+	 */
+	.globl	master
+master:
+	/* Work out the offset between the address we were linked at
+	 * and the address where we're running.
+	 * Set 32 bit mode while we are at it.
+	 */
+	bl	set32
+1:	mflr	r0
+	lis	r9,1b at ha
+	addi	r9,r9,1b at l
+	subf.	r0,r9,r0
+	beq	3f		/* if running at same address as linked */
+
+	/* The .got2 section contains a list of addresses, so add
+	   the address offset onto each entry. */
+	lis	r9,__got2_start at ha
+	addi	r9,r9,__got2_start at l
+	lis	r8,__got2_end at ha
+	addi	r8,r8,__got2_end at l
+	subf.	r8,r9,r8
+	beq	3f
+	srwi.	r8,r8,2
+	mtctr	r8
+	add	r9,r0,r9
+2:	lwz	r8,0(r9)
+	add	r8,r8,r0
+	stw	r8,0(r9)
+	addi	r9,r9,4
+	bdnz	2b
+3:
+	b	3f
+
+
+
+
+	/* the slaves may be in 32 or 64 bit mode, we don't care */
+	.org	master+0x60
+	.globl	slave
+slave:
+	li	4,gohere-master			/* read 0-relative */
+waiting:
+99:	lwz	r6,0(r4)
+	cmpwi	0,r6,0
+	beq	99b
+	mtctr	r6
+	addi	r4,r6,gohere-waiting		/* read from gohere in image */
+	bctr
+
+	.global gohere
+gohere:	.long	0	# when set the slave moves
+
+
+
+flushit:
+	/* Do a cache flush for our text, in case OF didn't */
+3:	lis	r9,_start at ha
+	addi	r9,r9,_start at l
+	add	r9,r0,r9
+	lis	r8,_etext at ha
+	addi	r8,r8,_etext at l
+	add	r8,r0,r8
+4:	dcbf	r0,r9
+	icbi	r0,r9
+	addi	r9,r9,0x20
+	cmplw	cr0,r9,r8
+	blt	4b
+	sync
+	isync
+
+	/* fill out a stack for c code */
+	lis	r1,__stack_end at ha
+	addi	r1,r1,__stack_end at l
+	add	r1,r1,r0
+	stwu	r5,-16(r1)	/* r5 should be 0 */
+
+	/* tell slave to come to our copy */
+	lis	r8,waiting at ha
+	addi	r8,r8,waiting at l
+	add	r8,r8,r0
+	stw	r8,gohere-master(0)
+
+	mr	r6,r1
+	b	start
+
+
+	/*
+	 * Check if the processor is running in 32 bit mode, using
+	 * only 32 bit instructions which should be safe on 32 and
+	 * 64 bit processors.
+	 *
+	 * The caller is assuming that the lr is used to return.
+	 */
+set32:	mfmsr	r0		/* grab whole msr		*/
+	rlwinm	r6,r0,0,0,31	/* extract bottom word		*/
+	subf.	r6,r6,r0	/* subtract, same?		*/
+	beqlr			/* yes: we are 32 bit mode	*/
+
+	/* Since the compare found other bits, we must be in 64 bit mode
+	 * on a 64 bit processor.  Since MSR[SF] is in the bits we masked
+	 * off the compare will always fail in 64 bit mode, and will alway
+	 * be equal in 32 bit mode (the size of the implicit compare).
+	 *
+	 * This program must run in 32 bit mode, so switch now.
+	 * Assume we are actually running in low 32 bits of memory space,
+	 * so we can just turn off MSR[SF] which is bit 0.
+	 */
+	.machine push
+	.machine "ppc64"
+	rldicl	r0,r0,0,1
+	sync
+	mtmsrd	r0
+	isync
+	.machine pop
+	blr
+
+	.org	master+0x100		/* make sure we don't go backwards */
+
+
+	/* this code needs a stack allocated in the image */
+	.section .stack,"aw", at nobits
+	.space 	4096
+
+	.data
+
+	/* a procedure descriptor used when pretending to be elf64_powerpc */
+	.balign	8
+	.global	_master64
+_master64:
+	.long	0, master	/* big endian, supported reloc ppc32 */
+	.quad	0, 0, 0
+
+	/* a procedure descriptor used when booting this as a COFF file */
+	.global	_master_opd
+_master_opd:
+	.long	master, 0, 0, 0
Index: kernel/arch/powerpc/boot/dt.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ kernel/arch/powerpc/boot/dt.h	2006-06-29 03:47:06.870855944 -0500
@@ -0,0 +1,46 @@
+#ifndef _PPC_BOOT_DT_H_
+#define _PPC_BOOT_DT_H_
+
+typedef unsigned int u32;
+
+/* Definitions used by the flattened device tree */
+#define OF_DT_HEADER		0xd00dfeed	/* marker */
+#define OF_DT_BEGIN_NODE	0x1		/* Start of node, full name */
+#define OF_DT_END_NODE		0x2		/* End node */
+#define OF_DT_PROP		0x3		/* Property: name off, size,
+						 * content */
+#define OF_DT_NOP		0x4		/* nop */
+#define OF_DT_END		0x9
+
+#define OF_DT_VERSION		0x10
+
+/*
+ * This is what gets passed to the kernel by prom_init or kexec
+ *
+ * The dt struct contains the device tree structure, full pathes and
+ * property contents. The dt strings contain a separate block with just
+ * the strings for the property names, and is fully page aligned and
+ * self contained in a page, so that it can be kept around by the kernel,
+ * each property name appears only once in this page (cheap compression)
+ *
+ * the mem_rsvmap contains a map of reserved ranges of physical memory,
+ * passing it here instead of in the device-tree itself greatly simplifies
+ * the job of everybody. It's just a list of u64 pairs (base/size) that
+ * ends when size is 0
+ */
+struct boot_param_header
+{
+	u32	magic;			/* magic word OF_DT_HEADER */
+	u32	totalsize;		/* total size of DT block */
+	u32	off_dt_struct;		/* offset to structure */
+	u32	off_dt_strings;		/* offset to strings */
+	u32	off_mem_rsvmap;		/* offset to memory reserve map */
+	u32	version;		/* format version */
+	u32	last_comp_version;	/* last compatible version */
+	/* version 2 fields below */
+	u32	boot_cpuid_phys;	/* Physical CPU id we're booting on */
+	/* version 3 fields below */
+	u32	dt_strings_size;	/* size of the DT strings block */
+};
+
+#endif				/* _PPC_BOOT_DT_H_ */



More information about the Linuxppc-dev mailing list