[PATCH 2/2] kdump shutdown hook support
Michael Neuling
mikey at neuling.org
Thu Dec 13 11:04:02 EST 2007
In message <1197500839.7695.19.camel at concordia> you wrote:
>
> --=-Kza0KCx0MG8nsjfq7kOz
> Content-Type: text/plain
> Content-Transfer-Encoding: quoted-printable
>
> On Wed, 2007-12-12 at 16:45 +1100, Michael Neuling wrote:
> > This adds hooks into the default_machine_crash_shutdown so drivers can
> > register a function to be run in the first kernel before we hand off
> > to the second kernel. This should only be used in exceptional
> > circumstances, like where the device can't be reset in the second
> > kernel alone (as is the case with eHEA). To emphasize this, the
> > number of handles allowed to be registered is currently #def to 1.
> >=20
> > This uses the setjmp/longjmp code to call out to the registered hooks,
> > so any bogus exceptions we encounter will hopefully be recoverable. =20
> >=20
> > I've tested with bogus data and instruction exceptions.
> >=20
> > Signed-off-by: Michael Neuling <mikey at neuling.org>
> > ---
> >=20
> > arch/powerpc/kernel/crash.c | 90 +++++++++++++++++++++++++++++++++++++=
> ++++---
> > include/asm-powerpc/kexec.h | 3 +
> > 2 files changed, 88 insertions(+), 5 deletions(-)
> >=20
> > Index: linux-2.6-ozlabs/arch/powerpc/kernel/crash.c
> > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> > --- linux-2.6-ozlabs.orig/arch/powerpc/kernel/crash.c
> > +++ linux-2.6-ozlabs/arch/powerpc/kernel/crash.c
> > @@ -32,6 +32,8 @@
> > #include <asm/lmb.h>
> > #include <asm/firmware.h>
> > #include <asm/smp.h>
> > +#include <asm/system.h>
> > +#include <asm/setjmp.h>
> > =20
> > #ifdef DEBUG
> > #include <asm/udbg.h>
> > @@ -45,6 +47,11 @@ int crashing_cpu =3D -1;
> > static cpumask_t cpus_in_crash =3D CPU_MASK_NONE;
> > cpumask_t cpus_in_sr =3D CPU_MASK_NONE;
> > =20
> > +#define CRASH_SHUTDOWN_HANDLES_NUM 1
>
> CRASH_HANDLER_MAX ?
yep, MAX is probably a more representative name
>
> > +/* NULL terminated list of shutdown handles */
> > +static crash_shutdown_t crash_shutdown_handles[CRASH_SHUTDOWN_HANDLES_NU=
> M+1];
> > +static DEFINE_SPINLOCK(crash_handles_lock);
> > +
> > #ifdef CONFIG_SMP
> > static atomic_t enter_on_soft_reset =3D ATOMIC_INIT(0);
> > =20
> > @@ -285,9 +292,69 @@ static inline void crash_kexec_stop_spus
> > }
> > #endif /* CONFIG_SPU_BASE */
> > =20
> > +/*=20
> > + * Register a function to be called on shutdown. Only use this if you
> > + * can't reset your device in the second kernel.
> > + */
> > +int crash_shutdown_register(crash_shutdown_t handler)
> > +{
> > + unsigned int i, rc;
> > +
> > + spin_lock(&crash_handles_lock);
>
> > + for(i =3D 0 ; i <=3D CRASH_SHUTDOWN_HANDLES_NUM; i++) {
> > + if (!crash_shutdown_handles[i]) {
> > + /* Insert handle at end */
> > + crash_shutdown_handles[i] =3D handler;
> > + rc =3D 0;
> > + break;
> > + }
> > + }
> >=20
> > + if (i =3D=3D CRASH_SHUTDOWN_HANDLES_NUM){
> > + printk(KERN_ERR "Crash shutdown handles full, "
> > + "not registered.\n");
> > + rc =3D 1;
> > + }
> > +
> > + spin_unlock(&crash_handles_lock);
>
> Perhaps?
Yep, looks nicer. I'll fix the one below too.
>
> > + return rc;
> > +}
> > +EXPORT_SYMBOL(crash_shutdown_register);
> > +
> > +int crash_shutdown_unregister(crash_shutdown_t handler)
> > +{
> > + unsigned int i;
> > +
> > + spin_lock(&crash_handles_lock);
> > + for(i =3D 0 ; i <=3D CRASH_SHUTDOWN_HANDLES_NUM; i++)
> > + if (crash_shutdown_handles[i] =3D=3D handler)
> > + break;
> > +
> > + if (i =3D=3D CRASH_SHUTDOWN_HANDLES_NUM){
> > + printk(KERN_ERR "Crash shutdown handle not found\n");
> > + spin_unlock(&crash_handles_lock);
> > + return 1;
> > + }
> > +
> > + /* Shift handles down */
> > + while(crash_shutdown_handles[i]) {
> > + crash_shutdown_handles[i] =3D crash_shutdown_handles[i+1];
> > + i++;
> > + }
> > + spin_unlock(&crash_handles_lock);
> > + return 0;
> > +}
> > +EXPORT_SYMBOL(crash_shutdown_unregister);
> > +
> > +static long crash_shutdown_buf[SETJMP_BUF_LEN];
>
> unsigned long?
yep
>
> > +
> > +static int handle_fault(struct pt_regs *regs)
> > +{
> > + longjmp(crash_shutdown_buf, 1);
> > + return 0;
> > +}
> > +
> > void default_machine_crash_shutdown(struct pt_regs *regs)
> > {
> > - unsigned int irq;
> > + unsigned int i;
> > =20
> > /*
> > * This function is only called after the system
> > @@ -301,14 +368,27 @@ void default_machine_crash_shutdown(stru
> > */
> > hard_irq_disable();
> > =20
> > - for_each_irq(irq) {
> > - struct irq_desc *desc =3D irq_desc + irq;
> > + for_each_irq(i) {
> > + struct irq_desc *desc =3D irq_desc + i;
> > =20
> > if (desc->status & IRQ_INPROGRESS)
> > - desc->chip->eoi(irq);
> > + desc->chip->eoi(i);
> > =20
> > if (!(desc->status & IRQ_DISABLED))
> > - desc->chip->disable(irq);
> > + desc->chip->disable(i);
> > + }
> > +
> > + /* Call registered shutdown routines */
> > + __debugger_fault_handler =3D handle_fault;
> > + i =3D 0;
> > + while(crash_shutdown_handles[i]){
> > + if (setjmp(crash_shutdown_buf) =3D=3D 0) {
> > + asm volatile("sync; isync");
> > + crash_shutdown_handles[i]();
> > + asm volatile("sync; isync");
> > + __delay(200);
> > + }
> > + i++;
> > }
>
> You should probably reset __debugger_fault_handler, just to be safe.
Agreed.
Mikey
More information about the Linuxppc-dev
mailing list