[PATCH] fix percpu heartbeats
Troy Benjegerdes
hozer at drgw.net
Mon Aug 19 14:49:00 EST 2002
Okay, good.. does anyone else have a comment on this? I had the impression
I was the only one that cared.
My take is this approach is much better than what we had. If no one else
comments or checks it in by the time I actually get the board I'm testing
to work again and need heartbeat to work, I'll go ahead and check it in
(after I've tested it this time) :P
On Sun, Aug 18, 2002 at 11:19:01PM -0500, Milton Miller wrote:
> The changeset of Cort's patch that Troy checked in is broken in several ways:
>
> 1) The slave cpus no longer call heartbeats at all because there was a
> previous continue in the loop. (reordered and used remembered cpu number)
>
> 2) The rtas call rate is per-minute not per-second. (correct the rate)
>
> 3) All the rtas calls will occur at the same time, modulo the contended
> rtas lock. (wait to divide the rate until smp_kick_cpus).
>
> 4) It added NRCPUS*3 words to ppc_md. It also pointed out we were modifing
> this structre repeatedly at runtime. [moved the counts to the cache hot
> and dirty per-cpu irqstat and allow only one choice of function, just per
> cpu countdown and period. The count is always decremented, but only when
> it reaches zero do we decide to call the function by checking the reset
> value for non-zero. The period is reset after the call, so aperiodic
> users (like 4xx watchdog maybe?) could clear the count during their callback.]
>
> I know there are other alternatives, but the cost to non-users is minimal
> (an extra branch and compare every 2*32 jiffies), and the the periodic
> call usage is set it up and forget it.
>
>
> I'll read comments through the lists, so no need to cc me.
>
> milton
>
> ===== arch/ppc/kernel/ppc4xx_setup.c 1.46 vs 1.47 =====
> --- 1.46/arch/ppc/kernel/ppc4xx_setup.c Fri Aug 2 11:30:01 2002
> +++ 1.47/arch/ppc/kernel/ppc4xx_setup.c Sun Aug 18 21:03:03 2002
> @@ -383,6 +383,8 @@
> strcpy(cmd_line, (char *) (r6 + KERNELBASE));
> }
> #if defined(CONFIG_PPC405_WDT)
> + ppc_md.heartbeat = ppc4xx_wdt_heartbeat;
> +
> /* Look for wdt= option on command line */
> if (strstr(cmd_line, "wdt=")) {
> int valid_wdt = 0;
> @@ -396,6 +398,26 @@
> ++q;
> }
> wdt_enable = valid_wdt;
> +
> + /* XXX why is the wdt functoin modifing count and not
> + * using reset ?? If this code wants kernel heartbeat
> + * calls then:
> + *
> + * #include <asm/hwirq.h>
> + * heartbeat_reset(cpu) = period_in_jiffies;
> + * heartbeat_count(cpu) = period_in_jiffies;
> + *
> + * The function will be called from the timer interrupt
> + * when the counts reaches 0, afterwards the count will
> + * be set to the heartbeat_reset(cpu) value (which
> + * could be cleard during call for one-shot mode).
> + *
> + * If the code doesn't want the count manageed it
> + * should not use the kernel callbacks (or set reset
> + * to 1 and use its own counters).
> + */
> +#error sanitize wdt and kernel heartbeat interactions
> +
> }
> #endif
>
> @@ -411,11 +433,6 @@
> ppc_md.halt = ppc4xx_halt;
>
> ppc_md.calibrate_decr = ppc4xx_calibrate_decr;
> -
> -#ifdef CONFIG_PPC405_WDT
> - ppc_md.heartbeat = ppc4xx_wdt_heartbeat;
> -#endif
> - ppc_md.heartbeat_count = 0;
>
> ppc_md.find_end_of_memory = ppc4xx_find_end_of_memory;
> ppc_md.setup_io_mappings = m4xx_map_io;
> ===== arch/ppc/kernel/time.c 1.43 vs 1.44 =====
> --- 1.43/arch/ppc/kernel/time.c Sat Aug 17 21:55:14 2002
> +++ 1.44/arch/ppc/kernel/time.c Sun Aug 18 21:42:02 2002
> @@ -1,5 +1,5 @@
> /*
> - * BK Id: SCCS/s.time.c 1.43 08/18/02 12:55:14 paulus
> + * BK Id: SCCS/s.time.c 1.44 08/18/02 21:42:02 miltonm
> */
> /*
> * Common time routines among all ppc machines.
> @@ -164,7 +164,12 @@
> jiffy_stamp += tb_ticks_per_jiffy;
> if (!user_mode(regs))
> ppc_do_profile(instruction_pointer(regs));
> - if (smp_processor_id())
> + if ( unlikely(!heartbeat_count(cpu)--)
> + && heartbeat_reset(cpu) ) {
> + ppc_md.heartbeat();
> + heartbeat_count(cpu) = heartbeat_reset(cpu);
> + }
> + if (cpu)
> continue;
>
> /* We are in an interrupt, no need to save/restore flags */
> @@ -200,18 +205,9 @@
> }
> write_unlock(&xtime_lock);
>
> - /*
> - * We need to make sure we catch up with lost interrupts
> - * so we do it in this loop rather than after this
> - * loop like we used to do.
> - * -- Cort <cort at fsmlabs.com>
> - */
> - if ( ppc_md.heartbeat[smp_processor_id()] &&
> - !ppc_md.heartbeat_count[smp_processor_id()]-- )
> - ppc_md.heartbeat[smp_processor_id()]();
>
> }
> - if ( !disarm_decr[smp_processor_id()] )
> + if ( !disarm_decr[cpu] )
> set_dec(next_dec);
> last_jiffy_stamp(cpu) = jiffy_stamp;
>
> ===== arch/ppc/platforms/chrp_setup.c 1.65 vs 1.66 =====
> --- 1.65/arch/ppc/platforms/chrp_setup.c Sat Aug 17 21:55:14 2002
> +++ 1.66/arch/ppc/platforms/chrp_setup.c Sun Aug 18 22:51:24 2002
> @@ -1,5 +1,5 @@
> /*
> - * BK Id: SCCS/s.chrp_setup.c 1.65 08/18/02 12:55:14 paulus
> + * BK Id: SCCS/s.chrp_setup.c 1.66 08/18/02 22:51:24 miltonm
> */
> /*
> * arch/ppc/platforms/setup.c
> @@ -225,8 +225,6 @@
> chrp_setup_arch(void)
> {
> struct device_node *device;
> - int i;
> - extern unsigned long smp_chrp_cpu_nr;
>
> /* init to some ~sane value until calibrate_delay() runs */
> loops_per_jiffy = 50000000/HZ;
> @@ -268,54 +266,34 @@
> /* Get the event scan rate for the rtas so we know how
> * often it expects a heartbeat. -- Cort
> */
> - if ( rtas_data )
> - {
> + if ( rtas_data ) {
> struct property *p;
> + unsigned long rate, count;
> device = find_devices("rtas");
> for ( p = device->properties;
> p && strncmp(p->name, "rtas-event-scan-rate", 20);
> p = p->next )
> /* nothing */ ;
> - if ( p && *(unsigned long *)p->value )
> - {
> -
> + if ( p && ( rate = *(unsigned long *)p->value ) > 0) {
> /*
> - * The CHRP RTAS note on multiprocessor systems:
> - * "In a multiprocessor system, each processor should
> - * call event-scan periodically, not always the same
> - * one. The event-scan function needs to be called a
> - * total of rtas-event-scan-rate times a minute"
> - *
> - * My interpretation of the Chrp docs on the
> - * scan rate is that not every CPU has to do
> - * the event scan. It seems to suggest that
> - * occasionally a different CPU should scan
> - * RTAS but this is so vague I think the authors
> - * considered it unimportant.
> - *
> - * Paul disagrees, so I run the event scan on each
> - * CPU. This is probably safe since it won't hurt
> - * and will only slow things down a bit.
> + * The value is the number of times per minute.
> + * For now assign the full workload here to cpu 0.
> *
> - * -- Cort <cort at fsmlabs.com>
> + * We now split the rate and spread the heartbeats
> + * when we kick secondary cpus so we can spread
> + * the calls evenly.
> */
> - int rate;
> + ppc_md.heartbeat = chrp_event_scan;
> +
> + count = (60*HZ) / rate;
> + if (!count) /* XXX insane */
> + count = 1;
> +
> + heartbeat_reset(0) = count;
> + heartbeat_count(0) = 1;
>
> - /* avoid div 0 problems */
> - if ( smp_chrp_cpu_nr )
> - rate = *(ulong *)p->value / smp_chrp_cpu_nr;
> - else
> - rate = *(ulong *)p->value;
> -
> - for ( i = 0; i < smp_chrp_cpu_nr ; i++ )
> - {
> - ppc_md.heartbeat[i] = chrp_event_scan;
> - ppc_md.heartbeat_reset[i] = (HZ/rate)-1;
> - ppc_md.heartbeat_count[i] = 1;
> - }
> printk("RTAS Event Scan Rate: %lu calls/minute "
> - "(%lu jiffies/cpu)\n",
> - *(unsigned long *)p->value, rate );
> + "(every %lu jiffies)\n", rate, count );
> }
> }
>
> @@ -330,8 +308,6 @@
> /* XXX: we should loop until the hardware says no more error logs -- Cort */
> call_rtas( "event-scan", 4, 1, &ret, 0xffffffff, 0,
> __pa(log), 1024 );
> - ppc_md.heartbeat_count[smp_processor_id()] =
> - ppc_md.heartbeat_reset[smp_processor_id()];
> }
>
> void __chrp
> ===== arch/ppc/platforms/chrp_smp.c 1.8 vs 1.9 =====
> --- 1.8/arch/ppc/platforms/chrp_smp.c Tue Aug 13 06:52:54 2002
> +++ 1.9/arch/ppc/platforms/chrp_smp.c Sun Aug 18 22:32:11 2002
> @@ -41,6 +41,35 @@
>
> extern unsigned long smp_chrp_cpu_nr;
>
> +/*
> + * The CHRP RTAS note on multiprocessor systems:
> + * "In a multiprocessor system, each processor should
> + * call event-scan periodically, not always the same
> + * one. The event-scan function needs to be called a
> + * total of rtas-event-scan-rate times a minute"
> + *
> + * We must call on each cpu in on a regular basis
> + * so that firmware can watch for cpu unique errors.
> + */
> +static void spread_heartbeat(void)
> +{
> + unsigned count = heartbeat_count(0);
> + unsigned offset = count;
> + int i;
> +
> + if ( !count || smp_chrp_cpu_nr < 2)
> + return;
> +
> + count *= smp_chrp_cpu_nr;
> +
> + for ( i = 0; i < smp_chrp_cpu_nr ; i++ )
> + {
> + heartbeat_reset(i) = count;
> + heartbeat_count(i) = i * offset;
> + }
> + printk("RTAS Event Scan now every %u jiffes on each cpu\n", count);
> +}
> +
> static int __init
> smp_chrp_probe(void)
> {
> @@ -96,6 +125,8 @@
>
> if (OpenPIC_Addr)
> do_openpic_setup_cpu();
> +
> + spread_heartbeat();
> }
>
> /* CHRP with openpic */
> ===== arch/ppc/platforms/ev64260_setup.c 1.15 vs 1.16 =====
> --- 1.15/arch/ppc/platforms/ev64260_setup.c Wed Aug 7 14:18:05 2002
> +++ 1.16/arch/ppc/platforms/ev64260_setup.c Sun Aug 18 21:03:03 2002
> @@ -523,10 +523,6 @@
> ppc_md.nvram_read_val = todc_direct_read_val;
> ppc_md.nvram_write_val = todc_direct_write_val;
>
> - ppc_md.heartbeat = NULL;
> - ppc_md.heartbeat_reset = 0;
> - ppc_md.heartbeat_count = 0;
> -
> #ifdef CONFIG_SERIAL_TEXT_DEBUG
> ev64260_set_bat();
> #ifdef CONFIG_GT64260_CONSOLE
> ===== arch/ppc/platforms/gemini_setup.c 1.33 vs 1.34 =====
> --- 1.33/arch/ppc/platforms/gemini_setup.c Sat Aug 17 21:55:15 2002
> +++ 1.34/arch/ppc/platforms/gemini_setup.c Sun Aug 18 21:03:03 2002
> @@ -1,5 +1,5 @@
> /*
> - * BK Id: SCCS/s.gemini_setup.c 1.33 08/18/02 12:55:15 paulus
> + * BK Id: SCCS/s.gemini_setup.c 1.34 08/18/02 21:03:03 miltonm
> */
> /*
> * arch/ppc/platforms/setup.c
> @@ -37,6 +37,7 @@
> #include <asm/time.h>
> #include <asm/open_pic.h>
> #include <asm/bootinfo.h>
> +#include <asm/hardirq.h> /* for heartbeat */
>
> void gemini_find_bridges(void);
> static int gemini_get_clock_speed(void);
> @@ -151,17 +152,22 @@
> static unsigned long led = GEMINI_LEDBASE+(4*8);
> static char direction = 8;
>
> -
> +#if 1
> /* We only want to do this on 1 CPU */
> - if (smp_processor_id())
> + if (smp_processor_id()) {
> + static short ratelimit;
> + if (!ratelimit++)
> + printk(KERN_ERR "%s: unexpected heartbeat on cpu %d\n",
> + __FUNCTION__, smp_processor_id());
> return;
> + }
> +#endif
> *(char *)led = 0;
> if ( (led + direction) > (GEMINI_LEDBASE+(7*8)) ||
> (led + direction) < (GEMINI_LEDBASE+(4*8)) )
> direction *= -1;
> led += direction;
> *(char *)led = 0xff;
> - ppc_md.heartbeat_count[0] = ppc_md.heartbeat_reset[0];
> }
>
> void __init gemini_setup_arch(void)
> @@ -184,9 +190,10 @@
>
> printk("Boot arguments: %s\n", cmd_line);
>
> - ppc_md.heartbeat[0] = gemini_heartbeat;
> - ppc_md.heartbeat_reset[0] = HZ/8;
> - ppc_md.heartbeat_count[0] = 1;
> + ppc_md.heartbeat = gemini_heartbeat;
> + /* only run on cpu 0 */
> + heartbeat_reset(0) = HZ/8;
> + heartbeat_count(0) = 1;
>
> /* Lookup PCI hosts */
> gemini_find_bridges();
> ===== arch/ppc/platforms/mcpn765_setup.c 1.26 vs 1.27 =====
> --- 1.26/arch/ppc/platforms/mcpn765_setup.c Wed Aug 7 14:18:06 2002
> +++ 1.27/arch/ppc/platforms/mcpn765_setup.c Sun Aug 18 21:03:03 2002
> @@ -389,10 +389,6 @@
> ppc_md.nvram_read_val = todc_m48txx_read_val;
> ppc_md.nvram_write_val = todc_m48txx_write_val;
>
> - ppc_md.heartbeat = NULL;
> - ppc_md.heartbeat_reset = 0;
> - ppc_md.heartbeat_count = 0;
> -
> #ifdef CONFIG_SERIAL_TEXT_DEBUG
> ppc_md.progress = mcpn765_progress;
> #endif
> ===== arch/ppc/platforms/sandpoint_setup.c 1.27 vs 1.28 =====
> --- 1.27/arch/ppc/platforms/sandpoint_setup.c Wed Aug 7 14:18:07 2002
> +++ 1.28/arch/ppc/platforms/sandpoint_setup.c Sun Aug 18 21:03:03 2002
> @@ -660,10 +660,6 @@
> ppc_md.nvram_read_val = todc_mc146818_read_val;
> ppc_md.nvram_write_val = todc_mc146818_write_val;
>
> - ppc_md.heartbeat = NULL;
> - ppc_md.heartbeat_reset = 0;
> - ppc_md.heartbeat_count = 0;
> -
> #ifdef CONFIG_SERIAL_TEXT_DEBUG
> ppc_md.progress = sandpoint_progress;
> #endif
> ===== arch/ppc/platforms/zx4500_setup.c 1.18 vs 1.19 =====
> --- 1.18/arch/ppc/platforms/zx4500_setup.c Wed Aug 7 14:18:07 2002
> +++ 1.19/arch/ppc/platforms/zx4500_setup.c Sun Aug 18 21:03:03 2002
> @@ -346,10 +346,6 @@
>
> ppc_md.calibrate_decr = zx4500_calibrate_decr;
>
> - ppc_md.heartbeat = NULL;
> - ppc_md.heartbeat_reset = 0;
> - ppc_md.heartbeat_count = 0;
> -
> #ifdef CONFIG_SERIAL_TEXT_DEBUG
> ppc_md.progress = zx4500_progress;
> #else /* !CONFIG_SERIAL_TEXT_DEBUG */
> ===== include/asm-ppc/hardirq.h 1.16 vs 1.17 =====
> --- 1.16/include/asm-ppc/hardirq.h Tue Nov 27 20:29:57 2001
> +++ 1.17/include/asm-ppc/hardirq.h Sun Aug 18 21:03:03 2002
> @@ -1,5 +1,5 @@
> /*
> - * BK Id: SCCS/s.hardirq.h 1.16 11/28/01 13:29:57 paulus
> + * BK Id: SCCS/s.hardirq.h 1.17 08/18/02 21:03:03 miltonm
> */
> #ifdef __KERNEL__
> #ifndef __ASM_HARDIRQ_H
> @@ -21,11 +21,15 @@
> unsigned int __syscall_count;
> struct task_struct * __ksoftirqd_task;
> unsigned int __last_jiffy_stamp;
> + unsigned int __heartbeat_count;
> + unsigned int __heartbeat_reset;
> } ____cacheline_aligned irq_cpustat_t;
>
> #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
>
> #define last_jiffy_stamp(cpu) __IRQ_STAT((cpu), __last_jiffy_stamp)
> +#define heartbeat_count(cpu) __IRQ_STAT((cpu), __heartbeat_count)
> +#define heartbeat_reset(cpu) __IRQ_STAT((cpu), __heartbeat_reset)
> /*
> * Are we in an interrupt context? Either doing bottom half
> * or hardware interrupt processing?
> ===== include/asm-ppc/machdep.h 1.43 vs 1.44 =====
> --- 1.43/include/asm-ppc/machdep.h Sat Aug 17 21:55:15 2002
> +++ 1.44/include/asm-ppc/machdep.h Sun Aug 18 21:03:03 2002
> @@ -1,5 +1,5 @@
> /*
> - * BK Id: SCCS/s.machdep.h 1.43 08/18/02 12:55:15 paulus
> + * BK Id: SCCS/s.machdep.h 1.44 08/18/02 21:03:03 miltonm
> */
> #ifdef __KERNEL__
> #ifndef _PPC_MACHDEP_H
> @@ -44,10 +44,7 @@
> int (*set_rtc_time)(unsigned long nowtime);
> unsigned long (*get_rtc_time)(void);
> void (*calibrate_decr)(void);
> -
> - void (*heartbeat[NR_CPUS])(void);
> - unsigned long heartbeat_reset[NR_CPUS];
> - unsigned long heartbeat_count[NR_CPUS];
> + void (*heartbeat)(void);
>
> unsigned long (*find_end_of_memory)(void);
> void (*setup_io_mappings)(void);
>
--
Troy Benjegerdes | master of mispeeling | 'da hozer' | hozer at drgw.net
-----"If this message isn't misspelled, I didn't write it" -- Me -----
"Why do musicians compose symphonies and poets write poems? They do it
because life wouldn't have any meaning for them if they didn't. That's
why I draw cartoons. It's my life." -- Charles Schulz
** Sent via the linuxppc-dev mail list. See http://lists.linuxppc.org/
More information about the Linuxppc-dev
mailing list