From paulus at samba.org Sun Aug 1 05:27:21 2004 From: paulus at samba.org (Paul Mackerras) Date: Sat, 31 Jul 2004 14:27:21 -0500 Subject: [RFC][PATCH] ppc64: better handling of H_ENTER failures In-Reply-To: <1091164951.2077.34.camel@gaston> References: <1091164951.2077.34.camel@gaston> Message-ID: <16651.62105.156445.534596@cargo.ozlabs.ibm.com> Benjamin Herrenschmidt writes: > This patch changes the hash insertion routines to return an error > instead of calling panic() when HV refuses to insert a HPTE. Looks good, do you have a test program? It should be possible to provoke the error with xmon -m /dev/mem, if nothing else. :) Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From benh at kernel.crashing.org Sun Aug 1 11:52:04 2004 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Sun, 01 Aug 2004 11:52:04 +1000 Subject: [RFC][PATCH] ppc64: better handling of H_ENTER failures In-Reply-To: <16651.62105.156445.534596@cargo.ozlabs.ibm.com> References: <1091164951.2077.34.camel@gaston> <16651.62105.156445.534596@cargo.ozlabs.ibm.com> Message-ID: <1091325123.7389.45.camel@gaston> On Sun, 2004-08-01 at 05:27, Paul Mackerras wrote: > Benjamin Herrenschmidt writes: > > > This patch changes the hash insertion routines to return an error > > instead of calling panic() when HV refuses to insert a HPTE. > > > Looks good, do you have a test program? It should be possible to > provoke the error with xmon -m /dev/mem, if nothing else. :) I had a test case with the VGA driver back in Austin, I didn't test from userland. Do you have a box at hand where HV will refuse a HPTE insertion for legacy ISA space ? that's probably the easiest way to trigger it. Ben. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Sun Aug 1 13:26:16 2004 From: anton at samba.org (Anton Blanchard) Date: Sun, 1 Aug 2004 13:26:16 +1000 Subject: [patch 3/4] Rework secondary SMT thread setup at boot In-Reply-To: <200407302145.i6ULjNc0063654@austin.ibm.com> References: <200407302145.i6ULjNc0063654@austin.ibm.com> Message-ID: <20040801032616.GE30253@krispykreme> Hi Nathan, > Our (ab)use of cpu_possible_map in setup_system to start secondary SMT > threads bothers me. Mark such threads in cpu_possible_map during > early boot; let RTAS tell us which present cpus are still offline > later so we can start them. I worry that some machines may not have a query-cpu-stopped-state rtas call... just checked and the s7a doesnt. If we fix query_cpu_stopped to not BUG and instead return error when it doesnt exist that should work. > I'm not totally sure about this one, it might be better to set up > cpu_sibling_map in prom_hold_cpus and use that in setup_system. > > Signed-off-by: Nathan Lynch > > > --- > > > diff -puN arch/ppc64/kernel/setup.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/setup.c > --- 2.6.8-rc2-mm1/arch/ppc64/kernel/setup.c~ppc64-fix-secondary-smt-thread-setup 2004-07-30 16:32:16.000000000 -0500 > +++ 2.6.8-rc2-mm1-nathanl/arch/ppc64/kernel/setup.c 2004-07-30 16:32:16.000000000 -0500 > @@ -232,16 +232,17 @@ void setup_system(unsigned long r3, unsi > chrp_init(r3, r4, r5, r6, r7); > > #ifdef CONFIG_SMP > - /* Start secondary threads on SMT systems */ > - for (i = 0; i < NR_CPUS; i++) { > - if (cpu_available(i) && !cpu_possible(i)) { > + /* Start secondary threads on SMT systems; primary threads > + * are already in the running state. > + */ > + for_each_present_cpu(i) { > + if (query_cpu_stopped > + (get_hard_smp_processor_id(i)) == 0) { > printk("%16.16x : starting thread\n", i); > rtas_call(rtas_token("start-cpu"), 3, 1, &ret, > get_hard_smp_processor_id(i), > (u32)*((unsigned long *)pseries_secondary_smp_init), > i); > - cpu_set(i, cpu_possible_map); > - systemcfg->processorCount++; > } > } > #endif /* CONFIG_SMP */ > diff -puN arch/ppc64/kernel/prom.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/prom.c > --- 2.6.8-rc2-mm1/arch/ppc64/kernel/prom.c~ppc64-fix-secondary-smt-thread-setup 2004-07-30 16:32:16.000000000 -0500 > +++ 2.6.8-rc2-mm1-nathanl/arch/ppc64/kernel/prom.c 2004-07-30 16:32:16.000000000 -0500 > @@ -1076,6 +1076,8 @@ next: > cpu_set(cpuid, RELOC(cpu_available_map)); > cpu_set(cpuid, RELOC(cpu_present_at_boot)); > cpu_set(cpuid, RELOC(cpu_present_map)); > + cpu_set(cpuid, RELOC(cpu_possible_map)); > + _systemcfg->processorCount++; > prom_printf("available\n"); > } else { > prom_printf("not available\n"); > diff -puN arch/ppc64/kernel/smp.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/smp.c > --- 2.6.8-rc2-mm1/arch/ppc64/kernel/smp.c~ppc64-fix-secondary-smt-thread-setup 2004-07-30 16:32:16.000000000 -0500 > +++ 2.6.8-rc2-mm1-nathanl/arch/ppc64/kernel/smp.c 2004-07-30 16:32:16.000000000 -0500 > @@ -228,7 +228,6 @@ static void __devinit smp_openpic_setup_ > do_openpic_setup_cpu(); > } > > -#ifdef CONFIG_HOTPLUG_CPU > /* Get state of physical CPU. > * Return codes: > * 0 - The processor is in the RTAS stopped state > @@ -237,7 +236,7 @@ static void __devinit smp_openpic_setup_ > * -1 - Hardware Error > * -2 - Hardware Busy, Try again later. > */ > -static int query_cpu_stopped(unsigned int pcpu) > +int query_cpu_stopped(unsigned int pcpu) > { > int cpu_status; > int status, qcss_tok; > @@ -254,6 +253,8 @@ static int query_cpu_stopped(unsigned in > return cpu_status; > } > > +#ifdef CONFIG_HOTPLUG_CPU > + > int __cpu_disable(void) > { > /* FIXME: go put this in a header somewhere */ > diff -puN include/asm-ppc64/smp.h~ppc64-fix-secondary-smt-thread-setup include/asm-ppc64/smp.h > --- 2.6.8-rc2-mm1/include/asm-ppc64/smp.h~ppc64-fix-secondary-smt-thread-setup 2004-07-30 16:32:16.000000000 -0500 > +++ 2.6.8-rc2-mm1-nathanl/include/asm-ppc64/smp.h 2004-07-30 16:32:16.000000000 -0500 > @@ -73,6 +73,7 @@ void smp_init_pSeries(void); > extern int __cpu_disable(void); > extern void __cpu_die(unsigned int cpu); > extern void cpu_die(void) __attribute__((noreturn)); > +extern int query_cpu_stopped(unsigned int pcpu); > #ifdef CONFIG_SCHED_SMT > extern cpumask_t cpu_sibling_map[NR_CPUS]; > #endif > > _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Sun Aug 1 13:49:20 2004 From: anton at samba.org (Anton Blanchard) Date: Sun, 1 Aug 2004 13:49:20 +1000 Subject: [patch 4/4] Remove unnecessary cpu maps (available, present_at_boot) In-Reply-To: <200407302145.i6ULjXc0055970@austin.ibm.com> References: <200407302145.i6ULjXc0055970@austin.ibm.com> Message-ID: <20040801034920.GF30253@krispykreme> Hi Nathan, > With cpu_present_map, we don't need these any longer. Thanks for all those patches. I tested them on current BK together with Srivatsa's cpu up race and managed to get an oops: cpu 1 (hwid 1) Ready to die... cpu 3 (hwid 3) Ready to die... cpu 0x5: Vector: 300 (Data Access) at [c000000002d2f4b0] pc: c00000000004b794: .find_busiest_group+0x290/0x450 lr: c00000000004b6c0: .find_busiest_group+0x1bc/0x450 sp: c000000002d2f730 msr: 8000000000001032 dar: 18 dsisr: 40000000 current = 0xc000000002909320 paca = 0xc00000000054ed00 pid = 2928, comm = kstopmachine enter ? for help 5:mon> Which is probably due to the SMT scheduler, at least it disappeared after I disabled the SMT scheduler config option. So it should go away once we get your hotplug fixes for sched domains merged. These 4 look good to merge once we work out the query cpu state rtas call issue. Anton ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Sun Aug 1 13:57:04 2004 From: anton at samba.org (Anton Blanchard) Date: Sun, 1 Aug 2004 13:57:04 +1000 Subject: [PATCH] [ppc64] fix hotplug irq migration code Message-ID: <20040801035704.GG30253@krispykreme> Hi, In migrate_irqs_away we werent converting a virtual irq to a real one. We ended up passing the wrong irq numbers to the hypervisor and migration of affinitised irqs on cpu hot unplug didnt work. Also clarify the rtas_stop_self printk. Signed-off-by: Anton Blanchard diff -puN arch/ppc64/kernel/xics.c~migrate-irqs-away arch/ppc64/kernel/xics.c --- foobar2/arch/ppc64/kernel/xics.c~migrate-irqs-away 2004-08-01 11:44:41.412353159 +1000 +++ foobar2-anton/arch/ppc64/kernel/xics.c 2004-08-01 11:53:43.050041725 +1000 @@ -657,9 +657,7 @@ void xics_migrate_irqs_away(void) int set_indicator = rtas_token("set-indicator"); const unsigned int giqs = 9005UL; /* Global Interrupt Queue Server */ int status = 0; - unsigned int irq, cpu = smp_processor_id(); - int xics_status[2]; - unsigned long flags; + unsigned int irq, virq, cpu = smp_processor_id(); BUG_ON(set_indicator == RTAS_UNKNOWN_SERVICE); @@ -676,12 +674,20 @@ void xics_migrate_irqs_away(void) ops->cppr_info(cpu, DEFAULT_PRIORITY); iosync(); - printk(KERN_WARNING "HOTPLUG: Migrating IRQs away\n"); - for_each_irq(irq) { - irq_desc_t *desc = get_irq_desc(irq); + for_each_irq(virq) { + irq_desc_t *desc; + int xics_status[2]; + unsigned long flags; + + /* We cant set affinity on ISA interrupts */ + if (virq < irq_offset_value()) + continue; + + desc = get_irq_desc(virq); + irq = virt_irq_to_real(irq_offset_down(virq)); /* We need to get IPIs still. */ - if (irq_offset_down(irq) == XICS_IPI) + if (irq == XICS_IPI || irq == NO_IRQ) continue; /* We only need to migrate enabled IRQS */ @@ -696,7 +702,7 @@ void xics_migrate_irqs_away(void) if (status) { printk(KERN_ERR "migrate_irqs_away: irq=%d " "ibm,get-xive returns %d\n", - irq, status); + virq, status); goto unlock; } @@ -709,21 +715,20 @@ void xics_migrate_irqs_away(void) goto unlock; printk(KERN_WARNING "IRQ %d affinity broken off cpu %u\n", - irq, cpu); + virq, cpu); /* Reset affinity to all cpus */ xics_status[0] = default_distrib_server; - status = rtas_call(ibm_set_xive, 3, 1, NULL, - irq, xics_status[0], xics_status[1]); + status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, + xics_status[0], xics_status[1]); if (status) printk(KERN_ERR "migrate_irqs_away irq=%d " "ibm,set-xive returns %d\n", - irq, status); + virq, status); unlock: spin_unlock_irqrestore(&desc->lock, flags); } - } #endif diff -L xics.c -puN /dev/null /dev/null diff -puN arch/ppc64/kernel/rtas.c~migrate-irqs-away arch/ppc64/kernel/rtas.c --- foobar2/arch/ppc64/kernel/rtas.c~migrate-irqs-away 2004-08-01 12:39:46.199965774 +1000 +++ foobar2-anton/arch/ppc64/kernel/rtas.c 2004-08-01 12:40:28.722010572 +1000 @@ -500,7 +500,7 @@ void rtas_stop_self(void) BUG_ON(rtas_args->token == RTAS_UNKNOWN_SERVICE); - printk("%u %u Ready to die...\n", + printk("cpu %u (hwid %u) Ready to die...\n", smp_processor_id(), hard_smp_processor_id()); enter_rtas(__pa(rtas_args)); _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Sun Aug 1 14:59:53 2004 From: anton at samba.org (Anton Blanchard) Date: Sun, 1 Aug 2004 14:59:53 +1000 Subject: [patch 4/4] Remove unnecessary cpu maps (available, present_at_boot) In-Reply-To: <20040801034920.GF30253@krispykreme> References: <200407302145.i6ULjXc0055970@austin.ibm.com> <20040801034920.GF30253@krispykreme> Message-ID: <20040801045953.GH30253@krispykreme> Hi, > Thanks for all those patches. I tested them on current BK together with > Srivatsa's cpu up race and managed to get an oops: > > cpu 1 (hwid 1) Ready to die... > cpu 3 (hwid 3) Ready to die... > cpu 0x5: Vector: 300 (Data Access) at [c000000002d2f4b0] > pc: c00000000004b794: .find_busiest_group+0x290/0x450 > lr: c00000000004b6c0: .find_busiest_group+0x1bc/0x450 > sp: c000000002d2f730 > msr: 8000000000001032 > dar: 18 > dsisr: 40000000 > current = 0xc000000002909320 > paca = 0xc00000000054ed00 > pid = 2928, comm = kstopmachine > enter ? for help > 5:mon> > > Which is probably due to the SMT scheduler, at least it disappeared > after I disabled the SMT scheduler config option. So it should go away > once we get your hotplug fixes for sched domains merged. Managed to make it pop with SMT sched off. Still, its probably sched domains topology setup/teardown I think. Werent we using stop machine to synchronise updates to the sched domains topology on sles9? Anton cpu 5 (hwid 5) Ready to die... cpu 0x7: Vector: 300 (Data Access) at [c00000000230b4b0] pc: c000000000049224: .find_busiest_group+0x290/0x450 lr: c000000000049150: .find_busiest_group+0x1bc/0x450 sp: c00000000230b730 msr: 8000000000001032 dar: 18 dsisr: 40000000 current = 0xc0000000029ce9b0 paca = 0xc00000000054ff00 pid = 3438, comm = kstopmachine enter ? for help 7:mon> t [c00000000230b730] c00000000230b7c0 (unreliable) [c00000000230b850] c00000000004aff0 .rebalance_tick+0x12c/0x2d4 [c00000000230b940] c00000000005bf98 .update_process_times+0xc4/0x154 [c00000000230b9e0] c000000000038dc0 .smp_local_timer_interrupt+0x3c/0x58 [c00000000230ba50] c00000000001529c .timer_interrupt+0x11c/0x3fc [c00000000230bb30] c00000000000a2b4 Decrementer_common+0xb4/0x100 --- Exception: 901 (Decrementer) at c000000000073ad0 .do_stop+0x26c/0x27c [c00000000230be20] c000000000073ab4 .do_stop+0x250/0x27c (unreliable) [c00000000230bed0] c00000000006b1c0 .kthread+0x178/0x1c8 [c00000000230bf90] c000000000017dac .kernel_thread+0x4c/0x68 ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:12:35 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:12:35 +1000 Subject: [0/5] STAB cleanup Message-ID: <20040803021235.GB3056@zax> This series of patches clean up the segment table code in the PPC64 kernel. I've given the patches basic testing on POWER3 (RS/6000 270) and RS64 iSeries. Paul, if you don't see any problems with these, please forward upstream. The five patches are: 1/5 stabs-move-to-mm - Move stab code to arch/ppc64/mm 2/5 stabs-kill-bitfields - Remove ugly bitfields 3/5 stabs-random-cleanups - Various simple code cleanups 4/5 stabs-checks-in-raw-ste-allocate - Remove duplication of various address check 5/5 stabs-switch-stab - Give flush_stab() a better name -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:13:28 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:13:28 +1000 Subject: [1/5] STAB cleanup - move to arch/ppc64/mm In-Reply-To: <20040803021235.GB3056@zax> References: <20040803021235.GB3056@zax> Message-ID: <20040803021328.GC3056@zax> Move the segment table handling code from arch/ppc64/kernel to arch/ppc64/mm where it better belongs. This patch doesn't actually change the code at all. Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/kernel/Makefile =================================================================== --- working-2.6.orig/arch/ppc64/kernel/Makefile +++ working-2.6/arch/ppc64/kernel/Makefile @@ -7,7 +7,7 @@ obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ time.o process.o signal.o syscalls.o misc.o ptrace.o \ - align.o semaphore.o bitops.o stab.o pacaData.o \ + align.o semaphore.o bitops.o pacaData.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o rtc.o init_task.o \ lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ Index: working-2.6/arch/ppc64/kernel/stab.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/stab.c +++ /dev/null @@ -1,281 +0,0 @@ -/* - * PowerPC64 Segment Translation Support. - * - * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com - * Copyright (c) 2001 Dave Engebretsen - * - * Copyright (C) 2002 Anton Blanchard , IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include - -static int make_ste(unsigned long stab, unsigned long esid, - unsigned long vsid); - -void slb_initialize(void); - -/* - * Build an entry for the base kernel segment and put it into - * the segment table or SLB. All other segment table or SLB - * entries are faulted in. - */ -void stab_initialize(unsigned long stab) -{ - unsigned long vsid = get_kernel_vsid(KERNELBASE); - - if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { - slb_initialize(); - } else { - asm volatile("isync; slbia; isync":::"memory"); - make_ste(stab, GET_ESID(KERNELBASE), vsid); - - /* Order update */ - asm volatile("sync":::"memory"); - } -} - -/* Both the segment table and SLB code uses the following cache */ -#define NR_STAB_CACHE_ENTRIES 8 -DEFINE_PER_CPU(long, stab_cache_ptr); -DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); - -/* - * Segment table stuff - */ - -/* - * Create a segment table entry for the given esid/vsid pair. - */ -static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) -{ - unsigned long entry, group, old_esid, castout_entry, i; - unsigned int global_entry; - STE *ste, *castout_ste; - unsigned long kernel_segment = (REGION_ID(esid << SID_SHIFT) != - USER_REGION_ID); - - /* Search the primary group first. */ - global_entry = (esid & 0x1f) << 3; - ste = (STE *)(stab | ((esid & 0x1f) << 7)); - - /* Find an empty entry, if one exists. */ - for (group = 0; group < 2; group++) { - for (entry = 0; entry < 8; entry++, ste++) { - if (!(ste->dw0.dw0.v)) { - ste->dw0.dword0 = 0; - ste->dw1.dword1 = 0; - ste->dw1.dw1.vsid = vsid; - ste->dw0.dw0.esid = esid; - ste->dw0.dw0.kp = 1; - if (!kernel_segment) - ste->dw0.dw0.ks = 1; - asm volatile("eieio":::"memory"); - ste->dw0.dw0.v = 1; - return (global_entry | entry); - } - } - /* Now search the secondary group. */ - global_entry = ((~esid) & 0x1f) << 3; - ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); - } - - /* - * Could not find empty entry, pick one with a round robin selection. - * Search all entries in the two groups. - */ - castout_entry = get_paca()->stab_rr; - for (i = 0; i < 16; i++) { - if (castout_entry < 8) { - global_entry = (esid & 0x1f) << 3; - ste = (STE *)(stab | ((esid & 0x1f) << 7)); - castout_ste = ste + castout_entry; - } else { - global_entry = ((~esid) & 0x1f) << 3; - ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); - castout_ste = ste + (castout_entry - 8); - } - - /* Dont cast out the first kernel segment */ - if (castout_ste->dw0.dw0.esid != GET_ESID(KERNELBASE)) - break; - - castout_entry = (castout_entry + 1) & 0xf; - } - - get_paca()->stab_rr = (castout_entry + 1) & 0xf; - - /* Modify the old entry to the new value. */ - - /* Force previous translations to complete. DRENG */ - asm volatile("isync" : : : "memory"); - - castout_ste->dw0.dw0.v = 0; - asm volatile("sync" : : : "memory"); /* Order update */ - - castout_ste->dw0.dword0 = 0; - castout_ste->dw1.dword1 = 0; - castout_ste->dw1.dw1.vsid = vsid; - old_esid = castout_ste->dw0.dw0.esid; - castout_ste->dw0.dw0.esid = esid; - castout_ste->dw0.dw0.kp = 1; - if (!kernel_segment) - castout_ste->dw0.dw0.ks = 1; - asm volatile("eieio" : : : "memory"); /* Order update */ - castout_ste->dw0.dw0.v = 1; - asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); - /* Ensure completion of slbie */ - asm volatile("sync" : : : "memory"); - - return (global_entry | (castout_entry & 0x7)); -} - -static inline void __ste_allocate(unsigned long esid, unsigned long vsid) -{ - unsigned char stab_entry; - unsigned long offset; - int region_id = REGION_ID(esid << SID_SHIFT); - - stab_entry = make_ste(get_paca()->stab_addr, esid, vsid); - - if (region_id != USER_REGION_ID) - return; - - offset = __get_cpu_var(stab_cache_ptr); - if (offset < NR_STAB_CACHE_ENTRIES) - __get_cpu_var(stab_cache[offset++]) = stab_entry; - else - offset = NR_STAB_CACHE_ENTRIES+1; - __get_cpu_var(stab_cache_ptr) = offset; -} - -/* - * Allocate a segment table entry for the given ea. - */ -int ste_allocate(unsigned long ea) -{ - unsigned long vsid, esid; - mm_context_t context; - - /* Check for invalid effective addresses. */ - if (!IS_VALID_EA(ea)) - return 1; - - /* Kernel or user address? */ - if (REGION_ID(ea) >= KERNEL_REGION_ID) { - vsid = get_kernel_vsid(ea); - context = KERNEL_CONTEXT(ea); - } else { - if (!current->mm) - return 1; - - context = current->mm->context; - vsid = get_vsid(context.id, ea); - } - - esid = GET_ESID(ea); - __ste_allocate(esid, vsid); - /* Order update */ - asm volatile("sync":::"memory"); - - return 0; -} - -/* - * preload some userspace segments into the segment table. - */ -static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) -{ - unsigned long pc = KSTK_EIP(tsk); - unsigned long stack = KSTK_ESP(tsk); - unsigned long unmapped_base; - unsigned long pc_esid = GET_ESID(pc); - unsigned long stack_esid = GET_ESID(stack); - unsigned long unmapped_base_esid; - unsigned long vsid; - - if (test_tsk_thread_flag(tsk, TIF_32BIT)) - unmapped_base = TASK_UNMAPPED_BASE_USER32; - else - unmapped_base = TASK_UNMAPPED_BASE_USER64; - - unmapped_base_esid = GET_ESID(unmapped_base); - - if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context.id, pc); - __ste_allocate(pc_esid, vsid); - - if (pc_esid == stack_esid) - return; - - if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context.id, stack); - __ste_allocate(stack_esid, vsid); - - if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid) - return; - - if (!IS_VALID_EA(unmapped_base) || - (REGION_ID(unmapped_base) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context.id, unmapped_base); - __ste_allocate(unmapped_base_esid, vsid); - - /* Order update */ - asm volatile("sync" : : : "memory"); -} - -/* Flush all user entries from the segment table of the current processor. */ -void flush_stab(struct task_struct *tsk, struct mm_struct *mm) -{ - STE *stab = (STE *) get_paca()->stab_addr; - STE *ste; - unsigned long offset = __get_cpu_var(stab_cache_ptr); - - /* Force previous translations to complete. DRENG */ - asm volatile("isync" : : : "memory"); - - if (offset <= NR_STAB_CACHE_ENTRIES) { - int i; - - for (i = 0; i < offset; i++) { - ste = stab + __get_cpu_var(stab_cache[i]); - ste->dw0.dw0.v = 0; - } - } else { - unsigned long entry; - - /* Invalidate all entries. */ - ste = stab; - - /* Never flush the first entry. */ - ste += 1; - for (entry = 1; - entry < (PAGE_SIZE / sizeof(STE)); - entry++, ste++) { - unsigned long ea; - ea = ste->dw0.dw0.esid << SID_SHIFT; - if (ea < KERNELBASE) { - ste->dw0.dw0.v = 0; - } - } - } - - asm volatile("sync; slbia; sync":::"memory"); - - __get_cpu_var(stab_cache_ptr) = 0; - - preload_stab(tsk, mm); -} Index: working-2.6/arch/ppc64/mm/stab.c =================================================================== --- /dev/null +++ working-2.6/arch/ppc64/mm/stab.c @@ -0,0 +1,281 @@ +/* + * PowerPC64 Segment Translation Support. + * + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * + * Copyright (C) 2002 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +static int make_ste(unsigned long stab, unsigned long esid, + unsigned long vsid); + +void slb_initialize(void); + +/* + * Build an entry for the base kernel segment and put it into + * the segment table or SLB. All other segment table or SLB + * entries are faulted in. + */ +void stab_initialize(unsigned long stab) +{ + unsigned long vsid = get_kernel_vsid(KERNELBASE); + + if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { + slb_initialize(); + } else { + asm volatile("isync; slbia; isync":::"memory"); + make_ste(stab, GET_ESID(KERNELBASE), vsid); + + /* Order update */ + asm volatile("sync":::"memory"); + } +} + +/* Both the segment table and SLB code uses the following cache */ +#define NR_STAB_CACHE_ENTRIES 8 +DEFINE_PER_CPU(long, stab_cache_ptr); +DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); + +/* + * Segment table stuff + */ + +/* + * Create a segment table entry for the given esid/vsid pair. + */ +static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) +{ + unsigned long entry, group, old_esid, castout_entry, i; + unsigned int global_entry; + STE *ste, *castout_ste; + unsigned long kernel_segment = (REGION_ID(esid << SID_SHIFT) != + USER_REGION_ID); + + /* Search the primary group first. */ + global_entry = (esid & 0x1f) << 3; + ste = (STE *)(stab | ((esid & 0x1f) << 7)); + + /* Find an empty entry, if one exists. */ + for (group = 0; group < 2; group++) { + for (entry = 0; entry < 8; entry++, ste++) { + if (!(ste->dw0.dw0.v)) { + ste->dw0.dword0 = 0; + ste->dw1.dword1 = 0; + ste->dw1.dw1.vsid = vsid; + ste->dw0.dw0.esid = esid; + ste->dw0.dw0.kp = 1; + if (!kernel_segment) + ste->dw0.dw0.ks = 1; + asm volatile("eieio":::"memory"); + ste->dw0.dw0.v = 1; + return (global_entry | entry); + } + } + /* Now search the secondary group. */ + global_entry = ((~esid) & 0x1f) << 3; + ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); + } + + /* + * Could not find empty entry, pick one with a round robin selection. + * Search all entries in the two groups. + */ + castout_entry = get_paca()->stab_rr; + for (i = 0; i < 16; i++) { + if (castout_entry < 8) { + global_entry = (esid & 0x1f) << 3; + ste = (STE *)(stab | ((esid & 0x1f) << 7)); + castout_ste = ste + castout_entry; + } else { + global_entry = ((~esid) & 0x1f) << 3; + ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); + castout_ste = ste + (castout_entry - 8); + } + + /* Dont cast out the first kernel segment */ + if (castout_ste->dw0.dw0.esid != GET_ESID(KERNELBASE)) + break; + + castout_entry = (castout_entry + 1) & 0xf; + } + + get_paca()->stab_rr = (castout_entry + 1) & 0xf; + + /* Modify the old entry to the new value. */ + + /* Force previous translations to complete. DRENG */ + asm volatile("isync" : : : "memory"); + + castout_ste->dw0.dw0.v = 0; + asm volatile("sync" : : : "memory"); /* Order update */ + + castout_ste->dw0.dword0 = 0; + castout_ste->dw1.dword1 = 0; + castout_ste->dw1.dw1.vsid = vsid; + old_esid = castout_ste->dw0.dw0.esid; + castout_ste->dw0.dw0.esid = esid; + castout_ste->dw0.dw0.kp = 1; + if (!kernel_segment) + castout_ste->dw0.dw0.ks = 1; + asm volatile("eieio" : : : "memory"); /* Order update */ + castout_ste->dw0.dw0.v = 1; + asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); + /* Ensure completion of slbie */ + asm volatile("sync" : : : "memory"); + + return (global_entry | (castout_entry & 0x7)); +} + +static inline void __ste_allocate(unsigned long esid, unsigned long vsid) +{ + unsigned char stab_entry; + unsigned long offset; + int region_id = REGION_ID(esid << SID_SHIFT); + + stab_entry = make_ste(get_paca()->stab_addr, esid, vsid); + + if (region_id != USER_REGION_ID) + return; + + offset = __get_cpu_var(stab_cache_ptr); + if (offset < NR_STAB_CACHE_ENTRIES) + __get_cpu_var(stab_cache[offset++]) = stab_entry; + else + offset = NR_STAB_CACHE_ENTRIES+1; + __get_cpu_var(stab_cache_ptr) = offset; +} + +/* + * Allocate a segment table entry for the given ea. + */ +int ste_allocate(unsigned long ea) +{ + unsigned long vsid, esid; + mm_context_t context; + + /* Check for invalid effective addresses. */ + if (!IS_VALID_EA(ea)) + return 1; + + /* Kernel or user address? */ + if (REGION_ID(ea) >= KERNEL_REGION_ID) { + vsid = get_kernel_vsid(ea); + context = KERNEL_CONTEXT(ea); + } else { + if (!current->mm) + return 1; + + context = current->mm->context; + vsid = get_vsid(context.id, ea); + } + + esid = GET_ESID(ea); + __ste_allocate(esid, vsid); + /* Order update */ + asm volatile("sync":::"memory"); + + return 0; +} + +/* + * preload some userspace segments into the segment table. + */ +static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) +{ + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; + unsigned long pc_esid = GET_ESID(pc); + unsigned long stack_esid = GET_ESID(stack); + unsigned long unmapped_base_esid; + unsigned long vsid; + + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + unmapped_base_esid = GET_ESID(unmapped_base); + + if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context.id, pc); + __ste_allocate(pc_esid, vsid); + + if (pc_esid == stack_esid) + return; + + if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context.id, stack); + __ste_allocate(stack_esid, vsid); + + if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid) + return; + + if (!IS_VALID_EA(unmapped_base) || + (REGION_ID(unmapped_base) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context.id, unmapped_base); + __ste_allocate(unmapped_base_esid, vsid); + + /* Order update */ + asm volatile("sync" : : : "memory"); +} + +/* Flush all user entries from the segment table of the current processor. */ +void flush_stab(struct task_struct *tsk, struct mm_struct *mm) +{ + STE *stab = (STE *) get_paca()->stab_addr; + STE *ste; + unsigned long offset = __get_cpu_var(stab_cache_ptr); + + /* Force previous translations to complete. DRENG */ + asm volatile("isync" : : : "memory"); + + if (offset <= NR_STAB_CACHE_ENTRIES) { + int i; + + for (i = 0; i < offset; i++) { + ste = stab + __get_cpu_var(stab_cache[i]); + ste->dw0.dw0.v = 0; + } + } else { + unsigned long entry; + + /* Invalidate all entries. */ + ste = stab; + + /* Never flush the first entry. */ + ste += 1; + for (entry = 1; + entry < (PAGE_SIZE / sizeof(STE)); + entry++, ste++) { + unsigned long ea; + ea = ste->dw0.dw0.esid << SID_SHIFT; + if (ea < KERNELBASE) { + ste->dw0.dw0.v = 0; + } + } + } + + asm volatile("sync; slbia; sync":::"memory"); + + __get_cpu_var(stab_cache_ptr) = 0; + + preload_stab(tsk, mm); +} Index: working-2.6/arch/ppc64/mm/Makefile =================================================================== --- working-2.6.orig/arch/ppc64/mm/Makefile +++ working-2.6/arch/ppc64/mm/Makefile @@ -4,6 +4,7 @@ EXTRA_CFLAGS += -mno-minimal-toc -obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o slb_low.o slb.o +obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \ + slb_low.o slb.o stab.o obj-$(CONFIG_DISCONTIGMEM) += numa.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:13:47 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:13:47 +1000 Subject: [2/5] STAB cleanup - kill bitfields In-Reply-To: <20040803021328.GC3056@zax> References: <20040803021235.GB3056@zax> <20040803021328.GC3056@zax> Message-ID: <20040803021347.GD3056@zax> Remove the overly verbose and hard to follow use of bitfields in the PPC64 segment table code, replacing it with explicit bitmask operations. Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/mm/stab.c =================================================================== --- working-2.6.orig/arch/ppc64/mm/stab.c 2004-07-29 16:14:46.201804936 +1000 +++ working-2.6/arch/ppc64/mm/stab.c 2004-07-29 16:14:46.642871056 +1000 @@ -61,33 +61,32 @@ { unsigned long entry, group, old_esid, castout_entry, i; unsigned int global_entry; - STE *ste, *castout_ste; + struct stab_entry *ste, *castout_ste; unsigned long kernel_segment = (REGION_ID(esid << SID_SHIFT) != USER_REGION_ID); + unsigned long esid_data; /* Search the primary group first. */ global_entry = (esid & 0x1f) << 3; - ste = (STE *)(stab | ((esid & 0x1f) << 7)); + ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); /* Find an empty entry, if one exists. */ for (group = 0; group < 2; group++) { for (entry = 0; entry < 8; entry++, ste++) { - if (!(ste->dw0.dw0.v)) { - ste->dw0.dword0 = 0; - ste->dw1.dword1 = 0; - ste->dw1.dw1.vsid = vsid; - ste->dw0.dw0.esid = esid; - ste->dw0.dw0.kp = 1; - if (!kernel_segment) - ste->dw0.dw0.ks = 1; + if (!(ste->esid_data & STE_ESID_V)) { + ste->vsid_data = vsid << STE_VSID_SHIFT; asm volatile("eieio":::"memory"); - ste->dw0.dw0.v = 1; + esid_data = esid << SID_SHIFT; + esid_data |= STE_ESID_KP | STE_ESID_V; + if (! kernel_segment) + esid_data |= STE_ESID_KS; + ste->esid_data = esid_data; return (global_entry | entry); } } /* Now search the secondary group. */ global_entry = ((~esid) & 0x1f) << 3; - ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); + ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); } /* @@ -98,16 +97,16 @@ for (i = 0; i < 16; i++) { if (castout_entry < 8) { global_entry = (esid & 0x1f) << 3; - ste = (STE *)(stab | ((esid & 0x1f) << 7)); + ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); castout_ste = ste + castout_entry; } else { global_entry = ((~esid) & 0x1f) << 3; - ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); + ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); castout_ste = ste + (castout_entry - 8); } /* Dont cast out the first kernel segment */ - if (castout_ste->dw0.dw0.esid != GET_ESID(KERNELBASE)) + if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE) break; castout_entry = (castout_entry + 1) & 0xf; @@ -120,19 +119,21 @@ /* Force previous translations to complete. DRENG */ asm volatile("isync" : : : "memory"); - castout_ste->dw0.dw0.v = 0; + old_esid = castout_ste->esid_data >> SID_SHIFT; + castout_ste->esid_data = 0; /* Invalidate old entry */ + asm volatile("sync" : : : "memory"); /* Order update */ - castout_ste->dw0.dword0 = 0; - castout_ste->dw1.dword1 = 0; - castout_ste->dw1.dw1.vsid = vsid; - old_esid = castout_ste->dw0.dw0.esid; - castout_ste->dw0.dw0.esid = esid; - castout_ste->dw0.dw0.kp = 1; - if (!kernel_segment) - castout_ste->dw0.dw0.ks = 1; + castout_ste->vsid_data = vsid << STE_VSID_SHIFT; + asm volatile("eieio" : : : "memory"); /* Order update */ - castout_ste->dw0.dw0.v = 1; + + esid_data = esid << SID_SHIFT; + esid_data |= STE_ESID_KP | STE_ESID_V; + if (!kernel_segment) + esid_data |= STE_ESID_KS; + castout_ste->esid_data = esid_data; + asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); /* Ensure completion of slbie */ asm volatile("sync" : : : "memory"); @@ -240,8 +241,8 @@ /* Flush all user entries from the segment table of the current processor. */ void flush_stab(struct task_struct *tsk, struct mm_struct *mm) { - STE *stab = (STE *) get_paca()->stab_addr; - STE *ste; + struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; + struct stab_entry *ste; unsigned long offset = __get_cpu_var(stab_cache_ptr); /* Force previous translations to complete. DRENG */ @@ -252,7 +253,7 @@ for (i = 0; i < offset; i++) { ste = stab + __get_cpu_var(stab_cache[i]); - ste->dw0.dw0.v = 0; + ste->esid_data = 0; /* invalidate entry */ } } else { unsigned long entry; @@ -263,12 +264,12 @@ /* Never flush the first entry. */ ste += 1; for (entry = 1; - entry < (PAGE_SIZE / sizeof(STE)); + entry < (PAGE_SIZE / sizeof(struct stab_entry)); entry++, ste++) { unsigned long ea; - ea = ste->dw0.dw0.esid << SID_SHIFT; + ea = ste->esid_data & ESID_MASK; if (ea < KERNELBASE) { - ste->dw0.dw0.v = 0; + ste->esid_data = 0; } } } Index: working-2.6/include/asm-ppc64/mmu.h =================================================================== --- working-2.6.orig/include/asm-ppc64/mmu.h 2004-07-29 16:14:41.040790952 +1000 +++ working-2.6/include/asm-ppc64/mmu.h 2004-07-29 16:14:46.643870904 +1000 @@ -37,33 +37,17 @@ mm_context_t ctx = { .id = REGION_ID(ea), KERNEL_LOW_HPAGES}; \ ctx; }) -typedef struct { - unsigned long esid: 36; /* Effective segment ID */ - unsigned long resv0:20; /* Reserved */ - unsigned long v: 1; /* Entry valid (v=1) or invalid */ - unsigned long resv1: 1; /* Reserved */ - unsigned long ks: 1; /* Supervisor (privileged) state storage key */ - unsigned long kp: 1; /* Problem state storage key */ - unsigned long n: 1; /* No-execute if n=1 */ - unsigned long resv2: 3; /* padding to a 64b boundary */ -} ste_dword0; - -typedef struct { - unsigned long vsid: 52; /* Virtual segment ID */ - unsigned long resv0:12; /* Padding to a 64b boundary */ -} ste_dword1; - -typedef struct _STE { - union { - unsigned long dword0; - ste_dword0 dw0; - } dw0; - - union { - unsigned long dword1; - ste_dword1 dw1; - } dw1; -} STE; +#define STE_ESID_V 0x80 +#define STE_ESID_KS 0x20 +#define STE_ESID_KP 0x10 +#define STE_ESID_N 0x08 + +#define STE_VSID_SHIFT 12 + +struct stab_entry { + unsigned long esid_data; + unsigned long vsid_data; +}; /* Hardware Page Table Entry */ -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:14:09 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:14:09 +1000 Subject: [3/5] STAB cleanup - assorted cleanups In-Reply-To: <20040803021347.GD3056@zax> References: <20040803021235.GB3056@zax> <20040803021328.GC3056@zax> <20040803021347.GD3056@zax> Message-ID: <20040803021409.GE3056@zax> Clean up various aspects of the PPC64 segment table management code: move code around to suit call order, remove redundant computations, and extra intermediate variables which don't really add to clarity. Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/mm/stab.c =================================================================== --- working-2.6.orig/arch/ppc64/mm/stab.c +++ working-2.6/arch/ppc64/mm/stab.c @@ -20,51 +20,26 @@ #include #include -static int make_ste(unsigned long stab, unsigned long esid, - unsigned long vsid); - -void slb_initialize(void); - -/* - * Build an entry for the base kernel segment and put it into - * the segment table or SLB. All other segment table or SLB - * entries are faulted in. - */ -void stab_initialize(unsigned long stab) -{ - unsigned long vsid = get_kernel_vsid(KERNELBASE); - - if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { - slb_initialize(); - } else { - asm volatile("isync; slbia; isync":::"memory"); - make_ste(stab, GET_ESID(KERNELBASE), vsid); - - /* Order update */ - asm volatile("sync":::"memory"); - } -} - /* Both the segment table and SLB code uses the following cache */ #define NR_STAB_CACHE_ENTRIES 8 DEFINE_PER_CPU(long, stab_cache_ptr); DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); /* - * Segment table stuff - */ - -/* * Create a segment table entry for the given esid/vsid pair. */ static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) { + unsigned long esid_data, vsid_data; unsigned long entry, group, old_esid, castout_entry, i; unsigned int global_entry; struct stab_entry *ste, *castout_ste; - unsigned long kernel_segment = (REGION_ID(esid << SID_SHIFT) != - USER_REGION_ID); - unsigned long esid_data; + unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE; + + vsid_data = vsid << STE_VSID_SHIFT; + esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; + if (! kernel_segment) + esid_data |= STE_ESID_KS; /* Search the primary group first. */ global_entry = (esid & 0x1f) << 3; @@ -74,12 +49,8 @@ for (group = 0; group < 2; group++) { for (entry = 0; entry < 8; entry++, ste++) { if (!(ste->esid_data & STE_ESID_V)) { - ste->vsid_data = vsid << STE_VSID_SHIFT; + ste->vsid_data = vsid_data; asm volatile("eieio":::"memory"); - esid_data = esid << SID_SHIFT; - esid_data |= STE_ESID_KP | STE_ESID_V; - if (! kernel_segment) - esid_data |= STE_ESID_KS; ste->esid_data = esid_data; return (global_entry | entry); } @@ -124,14 +95,8 @@ asm volatile("sync" : : : "memory"); /* Order update */ - castout_ste->vsid_data = vsid << STE_VSID_SHIFT; - + castout_ste->vsid_data = vsid_data; asm volatile("eieio" : : : "memory"); /* Order update */ - - esid_data = esid << SID_SHIFT; - esid_data |= STE_ESID_KP | STE_ESID_V; - if (!kernel_segment) - esid_data |= STE_ESID_KS; castout_ste->esid_data = esid_data; asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); @@ -145,11 +110,10 @@ { unsigned char stab_entry; unsigned long offset; - int region_id = REGION_ID(esid << SID_SHIFT); stab_entry = make_ste(get_paca()->stab_addr, esid, vsid); - if (region_id != USER_REGION_ID) + if ((esid << SID_SHIFT) >= KERNELBASE) return; offset = __get_cpu_var(stab_cache_ptr); @@ -165,27 +129,23 @@ */ int ste_allocate(unsigned long ea) { - unsigned long vsid, esid; - mm_context_t context; + unsigned long vsid; /* Check for invalid effective addresses. */ if (!IS_VALID_EA(ea)) return 1; /* Kernel or user address? */ - if (REGION_ID(ea) >= KERNEL_REGION_ID) { + if (ea >= KERNELBASE) { vsid = get_kernel_vsid(ea); - context = KERNEL_CONTEXT(ea); } else { if (!current->mm) return 1; - context = current->mm->context; - vsid = get_vsid(context.id, ea); + vsid = get_vsid(current->mm->context.id, ea); } - esid = GET_ESID(ea); - __ste_allocate(esid, vsid); + __ste_allocate(GET_ESID(ea), vsid); /* Order update */ asm volatile("sync":::"memory"); @@ -200,39 +160,34 @@ unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; - unsigned long pc_esid = GET_ESID(pc); - unsigned long stack_esid = GET_ESID(stack); - unsigned long unmapped_base_esid; unsigned long vsid; if (test_tsk_thread_flag(tsk, TIF_32BIT)) unmapped_base = TASK_UNMAPPED_BASE_USER32; else unmapped_base = TASK_UNMAPPED_BASE_USER64; - - unmapped_base_esid = GET_ESID(unmapped_base); - - if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID)) + + if (!IS_VALID_EA(pc) || (pc >= KERNELBASE)) return; vsid = get_vsid(mm->context.id, pc); - __ste_allocate(pc_esid, vsid); - - if (pc_esid == stack_esid) + __ste_allocate(GET_ESID(pc), vsid); + + if (GET_ESID(pc) == GET_ESID(stack)) return; - - if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID)) + + if (!IS_VALID_EA(stack) || (stack >= KERNELBASE)) return; vsid = get_vsid(mm->context.id, stack); - __ste_allocate(stack_esid, vsid); - - if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid) + __ste_allocate(GET_ESID(stack), vsid); + + if ((GET_ESID(pc) == GET_ESID(unmapped_base)) + || (GET_ESID(stack) == GET_ESID(unmapped_base))) return; - - if (!IS_VALID_EA(unmapped_base) || - (REGION_ID(unmapped_base) >= KERNEL_REGION_ID)) + + if (!IS_VALID_EA(unmapped_base) || (unmapped_base >= KERNELBASE)) return; vsid = get_vsid(mm->context.id, unmapped_base); - __ste_allocate(unmapped_base_esid, vsid); + __ste_allocate(GET_ESID(unmapped_base), vsid); /* Order update */ asm volatile("sync" : : : "memory"); @@ -280,3 +235,25 @@ preload_stab(tsk, mm); } + +extern void slb_initialize(void); + +/* + * Build an entry for the base kernel segment and put it into + * the segment table or SLB. All other segment table or SLB + * entries are faulted in. + */ +void stab_initialize(unsigned long stab) +{ + unsigned long vsid = get_kernel_vsid(KERNELBASE); + + if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { + slb_initialize(); + } else { + asm volatile("isync; slbia; isync":::"memory"); + make_ste(stab, GET_ESID(KERNELBASE), vsid); + + /* Order update */ + asm volatile("sync":::"memory"); + } +} -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:15:24 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:15:24 +1000 Subject: [4/5] STAB cleanup - remove check duplication In-Reply-To: <20040803021409.GE3056@zax> References: <20040803021235.GB3056@zax> <20040803021328.GC3056@zax> <20040803021347.GD3056@zax> <20040803021409.GE3056@zax> Message-ID: <20040803021524.GF3056@zax> Rearrange the ste_allocate()/__ste_allocate() path in the PPC64 segment table code more sensibly. This moves various valid address checks into the lower-level __ste_allocate(), meaning the checks don't need to be duplicated in preload_stab(). Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/mm/stab.c =================================================================== --- working-2.6.orig/arch/ppc64/mm/stab.c 2004-07-30 13:17:34.828858504 +1000 +++ working-2.6/arch/ppc64/mm/stab.c 2004-07-30 13:39:57.338854248 +1000 @@ -106,30 +106,14 @@ return (global_entry | (castout_entry & 0x7)); } -static inline void __ste_allocate(unsigned long esid, unsigned long vsid) -{ - unsigned char stab_entry; - unsigned long offset; - - stab_entry = make_ste(get_paca()->stab_addr, esid, vsid); - - if ((esid << SID_SHIFT) >= KERNELBASE) - return; - - offset = __get_cpu_var(stab_cache_ptr); - if (offset < NR_STAB_CACHE_ENTRIES) - __get_cpu_var(stab_cache[offset++]) = stab_entry; - else - offset = NR_STAB_CACHE_ENTRIES+1; - __get_cpu_var(stab_cache_ptr) = offset; -} - /* - * Allocate a segment table entry for the given ea. + * Allocate a segment table entry for the given ea and mm */ -int ste_allocate(unsigned long ea) +static int __ste_allocate(unsigned long ea, struct mm_struct *mm) { unsigned long vsid; + unsigned char stab_entry; + unsigned long offset; /* Check for invalid effective addresses. */ if (!IS_VALID_EA(ea)) @@ -139,19 +123,34 @@ if (ea >= KERNELBASE) { vsid = get_kernel_vsid(ea); } else { - if (!current->mm) + if (! mm) return 1; - vsid = get_vsid(current->mm->context.id, ea); + vsid = get_vsid(mm->context.id, ea); } - __ste_allocate(GET_ESID(ea), vsid); - /* Order update */ - asm volatile("sync":::"memory"); + stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); + + if (ea < KERNELBASE) { + offset = __get_cpu_var(stab_cache_ptr); + if (offset < NR_STAB_CACHE_ENTRIES) + __get_cpu_var(stab_cache[offset++]) = stab_entry; + else + offset = NR_STAB_CACHE_ENTRIES+1; + __get_cpu_var(stab_cache_ptr) = offset; + + /* Order update */ + asm volatile("sync":::"memory"); + } return 0; } +int ste_allocate(unsigned long ea) +{ + return __ste_allocate(ea, current->mm); +} + /* * preload some userspace segments into the segment table. */ @@ -160,34 +159,24 @@ unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; - unsigned long vsid; if (test_tsk_thread_flag(tsk, TIF_32BIT)) unmapped_base = TASK_UNMAPPED_BASE_USER32; else unmapped_base = TASK_UNMAPPED_BASE_USER64; - if (!IS_VALID_EA(pc) || (pc >= KERNELBASE)) - return; - vsid = get_vsid(mm->context.id, pc); - __ste_allocate(GET_ESID(pc), vsid); + __ste_allocate(pc, mm); if (GET_ESID(pc) == GET_ESID(stack)) return; - if (!IS_VALID_EA(stack) || (stack >= KERNELBASE)) - return; - vsid = get_vsid(mm->context.id, stack); - __ste_allocate(GET_ESID(stack), vsid); + __ste_allocate(stack, mm); if ((GET_ESID(pc) == GET_ESID(unmapped_base)) || (GET_ESID(stack) == GET_ESID(unmapped_base))) return; - if (!IS_VALID_EA(unmapped_base) || (unmapped_base >= KERNELBASE)) - return; - vsid = get_vsid(mm->context.id, unmapped_base); - __ste_allocate(GET_ESID(unmapped_base), vsid); + __ste_allocate(unmapped_base, mm); /* Order update */ asm volatile("sync" : : : "memory"); -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:15:57 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:15:57 +1000 Subject: [5/5] STAB cleanup - replace flush_stab() with switch_stab() In-Reply-To: <20040803021524.GF3056@zax> References: <20040803021235.GB3056@zax> <20040803021328.GC3056@zax> <20040803021347.GD3056@zax> <20040803021409.GE3056@zax> <20040803021524.GF3056@zax> Message-ID: <20040803021557.GG3056@zax> preload_stab() is only ever called (once) from flush_stab(), and flush_stab() is only ever called from switch_mm(). So, combine both functions into the more accurately named switch_stab(), called from switch_mm(). Signed-off-by: David Gibson Index: working-2.6/include/asm-ppc64/mmu_context.h =================================================================== --- working-2.6.orig/include/asm-ppc64/mmu_context.h 2004-07-30 14:13:32.932890776 +1000 +++ working-2.6/include/asm-ppc64/mmu_context.h 2004-07-30 14:13:35.017837008 +1000 @@ -135,7 +135,7 @@ spin_unlock_irqrestore(&mmu_context_queue.lock, flags); } -extern void flush_stab(struct task_struct *tsk, struct mm_struct *mm); +extern void switch_stab(struct task_struct *tsk, struct mm_struct *mm); extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); /* @@ -163,7 +163,7 @@ if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) switch_slb(tsk, next); else - flush_stab(tsk, next); + switch_stab(tsk, next); } #define deactivate_mm(tsk,mm) do { } while (0) Index: working-2.6/arch/ppc64/mm/stab.c =================================================================== --- working-2.6.orig/arch/ppc64/mm/stab.c 2004-07-30 14:13:34.968844456 +1000 +++ working-2.6/arch/ppc64/mm/stab.c 2004-07-30 14:13:55.314811992 +1000 @@ -152,42 +148,18 @@ } /* - * preload some userspace segments into the segment table. + * Do the segment table work for a context switch: flush all user + * entries from the table, then preload some probably useful entries + * for the new task */ -static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) -{ - unsigned long pc = KSTK_EIP(tsk); - unsigned long stack = KSTK_ESP(tsk); - unsigned long unmapped_base; - - if (test_tsk_thread_flag(tsk, TIF_32BIT)) - unmapped_base = TASK_UNMAPPED_BASE_USER32; - else - unmapped_base = TASK_UNMAPPED_BASE_USER64; - - __ste_allocate(pc, mm); - - if (GET_ESID(pc) == GET_ESID(stack)) - return; - - __ste_allocate(stack, mm); - - if ((GET_ESID(pc) == GET_ESID(unmapped_base)) - || (GET_ESID(stack) == GET_ESID(unmapped_base))) - return; - - __ste_allocate(unmapped_base, mm); - - /* Order update */ - asm volatile("sync" : : : "memory"); -} - -/* Flush all user entries from the segment table of the current processor. */ -void flush_stab(struct task_struct *tsk, struct mm_struct *mm) +void switch_stab(struct task_struct *tsk, struct mm_struct *mm) { struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; struct stab_entry *ste; unsigned long offset = __get_cpu_var(stab_cache_ptr); + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; /* Force previous translations to complete. DRENG */ asm volatile("isync" : : : "memory"); @@ -222,7 +194,27 @@ __get_cpu_var(stab_cache_ptr) = 0; - preload_stab(tsk, mm); + /* Now preload some entries for the new task */ + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + __ste_allocate(pc, mm); + + if (GET_ESID(pc) == GET_ESID(stack)) + return; + + __ste_allocate(stack, mm); + + if ((GET_ESID(pc) == GET_ESID(unmapped_base)) + || (GET_ESID(stack) == GET_ESID(unmapped_base))) + return; + + __ste_allocate(unmapped_base, mm); + + /* Order update */ + asm volatile("sync" : : : "memory"); } extern void slb_initialize(void); -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Tue Aug 3 18:21:41 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Tue, 03 Aug 2004 01:21:41 -0700 Subject: [PATCH] suppress 'store_purr' unused warning Message-ID: <1091521301.27397.3660.camel@nighthawk> Some new sysfs macros declare a store_purr() function that never gets used, which makes a compiler warning happen. Suppress the warning with an unused attribute. Can you tell that I'm using compiler warnings heavily in my CONFIG_NONLINEAR debugging? :) -- Dave -------------- next part -------------- A non-text attachment was scrubbed... Name: A3-ppc64-store_purr_unused.patch Type: text/x-patch Size: 842 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040803/2a3c317e/attachment.bin From sharada at in.ibm.com Tue Aug 3 23:15:27 2004 From: sharada at in.ibm.com (R Sharada) Date: Tue, 3 Aug 2004 18:45:27 +0530 Subject: cpumask move patch - RFC Message-ID: <20040803131527.GA4056@in.ibm.com> Hello, This is part of an attempt to clean up some of the kernel data structure initialisations from prom.c and move to later boot code. This patch moves the cpumask data structure initialisations from prom_hold_cpus() to setup_system(). The patch is against the 2.6.8-rc2 linus bitkeeper tree. Kindly request review and comments on this patch. Thanks and Regards, Sharada -------------- next part -------------- diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c 2004-08-03 02:12:58.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c 2004-08-04 07:15:25.747997352 -0700 @@ -77,6 +77,8 @@ void pSeries_calibrate_decr(void); void fwnmi_init(void); extern void SystemReset_FWNMI(void), MachineCheck_FWNMI(void); /* from head.S */ +void cpumask_setup(void); + int fwnmi_active; /* TRUE if an FWNMI handler is present */ dev_t boot_dev; @@ -468,3 +470,91 @@ setup_default_decr(); } + +void cpumask_setup() +{ + unsigned long ind; + struct device_node *np = NULL; + int cpuid = 0; + unsigned int *reg = NULL; + char *statusp = NULL; + int *propsize = NULL; + unsigned int cpu_threads; + + printk(KERN_INFO "cpumask_setup\n"); + /* On pmac, we just fill out the various global bitmasks and + * arrays indicating our CPUs are here, they are actually started + * later on from pmac_smp + */ + if (systemcfg->platform == PLATFORM_POWERMAC) { + while ((np = of_find_node_by_type(np, "cpu"))) { + reg = (unsigned int *)get_property(np, "reg", NULL); +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, cpu_present_at_boot); + if (*reg == 0) + cpu_set(cpuid, cpu_online_map); +#endif /* CONFIG_SMP */ + cpuid++; + } + of_node_put(np); + return; + } + + while ((np = of_find_node_by_type(np, "cpu"))) { + + statusp = (char *)get_property(np, "status", NULL); + if (strcmp(statusp, "okay") != 0) + continue; + + reg = (unsigned int *)get_property(np, "reg", NULL); + + propsize = (int *)get_property(np, "ibm,ppc-interrupt-server#s", NULL); + if (*propsize < 0) { + /* no property. old hardware has no SMT */ + cpu_threads = 1; + } else { + /* We have a threaded processor */ + cpu_threads = *propsize / sizeof(u32); + if (cpu_threads > 2) + cpu_threads = 1; /* ToDo: panic? */ + } + +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, cpu_present_at_boot); + if (cpuid == boot_cpuid) + cpu_set(cpuid, cpu_online_map); + + /* set the secondary threads into the cpuid mask */ + for (ind=1; ind < cpu_threads; ind++) { + cpuid++; + if (cpuid >= NR_CPUS) + continue; + if (naca->smt_state) { + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_present_at_boot); + } + } +#endif + cpuid++; + } + of_node_put(np); + +#ifdef CONFIG_HMT + /* Only enable HMT on processors that provide support. */ + if (__is_processor(PV_PULSAR) || + __is_processor(PV_ICESTAR) || + __is_processor(PV_SSTAR)) { + + for (ind = 0; ind < NR_CPUS; ind += 2) { + if (!cpu_online(ind)) + continue; + cpu_set(ind+1, cpu_possible_map); + } + } +#endif + return; +} diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c 2004-08-04 06:10:30.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c 2004-08-04 06:12:56.000000000 -0700 @@ -939,13 +939,6 @@ prom_getprop(node, "reg", ®, sizeof(reg)); lpaca[cpuid].hw_cpu_id = reg; -#ifdef CONFIG_SMP - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); - if (reg == 0) - cpu_set(cpuid, RELOC(cpu_online_map)); -#endif /* CONFIG_SMP */ cpuid++; } return; @@ -1042,9 +1035,6 @@ #ifdef CONFIG_SMP /* Set the number of active processors. */ _systemcfg->processorCount++; - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); #endif } else { prom_printf("... failed: %x\n", *acknowledge); @@ -1053,10 +1043,6 @@ #ifdef CONFIG_SMP else { prom_printf("%x : booting cpu %s\n", cpuid, path); - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_online_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); } #endif next: @@ -1070,8 +1056,6 @@ prom_printf("%x : preparing thread ... ", interrupt_server[i]); if (_naca->smt_state) { - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); prom_printf("available\n"); } else { prom_printf("not available\n"); @@ -1102,7 +1086,6 @@ } } /* cpu_set(i+1, cpu_online_map); */ - cpu_set(i+1, RELOC(cpu_possible_map)); } _systemcfg->processorCount *= 2; } else { diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c 2004-08-03 02:12:59.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c 2004-08-04 06:15:27.000000000 -0700 @@ -76,6 +76,7 @@ extern void pseries_secondary_smp_init(unsigned long); extern int idle_setup(void); extern void vpa_init(int cpu); +extern void cpumask_setup(void); unsigned long decr_overclock = 1; unsigned long decr_overclock_proc0 = 1; @@ -229,6 +230,7 @@ register_console(&udbg_console); __irq_offset_value = NUM_ISA_INTERRUPTS; finish_device_tree(); + cpumask_setup(); chrp_init(r3, r4, r5, r6, r7); #ifdef CONFIG_SMP @@ -251,6 +253,7 @@ #ifdef CONFIG_PPC_PMAC if (systemcfg->platform == PLATFORM_POWERMAC) { finish_device_tree(); + cpumask_setup(); pmac_init(r3, r4, r5, r6, r7); } #endif /* CONFIG_PPC_PMAC */ From nathanl at austin.ibm.com Wed Aug 4 09:18:28 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Tue, 3 Aug 2004 18:18:28 -0500 Subject: [patch 1/4] Use platform numbering of cpus for hypervisor calls Message-ID: <200408032317.i73NHksP073408@austin.ibm.com> We were using Linux's cpu numbering for cpu-related hypervisor calls (e.g. vpa registration, H_CONFER). It happened to work most of the time because Linux and the hypervisor usually, but not always, have the same numbering for cpus. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/smp.c~ppc64_fix_hcall_cpuids arch/ppc64/kernel/smp.c --- 2.6-tip/arch/ppc64/kernel/smp.c~ppc64_fix_hcall_cpuids 2004-08-03 18:06:53.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/smp.c 2004-08-03 18:06:53.000000000 -0500 @@ -487,11 +487,11 @@ static void __init smp_space_timers(unsi #ifdef CONFIG_PPC_PSERIES void vpa_init(int cpu) { - unsigned long flags; + unsigned long flags, pcpu = get_hard_smp_processor_id(cpu); /* Register the Virtual Processor Area (VPA) */ flags = 1UL << (63 - 18); - register_vpa(flags, cpu, __pa((unsigned long)&(paca[cpu].lppaca))); + register_vpa(flags, pcpu, __pa((unsigned long)&(paca[cpu].lppaca))); } static inline void smp_xics_do_message(int cpu, int msg) diff -puN arch/ppc64/lib/locks.c~ppc64_fix_hcall_cpuids arch/ppc64/lib/locks.c --- 2.6-tip/arch/ppc64/lib/locks.c~ppc64_fix_hcall_cpuids 2004-08-03 18:06:53.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/lib/locks.c 2004-08-03 18:06:53.000000000 -0500 @@ -63,7 +63,8 @@ void __spin_yield(spinlock_t *lock) HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, ((u64)holder_cpu << 32) | yield_count); #else - plpar_hcall_norets(H_CONFER, holder_cpu, yield_count); + plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu), + yield_count); #endif } @@ -179,7 +180,8 @@ void __rw_yield(rwlock_t *rw) HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, ((u64)holder_cpu << 32) | yield_count); #else - plpar_hcall_norets(H_CONFER, holder_cpu, yield_count); + plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu), + yield_count); #endif } _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Wed Aug 4 09:18:34 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Tue, 3 Aug 2004 18:18:34 -0500 Subject: [patch 2/4] Use cpu_present_map in ppc64 Message-ID: <200408032317.i73NHqsP079046@austin.ibm.com> Adopt the "standard" cpu_present_map for describing cpus which are present in the system, but not necessarily online. cpu_present_map is meant to be a superset of cpu_online_map and a subset of cpu_possible_map. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/prom.c~ppc64-add-cpu_present_map arch/ppc64/kernel/prom.c --- 2.6-tip/arch/ppc64/kernel/prom.c~ppc64-add-cpu_present_map 2004-08-03 18:07:06.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/prom.c 2004-08-03 18:07:06.000000000 -0500 @@ -943,6 +943,7 @@ static void __init prom_hold_cpus(unsign cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); cpu_set(cpuid, RELOC(cpu_present_at_boot)); + cpu_set(cpuid, RELOC(cpu_present_map)); if (reg == 0) cpu_set(cpuid, RELOC(cpu_online_map)); #endif /* CONFIG_SMP */ @@ -1045,6 +1046,7 @@ static void __init prom_hold_cpus(unsign cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); cpu_set(cpuid, RELOC(cpu_present_at_boot)); + cpu_set(cpuid, RELOC(cpu_present_map)); #endif } else { prom_printf("... failed: %x\n", *acknowledge); @@ -1057,6 +1059,7 @@ static void __init prom_hold_cpus(unsign cpu_set(cpuid, RELOC(cpu_possible_map)); cpu_set(cpuid, RELOC(cpu_online_map)); cpu_set(cpuid, RELOC(cpu_present_at_boot)); + cpu_set(cpuid, RELOC(cpu_present_map)); } #endif next: @@ -1072,6 +1075,7 @@ next: if (_naca->smt_state) { cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_present_at_boot)); + cpu_set(cpuid, RELOC(cpu_present_map)); prom_printf("available\n"); } else { prom_printf("not available\n"); @@ -1103,6 +1107,7 @@ next: } /* cpu_set(i+1, cpu_online_map); */ cpu_set(i+1, RELOC(cpu_possible_map)); + cpu_set(i+1, RELOC(cpu_present_map)); } _systemcfg->processorCount *= 2; } else { diff -puN arch/ppc64/kernel/smp.c~ppc64-add-cpu_present_map arch/ppc64/kernel/smp.c --- 2.6-tip/arch/ppc64/kernel/smp.c~ppc64-add-cpu_present_map 2004-08-03 18:07:06.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/smp.c 2004-08-03 18:07:06.000000000 -0500 @@ -127,6 +127,7 @@ static int smp_iSeries_numProcs(void) cpu_set(i, cpu_available_map); cpu_set(i, cpu_possible_map); cpu_set(i, cpu_present_at_boot); + cpu_set(i, cpu_present_map); ++np; } } _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Wed Aug 4 09:18:40 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Tue, 3 Aug 2004 18:18:40 -0500 Subject: [patch 3/4] Rework secondary SMT thread setup at boot Message-ID: <200408032317.i73NHwsP080586@austin.ibm.com> Our (ab)use of cpu_possible_map in setup_system to start secondary SMT threads bothers me. Mark such threads in cpu_possible_map during early boot; let RTAS tell us which present cpus are still offline later so we can start them. Also change query_cpu_stopped to not BUG when the RTAS query-cpu-stopped-state token is not available. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/prom.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/prom.c --- 2.6-tip/arch/ppc64/kernel/prom.c~ppc64-fix-secondary-smt-thread-setup 2004-08-03 18:07:13.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/prom.c 2004-08-03 18:07:13.000000000 -0500 @@ -1076,6 +1076,8 @@ next: cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_present_at_boot)); cpu_set(cpuid, RELOC(cpu_present_map)); + cpu_set(cpuid, RELOC(cpu_possible_map)); + _systemcfg->processorCount++; prom_printf("available\n"); } else { prom_printf("not available\n"); diff -puN arch/ppc64/kernel/setup.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/setup.c --- 2.6-tip/arch/ppc64/kernel/setup.c~ppc64-fix-secondary-smt-thread-setup 2004-08-03 18:07:13.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/setup.c 2004-08-03 18:07:13.000000000 -0500 @@ -232,16 +232,17 @@ void setup_system(unsigned long r3, unsi chrp_init(r3, r4, r5, r6, r7); #ifdef CONFIG_SMP - /* Start secondary threads on SMT systems */ - for (i = 0; i < NR_CPUS; i++) { - if (cpu_available(i) && !cpu_possible(i)) { + /* Start secondary threads on SMT systems; primary threads + * are already in the running state. + */ + for_each_present_cpu(i) { + if (query_cpu_stopped + (get_hard_smp_processor_id(i)) == 0) { printk("%16.16x : starting thread\n", i); rtas_call(rtas_token("start-cpu"), 3, 1, &ret, get_hard_smp_processor_id(i), (u32)*((unsigned long *)pseries_secondary_smp_init), i); - cpu_set(i, cpu_possible_map); - systemcfg->processorCount++; } } #endif /* CONFIG_SMP */ diff -puN arch/ppc64/kernel/smp.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/smp.c --- 2.6-tip/arch/ppc64/kernel/smp.c~ppc64-fix-secondary-smt-thread-setup 2004-08-03 18:07:13.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/smp.c 2004-08-03 18:07:13.000000000 -0500 @@ -225,7 +225,6 @@ static void __devinit smp_openpic_setup_ do_openpic_setup_cpu(); } -#ifdef CONFIG_HOTPLUG_CPU /* Get state of physical CPU. * Return codes: * 0 - The processor is in the RTAS stopped state @@ -234,13 +233,14 @@ static void __devinit smp_openpic_setup_ * -1 - Hardware Error * -2 - Hardware Busy, Try again later. */ -static int query_cpu_stopped(unsigned int pcpu) +int query_cpu_stopped(unsigned int pcpu) { int cpu_status; int status, qcss_tok; qcss_tok = rtas_token("query-cpu-stopped-state"); - BUG_ON(qcss_tok == RTAS_UNKNOWN_SERVICE); + if (qcss_tok == RTAS_UNKNOWN_SERVICE) + return -1; status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); if (status != 0) { printk(KERN_ERR @@ -251,6 +251,8 @@ static int query_cpu_stopped(unsigned in return cpu_status; } +#ifdef CONFIG_HOTPLUG_CPU + int __cpu_disable(void) { /* FIXME: go put this in a header somewhere */ diff -puN include/asm-ppc64/smp.h~ppc64-fix-secondary-smt-thread-setup include/asm-ppc64/smp.h --- 2.6-tip/include/asm-ppc64/smp.h~ppc64-fix-secondary-smt-thread-setup 2004-08-03 18:07:13.000000000 -0500 +++ 2.6-tip-nathanl/include/asm-ppc64/smp.h 2004-08-03 18:07:13.000000000 -0500 @@ -73,6 +73,7 @@ void smp_init_pSeries(void); extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); extern void cpu_die(void) __attribute__((noreturn)); +extern int query_cpu_stopped(unsigned int pcpu); #endif /* !(CONFIG_SMP) */ #define get_hard_smp_processor_id(CPU) (paca[(CPU)].hw_cpu_id) _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Wed Aug 4 09:18:46 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Tue, 3 Aug 2004 18:18:46 -0500 Subject: [patch 4/4] Remove unnecessary cpu maps (available, present_at_boot) Message-ID: <200408032318.i73NI4sP072766@austin.ibm.com> With cpu_present_map, we don't need these any longer. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/prom.c~ppc64-remove-unnecessary-cpu-maps arch/ppc64/kernel/prom.c --- 2.6-tip/arch/ppc64/kernel/prom.c~ppc64-remove-unnecessary-cpu-maps 2004-08-03 18:07:20.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/prom.c 2004-08-03 18:07:20.000000000 -0500 @@ -940,9 +940,7 @@ static void __init prom_hold_cpus(unsign lpaca[cpuid].hw_cpu_id = reg; #ifdef CONFIG_SMP - cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); cpu_set(cpuid, RELOC(cpu_present_map)); if (reg == 0) cpu_set(cpuid, RELOC(cpu_online_map)); @@ -1043,9 +1041,7 @@ static void __init prom_hold_cpus(unsign #ifdef CONFIG_SMP /* Set the number of active processors. */ _systemcfg->processorCount++; - cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); cpu_set(cpuid, RELOC(cpu_present_map)); #endif } else { @@ -1055,10 +1051,8 @@ static void __init prom_hold_cpus(unsign #ifdef CONFIG_SMP else { prom_printf("%x : booting cpu %s\n", cpuid, path); - cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); cpu_set(cpuid, RELOC(cpu_online_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); cpu_set(cpuid, RELOC(cpu_present_map)); } #endif @@ -1073,8 +1067,6 @@ next: prom_printf("%x : preparing thread ... ", interrupt_server[i]); if (_naca->smt_state) { - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); cpu_set(cpuid, RELOC(cpu_present_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); _systemcfg->processorCount++; diff -puN arch/ppc64/kernel/smp.c~ppc64-remove-unnecessary-cpu-maps arch/ppc64/kernel/smp.c --- 2.6-tip/arch/ppc64/kernel/smp.c~ppc64-remove-unnecessary-cpu-maps 2004-08-03 18:07:20.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/smp.c 2004-08-03 18:07:20.000000000 -0500 @@ -59,8 +59,6 @@ unsigned long cache_decay_ticks; cpumask_t cpu_possible_map = CPU_MASK_NONE; cpumask_t cpu_online_map = CPU_MASK_NONE; -cpumask_t cpu_available_map = CPU_MASK_NONE; -cpumask_t cpu_present_at_boot = CPU_MASK_NONE; EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL(cpu_possible_map); @@ -124,9 +122,7 @@ static int smp_iSeries_numProcs(void) np = 0; for (i=0; i < NR_CPUS; ++i) { if (paca[i].lppaca.xDynProcStatus < 2) { - cpu_set(i, cpu_available_map); cpu_set(i, cpu_possible_map); - cpu_set(i, cpu_present_at_boot); cpu_set(i, cpu_present_map); ++np; } @@ -878,7 +874,7 @@ int __devinit __cpu_up(unsigned int cpu) int c; /* At boot, don't bother with non-present cpus -JSCHOPP */ - if (system_state == SYSTEM_BOOTING && !cpu_present_at_boot(cpu)) + if (system_state == SYSTEM_BOOTING && !cpu_present(cpu)) return -ENOENT; paca[cpu].prof_counter = 1; diff -puN arch/ppc64/kernel/xics.c~ppc64-remove-unnecessary-cpu-maps arch/ppc64/kernel/xics.c --- 2.6-tip/arch/ppc64/kernel/xics.c~ppc64-remove-unnecessary-cpu-maps 2004-08-03 18:07:20.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/xics.c 2004-08-03 18:07:20.000000000 -0500 @@ -548,7 +548,7 @@ nextnode: #ifdef CONFIG_SMP for_each_cpu(i) { /* FIXME: Do this dynamically! --RR */ - if (!cpu_present_at_boot(i)) + if (!cpu_present(i)) continue; xics_per_cpu[i] = __ioremap((ulong)inodes[get_hard_smp_processor_id(i)].addr, (ulong)inodes[get_hard_smp_processor_id(i)].size, diff -puN include/asm-ppc64/smp.h~ppc64-remove-unnecessary-cpu-maps include/asm-ppc64/smp.h --- 2.6-tip/include/asm-ppc64/smp.h~ppc64-remove-unnecessary-cpu-maps 2004-08-03 18:07:20.000000000 -0500 +++ 2.6-tip-nathanl/include/asm-ppc64/smp.h 2004-08-03 18:07:20.000000000 -0500 @@ -36,23 +36,6 @@ extern void smp_message_recv(int, struct #define smp_processor_id() (get_paca()->paca_index) #define hard_smp_processor_id() (get_paca()->hw_cpu_id) -/* - * Retrieve the state of a CPU: - * online: CPU is in a normal run state - * possible: CPU is a candidate to be made online - * available: CPU is candidate for the 'possible' pool - * Used to get SMT threads started at boot time. - * present_at_boot: CPU was available at boot time. Used in DLPAR - * code to handle special cases for processor start up. - */ -extern cpumask_t cpu_present_at_boot; -extern cpumask_t cpu_online_map; -extern cpumask_t cpu_possible_map; -extern cpumask_t cpu_available_map; - -#define cpu_present_at_boot(cpu) cpu_isset(cpu, cpu_present_at_boot) -#define cpu_available(cpu) cpu_isset(cpu, cpu_available_map) - /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. * * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Wed Aug 4 09:19:42 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Tue, 03 Aug 2004 18:19:42 -0500 Subject: [patch 1/4] Use platform numbering of cpus for hypervisor calls In-Reply-To: <200408032317.i73NHksP073408@austin.ibm.com> References: <200408032317.i73NHksP073408@austin.ibm.com> Message-ID: <1091575182.30125.1.camel@biclops.private.network> I should note that these patches are against 2.6.8-rc2-bk13. Nathan ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jhe at us.ibm.com Thu Aug 5 07:02:43 2004 From: jhe at us.ibm.com (John Engel) Date: Wed, 04 Aug 2004 16:02:43 -0500 Subject: [PATCH] 32-bit ptrace geteventmsg fix Message-ID: <41114EF3.5030607@us.ibm.com> Here's a fix for the 32-bit PTRACE_GETEVENTMSG ptrace call that handles the passing of a 32-bit user address. Please apply, thanks. Signed-off-by: John Engel --- linux-2.6.5-7.97/arch/ppc64/kernel/ptrace32.c.orig 2004-07-29 13:52:42.000000000 -0500 +++ linux-2.6.5-7.97/arch/ppc64/kernel/ptrace32.c 2004-07-29 13:43:02.000000000 -0500 @@ -413,7 +413,9 @@ break; } - + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message, (unsigned int __user *) data); + break; default: ret = ptrace_request(child, request, addr, data); ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Thu Aug 5 07:10:46 2004 From: anton at samba.org (Anton Blanchard) Date: Thu, 5 Aug 2004 07:10:46 +1000 Subject: [PATCH] 32-bit ptrace geteventmsg fix In-Reply-To: <41114EF3.5030607@us.ibm.com> References: <41114EF3.5030607@us.ibm.com> Message-ID: <20040804211046.GU30253@krispykreme> Hi John, Looks good. Anton -- John Engel : Here's a fix for the 32-bit PTRACE_GETEVENTMSG ptrace call that handles the passing of a 32-bit user address. Please apply, thanks. Signed-off-by: John Engel Signed-off-by: Anton Blanchard ===== arch/ppc64/kernel/ptrace32.c 1.11 vs edited ===== --- 1.11/arch/ppc64/kernel/ptrace32.c Thu Jun 10 16:21:41 2004 +++ edited/arch/ppc64/kernel/ptrace32.c Thu Aug 5 07:08:01 2004 @@ -404,7 +404,9 @@ break; } - + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message, (unsigned int __user *) data); + break; default: ret = ptrace_request(child, request, addr, data); ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jhe at us.ibm.com Thu Aug 5 07:16:19 2004 From: jhe at us.ibm.com (John Engel) Date: Wed, 04 Aug 2004 16:16:19 -0500 Subject: [PATCH] [trivial] clear iSeries refcode field Message-ID: <41115223.6090305@us.ibm.com> This patch clears the refcode field when booting an iSeries partition. Please apply if there are no problems. Thanks. Signed-off-by: John Engel --- arch/ppc64/kernel/iSeries_setup.c.orig 2004-05-17 15:48:53.000000000 -0500 +++ arch/ppc64/kernel/iSeries_setup.c 2004-05-17 16:32:46.000000000 -0500 @@ -852,3 +852,12 @@ } } } + +int __init iSeries_src_init(void) +{ + /* clear the progress line */ + ppc_md.progress(" ", 0xffff); + return 0; +} + +late_initcall(iSeries_src_init); ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Thu Aug 5 07:30:54 2004 From: anton at samba.org (Anton Blanchard) Date: Thu, 5 Aug 2004 07:30:54 +1000 Subject: [PATCH] [trivial] clear iSeries refcode field In-Reply-To: <41115223.6090305@us.ibm.com> References: <41115223.6090305@us.ibm.com> Message-ID: <20040804213054.GV30253@krispykreme> Hi John, > This patch clears the refcode field when booting an iSeries partition. > Please apply if there are no problems. Thanks. > > Signed-off-by: John Engel Thanks, but it looks like this one is already in current BK. Anton > --- arch/ppc64/kernel/iSeries_setup.c.orig 2004-05-17 15:48:53.000000000 > -0500 > +++ arch/ppc64/kernel/iSeries_setup.c 2004-05-17 16:32:46.000000000 -0500 > @@ -852,3 +852,12 @@ > } > } > } > + > +int __init iSeries_src_init(void) > +{ > + /* clear the progress line */ > + ppc_md.progress(" ", 0xffff); > + return 0; > +} > + > +late_initcall(iSeries_src_init); ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Thu Aug 5 08:58:07 2004 From: anton at samba.org (Anton Blanchard) Date: Thu, 5 Aug 2004 08:58:07 +1000 Subject: [PATCH] suppress 'store_purr' unused warning In-Reply-To: <1091521301.27397.3660.camel@nighthawk> References: <1091521301.27397.3660.camel@nighthawk> Message-ID: <20040804225807.GW30253@krispykreme> Hi, > Some new sysfs macros declare a store_purr() function that never gets > used, which makes a compiler warning happen. Suppress the warning with > an unused attribute. Looks good to me, but could you use the __attribute_used__ compiler.h macro to guard against the ever changing gcc mind? Anton > Can you tell that I'm using compiler warnings heavily in my > CONFIG_NONLINEAR debugging? :) > > -- Dave > > > > > --- > > memhotplug-dave/arch/ppc64/kernel/sysfs.c | 4 ++-- > 1 files changed, 2 insertions(+), 2 deletions(-) > > diff -puN arch/ppc64/kernel/sysfs.c~A3-ppc64-store_purr_unused arch/ppc64/kernel/sysfs.c > --- memhotplug/arch/ppc64/kernel/sysfs.c~A3-ppc64-store_purr_unused Tue Aug 3 01:15:51 2004 > +++ memhotplug-dave/arch/ppc64/kernel/sysfs.c Tue Aug 3 01:17:17 2004 > @@ -209,8 +209,8 @@ static ssize_t show_##NAME(struct sys_de > unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \ > return sprintf(buf, "%lx\n", val); \ > } \ > -static ssize_t store_##NAME(struct sys_device *dev, const char *buf, \ > - size_t count) \ > +static ssize_t __attribute__((unused)) \ > + store_##NAME(struct sys_device *dev, const char *buf, size_t count) \ > { \ > struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ > unsigned long val; \ > > _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Thu Aug 5 09:13:40 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Wed, 04 Aug 2004 16:13:40 -0700 Subject: [PATCH] suppress 'store_purr' unused warning In-Reply-To: <20040804225807.GW30253@krispykreme> References: <1091521301.27397.3660.camel@nighthawk> <20040804225807.GW30253@krispykreme> Message-ID: <1091661220.27397.6860.camel@nighthawk> On Wed, 2004-08-04 at 15:58, Anton Blanchard wrote: > Hi, > > > Some new sysfs macros declare a store_purr() function that never gets > > used, which makes a compiler warning happen. Suppress the warning with > > an unused attribute. > > Looks good to me, but could you use the __attribute_used__ compiler.h > macro to guard against the ever changing gcc mind? Updated patch attached. -- Dave -------------- next part -------------- A non-text attachment was scrubbed... Name: A3-ppc64-store_purr_unused.patch Type: text/x-patch Size: 1050 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040804/5a5735d5/attachment.bin From anton at samba.org Thu Aug 5 12:56:49 2004 From: anton at samba.org (Anton Blanchard) Date: Thu, 5 Aug 2004 12:56:49 +1000 Subject: [PATCH] suppress 'store_purr' unused warning In-Reply-To: <1091661220.27397.6860.camel@nighthawk> References: <1091521301.27397.3660.camel@nighthawk> <20040804225807.GW30253@krispykreme> <1091661220.27397.6860.camel@nighthawk> Message-ID: <20040805025649.GE30253@krispykreme> > Updated patch attached. Thanks Dave, Anton -- Some new sysfs macros declare a store_purr() function that never gets used, which makes a compiler warning happen. Suppress the warning with the used attribute. Signed-off-by: Dave Hansen Signed-off-by: Anton Blanchard diff -puN arch/ppc64/kernel/sysfs.c~A3-ppc64-store_purr_unused arch/ppc64/kernel/sysfs.c --- memhotplug/arch/ppc64/kernel/sysfs.c~A3-ppc64-store_purr_unused Wed Aug 4 16:08:52 2004 +++ memhotplug-dave/arch/ppc64/kernel/sysfs.c Wed Aug 4 16:12:35 2004 @@ -209,8 +209,8 @@ static ssize_t show_##NAME(struct sys_de unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \ return sprintf(buf, "%lx\n", val); \ } \ -static ssize_t store_##NAME(struct sys_device *dev, const char *buf, \ - size_t count) \ +static ssize_t __attribute_used__ \ + store_##NAME(struct sys_device *dev, const char *buf, size_t count) \ { \ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ unsigned long val; \ _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Fri Aug 6 02:01:13 2004 From: anton at samba.org (Anton Blanchard) Date: Fri, 6 Aug 2004 02:01:13 +1000 Subject: [PATCH] remove linux,pci-domain from OFDT In-Reply-To: <1090883181.5914.4.camel@sinatra.austin.ibm.com> References: <1090883181.5914.4.camel@sinatra.austin.ibm.com> Message-ID: <20040805160112.GJ30253@krispykreme> Hi, > The patch below scraps the creation of the "linux,pci-domain" property in the > OF device tree for each PCI Host Bridge. This seems appropriate for the > following reasons: I think Martin wanted it for some userspace tools. However, if it isnt even making it into the device-tree, then I agree its a good candidate for removal :) Anton > 1) It isn't referenced/used in the kernel. > 2) It isn't exported to userspace, since it's added after /proc/device-tree > is created. > 3) Even if it was correctly exported to userspace, the same info is already > available in sysfs. > > Please apply, if there are no problems. > > Thanks- > John > > Signed-off-by: John Rose > > diff -Nru a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c > --- a/arch/ppc64/kernel/pSeries_pci.c Mon Jul 26 17:50:29 2004 > +++ b/arch/ppc64/kernel/pSeries_pci.c Mon Jul 26 17:50:29 2004 > @@ -402,7 +402,6 @@ > int *bus_range; > char *model; > enum phb_types phb_type; > - struct property *of_prop; > > model = (char *)get_property(dev, "model", NULL); > > @@ -448,21 +447,6 @@ > kfree(phb); > return NULL; > } > - > - of_prop = (struct property *)alloc_bootmem(sizeof(struct property) + > - sizeof(phb->global_number)); > - > - if (!of_prop) { > - kfree(phb); > - return NULL; > - } > - > - memset(of_prop, 0, sizeof(struct property)); > - of_prop->name = "linux,pci-domain"; > - of_prop->length = sizeof(phb->global_number); > - of_prop->value = (unsigned char *)&of_prop[1]; > - memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number)); > - prom_add_property(dev, of_prop); > > phb->first_busno = bus_range[0]; > phb->last_busno = bus_range[1]; ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From johnrose at austin.ibm.com Fri Aug 6 03:21:13 2004 From: johnrose at austin.ibm.com (John Rose) Date: Thu, 05 Aug 2004 12:21:13 -0500 Subject: [PATCH] remove linux,pci-domain from OFDT In-Reply-To: <20040805160112.GJ30253@krispykreme> References: <1090883181.5914.4.camel@sinatra.austin.ibm.com> <20040805160112.GJ30253@krispykreme> Message-ID: <1091726473.27121.11.camel@sinatra.austin.ibm.com> Hey - > I think Martin wanted it for some userspace tools. However, if it isnt > even making it into the device-tree, then I agree its a good candidate > for removal :) Talked to Martin. Apparently it is being exported through /proc, and his tools use it to correlate OF device tree path to pci name "xxxx:xx:xx.x". My mistake, please disregard my request to remove it :) Thanks- John ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Fri Aug 6 09:14:24 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Thu, 05 Aug 2004 18:14:24 -0500 Subject: cpumask move patch - RFC In-Reply-To: <20040803131527.GA4056@in.ibm.com> References: <20040803131527.GA4056@in.ibm.com> Message-ID: <1091747664.438.139.camel@pants.austin.ibm.com> On Tue, 2004-08-03 at 08:15, R Sharada wrote: > Hello, > This is part of an attempt to clean up some of the kernel data > structure initialisations from prom.c and move to later boot code. +void cpumask_setup() +{ + unsigned long ind; + struct device_node *np = NULL; + int cpuid = 0; + unsigned int *reg = NULL; + char *statusp = NULL; + int *propsize = NULL; The reg, statusp, and propsize initializations seem unnecessary. + cpuid++; + } + of_node_put(np); + return; Most of these of_node_put's are superfluous unless there's a chance you have explicitly broken out of the loop. + propsize = (int *)get_property(np, "ibm,ppc-interrupt-server#s", NULL); + if (*propsize < 0) { + /* no property. old hardware has no SMT */ + cpu_threads = 1; + } else { + /* We have a threaded processor */ + cpu_threads = *propsize / sizeof(u32); + if (cpu_threads > 2) + cpu_threads = 1; /* ToDo: panic? */ This is incorrect -- get_property does not return the size of the property; it stores the size in the third argument. The return value of get_property is a pointer to the kernel's copy of the property itself. While I agree in theory with removing all the cpumask initializations from prom_hold_cpus, I don't think simply transplanting the mess is the way to do it. Wouldn't it be nice to have one loop which works on pmac and pSeries, SMP and UP, without all those #ifdef's? Nathan ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Fri Aug 6 11:54:05 2004 From: paulus at samba.org (Paul Mackerras) Date: Fri, 6 Aug 2004 11:54:05 +1000 Subject: cpumask move patch - RFC In-Reply-To: <1091747664.438.139.camel@pants.austin.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> Message-ID: <16658.58557.455887.141510@cargo.ozlabs.ibm.com> Nathan Lynch writes: > This is incorrect -- get_property does not return the size of the > property; it stores the size in the third argument. The return value of > get_property is a pointer to the kernel's copy of the property itself. Good point. I notice also that we need to check for statusp != NULL in this code: + statusp = (char *)get_property(np, "status", NULL); + if (strcmp(statusp, "okay") != 0) + continue; > While I agree in theory with removing all the cpumask initializations > from prom_hold_cpus, I don't think simply transplanting the mess is the > way to do it. Wouldn't it be nice to have one loop which works on pmac > and pSeries, SMP and UP, without all those #ifdef's? It would be nice, if it turns out to be possible. :) It's not clear that there is enough consistency between the device tree representations on the different platforms to be able to achieve that. I think what Sharada has done is a useful first step. At least now we can get rid of a bunch of RELOCs. Sharada, could you do a revised patch? Thanks, Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From sharada at in.ibm.com Fri Aug 6 14:26:27 2004 From: sharada at in.ibm.com (R Sharada) Date: Fri, 6 Aug 2004 09:56:27 +0530 Subject: cpumask move patch - RFC In-Reply-To: <16658.58557.455887.141510@cargo.ozlabs.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> <16658.58557.455887.141510@cargo.ozlabs.ibm.com> Message-ID: <20040806042627.GA1288@in.ibm.com> Nathan, Paul, Thanks for your valuable feedback. I noted your comments and shall work on a revised patch and put it up for review soon. Thanks and Regards, Sharada On Fri, Aug 06, 2004 at 11:54:05AM +1000, Paul Mackerras wrote: > Nathan Lynch writes: > > > This is incorrect -- get_property does not return the size of the > > property; it stores the size in the third argument. The return value of > > get_property is a pointer to the kernel's copy of the property itself. > > Good point. > > I notice also that we need to check for statusp != NULL in this code: > > + statusp = (char *)get_property(np, "status", NULL); > + if (strcmp(statusp, "okay") != 0) > + continue; > > > > While I agree in theory with removing all the cpumask initializations > > from prom_hold_cpus, I don't think simply transplanting the mess is the > > way to do it. Wouldn't it be nice to have one loop which works on pmac > > and pSeries, SMP and UP, without all those #ifdef's? > > It would be nice, if it turns out to be possible. :) It's not clear > that there is enough consistency between the device tree > representations on the different platforms to be able to achieve that. > > I think what Sharada has done is a useful first step. At least now we > can get rid of a bunch of RELOCs. Sharada, could you do a revised > patch? > > Thanks, > Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From sharada at in.ibm.com Mon Aug 9 16:30:16 2004 From: sharada at in.ibm.com (R Sharada) Date: Mon, 9 Aug 2004 12:00:16 +0530 Subject: cpumask move patch - RFC In-Reply-To: <1091747664.438.139.camel@pants.austin.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> Message-ID: <20040809063016.GA2256@in.ibm.com> Hello, On Thu, Aug 05, 2004 at 06:14:24PM -0500, Nathan Lynch wrote: > > On Tue, 2004-08-03 at 08:15, R Sharada wrote: > > Hello, > > This is part of an attempt to clean up some of the kernel data > > structure initialisations from prom.c and move to later boot code. > > +void cpumask_setup() > +{ > + unsigned long ind; > + struct device_node *np = NULL; > + int cpuid = 0; > + unsigned int *reg = NULL; > + char *statusp = NULL; > + int *propsize = NULL; > > The reg, statusp, and propsize initializations seem unnecessary. Don't we need to initialize the pointers for cleaniness sake? That was the only idea behind setting them to NULL. > > + cpuid++; > + } > + of_node_put(np); > + return; > > Most of these of_node_put's are superfluous unless there's a chance you > have explicitly broken out of the loop. One question here though. The of_node_put() calls in the patch are actually out of the while loop. I see that of_find_node_by_type() actually increments the node->users via the of_node_get() call, and decrements for the parent node; hence for the last node, we would still need to decrement the refcount, by calling of_node_put explicitly outside of the while loop, is it not? Or did I miss something? > > + propsize = (int *)get_property(np, "ibm,ppc-interrupt-server#s", NULL); > + if (*propsize < 0) { > + /* no property. old hardware has no SMT */ > + cpu_threads = 1; > + } else { > + /* We have a threaded processor */ > + cpu_threads = *propsize / sizeof(u32); > + if (cpu_threads > 2) > + cpu_threads = 1; /* ToDo: panic? */ > > This is incorrect -- get_property does not return the size of the > property; it stores the size in the third argument. The return value of > get_property is a pointer to the kernel's copy of the property itself. Thanks and yes, that was my mistake. I will change this to read the property size correctly from the get_property() call. > > While I agree in theory with removing all the cpumask initializations > from prom_hold_cpus, I don't think simply transplanting the mess is the > way to do it. Wouldn't it be nice to have one loop which works on pmac > and pSeries, SMP and UP, without all those #ifdef's? > I agree that what you suggest is the cleaner way to go instead of a plain copy. Well, I could remove the #ifdef SMP in the code. However, as regards merging pmac and pseries, the code for pmac does not seem to really check for the cpu status, etc. Is it not needed on pmac? I am not too aware of pmac and need to see the devicetree for pmac and understand if it is different from the pseries tree. > Nathan > > I am working on the changes and will post a revised patch soon. I might need to look a little more to merge the pseries and pmac stuff together. Thanks and Regards, Sharada ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Wed Aug 11 09:42:48 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Tue, 10 Aug 2004 16:42:48 -0700 Subject: [PATCH] include profile.c in kernel/irq.c Message-ID: <1092181368.2813.26.camel@nighthawk> This is against 2.6.8-rc4-mm1 arch/ppc64/kernel/irq.c: In function `init_irq_proc': arch/ppc64/kernel/irq.c:797: warning: implicit declaration of function `create_prof_cpu_mask' -- Dave -------------- next part -------------- A non-text attachment was scrubbed... Name: ppc64-irq.c-include-profile.h.patch Type: text/x-patch Size: 376 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040810/ce18bccc/attachment.bin From johnrose at austin.ibm.com Thu Aug 12 02:01:10 2004 From: johnrose at austin.ibm.com (John Rose) Date: Wed, 11 Aug 2004 11:01:10 -0500 Subject: [PATCH] promote hose_list to an "official" list Message-ID: <1092240070.3940.6.camel@sinatra.austin.ibm.com> This patch changes hose_list from a simple linked list to a "list.h"-style list. This is in preparation for the runtime addition/removal of PCI Host Bridges. Thanks- John Signed-off-by: John Rose diff -Nru a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c --- a/arch/ppc64/kernel/pSeries_iommu.c Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pSeries_iommu.c Tue Aug 10 18:14:50 2004 @@ -90,7 +90,7 @@ static void iommu_buses_init(void) { - struct pci_controller* phb; + struct pci_controller *phb, *tmp; struct device_node *dn, *first_dn; int num_slots, num_slots_ilog2; int first_phb = 1; @@ -109,7 +109,7 @@ /* XXX Should we be using pci_root_buses instead? -ojn */ - for (phb=hose_head; phb; phb=phb->next) { + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { first_dn = ((struct device_node *)phb->arch_data)->child; /* Carve 2GB into the largest dma_window_size possible */ diff -Nru a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c --- a/arch/ppc64/kernel/pSeries_pci.c Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pSeries_pci.c Tue Aug 10 18:14:50 2004 @@ -712,9 +712,9 @@ static void phbs_fixup_io(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; - for (hose=hose_head;hose;hose=hose->next) + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) remap_bus_range(hose->bus); } @@ -747,8 +747,8 @@ pci_find_hose_for_OF_device(struct device_node *node) { while (node) { - struct pci_controller *hose; - for (hose=hose_head;hose;hose=hose->next) + struct pci_controller *hose, *tmp; + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) if (hose->arch_data == node) return hose; node=node->parent; diff -Nru a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c --- a/arch/ppc64/kernel/pci.c Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pci.c Tue Aug 10 18:14:50 2004 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -63,8 +64,7 @@ void iSeries_pcibios_init(void); -struct pci_controller *hose_head; -struct pci_controller **hose_tail = &hose_head; +LIST_HEAD(hose_list); struct pci_dma_ops pci_dma_ops; EXPORT_SYMBOL(pci_dma_ops); @@ -240,8 +240,8 @@ hose->type = controller_type; hose->global_number = global_phb_number++; - *hose_tail = hose; - hose_tail = &hose->next; + list_add_tail(&hose->list_node, &hose_list); + return hose; } @@ -281,7 +281,7 @@ static int __init pcibios_init(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; struct pci_bus *bus; #ifdef CONFIG_PPC_ISERIES @@ -292,7 +292,7 @@ printk("PCI: Probing PCI hardware\n"); /* Scan all of the recorded PCI controllers. */ - for (hose = hose_head; hose; hose = hose->next) { + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { hose->last_busno = 0xff; bus = pci_scan_bus(hose->first_busno, hose->ops, hose->arch_data); diff -Nru a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h --- a/arch/ppc64/kernel/pci.h Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pci.h Tue Aug 10 18:14:50 2004 @@ -17,8 +17,6 @@ extern struct pci_controller* pci_alloc_pci_controller(enum phb_types controller_type); extern struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node); -extern struct pci_controller* hose_head; -extern struct pci_controller** hose_tail; extern int global_phb_number; diff -Nru a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c --- a/arch/ppc64/kernel/pci_dn.c Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pci_dn.c Tue Aug 10 18:14:50 2004 @@ -129,10 +129,10 @@ */ static void *traverse_all_pci_devices(traverse_func pre) { - struct pci_controller *phb; + struct pci_controller *phb, *tmp; void *ret; - for (phb = hose_head; phb; phb = phb->next) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) if ((ret = traverse_pci_devices(phb->arch_data, pre, phb)) != NULL) return ret; diff -Nru a/arch/ppc64/kernel/pmac_pci.c b/arch/ppc64/kernel/pmac_pci.c --- a/arch/ppc64/kernel/pmac_pci.c Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pmac_pci.c Tue Aug 10 18:14:50 2004 @@ -672,9 +672,9 @@ static void __init pmac_fixup_phb_resources(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; - for (hose = hose_head; hose; hose = hose->next) { + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; hose->io_resource.start += offset; hose->io_resource.end += offset; diff -Nru a/include/asm-ppc64/pci-bridge.h b/include/asm-ppc64/pci-bridge.h --- a/include/asm-ppc64/pci-bridge.h Tue Aug 10 18:14:50 2004 +++ b/include/asm-ppc64/pci-bridge.h Tue Aug 10 18:14:50 2004 @@ -33,9 +33,9 @@ struct pci_controller { char what[8]; /* Eye catcher */ enum phb_types type; /* Type of hardware */ - struct pci_controller *next; struct pci_bus *bus; void *arch_data; + struct list_head list_node; int first_busno; int last_busno; diff -Nru a/include/asm-ppc64/pci.h b/include/asm-ppc64/pci.h --- a/include/asm-ppc64/pci.h Tue Aug 10 18:14:50 2004 +++ b/include/asm-ppc64/pci.h Tue Aug 10 18:14:50 2004 @@ -233,6 +233,8 @@ extern void pcibios_add_platform_entries(struct pci_dev *dev); +extern struct list_head hose_list; + #endif /* __KERNEL__ */ #endif /* __PPC64_PCI_H */ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Thu Aug 12 02:07:10 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Wed, 11 Aug 2004 11:07:10 -0500 Subject: [patch 1/2] Avoid calling scheduler from timer_interrupt on "offline" cpu Message-ID: <200408111607.i7BG7CSY039962@austin.ibm.com> When taking a cpu offline, once the cpu has been removed from cpu_online_map, it is not supposed to service any more interrupts. This presents a problem on ppc64 because we cannot truly disable the decrementer. There used to be cpu_is_offline() checks in several scheduler functions (e.g. rebalance_tick()) which papered over this issue, but these checks were removed recently. So with recent 2.6 kernels, an attempt to offline a cpu can result in a crash in find_busiest_group(): Turning cpu 2 to 0 cpu 0x2: Vector: 300 (Data Access) at [c00000003a4033e0] pc: c00000000004b988: .find_busiest_group+0x234/0x420 lr: c00000000004b8bc: .find_busiest_group+0x168/0x420 sp: c00000003a403660 msr: 8000000000001032 dar: 18 dsisr: 40000000 current = 0xc000000031fdf420 paca = 0xc000000000421200 pid = 8515, comm = kstopmachine enter ? for help 2:mon> t [c00000003a403660] c00000003a403720 (unreliable) [c00000003a403780] c00000000004bcf4 .load_balance+0x78/0x2c0 [c00000003a403840] c00000000004c3e4 .rebalance_tick+0x124/0x148 [c00000003a4038f0] c000000000060170 .update_process_times+0x44/0x60 [c00000003a403980] c00000000003ab64 .smp_local_timer_interrupt+0x40/0x50 [c00000003a4039f0] c000000000015eb4 .timer_interrupt+0x100/0x40c [c00000003a403ae0] c00000000000a2b4 Decrementer_common+0xb4/0x100 Exception: 901 (Decrementer) at c00000000007b008 .restart_machine+0x20/0x30 [c00000003a403dd0] 0000000000000000 (unreliable) [c00000003a403e50] c00000000007b0dc .do_stop+0xc4/0xc8 [c00000003a403ed0] c000000000070cc8 .kthread+0x11c/0x128 [c00000003a403f90] c0000000000194dc .kernel_thread+0x4c/0x68 This patch prevents such crashes. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/time.c~ppc64-timer_interrupt-handle-offline-cpu arch/ppc64/kernel/time.c --- 2.6.8-rc4/arch/ppc64/kernel/time.c~ppc64-timer_interrupt-handle-offline-cpu 2004-08-11 10:44:27.000000000 -0500 +++ 2.6.8-rc4-nathanl/arch/ppc64/kernel/time.c 2004-08-11 10:44:27.000000000 -0500 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -281,8 +282,20 @@ int timer_interrupt(struct pt_regs * reg while (lpaca->next_jiffy_update_tb <= (cur_tb = get_tb())) { #ifdef CONFIG_SMP - smp_local_timer_interrupt(regs); + /* + * We cannot disable the decrementer, so in the period + * between this cpu's being marked offline in cpu_online_map + * and calling stop-self, it is taking timer interrupts. + * Avoid calling into the scheduler rebalancing code if this + * is the case. + */ + if (!cpu_is_offline(cpu)) + smp_local_timer_interrupt(regs); #endif + /* + * No need to check whether cpu is offline here; boot_cpuid + * should have been fixed up by now. + */ if (cpu == boot_cpuid) { write_seqlock(&xtime_lock); tb_last_stamp = lpaca->next_jiffy_update_tb; _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Thu Aug 12 02:07:17 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Wed, 11 Aug 2004 11:07:17 -0500 Subject: [patch 2/2] fix idle loop for offline cpu Message-ID: <200408111607.i7BG7JSY044136@austin.ibm.com> In the default_idle and dedicated_idle loops, there are some inner loops out of which we should break if the cpu is marked offline. Otherwise, it is possible for the cpu to get stuck and never actually go offline. shared_idle is unaffected. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu arch/ppc64/kernel/idle.c --- 2.6.8-rc4/arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu 2004-08-11 10:44:29.000000000 -0500 +++ 2.6.8-rc4-nathanl/arch/ppc64/kernel/idle.c 2004-08-11 10:44:29.000000000 -0500 @@ -132,6 +132,7 @@ int iSeries_idle(void) int default_idle(void) { long oldval; + unsigned int cpu = smp_processor_id(); while (1) { oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); @@ -139,7 +140,7 @@ int default_idle(void) if (!oldval) { set_thread_flag(TIF_POLLING_NRFLAG); - while (!need_resched()) { + while (!need_resched() && !cpu_is_offline(cpu)) { barrier(); HMT_low(); } @@ -151,8 +152,7 @@ int default_idle(void) } schedule(); - if (cpu_is_offline(smp_processor_id()) && - system_state == SYSTEM_RUNNING) + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } @@ -169,8 +169,9 @@ int dedicated_idle(void) struct paca_struct *lpaca = get_paca(), *ppaca; unsigned long start_snooze; unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); + unsigned int cpu = smp_processor_id(); - ppaca = &paca[smp_processor_id() ^ 1]; + ppaca = &paca[cpu ^ 1]; while (1) { /* Indicate to the HV that we are idle. Now would be @@ -182,7 +183,7 @@ int dedicated_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); start_snooze = __get_tb() + *smt_snooze_delay * tb_ticks_per_usec; - while (!need_resched()) { + while (!need_resched() && !cpu_is_offline(cpu)) { /* need_resched could be 1 or 0 at this * point. If it is 0, set it to 0, so * an IPI/Prod is sent. If it is 1, keep @@ -241,8 +242,7 @@ int dedicated_idle(void) HMT_medium(); lpaca->lppaca.xIdle = 0; schedule(); - if (cpu_is_offline(smp_processor_id()) && - system_state == SYSTEM_RUNNING) + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } return 0; _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From johnrose at austin.ibm.com Thu Aug 12 02:22:20 2004 From: johnrose at austin.ibm.com (John Rose) Date: Wed, 11 Aug 2004 11:22:20 -0500 Subject: [PATCH] [correction] promote hose_list to an "official" list Message-ID: <1092241340.3940.12.camel@sinatra.austin.ibm.com> On second thought, no need to externalize the list to include/asm-ppc64/pci.h. Here's a corrected patch. This patch changes hose_list from a simple linked list to a "list.h"-style list. This is in preparation for the runtime addition/removal of PCI Host Bridges. Thanks- John diff -Nru a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c --- a/arch/ppc64/kernel/pSeries_iommu.c Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pSeries_iommu.c Wed Aug 11 10:54:25 2004 @@ -90,7 +90,7 @@ static void iommu_buses_init(void) { - struct pci_controller* phb; + struct pci_controller *phb, *tmp; struct device_node *dn, *first_dn; int num_slots, num_slots_ilog2; int first_phb = 1; @@ -109,7 +109,7 @@ /* XXX Should we be using pci_root_buses instead? -ojn */ - for (phb=hose_head; phb; phb=phb->next) { + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { first_dn = ((struct device_node *)phb->arch_data)->child; /* Carve 2GB into the largest dma_window_size possible */ diff -Nru a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c --- a/arch/ppc64/kernel/pSeries_pci.c Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pSeries_pci.c Wed Aug 11 10:54:25 2004 @@ -712,9 +712,9 @@ static void phbs_fixup_io(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; - for (hose=hose_head;hose;hose=hose->next) + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) remap_bus_range(hose->bus); } @@ -747,8 +747,8 @@ pci_find_hose_for_OF_device(struct device_node *node) { while (node) { - struct pci_controller *hose; - for (hose=hose_head;hose;hose=hose->next) + struct pci_controller *hose, *tmp; + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) if (hose->arch_data == node) return hose; node=node->parent; diff -Nru a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c --- a/arch/ppc64/kernel/pci.c Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pci.c Wed Aug 11 10:54:25 2004 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -63,8 +64,7 @@ void iSeries_pcibios_init(void); -struct pci_controller *hose_head; -struct pci_controller **hose_tail = &hose_head; +LIST_HEAD(hose_list); struct pci_dma_ops pci_dma_ops; EXPORT_SYMBOL(pci_dma_ops); @@ -240,8 +240,8 @@ hose->type = controller_type; hose->global_number = global_phb_number++; - *hose_tail = hose; - hose_tail = &hose->next; + list_add_tail(&hose->list_node, &hose_list); + return hose; } @@ -281,7 +281,7 @@ static int __init pcibios_init(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; struct pci_bus *bus; #ifdef CONFIG_PPC_ISERIES @@ -292,7 +292,7 @@ printk("PCI: Probing PCI hardware\n"); /* Scan all of the recorded PCI controllers. */ - for (hose = hose_head; hose; hose = hose->next) { + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { hose->last_busno = 0xff; bus = pci_scan_bus(hose->first_busno, hose->ops, hose->arch_data); diff -Nru a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h --- a/arch/ppc64/kernel/pci.h Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pci.h Wed Aug 11 10:54:25 2004 @@ -17,9 +17,7 @@ extern struct pci_controller* pci_alloc_pci_controller(enum phb_types controller_type); extern struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node); -extern struct pci_controller* hose_head; -extern struct pci_controller** hose_tail; - +extern struct list_head hose_list; extern int global_phb_number; /******************************************************************* diff -Nru a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c --- a/arch/ppc64/kernel/pci_dn.c Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pci_dn.c Wed Aug 11 10:54:25 2004 @@ -129,10 +129,10 @@ */ static void *traverse_all_pci_devices(traverse_func pre) { - struct pci_controller *phb; + struct pci_controller *phb, *tmp; void *ret; - for (phb = hose_head; phb; phb = phb->next) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) if ((ret = traverse_pci_devices(phb->arch_data, pre, phb)) != NULL) return ret; diff -Nru a/arch/ppc64/kernel/pmac_pci.c b/arch/ppc64/kernel/pmac_pci.c --- a/arch/ppc64/kernel/pmac_pci.c Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pmac_pci.c Wed Aug 11 10:54:25 2004 @@ -672,9 +672,9 @@ static void __init pmac_fixup_phb_resources(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; - for (hose = hose_head; hose; hose = hose->next) { + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; hose->io_resource.start += offset; hose->io_resource.end += offset; diff -Nru a/include/asm-ppc64/pci-bridge.h b/include/asm-ppc64/pci-bridge.h --- a/include/asm-ppc64/pci-bridge.h Wed Aug 11 10:54:25 2004 +++ b/include/asm-ppc64/pci-bridge.h Wed Aug 11 10:54:25 2004 @@ -33,9 +33,9 @@ struct pci_controller { char what[8]; /* Eye catcher */ enum phb_types type; /* Type of hardware */ - struct pci_controller *next; struct pci_bus *bus; void *arch_data; + struct list_head list_node; int first_busno; int last_busno; ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jschopp at austin.ibm.com Thu Aug 12 04:30:59 2004 From: jschopp at austin.ibm.com (Joel Schopp) Date: Wed, 11 Aug 2004 13:30:59 -0500 Subject: [patch 2/2] fix idle loop for offline cpu In-Reply-To: <200408111607.i7BG7JSY044136@austin.ibm.com> References: <200408111607.i7BG7JSY044136@austin.ibm.com> Message-ID: <411A65E3.8040101@austin.ibm.com> iSeries is not cpu DLPAR capable in Linux. The "i5" machine itself is, but it is not supported by #define CONFIG_PPC_ISERIES. You actually run the pSeries Linux on it. Because of this I think it is wasteful to check cpu_is_offline here. Other than that I think both patches are great. > diff -puN arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu arch/ppc64/kernel/idle.c > --- 2.6.8-rc4/arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu 2004-08-11 10:44:29.000000000 -0500 > +++ 2.6.8-rc4-nathanl/arch/ppc64/kernel/idle.c 2004-08-11 10:44:29.000000000 -0500 > @@ -132,6 +132,7 @@ int iSeries_idle(void) > int default_idle(void) > { > long oldval; > + unsigned int cpu = smp_processor_id(); > > while (1) { > oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); > @@ -139,7 +140,7 @@ int default_idle(void) > if (!oldval) { > set_thread_flag(TIF_POLLING_NRFLAG); > > - while (!need_resched()) { > + while (!need_resched() && !cpu_is_offline(cpu)) { > barrier(); > HMT_low(); > } > @@ -151,8 +152,7 @@ int default_idle(void) > } > > schedule(); > - if (cpu_is_offline(smp_processor_id()) && > - system_state == SYSTEM_RUNNING) > + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) > cpu_die(); > } > ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Thu Aug 12 04:45:54 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Wed, 11 Aug 2004 13:45:54 -0500 Subject: [patch 2/2] fix idle loop for offline cpu In-Reply-To: <411A65E3.8040101@austin.ibm.com> References: <200408111607.i7BG7JSY044136@austin.ibm.com> <411A65E3.8040101@austin.ibm.com> Message-ID: <1092249954.19239.2.camel@pants.austin.ibm.com> On Wed, 2004-08-11 at 13:30, Joel Schopp wrote: > iSeries is not cpu DLPAR capable in Linux. The "i5" machine itself is, > but it is not supported by #define CONFIG_PPC_ISERIES. You actually run > the pSeries Linux on it. Because of this I think it is wasteful to > check cpu_is_offline here. > > Other than that I think both patches are great. > > > diff -puN arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu arch/ppc64/kernel/idle.c > > --- 2.6.8-rc4/arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu 2004-08-11 10:44:29.000000000 -0500 > > +++ 2.6.8-rc4-nathanl/arch/ppc64/kernel/idle.c 2004-08-11 10:44:29.000000000 -0500 > > @@ -132,6 +132,7 @@ int iSeries_idle(void) > > int default_idle(void) > > { > > long oldval; > > + unsigned int cpu = smp_processor_id(); > > The diff output is confusing -- I did not modify iSeries_idle; the hunk to which you refer actually changes default_idle, which is used on Power 4 partitions. Nathan ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jschopp at austin.ibm.com Thu Aug 12 05:20:37 2004 From: jschopp at austin.ibm.com (Joel Schopp) Date: Wed, 11 Aug 2004 14:20:37 -0500 Subject: [patch 2/2] fix idle loop for offline cpu In-Reply-To: <1092249954.19239.2.camel@pants.austin.ibm.com> References: <200408111607.i7BG7JSY044136@austin.ibm.com> <411A65E3.8040101@austin.ibm.com> <1092249954.19239.2.camel@pants.austin.ibm.com> Message-ID: <411A7185.5070002@austin.ibm.com> > The diff output is confusing -- I did not modify iSeries_idle; the hunk > to which you refer actually changes default_idle, which is used on Power > 4 partitions. You are right. Diff lied to me, it was default_idle, which is fine to modify. Patches look great. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From sharada at in.ibm.com Thu Aug 12 22:17:03 2004 From: sharada at in.ibm.com (R Sharada) Date: Thu, 12 Aug 2004 17:47:03 +0530 Subject: cpumask move patch revised - RFC In-Reply-To: <20040806042627.GA1288@in.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> <16658.58557.455887.141510@cargo.ozlabs.ibm.com> <20040806042627.GA1288@in.ibm.com> Message-ID: <20040812121703.GA9760@in.ibm.com> Hello, Based on the feedback, here is the revised cpumask patch that moves the cpumask initialization from prom_hold_cpus() to later boot, in setup_system(). The patch is against the 2.6.8-rc2 linus bitkeeper tree. - The get_property call has been corrected to obtain the property size from the correct argument. - The unnecessary variable initializations have been removed - check for NULL value of status incorporated. I have not removed the #ifdefs for SMP, as all the cpumask data structures, as I see them in code now, are defined for SMP systems and does not seem to be defined for UP. The merge of the POWERMAC and PSERIES #ifdefs is also deferred as I don't know a lot about the POWERMAC initialization and startup to see if the two cases can be merged. Please review and comment on the patch. Thanks and Regards, Sharada -------------- next part -------------- diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c 2004-08-03 02:12:58.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c 2004-08-13 06:02:22.808964544 -0700 @@ -77,6 +77,8 @@ void pSeries_calibrate_decr(void); void fwnmi_init(void); extern void SystemReset_FWNMI(void), MachineCheck_FWNMI(void); /* from head.S */ +void cpumask_setup(void); + int fwnmi_active; /* TRUE if an FWNMI handler is present */ dev_t boot_dev; @@ -468,3 +470,92 @@ setup_default_decr(); } + +void cpumask_setup() +{ + unsigned long ind; + struct device_node *np = NULL; + int cpuid = 0; + unsigned int *reg; + char *statusp; + int prop; + int *propsize = ∝ + unsigned int cpu_threads; + + printk(KERN_INFO "cpumask_setup\n"); + /* On pmac, we just fill out the various global bitmasks and + * arrays indicating our CPUs are here, they are actually started + * later on from pmac_smp + */ + if (systemcfg->platform == PLATFORM_POWERMAC) { + while ((np = of_find_node_by_type(np, "cpu"))) { + reg = (unsigned int *)get_property(np, "reg", NULL); +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, cpu_present_at_boot); + if (*reg == 0) + cpu_set(cpuid, cpu_online_map); +#endif /* CONFIG_SMP */ + cpuid++; + } + of_node_put(np); + return; + } + + while ((np = of_find_node_by_type(np, "cpu"))) { + + statusp = (char *)get_property(np, "status", NULL); + if ((statusp == NULL) || (statusp && strcmp(statusp, "okay") != 0)) + continue; + + reg = (unsigned int *)get_property(np, "reg", NULL); + + get_property(np, "ibm,ppc-interrupt-server#s", propsize); + if (*propsize < 0) { + /* no property. old hardware has no SMT */ + cpu_threads = 1; + } else { + /* We have a threaded processor */ + cpu_threads = *propsize / sizeof(u32); + if (cpu_threads > 2) + cpu_threads = 1; /* ToDo: panic? */ + } + +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, cpu_present_at_boot); + if (cpuid == boot_cpuid) + cpu_set(cpuid, cpu_online_map); + + /* set the secondary threads into the cpuid mask */ + for (ind=1; ind < cpu_threads; ind++) { + cpuid++; + if (cpuid >= NR_CPUS) + continue; + if (naca->smt_state) { + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_present_at_boot); + } + } +#endif /* CONFIG_SMP */ + cpuid++; + } + of_node_put(np); + +#ifdef CONFIG_HMT + /* Only enable HMT on processors that provide support. */ + if (__is_processor(PV_PULSAR) || + __is_processor(PV_ICESTAR) || + __is_processor(PV_SSTAR)) { + + for (ind = 0; ind < NR_CPUS; ind += 2) { + if (!cpu_online(ind)) + continue; + cpu_set(ind+1, cpu_possible_map); + } + } +#endif + return; +} diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c 2004-08-04 06:10:30.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c 2004-08-12 23:52:47.000000000 -0700 @@ -939,13 +939,6 @@ prom_getprop(node, "reg", ®, sizeof(reg)); lpaca[cpuid].hw_cpu_id = reg; -#ifdef CONFIG_SMP - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); - if (reg == 0) - cpu_set(cpuid, RELOC(cpu_online_map)); -#endif /* CONFIG_SMP */ cpuid++; } return; @@ -1042,9 +1035,6 @@ #ifdef CONFIG_SMP /* Set the number of active processors. */ _systemcfg->processorCount++; - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); #endif } else { prom_printf("... failed: %x\n", *acknowledge); @@ -1053,10 +1043,6 @@ #ifdef CONFIG_SMP else { prom_printf("%x : booting cpu %s\n", cpuid, path); - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_online_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); } #endif next: @@ -1069,13 +1055,6 @@ lpaca[cpuid].hw_cpu_id = interrupt_server[i]; prom_printf("%x : preparing thread ... ", interrupt_server[i]); - if (_naca->smt_state) { - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); - prom_printf("available\n"); - } else { - prom_printf("not available\n"); - } } #endif cpuid++; @@ -1101,8 +1080,6 @@ pir & 0x3ff; } } -/* cpu_set(i+1, cpu_online_map); */ - cpu_set(i+1, RELOC(cpu_possible_map)); } _systemcfg->processorCount *= 2; } else { diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c 2004-08-03 02:12:59.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c 2004-08-04 06:15:27.000000000 -0700 @@ -76,6 +76,7 @@ extern void pseries_secondary_smp_init(unsigned long); extern int idle_setup(void); extern void vpa_init(int cpu); +extern void cpumask_setup(void); unsigned long decr_overclock = 1; unsigned long decr_overclock_proc0 = 1; @@ -229,6 +230,7 @@ register_console(&udbg_console); __irq_offset_value = NUM_ISA_INTERRUPTS; finish_device_tree(); + cpumask_setup(); chrp_init(r3, r4, r5, r6, r7); #ifdef CONFIG_SMP @@ -251,6 +253,7 @@ #ifdef CONFIG_PPC_PMAC if (systemcfg->platform == PLATFORM_POWERMAC) { finish_device_tree(); + cpumask_setup(); pmac_init(r3, r4, r5, r6, r7); } #endif /* CONFIG_PPC_PMAC */ From olh at suse.de Fri Aug 13 01:02:29 2004 From: olh at suse.de (Olaf Hering) Date: Thu, 12 Aug 2004 17:02:29 +0200 Subject: max openfirmware property size Message-ID: <20040812150229.GC28577@suse.de> Does the openfirmware spec say something about the maximum size of the 'name' and 'compatible' property? I guess the content lenght is undefined. -- USB is for mice, FireWire is for men! sUse lINUX ag, n?RNBERG ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From segher at kernel.crashing.org Fri Aug 13 01:52:50 2004 From: segher at kernel.crashing.org (Segher Boessenkool) Date: Thu, 12 Aug 2004 17:52:50 +0200 Subject: max openfirmware property size In-Reply-To: <20040812150229.GC28577@suse.de> References: <20040812150229.GC28577@suse.de> Message-ID: > Does the openfirmware spec say something about the maximum size of the > 'name' and 'compatible' property? > I guess the content lenght is undefined. It says nothing specific to "name" or "compatible". So their maximum size is the maximum size that fits into a cell, i.e. 0xffffffff on most systems. Segher ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jschopp at austin.ibm.com Fri Aug 13 05:05:03 2004 From: jschopp at austin.ibm.com (Joel Schopp) Date: Thu, 12 Aug 2004 14:05:03 -0500 Subject: cpumask move patch revised - RFC In-Reply-To: <20040812121703.GA9760@in.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> <16658.58557.455887.141510@cargo.ozlabs.ibm.com> <20040806042627.GA1288@in.ibm.com> <20040812121703.GA9760@in.ibm.com> Message-ID: <411BBF5F.3070901@austin.ibm.com> This will surely conflict with Nathan's recent patch "[patch 4/4] Remove unnecessary cpu maps (available, present_at_boot)". I think Nathan's patch should go in first and yours reworked to match. Other comments inline below. R Sharada wrote: > Hello, > Based on the feedback, here is the revised cpumask patch that > moves the cpumask initialization from prom_hold_cpus() to later boot, in > setup_system(). > The patch is against the 2.6.8-rc2 linus bitkeeper tree. > > - The get_property call has been corrected to obtain the property size from > the correct argument. > - The unnecessary variable initializations have been removed > - check for NULL value of status incorporated. > > I have not removed the #ifdefs for SMP, as all the cpumask data structures, > as I see them in code now, are defined for SMP systems and does not seem to be > defined for UP. > The merge of the POWERMAC and PSERIES #ifdefs is also deferred as I don't > know a lot about the POWERMAC initialization and startup to see if the two > cases can be merged. > > Please review and comment on the patch. > > Thanks and Regards, > Sharada > > > ------------------------------------------------------------------------ > > diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c > --- linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c 2004-08-03 02:12:58.000000000 -0700 > +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c 2004-08-13 06:02:22.808964544 -0700 > @@ -77,6 +77,8 @@ > void pSeries_calibrate_decr(void); > void fwnmi_init(void); > extern void SystemReset_FWNMI(void), MachineCheck_FWNMI(void); /* from head.S */ > +void cpumask_setup(void); > + Is this really necessary? Might it go better in a .h file somewhere? > int fwnmi_active; /* TRUE if an FWNMI handler is present */ > > dev_t boot_dev; > @@ -468,3 +470,92 @@ > > setup_default_decr(); > } > + > +void cpumask_setup() > +{ > + unsigned long ind; > + struct device_node *np = NULL; > + int cpuid = 0; > + unsigned int *reg; > + char *statusp; > + int prop; > + int *propsize = ∝ > + unsigned int cpu_threads; > + > + printk(KERN_INFO "cpumask_setup\n"); > + /* On pmac, we just fill out the various global bitmasks and > + * arrays indicating our CPUs are here, they are actually started > + * later on from pmac_smp > + */ > + if (systemcfg->platform == PLATFORM_POWERMAC) { > + while ((np = of_find_node_by_type(np, "cpu"))) { > + reg = (unsigned int *)get_property(np, "reg", NULL); > +#ifdef CONFIG_SMP > + cpu_set(cpuid, cpu_available_map); > + cpu_set(cpuid, cpu_possible_map); > + cpu_set(cpuid, cpu_present_at_boot); > + if (*reg == 0) > + cpu_set(cpuid, cpu_online_map); > +#endif /* CONFIG_SMP */ > + cpuid++; > + } Shouldn't the whole while loop and of_node_put be in the #ifdef CONFIG_SMP, as otherwise all we do is iterate over the cpus not doing anything? > + of_node_put(np); > + return; > + } > + > + while ((np = of_find_node_by_type(np, "cpu"))) { > + > + statusp = (char *)get_property(np, "status", NULL); > + if ((statusp == NULL) || (statusp && strcmp(statusp, "okay") != 0)) > + continue; > + > + reg = (unsigned int *)get_property(np, "reg", NULL); > + > + get_property(np, "ibm,ppc-interrupt-server#s", propsize); > + if (*propsize < 0) { > + /* no property. old hardware has no SMT */ > + cpu_threads = 1; > + } else { > + /* We have a threaded processor */ > + cpu_threads = *propsize / sizeof(u32); > + if (cpu_threads > 2) > + cpu_threads = 1; /* ToDo: panic? */ I think it is about time we start making code that will deal with more than 2 cpu_threads, as the processors seem inevitable and not too far off. > + } > + > +#ifdef CONFIG_SMP > + cpu_set(cpuid, cpu_available_map); > + cpu_set(cpuid, cpu_possible_map); > + cpu_set(cpuid, cpu_present_at_boot); > + if (cpuid == boot_cpuid) > + cpu_set(cpuid, cpu_online_map); > + > + /* set the secondary threads into the cpuid mask */ > + for (ind=1; ind < cpu_threads; ind++) { > + cpuid++; > + if (cpuid >= NR_CPUS) > + continue; > + if (naca->smt_state) { > + cpu_set(cpuid, cpu_available_map); > + cpu_set(cpuid, cpu_present_at_boot); > + } > + } > +#endif /* CONFIG_SMP */ Again I'd have the CONFIG_SMP cover more. The whole while loop and the of_node_put. > + cpuid++; > + } > + of_node_put(np); > + > +#ifdef CONFIG_HMT > + /* Only enable HMT on processors that provide support. */ > + if (__is_processor(PV_PULSAR) || > + __is_processor(PV_ICESTAR) || > + __is_processor(PV_SSTAR)) { > + > + for (ind = 0; ind < NR_CPUS; ind += 2) { > + if (!cpu_online(ind)) > + continue; > + cpu_set(ind+1, cpu_possible_map); > + } > + } > +#endif > + return; > +} > diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c > --- linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c 2004-08-04 06:10:30.000000000 -0700 > +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c 2004-08-12 23:52:47.000000000 -0700 > @@ -939,13 +939,6 @@ > prom_getprop(node, "reg", ®, sizeof(reg)); > lpaca[cpuid].hw_cpu_id = reg; > > -#ifdef CONFIG_SMP > - cpu_set(cpuid, RELOC(cpu_available_map)); > - cpu_set(cpuid, RELOC(cpu_possible_map)); > - cpu_set(cpuid, RELOC(cpu_present_at_boot)); > - if (reg == 0) > - cpu_set(cpuid, RELOC(cpu_online_map)); > -#endif /* CONFIG_SMP */ > cpuid++; > } > return; > @@ -1042,9 +1035,6 @@ > #ifdef CONFIG_SMP > /* Set the number of active processors. */ > _systemcfg->processorCount++; > - cpu_set(cpuid, RELOC(cpu_available_map)); > - cpu_set(cpuid, RELOC(cpu_possible_map)); > - cpu_set(cpuid, RELOC(cpu_present_at_boot)); > #endif > } else { > prom_printf("... failed: %x\n", *acknowledge); > @@ -1053,10 +1043,6 @@ > #ifdef CONFIG_SMP > else { > prom_printf("%x : booting cpu %s\n", cpuid, path); > - cpu_set(cpuid, RELOC(cpu_available_map)); > - cpu_set(cpuid, RELOC(cpu_possible_map)); > - cpu_set(cpuid, RELOC(cpu_online_map)); > - cpu_set(cpuid, RELOC(cpu_present_at_boot)); > } > #endif > next: > @@ -1069,13 +1055,6 @@ > lpaca[cpuid].hw_cpu_id = interrupt_server[i]; > prom_printf("%x : preparing thread ... ", > interrupt_server[i]); > - if (_naca->smt_state) { > - cpu_set(cpuid, RELOC(cpu_available_map)); > - cpu_set(cpuid, RELOC(cpu_present_at_boot)); > - prom_printf("available\n"); > - } else { > - prom_printf("not available\n"); > - } > } > #endif > cpuid++; > @@ -1101,8 +1080,6 @@ > pir & 0x3ff; > } > } > -/* cpu_set(i+1, cpu_online_map); */ > - cpu_set(i+1, RELOC(cpu_possible_map)); > } > _systemcfg->processorCount *= 2; > } else { > diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c > --- linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c 2004-08-03 02:12:59.000000000 -0700 > +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c 2004-08-04 06:15:27.000000000 -0700 > @@ -76,6 +76,7 @@ > extern void pseries_secondary_smp_init(unsigned long); > extern int idle_setup(void); > extern void vpa_init(int cpu); > +extern void cpumask_setup(void); Could this go in a .h file somewhere? > > unsigned long decr_overclock = 1; > unsigned long decr_overclock_proc0 = 1; > @@ -229,6 +230,7 @@ > register_console(&udbg_console); > __irq_offset_value = NUM_ISA_INTERRUPTS; > finish_device_tree(); > + cpumask_setup(); > chrp_init(r3, r4, r5, r6, r7); > > #ifdef CONFIG_SMP > @@ -251,6 +253,7 @@ > #ifdef CONFIG_PPC_PMAC > if (systemcfg->platform == PLATFORM_POWERMAC) { > finish_device_tree(); > + cpumask_setup(); > pmac_init(r3, r4, r5, r6, r7); > } > #endif /* CONFIG_PPC_PMAC */ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From roland at topspin.com Fri Aug 13 05:23:58 2004 From: roland at topspin.com (Roland Dreier) Date: Thu, 12 Aug 2004 12:23:58 -0700 Subject: JS20 kernel Message-ID: <52pt5w17ox.fsf@topspin.com> Should I expect mainline 2.6 to work on an IBM JS20? Should I start with pSeries_defconfig? Thanks, Roland ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From moilanen at austin.ibm.com Fri Aug 13 05:56:26 2004 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Thu, 12 Aug 2004 14:56:26 -0500 Subject: JS20 kernel In-Reply-To: <52pt5w17ox.fsf@topspin.com> References: <52pt5w17ox.fsf@topspin.com> Message-ID: <20040812145626.0e517ef2@localhost> > > Should I expect mainline 2.6 to work on an IBM JS20? Should I start > with pSeries_defconfig? > The mainline should work w/ everything except the broadcom adapter. It seems that the tg3 driver got regressed somewhere in the following patch. Brian Rzycki is working on getting the correct fix. Thanks, Jake # This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet 1.1722.162.1 -> 1.1722.162.2 # drivers/net/tg3.h 1.42 -> 1.43 # drivers/net/tg3.c 1.184 -> 1.185 # # -------------------------------------------- # 04/07/02 davem at nuts.davemloft.net 1.1722.162.2 # [TG3]: Fibre PHY fixes from Sun. # # - Support HW autoneg on 5704. # - On serdes, no MII reg ioctl support. # -------------------------------------------- # diff -Nru a/drivers/net/tg3.c b/drivers/net/tg3.c --- a/drivers/net/tg3.c Mon Jul 19 11:05:33 2004 +++ b/drivers/net/tg3.c Mon Jul 19 11:05:33 2004 @@ -1,8 +1,9 @@ /* * tg3.c: Broadcom Tigon3 ethernet driver. * - * Copyright (C) 2001, 2002, 2003 David S. Miller (davem at redhat.com) + * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem at redhat.com) * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik at pobox.com) + * Copyright (C) 2004 Sun Microsystems Inc. */ #include @@ -1961,6 +1962,67 @@ return ret; } +static int fiber_autoneg(struct tg3 *tp, u32 *flags) +{ + int res = 0; + + if (tp->tg3_flags2 & TG3_FLG2_HW_AUTONEG) { + u32 dig_status; + + dig_status = tr32(SG_DIG_STATUS); + *flags = 0; + if (dig_status & SG_DIG_PARTNER_ASYM_PAUSE) + *flags |= MR_LP_ADV_ASYM_PAUSE; + if (dig_status & SG_DIG_PARTNER_PAUSE_CAPABLE) + *flags |= MR_LP_ADV_SYM_PAUSE; + + if ((dig_status & SG_DIG_AUTONEG_COMPLETE) && + !(dig_status & (SG_DIG_AUTONEG_ERROR | + SG_DIG_PARTNER_FAULT_MASK))) + res = 1; + } else { + struct tg3_fiber_aneginfo aninfo; + int status = ANEG_FAILED; + unsigned int tick; + u32 tmp; + + tw32_f(MAC_TX_AUTO_NEG, 0); + + tmp = tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK; + tw32_f(MAC_MODE, tmp | MAC_MODE_PORT_MODE_GMII); + udelay(40); + + tw32_f(MAC_MODE, tp->mac_mode | MAC_MODE_SEND_CONFIGS); + udelay(40); + + memset(&aninfo, 0, sizeof(aninfo)); + aninfo.flags |= MR_AN_ENABLE; + aninfo.state = ANEG_STATE_UNKNOWN; + aninfo.cur_time = 0; + tick = 0; + while (++tick < 195000) { + status = tg3_fiber_aneg_smachine(tp, &aninfo); + if (status == ANEG_DONE || status == ANEG_FAILED) + break; + + udelay(1); + } + + tp->mac_mode &= ~MAC_MODE_SEND_CONFIGS; + tw32_f(MAC_MODE, tp->mac_mode); + udelay(40); + + *flags = aninfo.flags; + + if (status == ANEG_DONE && + (aninfo.flags & (MR_AN_COMPLETE | MR_LINK_OK | + MR_LP_ADV_FULL_DUPLEX))) + res = 1; + } + + return res; +} + static int tg3_setup_fiber_phy(struct tg3 *tp, int force_reset) { u32 orig_pause_cfg; @@ -1980,6 +2042,20 @@ tw32_f(MAC_MODE, tp->mac_mode); udelay(40); + if (tp->tg3_flags2 & TG3_FLG2_HW_AUTONEG) { + /* Allow time for the hardware to auto-negotiate (195ms) */ + unsigned int tick = 0; + + while (++tick < 195000) { + if (tr32(SG_DIG_STATUS) & SG_DIG_AUTONEG_COMPLETE) + break; + udelay(1); + } + if (tick >= 195000) + printk(KERN_INFO PFX "%s: HW autoneg failed !\n", + tp->dev->name); + } + /* Reset when initting first time or we have a link. */ if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) || (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED)) { @@ -2031,53 +2107,18 @@ udelay(40); current_link_up = 0; - if (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED) { - if (tp->link_config.autoneg == AUTONEG_ENABLE && - !(tp->tg3_flags & TG3_FLAG_GOT_SERDES_FLOWCTL)) { - struct tg3_fiber_aneginfo aninfo; - int status = ANEG_FAILED; - unsigned int tick; - u32 tmp; - - memset(&aninfo, 0, sizeof(aninfo)); - aninfo.flags |= (MR_AN_ENABLE); - - tw32(MAC_TX_AUTO_NEG, 0); - - tmp = tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK; - tw32_f(MAC_MODE, tmp | MAC_MODE_PORT_MODE_GMII); - udelay(40); - - tw32_f(MAC_MODE, tp->mac_mode | MAC_MODE_SEND_CONFIGS); - udelay(40); - - aninfo.state = ANEG_STATE_UNKNOWN; - aninfo.cur_time = 0; - tick = 0; - while (++tick < 195000) { - status = tg3_fiber_aneg_smachine(tp, &aninfo); - if (status == ANEG_DONE || - status == ANEG_FAILED) - break; - - udelay(1); - } - - tp->mac_mode &= ~MAC_MODE_SEND_CONFIGS; - tw32_f(MAC_MODE, tp->mac_mode); - udelay(40); - - if (status == ANEG_DONE && - (aninfo.flags & - (MR_AN_COMPLETE | MR_LINK_OK | - MR_LP_ADV_FULL_DUPLEX))) { + if (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED) { + if (tp->link_config.autoneg == AUTONEG_ENABLE) { + u32 flags; + + if (fiber_autoneg(tp, &flags)) { u32 local_adv, remote_adv; local_adv = ADVERTISE_PAUSE_CAP; remote_adv = 0; - if (aninfo.flags & MR_LP_ADV_SYM_PAUSE) - remote_adv |= LPA_PAUSE_CAP; - if (aninfo.flags & MR_LP_ADV_ASYM_PAUSE) + if (flags & MR_LP_ADV_SYM_PAUSE) + remote_adv |= LPA_PAUSE_CAP; + if (flags & MR_LP_ADV_ASYM_PAUSE) remote_adv |= LPA_PAUSE_ASYM; tg3_setup_flow_control(tp, local_adv, remote_adv); @@ -2104,8 +2145,10 @@ } else { /* Forcing 1000FD link up. */ current_link_up = 1; + tp->tg3_flags |= TG3_FLAG_GOT_SERDES_FLOWCTL; } - } + } else + tp->tg3_flags &= ~TG3_FLAG_GOT_SERDES_FLOWCTL; tp->mac_mode &= ~MAC_MODE_LINK_POLARITY; tw32_f(MAC_MODE, tp->mac_mode); @@ -5203,6 +5246,26 @@ */ tw32_f(MAC_LOW_WMARK_MAX_RX_FRAME, 2); + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 && + tp->phy_id == PHY_ID_SERDES) { + /* Enable hardware link auto-negotiation */ + u32 digctrl, txctrl; + + digctrl = SG_DIG_USING_HW_AUTONEG | SG_DIG_CRC16_CLEAR_N | + SG_DIG_LOCAL_DUPLEX_STATUS | SG_DIG_LOCAL_LINK_STATUS | + (2 << SG_DIG_SPEED_STATUS_SHIFT) | SG_DIG_FIBER_MODE | + SG_DIG_GBIC_ENABLE; + + txctrl = tr32(MAC_SERDES_CFG); + tw32_f(MAC_SERDES_CFG, txctrl | MAC_SERDES_CFG_EDGE_SELECT); + tw32_f(SG_DIG_CTRL, digctrl | SG_DIG_SOFT_RESET); + tr32(SG_DIG_CTRL); + udelay(5); + tw32_f(SG_DIG_CTRL, digctrl); + + tp->tg3_flags2 |= TG3_FLG2_HW_AUTONEG; + } + err = tg3_setup_phy(tp, 1); if (err) return err; @@ -6536,6 +6599,9 @@ case SIOCGMIIREG: { u32 mii_regval; + if (tp->phy_id == PHY_ID_SERDES) + break; /* We have no PHY */ + spin_lock_irq(&tp->lock); err = tg3_readphy(tp, data->reg_num & 0x1f, &mii_regval); spin_unlock_irq(&tp->lock); @@ -6546,6 +6612,9 @@ } case SIOCSMIIREG: + if (tp->phy_id == PHY_ID_SERDES) + break; /* We have no PHY */ + if (!capable(CAP_NET_ADMIN)) return -EPERM; diff -Nru a/drivers/net/tg3.h b/drivers/net/tg3.h --- a/drivers/net/tg3.h Mon Jul 19 11:05:33 2004 +++ b/drivers/net/tg3.h Mon Jul 19 11:05:33 2004 @@ -1,8 +1,9 @@ /* $Id: tg3.h,v 1.37.2.32 2002/03/11 12:18:18 davem Exp $ * tg3.h: Definitions for Broadcom Tigon3 ethernet driver. * - * Copyright (C) 2001, 2002 David S. Miller (davem at redhat.com) + * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem at redhat.com) * Copyright (C) 2001 Jeff Garzik (jgarzik at pobox.com) + * Copyright (C) 2004 Sun Microsystems Inc. */ #ifndef _T3_H @@ -116,6 +117,7 @@ #define CHIPREV_ID_5704_A0 0x2000 #define CHIPREV_ID_5704_A1 0x2001 #define CHIPREV_ID_5704_A2 0x2002 +#define CHIPREV_ID_5704_A3 0x2003 #define CHIPREV_ID_5705_A0 0x3000 #define CHIPREV_ID_5705_A1 0x3001 #define CHIPREV_ID_5705_A2 0x3002 @@ -518,8 +520,50 @@ #define MAC_EXTADDR_11_HIGH 0x00000588 #define MAC_EXTADDR_11_LOW 0x0000058c #define MAC_SERDES_CFG 0x00000590 +#define MAC_SERDES_CFG_EDGE_SELECT 0x00001000 #define MAC_SERDES_STAT 0x00000594 -/* 0x598 --> 0x600 unused */ +/* 0x598 --> 0x5b0 unused */ +#define SG_DIG_CTRL 0x000005b0 +#define SG_DIG_USING_HW_AUTONEG 0x80000000 +#define SG_DIG_SOFT_RESET 0x40000000 +#define SG_DIG_DISABLE_LINKRDY 0x20000000 +#define SG_DIG_CRC16_CLEAR_N 0x01000000 +#define SG_DIG_EN10B 0x00800000 +#define SG_DIG_CLEAR_STATUS 0x00400000 +#define SG_DIG_LOCAL_DUPLEX_STATUS 0x00200000 +#define SG_DIG_LOCAL_LINK_STATUS 0x00100000 +#define SG_DIG_SPEED_STATUS_MASK 0x000c0000 +#define SG_DIG_SPEED_STATUS_SHIFT 18 +#define SG_DIG_JUMBO_PACKET_DISABLE 0x00020000 +#define SG_DIG_RESTART_AUTONEG 0x00010000 +#define SG_DIG_FIBER_MODE 0x00008000 +#define SG_DIG_REMOTE_FAULT_MASK 0x00006000 +#define SG_DIG_PAUSE_MASK 0x00001800 +#define SG_DIG_GBIC_ENABLE 0x00000400 +#define SG_DIG_CHECK_END_ENABLE 0x00000200 +#define SG_DIG_SGMII_AUTONEG_TIMER 0x00000100 +#define SG_DIG_CLOCK_PHASE_SELECT 0x00000080 +#define SG_DIG_GMII_INPUT_SELECT 0x00000040 +#define SG_DIG_MRADV_CRC16_SELECT 0x00000020 +#define SG_DIG_COMMA_DETECT_ENABLE 0x00000010 +#define SG_DIG_AUTONEG_TIMER_REDUCE 0x00000008 +#define SG_DIG_AUTONEG_LOW_ENABLE 0x00000004 +#define SG_DIG_REMOTE_LOOPBACK 0x00000002 +#define SG_DIG_LOOPBACK 0x00000001 +#define SG_DIG_STATUS 0x000005b4 +#define SG_DIG_CRC16_BUS_MASK 0xffff0000 +#define SG_DIG_PARTNER_FAULT_MASK 0x00600000 /* If !MRADV_CRC16_SELECT */ +#define SG_DIG_PARTNER_ASYM_PAUSE 0x00100000 /* If !MRADV_CRC16_SELECT */ +#define SG_DIG_PARTNER_PAUSE_CAPABLE 0x00080000 /* If !MRADV_CRC16_SELECT */ +#define SG_DIG_PARTNER_HALF_DUPLEX 0x00040000 /* If !MRADV_CRC16_SELECT */ +#define SG_DIG_PARTNER_FULL_DUPLEX 0x00020000 /* If !MRADV_CRC16_SELECT */ +#define SG_DIG_PARTNER_NEXT_PAGE 0x00010000 /* If !MRADV_CRC16_SELECT */ +#define SG_DIG_AUTONEG_STATE_MASK 0x00000ff0 +#define SG_DIG_COMMA_DETECTOR 0x00000008 +#define SG_DIG_MAC_ACK_STATUS 0x00000004 +#define SG_DIG_AUTONEG_COMPLETE 0x00000002 +#define SG_DIG_AUTONEG_ERROR 0x00000001 +/* 0x5b8 --> 0x600 unused */ #define MAC_TX_MAC_STATE_BASE 0x00000600 /* 16 bytes */ #define MAC_RX_MAC_STATE_BASE 0x00000610 /* 20 bytes */ /* 0x624 --> 0x800 unused */ @@ -2044,6 +2088,7 @@ #define TG3_FLG2_PHY_BER_BUG 0x00000100 #define TG3_FLG2_PCI_EXPRESS 0x00000200 #define TG3_FLG2_ASF_NEW_HANDSHAKE 0x00000400 +#define TG3_FLG2_HW_AUTONEG 0x00000800 u32 split_mode_max_reqs; #define SPLIT_MODE_5704_MAX_REQ 3 ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From sharada at in.ibm.com Fri Aug 13 14:30:00 2004 From: sharada at in.ibm.com (R Sharada) Date: Fri, 13 Aug 2004 10:00:00 +0530 Subject: cpumask move patch revised - RFC In-Reply-To: <411BBF5F.3070901@austin.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> <16658.58557.455887.141510@cargo.ozlabs.ibm.com> <20040806042627.GA1288@in.ibm.com> <20040812121703.GA9760@in.ibm.com> <411BBF5F.3070901@austin.ibm.com> Message-ID: <20040813043000.GA1168@in.ibm.com> Hello Joel, Thanks for your feedback and comments. Responses inline I shall send out a revised patch soon. On Thu, Aug 12, 2004 at 02:05:03PM -0500, Joel Schopp wrote: > This will surely conflict with Nathan's recent patch "[patch 4/4] Remove > unnecessary cpu maps (available, present_at_boot)". I think Nathan's > patch should go in first and yours reworked to match. Other comments > inline below. Yes, you are correct. I did see Nathan's patch on the removal of the unnecessary cpu maps. And yes, I am waiting for his patch to go first and then have this reworked to match that change. > > R Sharada wrote: > > >Hello, > > Based on the feedback, here is the revised cpumask patch that > >moves the cpumask initialization from prom_hold_cpus() to later boot, in > >setup_system(). > >The patch is against the 2.6.8-rc2 linus bitkeeper tree. > > > >- The get_property call has been corrected to obtain the property size from > >the correct argument. > >- The unnecessary variable initializations have been removed > >- check for NULL value of status incorporated. > > > >I have not removed the #ifdefs for SMP, as all the cpumask data structures, > >as I see them in code now, are defined for SMP systems and does not seem > >to be > >defined for UP. > >The merge of the POWERMAC and PSERIES #ifdefs is also deferred as I don't > >know a lot about the POWERMAC initialization and startup to see if the two > >cases can be merged. > > > >Please review and comment on the patch. > > > >Thanks and Regards, > >Sharada > > > > > >------------------------------------------------------------------------ > > > >diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c > >linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c > >--- linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c 2004-08-03 > >02:12:58.000000000 -0700 > >+++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c 2004-08-13 > >06:02:22.808964544 -0700 > >@@ -77,6 +77,8 @@ > > void pSeries_calibrate_decr(void); > > void fwnmi_init(void); > > extern void SystemReset_FWNMI(void), MachineCheck_FWNMI(void); /* > > from head.S */ > >+void cpumask_setup(void); > >+ > > Is this really necessary? Might it go better in a .h file somewhere? Well, yes, perhaps it could be put in some .h file. However, the idea here was that, I just followed the conventions for other functions in chrp_setup.c file > > > int fwnmi_active; /* TRUE if an FWNMI handler is present */ > > > > dev_t boot_dev; > >@@ -468,3 +470,92 @@ > > > > setup_default_decr(); > > } > >+ > >+void cpumask_setup() > >+{ > >+ unsigned long ind; > >+ struct device_node *np = NULL; > >+ int cpuid = 0; > >+ unsigned int *reg; > >+ char *statusp; > >+ int prop; > >+ int *propsize = ∝ > >+ unsigned int cpu_threads; > >+ > >+ printk(KERN_INFO "cpumask_setup\n"); > >+ /* On pmac, we just fill out the various global bitmasks and > >+ * arrays indicating our CPUs are here, they are actually started > >+ * later on from pmac_smp > >+ */ > >+ if (systemcfg->platform == PLATFORM_POWERMAC) { > >+ while ((np = of_find_node_by_type(np, "cpu"))) { > >+ reg = (unsigned int *)get_property(np, "reg", NULL); > >+#ifdef CONFIG_SMP > >+ cpu_set(cpuid, cpu_available_map); > >+ cpu_set(cpuid, cpu_possible_map); > >+ cpu_set(cpuid, cpu_present_at_boot); > >+ if (*reg == 0) > >+ cpu_set(cpuid, cpu_online_map); > >+#endif /* CONFIG_SMP */ > >+ cpuid++; > >+ } > > Shouldn't the whole while loop and of_node_put be in the #ifdef > CONFIG_SMP, as otherwise all we do is iterate over the cpus not doing > anything? Hmm.. Well, yes, I suppose we could move all of the while loop under #ifdef SMP. The idea as I understood it was that the cpumask data structures are defined for SMP alone and hencel flanked within the #ifdef SMP. But looking at it again, perhaps what you are suggesting is a good idea. I don't see the while loop doing anything else, anyways, in the non-SMP case. As regards the of_node_put, discussing with Nathan, I realized that it isn't really necessary, even for the last cpu node data structure in the while loop. So, this of_node_put will be gone soon, in the next patch. > > >+ of_node_put(np); > >+ return; > >+ } > >+ > >+ while ((np = of_find_node_by_type(np, "cpu"))) { > >+ > >+ statusp = (char *)get_property(np, "status", NULL); > >+ if ((statusp == NULL) || (statusp && strcmp(statusp, "okay") > >!= 0)) > >+ continue; > >+ > >+ reg = (unsigned int *)get_property(np, "reg", NULL); > >+ > >+ get_property(np, "ibm,ppc-interrupt-server#s", propsize); > >+ if (*propsize < 0) { > >+ /* no property. old hardware has no SMT */ > >+ cpu_threads = 1; > >+ } else { > >+ /* We have a threaded processor */ > >+ cpu_threads = *propsize / sizeof(u32); > >+ if (cpu_threads > 2) > >+ cpu_threads = 1; /* ToDo: panic? */ > > I think it is about time we start making code that will deal with more > than 2 cpu_threads, as the processors seem inevitable and not too far off. > So, can SMT/HMT have more than 2 threads now? or planned in the near future? > >+ } > >+ > >+#ifdef CONFIG_SMP > >+ cpu_set(cpuid, cpu_available_map); > >+ cpu_set(cpuid, cpu_possible_map); > >+ cpu_set(cpuid, cpu_present_at_boot); > >+ if (cpuid == boot_cpuid) > >+ cpu_set(cpuid, cpu_online_map); > >+ > >+ /* set the secondary threads into the cpuid mask */ > >+ for (ind=1; ind < cpu_threads; ind++) { > >+ cpuid++; > >+ if (cpuid >= NR_CPUS) > >+ continue; > >+ if (naca->smt_state) { > >+ cpu_set(cpuid, cpu_available_map); > >+ cpu_set(cpuid, cpu_present_at_boot); > >+ } > >+ } > >+#endif /* CONFIG_SMP */ > > Again I'd have the CONFIG_SMP cover more. The whole while loop and the > of_node_put. > However, here we still need to be able to check cpu node status and interrupt-server#s property, etc. for non-SMP (UP) systems as well, is it not? In that case, we can't really move the while loop inside the #ifdef SMP, can we? The case that you are talking about ( iterating over the cpus and not doing anything ) would occur only in the case of a SMP machine running a UP kernel, is it not? That seems unlikely? Or are there other scenarios? > >+ cpuid++; > >+ } > >+ of_node_put(np); > >+ > >+#ifdef CONFIG_HMT > >+ /* Only enable HMT on processors that provide support. */ > >+ if (__is_processor(PV_PULSAR) || > >+ __is_processor(PV_ICESTAR) || > >+ __is_processor(PV_SSTAR)) { > >+ > >+ for (ind = 0; ind < NR_CPUS; ind += 2) { > >+ if (!cpu_online(ind)) > >+ continue; > >+ cpu_set(ind+1, cpu_possible_map); > >+ } > >+ } > >+#endif > >+ return; > >+} > >diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c > >linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c > >--- linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c 2004-08-04 > >06:10:30.000000000 -0700 > >+++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c 2004-08-12 > >23:52:47.000000000 -0700 > >@@ -939,13 +939,6 @@ > > prom_getprop(node, "reg", ®, sizeof(reg)); > > lpaca[cpuid].hw_cpu_id = reg; > > > >-#ifdef CONFIG_SMP > >- cpu_set(cpuid, RELOC(cpu_available_map)); > >- cpu_set(cpuid, RELOC(cpu_possible_map)); > >- cpu_set(cpuid, RELOC(cpu_present_at_boot)); > >- if (reg == 0) > >- cpu_set(cpuid, RELOC(cpu_online_map)); > >-#endif /* CONFIG_SMP */ > > cpuid++; > > } > > return; > >@@ -1042,9 +1035,6 @@ > > #ifdef CONFIG_SMP > > /* Set the number of active processors. */ > > _systemcfg->processorCount++; > >- cpu_set(cpuid, RELOC(cpu_available_map)); > >- cpu_set(cpuid, RELOC(cpu_possible_map)); > >- cpu_set(cpuid, RELOC(cpu_present_at_boot)); > > #endif > > } else { > > prom_printf("... failed: %x\n", > > *acknowledge); > >@@ -1053,10 +1043,6 @@ > > #ifdef CONFIG_SMP > > else { > > prom_printf("%x : booting cpu %s\n", cpuid, path); > >- cpu_set(cpuid, RELOC(cpu_available_map)); > >- cpu_set(cpuid, RELOC(cpu_possible_map)); > >- cpu_set(cpuid, RELOC(cpu_online_map)); > >- cpu_set(cpuid, RELOC(cpu_present_at_boot)); > > } > > #endif > > next: > >@@ -1069,13 +1055,6 @@ > > lpaca[cpuid].hw_cpu_id = interrupt_server[i]; > > prom_printf("%x : preparing thread ... ", > > interrupt_server[i]); > >- if (_naca->smt_state) { > >- cpu_set(cpuid, RELOC(cpu_available_map)); > >- cpu_set(cpuid, RELOC(cpu_present_at_boot)); > >- prom_printf("available\n"); > >- } else { > >- prom_printf("not available\n"); > >- } > > } > > #endif > > cpuid++; > >@@ -1101,8 +1080,6 @@ > > pir & 0x3ff; > > } > > } > >-/* cpu_set(i+1, cpu_online_map); */ > >- cpu_set(i+1, RELOC(cpu_possible_map)); > > } > > _systemcfg->processorCount *= 2; > > } else { > >diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c > >linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c > >--- linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c 2004-08-03 > >02:12:59.000000000 -0700 > >+++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c 2004-08-04 > >06:15:27.000000000 -0700 > >@@ -76,6 +76,7 @@ > > extern void pseries_secondary_smp_init(unsigned long); > > extern int idle_setup(void); > > extern void vpa_init(int cpu); > >+extern void cpumask_setup(void); > > Could this go in a .h file somewhere? Again, the conventions used for other functions in the file was used for this function declaration. Was it the idea behind declaring this way, that we declare the functions only in the required source files and do away stacking up too many declarations of functions (that are not called from many places anyways) in common .h files? > > > > > unsigned long decr_overclock = 1; > > unsigned long decr_overclock_proc0 = 1; > >@@ -229,6 +230,7 @@ > > register_console(&udbg_console); > > __irq_offset_value = NUM_ISA_INTERRUPTS; > > finish_device_tree(); > >+ cpumask_setup(); > > chrp_init(r3, r4, r5, r6, r7); > > > > #ifdef CONFIG_SMP > >@@ -251,6 +253,7 @@ > > #ifdef CONFIG_PPC_PMAC > > if (systemcfg->platform == PLATFORM_POWERMAC) { > > finish_device_tree(); > >+ cpumask_setup(); > > pmac_init(r3, r4, r5, r6, r7); > > } > > #endif /* CONFIG_PPC_PMAC */ Thanks and Regards, Sharada ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Fri Aug 13 16:20:22 2004 From: paulus at samba.org (Paul Mackerras) Date: Fri, 13 Aug 2004 16:20:22 +1000 Subject: cpumask move patch revised - RFC In-Reply-To: <20040813043000.GA1168@in.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> <16658.58557.455887.141510@cargo.ozlabs.ibm.com> <20040806042627.GA1288@in.ibm.com> <20040812121703.GA9760@in.ibm.com> <411BBF5F.3070901@austin.ibm.com> <20040813043000.GA1168@in.ibm.com> Message-ID: <16668.23974.392788.23268@cargo.ozlabs.ibm.com> R Sharada writes: > Yes, you are correct. I did see Nathan's patch on the removal of the > unnecessary cpu maps. And yes, I am waiting for his patch to go first > and then have this reworked to match that change. I have just sent Nathan's patches on to Andrew Morton. > > Is this really necessary? Might it go better in a .h file somewhere? > > Well, yes, perhaps it could be put in some .h file. However, the idea here > was that, I just followed the conventions for other functions in chrp_setup.c > file Hmmm. Anything that is defined in one file and referenced in another should be declared in a header, not in the individual C files. Put it in asm-ppc64/smp.h (unless you can think of a better place). Either that or move cpumask_setup() into setup.c. > As regards the of_node_put, discussing with Nathan, I realized that it isn't > really necessary, even for the last cpu node data structure in the while > loop. So, this of_node_put will be gone soon, in the next patch. Note that it is not necessary because np is NULL by the time you exit the loop. > > I think it is about time we start making code that will deal with more > > than 2 cpu_threads, as the processors seem inevitable and not too far off. > > > So, can SMT/HMT have more than 2 threads now? or planned in the near future? Not that I know of. :) There are diminishing returns from having more than 2 threads. If we ever get more than 2 threads we can change the code then, but that won't be in the next few years at least. > > Again I'd have the CONFIG_SMP cover more. The whole while loop and the > > of_node_put. > > > However, here we still need to be able to check cpu node status and > interrupt-server#s property, etc. for non-SMP (UP) systems as well, > is it not? In that case, we can't really move the while loop inside the > #ifdef SMP, can we? > The case that you are talking about ( iterating over the cpus and not doing > anything ) would occur only in the case of a SMP machine running a UP > kernel, is it not? That seems unlikely? Or are there other scenarios? That would be an uncommon case, and performance is not critical. I would like to see such optimizations as a second patch after we have moved the code and tested it. Regards, Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From moilanen at austin.ibm.com Sat Aug 14 04:06:40 2004 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Fri, 13 Aug 2004 13:06:40 -0500 Subject: [PATCH] log machine check errors Message-ID: <20040813130640.08ccff25@localhost> Somewhere along the line it looks like logging machine check errors never got put in 2.6. Machine check error logs were one of the main reasons for storing logs to nvram. Here's a forward port of the 2.4 code the Dave Altobelli originally wrote. Thanks, Jake Signed-off-by: Dave Altobelli Signed-off-by: Jake Moilanen --- diff -puN arch/ppc64/kernel/traps.c~machine-check-logging arch/ppc64/kernel/traps.c --- linux-2.6-ames/arch/ppc64/kernel/traps.c~machine-check-logging Fri Aug 13 08:01:00 2004 +++ linux-2.6-ames-moilanen/arch/ppc64/kernel/traps.c Fri Aug 13 09:01:37 2004 @@ -37,10 +37,14 @@ #include #include #include +#include #ifdef CONFIG_PPC_PSERIES /* This is true if we are using the firmware NMI handler (typically LPAR) */ extern int fwnmi_active; + +char mce_data_buf[RTAS_ERROR_LOG_MAX]__page_aligned; + #endif #ifdef CONFIG_DEBUGGER @@ -149,6 +153,13 @@ _exception(int signr, siginfo_t *info, s * FWNMI vectors. The pt_regs' r3 will be updated to reflect * the actual r3 if possible, and a ptr to the error log entry * will be returned if found. + * + * The mce_data_buf does not have any locks or protection around it, + * if a second machine check comes in, or a system reset is done + * before we have logged the error, then we will get corruption in the + * error log. This is preferable over holding off on calling + * ibm,nmi-interlock which would result in us checkstopping if a + * second machine check did come in. */ static struct rtas_error_log *FWNMI_get_errinfo(struct pt_regs *regs) { @@ -160,7 +171,9 @@ static struct rtas_error_log *FWNMI_get_ (errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) { savep = __va(errdata); regs->gpr[3] = savep[0]; /* restore original r3 */ - errhdr = (struct rtas_error_log *)(savep + 1); + memset(mce_data_buf, 0, RTAS_ERROR_LOG_MAX); + memcpy(mce_data_buf, (char *)(savep + 1), RTAS_ERROR_LOG_MAX); + errhdr = (struct rtas_error_log *)mce_data_buf; } else { printk("FWNMI: corrupt r3\n"); } @@ -211,19 +224,20 @@ SystemResetException(struct pt_regs *reg * Return 1 if corrected (or delivered a signal). * Return 0 if there is nothing we can do. */ -static int recover_mce(struct pt_regs *regs, struct rtas_error_log err) +static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err) { siginfo_t info; + int nonfatal = 0; - if (err.disposition == DISP_FULLY_RECOVERED) { + if (err->disposition == DISP_FULLY_RECOVERED) { /* Platform corrected itself */ - return 1; + nonfatal = 1; } else if ((regs->msr & MSR_RI) && user_mode(regs) && - err.severity == SEVERITY_ERROR_SYNC && - err.disposition == DISP_NOT_RECOVERED && - err.target == TARGET_MEMORY && - err.type == TYPE_ECC_UNCORR && + err->severity == SEVERITY_ERROR_SYNC && + err->disposition == DISP_NOT_RECOVERED && + err->target == TARGET_MEMORY && + err->type == TYPE_ECC_UNCORR && !(current->pid == 0 || current->pid == 1)) { /* Kill off a user process with an ECC error */ info.si_signo = SIGBUS; @@ -234,9 +248,12 @@ static int recover_mce(struct pt_regs *r printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n", current->pid); _exception(SIGBUS, &info, regs); - return 1; + nonfatal = 1; } - return 0; + + log_error((char *)err, ERR_TYPE_RTAS_LOG, !nonfatal); + + return nonfatal; } #endif @@ -254,14 +271,12 @@ void MachineCheckException(struct pt_regs *regs) { #ifdef CONFIG_PPC_PSERIES - struct rtas_error_log err, *errp; + struct rtas_error_log *errp; if (fwnmi_active) { errp = FWNMI_get_errinfo(regs); - if (errp) - err = *errp; - FWNMI_release_errinfo(); /* frees errp */ - if (errp && recover_mce(regs, err)) + FWNMI_release_errinfo(); + if (errp && recover_mce(regs, errp)) return; } #endif _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From kravetz at us.ibm.com Sat Aug 14 04:25:47 2004 From: kravetz at us.ibm.com (Mike Kravetz) Date: Fri, 13 Aug 2004 11:25:47 -0700 Subject: disabling kernel access to memory Message-ID: <20040813182547.GA4228@w-mikek2.beaverton.ibm.com> I'm playing around with memory hotplug. Currently, I'm trying to get the nonlinear translation initialization to be based on the LMB layout for ppc64. To make my life easy, I'm simulating various LMB layouts by modifying the code in prom_initialize_lmb() so that it appears the layout is different than what it really is. For example, I'm creating 'holes' in the physical address space that do not occur naturally on my system. This all seems to work well and the 'downstream' memory management code works with the simulated layouts rather than the real one. When creating artificial 'holes' in the physical address space, I would really like to disable all access to this memory in an effort to catch anyone making incorrect accesses. Of course, these holes are at least 16MB (min LMB size) in size. Is there an 'easy' way to prevent all access to these holes? -- Mike ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From olh at suse.de Mon Aug 16 04:40:24 2004 From: olh at suse.de (Olaf Hering) Date: Sun, 15 Aug 2004 20:40:24 +0200 Subject: PATCH: sched_{s,g}etaffinity compat In-Reply-To: <20040510030153.GB27713@krispykreme> References: <20040509115205.GA28943@suse.de> <20040510030153.GB27713@krispykreme> Message-ID: <20040815184024.GA5580@suse.de> On Mon, May 10, Anton Blanchard wrote: > > Hi, > > > On "real" hardware with NR_CPUS > sizeof(long)*8, the > > sys_sched_setaffinity and getaffinity compatibility functions break, > > because they just convert long masks instead of the full CPU masks. > > > > This patches fixes this problem. > > > > Spotted by a ppc32 glibc make check, on a ppc64 Kernel. > > Unfortunately thats not enough :) We need to change between 32bit and > 64bit bitfields (just like we do on the 32bit compat select call). Here > is a patch from Milton from a while ago that should do it. Probably not correct, bot -EWORKSFORME. diff -purNX /suse/olh/kernel/kernel_exclude.txt linux-2.6.8.1.orig/kernel/compat.c linux-2.6.8.1-olh/kernel/compat.c --- linux-2.6.8.1.orig/kernel/compat.c 2004-08-14 12:55:32.000000000 +0200 +++ linux-2.6.8.1-olh/kernel/compat.c 2004-08-15 20:23:05.000000000 +0200 @@ -383,16 +383,56 @@ compat_sys_wait4(compat_pid_t pid, compa } } +/* for maximum compatability, we allow programs to use a single (compat) + * unsigned long bitmask if all cpus will fit. If not, you have to have + * at least the kernel size available. + */ +#define USE_COMPAT_ULONG_CPUMASK (NR_CPUS <= 8*sizeof(compat_ulong_t)) + asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, unsigned int len, compat_ulong_t __user *user_mask_ptr) { - unsigned long kern_mask; + cpumask_t kern_mask; mm_segment_t old_fs; int ret; - if (get_user(kern_mask, user_mask_ptr)) - return -EFAULT; + if (USE_COMPAT_ULONG_CPUMASK) { + compat_ulong_t user_mask; + + if (len < sizeof(user_mask)) + return -EINVAL; + + if (get_user(user_mask, user_mask_ptr)) + return -EFAULT; + + kern_mask.bits[0] = user_mask; + } else { + if (len < sizeof(kern_mask)) + return -EINVAL; + + if (!access_ok(VERIFY_READ, user_mask_ptr, sizeof(kern_mask))) + return -EFAULT; + else { + int i, j; + unsigned long *k, m; + compat_ulong_t um; + + k = &kern_mask.bits[0]; + + for (i=0; i < sizeof(kern_mask)/sizeof(m); i++) { + m = 0; + + for (j = 0; j < sizeof(m)/sizeof(um); j++ ) { + if (__get_user(um, user_mask_ptr)) + return -EFAULT; + user_mask_ptr++; + m |= (unsigned long)um << (8*sizeof(um)*j); + } + *k++ = m; + } + } + } old_fs = get_fs(); set_fs(KERNEL_DS); @@ -407,10 +447,14 @@ asmlinkage long compat_sys_sched_setaffi asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len, compat_ulong_t __user *user_mask_ptr) { - unsigned long kern_mask; + cpumask_t kern_mask; mm_segment_t old_fs; int ret; + if (len < (USE_COMPAT_ULONG_CPUMASK ? sizeof(compat_ulong_t) + : sizeof(kern_mask))) + return -EINVAL; + old_fs = get_fs(); set_fs(KERNEL_DS); ret = sys_sched_getaffinity(pid, @@ -419,9 +463,33 @@ asmlinkage long compat_sys_sched_getaffi set_fs(old_fs); if (ret > 0) { - ret = sizeof(compat_ulong_t); - if (put_user(kern_mask, user_mask_ptr)) - return -EFAULT; + if (USE_COMPAT_ULONG_CPUMASK) { + ret = sizeof(compat_ulong_t); + if (put_user(kern_mask.bits[0], user_mask_ptr)) + return -EFAULT; + } else { + int i, j, err; + unsigned long *k, m; + compat_ulong_t um; + + err = ! access_ok(VERIFY_WRITE, user_mask_ptr, ret); + + k = &kern_mask.bits[0]; + + for (i=0; i < sizeof(kern_mask)/sizeof(m) && !err; i++) { + m = *k++; + + for (j = 0; j < sizeof(m)/sizeof(compat_ulong_t) && !err; j++ ) { + um = m; + err |= __put_user(um, user_mask_ptr); + user_mask_ptr++; + m >>= 4*sizeof(compat_ulong_t); + m >>= 4*sizeof(compat_ulong_t); + } + } + if (err) + ret = -EFAULT; + } } return ret; -- USB is for mice, FireWire is for men! sUse lINUX ag, n?RNBERG ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From olh at suse.de Mon Aug 16 07:51:42 2004 From: olh at suse.de (Olaf Hering) Date: Sun, 15 Aug 2004 23:51:42 +0200 Subject: glibc fails in tst-timer4, __SI_TIMER needs its own case Message-ID: <20040815215142.GA18611@suse.de> Ben, can you have a look at this one? glibc make check fails since the tst-timer4 test was added in April this year. I think __ST_TIMER needs its own case. This patch against 2.6.8.1 fixes it for me. @@ -472,10 +527,14 @@ static long copy_siginfo_to_user32(compa &d->si_addr); break; case __SI_POLL >> 16: - case __SI_TIMER >> 16: err |= __put_user(s->si_band, &d->si_band); err |= __put_user(s->si_fd, &d->si_fd); break; + case __SI_TIMER >> 16: + err |= __put_user(s->_sifields._rt._pid, &d->_sifields._rt._pid); + err |= __put_user(s->_sifields._rt._uid, &d->_sifields._rt._uid); + err |= __put_user((u32)(u64)s->_sifields._rt._sigval.sival_int, &d->_sifields._rt._sigval.sival_int); + break; case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ case __SI_MESGQ >> 16: err |= __put_user(s->si_int, &d->si_int); -- USB is for mice, FireWire is for men! sUse lINUX ag, n?RNBERG ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Mon Aug 16 08:19:51 2004 From: anton at samba.org (Anton Blanchard) Date: Mon, 16 Aug 2004 08:19:51 +1000 Subject: rtas_call uses kmalloc before the memory subsystem is up Message-ID: <20040815221951.GK5637@krispykreme> Marcus and Olaf hit this problem: reserve_bootmem 0 870b30 reserve_bootmem 4400000 154000 reserve_bootmem f92c000 6d4000 reserve_bootmem 20ffbc000 42000 reserve_bootmem 20fffef08 10f8 Boot arguments: root=/dev/sdb3 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=128 NUMA PSERIES LPAR NIP: C00000000008571C XER: 0000000000000000 LR: C00000000003470C REGS: c0000000003fb760 TRAP: 0300 Not tainted (2.6.8.1-tst-timer4) MSR: 8000000000001032 EE: 0 PR: 0 FP: 0 ME: 1 IR/DR: 11 DAR: 0000000000000000, DSISR: 0000000040000000 TASK: c00000000049c160[0] 'swapper' THREAD: c0000000003f8000 CPU: 0 GPR00: 0000000000000000 C0000000003FB9E0 C0000000005E8540 0000000000000000 GPR04: 0000000000000020 0000000000000000 0000000000000000 0000000000000005 GPR08: C00000000066D16C C0000000004BA918 8000000000001032 0000000000000000 GPR12: 000000000FD4D8A8 C0000000003FC000 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000230000 C0000000003FBD70 000000000006040F 0000000000000001 GPR24: 0000000000000001 0000000000000000 0000000000000000 000000000000002B GPR28: 8000000000001032 0000000000000001 FFFFFFFFFFFFFFFF 8000000000001032 NIP [c00000000008571c] .kmem_cache_alloc+0x28/0x78 LR [c00000000003470c] .rtas_call+0x260/0x284 Call Trace: [c0000000003fba60] [c00000000003470c] .rtas_call+0x260/0x284 [c0000000003fbb10] [c00000000002eb6c] .early_enable_eeh+0x2c8/0x3cc [c0000000003fbc60] [c00000000002ca4c] .traverse_pci_devices+0x178/0x188 [c0000000003fbd00] [c0000000003cf5c4] .eeh_init+0x170/0x204 [c0000000003fbda0] [c0000000003d1010] .chrp_setup_arch+0x60/0x154 [c0000000003fbe30] [c0000000003cce08] .setup_arch+0x170/0x200 [c0000000003fbed0] [c0000000003c7520] .start_kernel+0x78/0x33c [c0000000003fbf90] [c00000000000c038] .__setup_cpu_power3+0x0/0x4 rtas_call is doing a kmalloc before the memory subsystem is up, but only when we hit an error. Anton ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Mon Aug 16 08:20:06 2004 From: paulus at samba.org (Paul Mackerras) Date: Mon, 16 Aug 2004 08:20:06 +1000 Subject: disabling kernel access to memory In-Reply-To: <20040813182547.GA4228@w-mikek2.beaverton.ibm.com> References: <20040813182547.GA4228@w-mikek2.beaverton.ibm.com> Message-ID: <16671.57750.537610.762907@cargo.ozlabs.ibm.com> Mike Kravetz writes: > When creating artificial 'holes' in the physical address space, > I would really like to disable all access to this memory in an > effort to catch anyone making incorrect accesses. Of course, > these holes are at least 16MB (min LMB size) in size. Is there > an 'easy' way to prevent all access to these holes? You might be able to hand them back to the hypervisor using the DLPAR calls. Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Mon Aug 16 15:18:19 2004 From: paulus at samba.org (Paul Mackerras) Date: Mon, 16 Aug 2004 15:18:19 +1000 Subject: rtas_call uses kmalloc before the memory subsystem is up In-Reply-To: <20040815221951.GK5637@krispykreme> References: <20040815221951.GK5637@krispykreme> Message-ID: <16672.17307.578763.854775@cargo.ozlabs.ibm.com> Anton Blanchard writes: > rtas_call is doing a kmalloc before the memory subsystem is up, but only > when we hit an error. This is a quick-n-dirty hack to fix the problem. It's not completely obvious what the proper solution looks like, unfortunately. Paul. diff -urN linux-2.5/arch/ppc64/kernel/rtas.c test25/arch/ppc64/kernel/rtas.c --- linux-2.5/arch/ppc64/kernel/rtas.c 2004-08-03 08:07:43.000000000 +1000 +++ test25/arch/ppc64/kernel/rtas.c 2004-08-16 14:47:22.147162600 +1000 @@ -165,9 +165,12 @@ /* Log the error in the unlikely case that there was one. */ if (unlikely(logit)) { - buff_copy = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC); - if (buff_copy) { - memcpy(buff_copy, rtas_err_buf, RTAS_ERROR_LOG_MAX); + buff_copy = rtas_err_buf; + if (mem_init_done) { + buff_copy = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC); + if (buff_copy) + memcpy(buff_copy, rtas_err_buf, + RTAS_ERROR_LOG_MAX); } } @@ -176,7 +179,8 @@ if (buff_copy) { log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0); - kfree(buff_copy); + if (mem_init_done) + kfree(buff_copy); } return ret; } ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From akpm at osdl.org Mon Aug 16 17:01:46 2004 From: akpm at osdl.org (Andrew Morton) Date: Mon, 16 Aug 2004 00:01:46 -0700 Subject: recent sym2 breakage Message-ID: <20040816000146.3f484151.akpm@osdl.org> So I'm putting together 2.6.8.1-mm1, which is bigger than Ben Hur and it seems that someone broke the sym2 driver, on ppc64 at least. It runs OK, but a `halt -p' spits nasty messages, then hangs. Can someone suggest what changes caused this? Shutting down network interfaces: eth0 done Shutting down sound driver done Shutting down service at daemon done sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: ABORT operation started. sym0:8:0: ABORT operation timed-out. sym0:8:0: DEVICE RESET operation started. sym0:8:0: DEVICE RESET operation timed-out. sym0:8:0: BUS RESET operation started. sym0:8:0: BUS RESET operation timed-out. sym0:8:0: HOST RESET operation started. sym0: SCSI BUS has been reset. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From olh at suse.de Mon Aug 16 21:37:05 2004 From: olh at suse.de (Olaf Hering) Date: Mon, 16 Aug 2004 13:37:05 +0200 Subject: recent sym2 breakage In-Reply-To: <20040816000146.3f484151.akpm@osdl.org> References: <20040816000146.3f484151.akpm@osdl.org> Message-ID: <20040816113705.GA6015@suse.de> On Mon, Aug 16, Andrew Morton wrote: > > So I'm putting together 2.6.8.1-mm1, which is bigger than Ben Hur and it > seems that someone broke the sym2 driver, on ppc64 at least. > > It runs OK, but a `halt -p' spits nasty messages, then hangs. sym2 is a bit flaky in 2.6. Does the vanilla kernel work ok for you? You might need at least this patch in case of rtas errors: diff -purNX /suse/olh/kernel/kernel_exclude.txt linux-2.6.8.1.orig/arch/ppc64/kernel/rtas.c linux-2.6.8.1-olh/arch/ppc64/kernel/rtas.c --- linux-2.6.8.1.orig/arch/ppc64/kernel/rtas.c 2004-08-14 12:54:50.000000000 +0200 +++ linux-2.6.8.1-olh/arch/ppc64/kernel/rtas.c 2004-08-15 19:19:49.000000000 +0200 @@ -31,6 +31,8 @@ #include #include +extern int mem_init_done; + struct flash_block_list_header rtas_firmware_flash_list = {0, NULL}; struct rtas_t rtas = { @@ -165,6 +167,7 @@ int rtas_call(int token, int nargs, int /* Log the error in the unlikely case that there was one. */ if (unlikely(logit)) { + if (mem_init_done) buff_copy = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC); if (buff_copy) { memcpy(buff_copy, rtas_err_buf, RTAS_ERROR_LOG_MAX); -- USB is for mice, FireWire is for men! sUse lINUX ag, n?RNBERG ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From willy at debian.org Mon Aug 16 21:42:27 2004 From: willy at debian.org (Matthew Wilcox) Date: Mon, 16 Aug 2004 12:42:27 +0100 Subject: recent sym2 breakage In-Reply-To: <20040816000146.3f484151.akpm@osdl.org> References: <20040816000146.3f484151.akpm@osdl.org> Message-ID: <20040816114227.GD28995@parcelfarce.linux.theplanet.co.uk> On Mon, Aug 16, 2004 at 12:01:46AM -0700, Andrew Morton wrote: > It runs OK, but a `halt -p' spits nasty messages, then hangs. > > Can someone suggest what changes caused this? Reviewing the diff between 2.6.7 and 2.6.8.1 doesn't show any significant changes to the sym2 driver (lots of 0/NULL changes, and ppc64 is weird, but not /that/ weird ;-) So it must be something sym2 is using. The obvious candidate is the change to drivers/scsi/scsi_transport_spi.c in the spi_dv_retrain function. If you pull that out, does it work better? ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nfont at austin.ibm.com Tue Aug 17 01:57:30 2004 From: nfont at austin.ibm.com (Nathan Fontenot) Date: Mon, 16 Aug 2004 10:57:30 -0500 Subject: [PATCH] RTAS spam reduction Message-ID: <4120D96A.5020909@austin.ibm.com> It seems my first patch didn't go too far, so here is a revised version of the rtas spam patch. The big change in this patch is the replacing the big printk hexdump of RTAS events with a single line such as kernel: RTAS: event: 1, Type: Dump Notification Event, Severity: 1 The full hexdump is always available in /var/log/platform (logged by the rtas_errd daemon) or you can boot with the option rtasmsgs=[on/off] to turn on or off full printk hexdumping of RTAS events. Signed-off-by: Nathan Fontenot -- Nathan Fontenot Power Linux Platform Serviceability Home: IBM Austin 908/1E-036 Phone: 512.838.3377 (T/L 678.3377) Email: nfont at austin.ibm.com -------------- next part -------------- A non-text attachment was scrubbed... Name: rtas-spam.patch Type: text/x-patch Size: 9606 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040816/d82751de/attachment.bin From jschopp at austin.ibm.com Tue Aug 17 02:53:55 2004 From: jschopp at austin.ibm.com (Joel Schopp) Date: Mon, 16 Aug 2004 11:53:55 -0500 Subject: disabling kernel access to memory In-Reply-To: <16671.57750.537610.762907@cargo.ozlabs.ibm.com> References: <20040813182547.GA4228@w-mikek2.beaverton.ibm.com> <16671.57750.537610.762907@cargo.ozlabs.ibm.com> Message-ID: <4120E6A3.2040305@austin.ibm.com> >>When creating artificial 'holes' in the physical address space, >>I would really like to disable all access to this memory in an >>effort to catch anyone making incorrect accesses. Of course, >>these holes are at least 16MB (min LMB size) in size. Is there >>an 'easy' way to prevent all access to these holes? > > > You might be able to hand them back to the hypervisor using the DLPAR > calls. I think the idea is that DLPAR calls are the hard part. Doing an isolate requires all the PTEs and TCEs don't reference the memory for instance. Back in my userspace days I remember threading libraries liked to use mprotect for such things. Linux kernel has mprotect and sys_mprotect, not sure if they will fit your needs or not. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Tue Aug 17 04:08:47 2004 From: anton at samba.org (Anton Blanchard) Date: Tue, 17 Aug 2004 04:08:47 +1000 Subject: [PATCH] [ppc64] POWER4 oprofile update Message-ID: <20040816180847.GM5637@krispykreme> POWER4 oprofile updates from Carl Love. - Create mmcr0, mmcr1, mmcra oprofilefs files. - Use kernel and user profile disable bits. (Some modifications by me) Signed-off-by: Anton Blanchard diff -puN arch/ppc64/oprofile/common.c~oprofile-1 arch/ppc64/oprofile/common.c --- foobar2/arch/ppc64/oprofile/common.c~oprofile-1 2004-08-15 07:28:56.850539856 +1000 +++ foobar2-anton/arch/ppc64/oprofile/common.c 2004-08-16 06:00:30.420792426 +1000 @@ -90,6 +90,14 @@ static int op_ppc64_create_files(struct { int i; + /* + * There is one mmcr0, mmcr1 and mmcra for setting the events for + * all of the counters. + */ + oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0); + oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1); + oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra); + for (i = 0; i < model->num_counters; ++i) { struct dentry *dir; char buf[3]; @@ -112,6 +120,10 @@ static int op_ppc64_create_files(struct oprofilefs_create_ulong(sb, root, "enable_kernel", &sys.enable_kernel); oprofilefs_create_ulong(sb, root, "enable_user", &sys.enable_user); + /* Default to tracing both kernel and user */ + sys.enable_kernel = 1; + sys.enable_user = 1; + return 0; } diff -puN arch/ppc64/oprofile/op_impl.h~oprofile-1 arch/ppc64/oprofile/op_impl.h --- foobar2/arch/ppc64/oprofile/op_impl.h~oprofile-1 2004-08-15 07:28:56.856539395 +1000 +++ foobar2-anton/arch/ppc64/oprofile/op_impl.h 2004-08-16 06:02:45.825786713 +1000 @@ -19,6 +19,12 @@ /* freeze counters. set to 1 on a perfmon exception */ #define MMCR0_FC (1UL << (31 - 0)) +/* freeze in supervisor state */ +#define MMCR0_KERNEL_DISABLE (1UL << (31 - 1)) + +/* freeze in problem state */ +#define MMCR0_PROBLEM_DISABLE (1UL << (31 - 2)) + /* freeze counters while MSR mark = 1 */ #define MMCR0_FCM1 (1UL << (31 - 3)) @@ -28,15 +34,15 @@ /* freeze counters on enabled condition or event */ #define MMCR0_FCECE (1UL << (31 - 6)) -/* performance monitor alert has occurred, set to 0 after handling exception */ -#define MMCR0_PMAO (1UL << (31 - 24)) - /* PMC1 count enable*/ #define MMCR0_PMC1INTCONTROL (1UL << (31 - 16)) /* PMCn count enable*/ #define MMCR0_PMCNINTCONTROL (1UL << (31 - 17)) +/* performance monitor alert has occurred, set to 0 after handling exception */ +#define MMCR0_PMAO (1UL << (31 - 24)) + /* state of MSR HV when SIAR set */ #define MMCRA_SIHV (1UL << (63 - 35)) @@ -60,6 +66,9 @@ struct op_counter_config { /* System-wide configuration as set via oprofilefs. */ struct op_system_config { + unsigned long mmcr0; + unsigned long mmcr1; + unsigned long mmcra; unsigned long enable_kernel; unsigned long enable_user; }; diff -puN arch/ppc64/oprofile/op_model_power4.c~oprofile-1 arch/ppc64/oprofile/op_model_power4.c --- foobar2/arch/ppc64/oprofile/op_model_power4.c~oprofile-1 2004-08-15 07:28:56.863538857 +1000 +++ foobar2-anton/arch/ppc64/oprofile/op_model_power4.c 2004-08-16 06:00:31.582969777 +1000 @@ -27,6 +27,11 @@ static int num_counters; static int oprofile_running; static int mmcra_has_sihv; +/* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */ +static u32 mmcr0_val; +static u64 mmcr1_val; +static u32 mmcra_val; + static void power4_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) @@ -45,18 +50,36 @@ static void power4_reg_setup(struct op_c if (cur_cpu_spec->cpu_features & CPU_FTR_MMCRA_SIHV) mmcra_has_sihv = 1; + /* + * The performance counter event settings are given in the mmcr0, + * mmcr1 and mmcra values passed from the user in the + * op_system_config structure (sys variable). + */ + mmcr0_val = sys->mmcr0; + mmcr1_val = sys->mmcr1; + mmcra_val = sys->mmcra; + for (i = 0; i < num_counters; ++i) reset_value[i] = 0x80000000UL - ctr[i].count; - /* XXX setup user and kernel profiling */ + /* setup user and kernel profiling */ + if (sys->enable_kernel) + mmcr0_val &= ~MMCR0_KERNEL_DISABLE; + else + mmcr0_val |= MMCR0_KERNEL_DISABLE; + + if (sys->enable_user) + mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; + else + mmcr0_val |= MMCR0_PROBLEM_DISABLE; } extern void ppc64_enable_pmcs(void); static void power4_cpu_setup(void *unused) { - unsigned int mmcr0 = mfspr(SPRN_MMCR0); - unsigned long mmcra = mfspr(SPRN_MMCRA); + unsigned int mmcr0 = mmcr0_val; + unsigned long mmcra = mmcra_val; ppc64_enable_pmcs(); @@ -68,6 +91,8 @@ static void power4_cpu_setup(void *unuse mmcr0 |= MMCR0_PMC1INTCONTROL|MMCR0_PMCNINTCONTROL; mtspr(SPRN_MMCR0, mmcr0); + mtspr(SPRN_MMCR1, mmcr1_val); + mmcra |= MMCRA_SAMPLE_ENABLE; mtspr(SPRN_MMCRA, mmcra); _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Tue Aug 17 04:09:57 2004 From: anton at samba.org (Anton Blanchard) Date: Tue, 17 Aug 2004 04:09:57 +1000 Subject: [PATCH] [ppc64] disable oprofile debug messages In-Reply-To: <20040816180847.GM5637@krispykreme> References: <20040816180847.GM5637@krispykreme> Message-ID: <20040816180957.GN5637@krispykreme> Disable oprofile debug messages. They arent much use now things are working reliably. Signed-off-by: Anton Blanchard diff -puN arch/ppc64/oprofile/op_model_power4.c~oprofile-2 arch/ppc64/oprofile/op_model_power4.c --- linux-2.5/arch/ppc64/oprofile/op_model_power4.c~oprofile-2 2004-08-15 05:54:32.550954834 +1000 +++ linux-2.5-anton/arch/ppc64/oprofile/op_model_power4.c 2004-08-15 05:54:46.923754121 +1000 @@ -17,7 +17,7 @@ #include #include -#define dbg(args...) printk(args) +#define dbg(args...) #include "op_impl.h" diff -puN arch/ppc64/oprofile/op_model_rs64.c~oprofile-2 arch/ppc64/oprofile/op_model_rs64.c --- linux-2.5/arch/ppc64/oprofile/op_model_rs64.c~oprofile-2 2004-08-15 05:54:59.089584057 +1000 +++ linux-2.5-anton/arch/ppc64/oprofile/op_model_rs64.c 2004-08-15 05:55:04.913502591 +1000 @@ -15,7 +15,7 @@ #include #include -#define dbg(args...) printk(args) +#define dbg(args...) #include "op_impl.h" _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Tue Aug 17 04:11:34 2004 From: anton at samba.org (Anton Blanchard) Date: Tue, 17 Aug 2004 04:11:34 +1000 Subject: [PATCH] [ppc64] allow oprofile module to be safely unloaded In-Reply-To: <20040816180957.GN5637@krispykreme> References: <20040816180847.GM5637@krispykreme> <20040816180957.GN5637@krispykreme> Message-ID: <20040816181134.GO5637@krispykreme> Allow the oprofile module to be unloaded, before we never removed the oprofile specific interrupt handler. Handle the pending exception case in the dummy interrupt handler instead. Signed-off-by: Anton Blanchard diff -puN arch/ppc64/kernel/traps.c~oprofile-3 arch/ppc64/kernel/traps.c --- linux-2.5/arch/ppc64/kernel/traps.c~oprofile-3 2004-08-15 06:04:59.800226959 +1000 +++ linux-2.5-anton/arch/ppc64/kernel/traps.c 2004-08-15 06:17:33.918867410 +1000 @@ -492,8 +492,15 @@ static inline void emulate_single_step(s SingleStepException(regs); } +/* Ensure exceptions are disabled */ +#define MMCR0_PMXE (1UL << (31 - 5)) + static void dummy_perf(struct pt_regs *regs) { + unsigned int mmcr0 = mfspr(SPRN_MMCR0); + + mmcr0 &= ~MMCR0_PMXE; + mtspr(SPRN_MMCR0, mmcr0); } void (*perf_irq)(struct pt_regs *) = dummy_perf; diff -puN arch/ppc64/oprofile/op_model_power4.c~oprofile-3 arch/ppc64/oprofile/op_model_power4.c diff -puN arch/ppc64/oprofile/common.c~oprofile-3 arch/ppc64/oprofile/common.c --- linux-2.5/arch/ppc64/oprofile/common.c~oprofile-3 2004-08-15 06:18:15.778542250 +1000 +++ linux-2.5-anton/arch/ppc64/oprofile/common.c 2004-08-15 06:19:13.001083615 +1000 @@ -52,17 +52,10 @@ static int op_ppc64_setup(void) static void op_ppc64_shutdown(void) { - /* - * We need to be sure we have cleared all pending exceptions before - * removing the interrupt handler. For the moment we play it safe and - * leave it in - */ -#if 0 mb(); /* Remove our interrupt handler. We may be removing this module. */ perf_irq = save_perf_irq; -#endif } static void op_ppc64_cpu_start(void *dummy) _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Tue Aug 17 04:12:38 2004 From: anton at samba.org (Anton Blanchard) Date: Tue, 17 Aug 2004 04:12:38 +1000 Subject: [PATCH] [ppc64] add missing EXPORT_SYMBOLS for oprofile In-Reply-To: <20040816181134.GO5637@krispykreme> References: <20040816180847.GM5637@krispykreme> <20040816180957.GN5637@krispykreme> <20040816181134.GO5637@krispykreme> Message-ID: <20040816181238.GP5637@krispykreme> Add some missing exports, required for oprofile to be compiled as a module. Signed-off-by: Anton Blanchard diff -puN arch/ppc64/kernel/rtas.c~oprofile-4 arch/ppc64/kernel/rtas.c --- foobar2/arch/ppc64/kernel/rtas.c~oprofile-4 2004-08-16 02:50:42.661849555 +1000 +++ foobar2-anton/arch/ppc64/kernel/rtas.c 2004-08-16 02:50:42.705846173 +1000 @@ -37,6 +37,8 @@ struct rtas_t rtas = { .lock = SPIN_LOCK_UNLOCKED }; +EXPORT_SYMBOL(rtas); + char rtas_err_buf[RTAS_ERROR_LOG_MAX]; spinlock_t rtas_data_buf_lock = SPIN_LOCK_UNLOCKED; diff -puN arch/ppc64/kernel/smp.c~oprofile-4 arch/ppc64/kernel/smp.c --- foobar2/arch/ppc64/kernel/smp.c~oprofile-4 2004-08-16 02:50:42.668849017 +1000 +++ foobar2-anton/arch/ppc64/kernel/smp.c 2004-08-16 02:50:42.701846481 +1000 @@ -753,6 +753,8 @@ out: return ret; } +EXPORT_SYMBOL(smp_call_function); + void smp_call_function_interrupt(void) { void (*func) (void *info); diff -puN arch/ppc64/kernel/sysfs.c~oprofile-4 arch/ppc64/kernel/sysfs.c --- foobar2/arch/ppc64/kernel/sysfs.c~oprofile-4 2004-08-16 02:50:42.674848556 +1000 +++ foobar2-anton/arch/ppc64/kernel/sysfs.c 2004-08-16 02:50:42.706846097 +1000 @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -172,6 +173,8 @@ void ppc64_enable_pmcs(void) } #endif +EXPORT_SYMBOL(ppc64_enable_pmcs); + /* XXX convert to rusty's on_one_cpu */ static unsigned long run_on_cpu(unsigned long cpu, unsigned long (*func)(unsigned long), diff -puN arch/ppc64/kernel/traps.c~oprofile-4 arch/ppc64/kernel/traps.c --- foobar2/arch/ppc64/kernel/traps.c~oprofile-4 2004-08-16 02:50:42.680848095 +1000 +++ foobar2-anton/arch/ppc64/kernel/traps.c 2004-08-16 02:50:42.703846327 +1000 @@ -505,6 +505,8 @@ static void dummy_perf(struct pt_regs *r void (*perf_irq)(struct pt_regs *) = dummy_perf; +EXPORT_SYMBOL(perf_irq); + void PerformanceMonitorException(struct pt_regs *regs) { _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Tue Aug 17 04:13:29 2004 From: anton at samba.org (Anton Blanchard) Date: Tue, 17 Aug 2004 04:13:29 +1000 Subject: [PATCH] [ppc64] Fix oprofile error messages In-Reply-To: <20040816181238.GP5637@krispykreme> References: <20040816180847.GM5637@krispykreme> <20040816180957.GN5637@krispykreme> <20040816181134.GO5637@krispykreme> <20040816181238.GP5637@krispykreme> Message-ID: <20040816181329.GQ5637@krispykreme> Clean up an oprofile error message, it was missing a newline. Signed-off-by: Anton Blanchard diff -puN arch/ppc64/kernel/sysfs.c~oprofile-5 arch/ppc64/kernel/sysfs.c --- foobar2/arch/ppc64/kernel/sysfs.c~oprofile-5 2004-08-16 03:10:39.622631282 +1000 +++ foobar2-anton/arch/ppc64/kernel/sysfs.c 2004-08-16 03:23:08.337622810 +1000 @@ -147,8 +147,9 @@ void ppc64_enable_pmcs(void) reset = 0; ret = plpar_hcall_norets(H_PERFMON, set, reset); if (ret) - printk(KERN_ERR "H_PERFMON call returned %d", - ret); + printk(KERN_ERR "H_PERFMON call on cpu %u " + "returned %d\n", + smp_processor_id(), ret); break; default: _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Tue Aug 17 05:00:31 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Mon, 16 Aug 2004 12:00:31 -0700 Subject: disabling kernel access to memory In-Reply-To: <4120E6A3.2040305@austin.ibm.com> References: <20040813182547.GA4228@w-mikek2.beaverton.ibm.com> <16671.57750.537610.762907@cargo.ozlabs.ibm.com> <4120E6A3.2040305@austin.ibm.com> Message-ID: <1092682831.15667.4370.camel@nighthawk> On Mon, 2004-08-16 at 09:53, Joel Schopp wrote: > >>When creating artificial 'holes' in the physical address space, > >>I would really like to disable all access to this memory in an > >>effort to catch anyone making incorrect accesses. Of course, > >>these holes are at least 16MB (min LMB size) in size. Is there > >>an 'easy' way to prevent all access to these holes? > > > > > > You might be able to hand them back to the hypervisor using the DLPAR > > calls. > > I think the idea is that DLPAR calls are the hard part. Doing an > isolate requires all the PTEs and TCEs don't reference the memory for > instance. That shouldn't be too hard. This is for debugging anyway, right Mike? Put a check in the htab_pte_insert() (or whatever the hardware insertion call) is to scream if you attempt to set a hardware pte to one of the empty areas. Then go kill all the Linux ptes, effectively (if these were real functions (you shouldn't have to do this anyway if all of the pages from that area are out of the allocator): for_each_task() for_each_pgd() for_each_pmd() ... Then, kill the kernel mappings. > Back in my userspace days I remember threading libraries liked to use > mprotect for such things. Linux kernel has mprotect and sys_mprotect, > not sure if they will fit your needs or not. Those only apply to userspace pages. I think Mike want to make sure to kill all accesses to the memory, including from the kernel. -- Dave ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From kravetz at us.ibm.com Tue Aug 17 09:48:16 2004 From: kravetz at us.ibm.com (Mike Kravetz) Date: Mon, 16 Aug 2004 16:48:16 -0700 Subject: disabling kernel access to memory In-Reply-To: <1092682831.15667.4370.camel@nighthawk> References: <20040813182547.GA4228@w-mikek2.beaverton.ibm.com> <16671.57750.537610.762907@cargo.ozlabs.ibm.com> <4120E6A3.2040305@austin.ibm.com> <1092682831.15667.4370.camel@nighthawk> Message-ID: <20040816234816.GF4423@w-mikek2.beaverton.ibm.com> On Mon, Aug 16, 2004 at 12:00:31PM -0700, Dave Hansen wrote: > > That shouldn't be too hard. This is for debugging anyway, right Mike? > Put a check in the htab_pte_insert() (or whatever the hardware insertion > call) is to scream if you attempt to set a hardware pte to one of the > empty areas. Then go kill all the Linux ptes, effectively (if these > were real functions (you shouldn't have to do this anyway if all of the > pages from that area are out of the allocator): > > for_each_task() > for_each_pgd() > for_each_pmd() > ... > > Then, kill the kernel mappings. > Thanks, I'll give that a try. > > Back in my userspace days I remember threading libraries liked to use > > mprotect for such things. Linux kernel has mprotect and sys_mprotect, > > not sure if they will fit your needs or not. > > Those only apply to userspace pages. I think Mike want to make sure to > kill all accesses to the memory, including from the kernel. Yeah, I'm mostly/only interested in access from the kernel. Right now, I'm slowly finding places in the kernel that assume the phys -> virt translations are linear. When I can get a kernel to boot where the translations are nonlinear, then I'll look into the debug stuff. -- Mike ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Tue Aug 17 12:54:55 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Mon, 16 Aug 2004 21:54:55 -0500 Subject: UP load_up_fpu crash (2.6.8-rc2) In-Reply-To: <16647.1612.244643.858755@cargo.ozlabs.ibm.com> References: <1090651390.3592.11.camel@booger> <16647.1612.244643.858755@cargo.ozlabs.ibm.com> Message-ID: <1092711295.3081.22.camel@booger> On Tue, 2004-07-27 at 20:50, Paul Mackerras wrote: > Nathan Lynch writes: > > > We seem to be broken with CONFIG_SMP=n on 2.6.8-rc2 and 2.6.8-rc1-mm1: > > > > Freeing unused kernel memory: 280k freed > > INIT: version 2.85 booting > > Vector: 300 (Data Access) at [c00000003f043bb0] > > pc: c00000000000bab0: .load_up_fpu+0xb0/0x16c > > lr: 00000000400272b8 > > sp: c00000003f043e30 > > msr: 8000000000003032 > > dar: 108 > > dsisr: 40000000 > > current = 0xc00000003f03d440 > > paca = 0xc0000000003cc000 > > pid = 327, comm = hotplug > > enter ? for help > > mon> t > > [c00000003f043e30] c00000000000b4d8 .handle_page_fault+0x20/0x40 > > (unreliable) > > --- Exception: 801 (FPU Unavailable) at 000000004000b908 > > SP (ffffe480) is in userspace > > This is very puzzling. It appears that we have taken a FPU > unavailable trap from userspace, which is fine, but then it looks like > we think some other task owns the FPU at the moment, and that task is > a kernel thread. > > We are crashing because last_task_used_math->thread.regs is NULL. > That should only happen for a kernel thread, but last_task_used_math > should never point to a kernel thread. The only place that > last_task_used_math gets set to a non-NULL value is in load_up_fpu, > and that should only be called if we get a FPU unavailable trap from > usermode. > > It would be very useful to see what last_task_used_math contains at > the time of the crash, and see what last_task_used_math->comm is, so > we can work out whether the task that owns the FPU is in fact a kernel > thread - in which case we need to work out how last_task_used_math is > getting to point at it - or if it isn't a kernel thread, in which case > we need to work out why task->thread.regs is NULL for that task. Sorry to take so long to investigate this further. Still happens with 2.6.8.1-mm1: Vector: 300 (Data Access) at [c0000003df1bfbb0] pc: c00000000000b8b0: .load_up_fpu+0xb0/0x16c lr: 00000000400272b8 sp: c0000003df1bfe30 msr: 8000000000003032 dar: 108 dsisr: 40000000 current = 0xc0000003df1bb440 paca = 0xc0000000003f0000 pid = 836, comm = hotplug mon> t [c0000003df1bfe30] c00000000000b2d8 .handle_page_fault+0x20/0x40 (unreliable) --- Exception: 801 (FPU Unavailable) at 000000004000b908 SP (ffffe480) is in userspace mon> ls last_task_used_math last_task_used_math: c0000000005ede90 mon> d c0000000005ede90 c0000000005ede90 c00000000f6151c0 0000000000000000 |.....aQ.........| c0000000005edea0 0000000000000000 0000000000000000 |................| c0000000005edeb0 0000000000000000 0000000000000000 |................| c0000000005edec0 0000000000000000 0000000000000000 |................| mon> d c00000000f6151c0 c00000000f6151c0 0000000000000002 c00000000f618000 |.............a..| c00000000f6151d0 0000000400000000 0000000000000100 |................| c00000000f6151e0 0000000000000000 ffffffff00000073 |...............s| c00000000f6151f0 0000007800000000 0000000000100100 |...x............| mon> c00000000f615200 0000000000200200 0000000000000000 |..... ..........| c00000000f615210 000000003b9ac985 0000000000000065 |....;..........e| c00000000f615220 00001a46ac04c868 0000000000000000 |...F...h........| c00000000f615230 0000000000000000 0000000000000001 |................| mon> c00000000f615240 0000004d00000000 c00000000f614988 |...M.........aI.| c00000000f615250 c00000000044f8a8 c00000000f615258 |.....D.......aRX| c00000000f615260 c00000000f615258 c00000000f615268 |.....aRX.....aRh| c00000000f615270 c00000000f615268 c0000000003e0bb0 |.....aRh.....>..| mon> c00000000f615280 c0000000003e0bb0 c000000000448d30 |.....>.......D.0| c00000000f615290 0000000000000000 0000000000000000 |................| c00000000f6152a0 0000000000000000 8000000000000001 |................| c00000000f6152b0 0000000100000000 c00000000044f820 |.............D. | mon> c00000000f6152c0 c00000000044f820 c00000000f614a18 |.....D. .....aJ.| c00000000f6152d0 c00000000f57e358 c00000000044f928 |.....W.X.....D.(| c00000000f6152e0 c00000000044f928 c00000000f6151c0 |.....D.(.....aQ.| c00000000f6152f0 c00000000f615318 c00000000f615318 |.....aS......aS.| mon> c00000000f615300 c00000000f615308 0000000100000001 |.....aS.........| c00000000f615310 c00000000f6151c0 c00000000f6152f0 |.....aQ......aR.| c00000000f615320 c00000000f6152f0 c00000000e1294b0 |.....aR.........| c00000000f615330 c00000000e1294b0 c00000000f615360 |.............aS`| mon> c00000000f615340 c00000000f615360 c00000000f615350 |.....aS`.....aSP| c00000000f615350 0000000100000001 c00000000f6151c0 |.............aQ.| c00000000f615360 c00000000f615338 c00000000f615338 |.....aS8.....aS8| c00000000f615370 c00000000e1394b0 c00000000e1394b0 |................| mon> c00000000f615380 c00000000f614ac0 c00000000044f9e0 |.....aJ......D..| c00000000f615390 c00000000044f9f8 0000000000000001 |.....D..........| c00000000f6153a0 c00000000044f820 c00000000044f9e0 |.....D. .....D..| c00000000f6153b0 c00000000044f9e0 c00000000e13f680 |.....D..........| mon> c00000000f6153c0 c00000000e13f680 c00000000f614b08 |.............aK.| c00000000f6153d0 c00000000044fa28 c00000000044fa40 |.....D.(.....D.@| c00000000f6153e0 0000000000000001 c00000000044f820 |.............D. | c00000000f6153f0 c00000000044fa28 c00000000044fa28 |.....D.(.....D.(| mon> c00000000f615400 c00000000e14f680 c00000000e14f680 |................| c00000000f615410 c00000000f615410 c00000000f615410 |.....aT......aT.| c00000000f615420 0000000000000000 0000000000000000 |................| c00000000f615430 0000000000000000 0000000000000000 |................| mon> c00000000f615440 0000000000000000 0000000000000000 |................| c00000000f615450 0000000000000000 0000000000000000 |................| c00000000f615460 0000000000000000 0000000000000000 |................| c00000000f615470 0000000000000000 0000000000000000 |................| mon> c00000000f615480 0000000000000000 000000004b87ad6e |............K..n| c00000000f615490 c0000000005ae2c0 c00000000f6151c0 |.....Z.......aQ.| c00000000f6154a0 0000000000000000 0000000000000000 |................| c00000000f6154b0 0000000000000329 0000000000000000 |.......)........| mon> c00000000f6154c0 0000000000000000 00000000000005c1 |................| c00000000f6154d0 0000000000000000 0000000000000000 |................| c00000000f6154e0 0000000000000000 0000000000000148 |...............H| c00000000f6154f0 000000000000002a 0000000000000005 |.......*........| mon> c00000000f615500 0000000000000000 0000000000000000 |................| c00000000f615510 0000000000000000 0000000000000000 |................| c00000000f615520 0000000000000000 0000000000000000 |................| c00000000f615530 c000000000469108 fffffeff00000000 |.....F..........| mon> c00000000f615540 ffffffff00000000 c0000000004690b8 |.............F..| c00000000f615550 ffffffffffffffff ffffffffffffffff |................| c00000000f615560 ffffffffffffffff ffffffffffffffff |................| c00000000f615570 ffffffffffffffff ffffffffffffffff |................| mon> c00000000f615580 0000000000800000 ffffffffffffffff |................| c00000000f615590 0000000000000000 ffffffffffffffff |................| c00000000f6155a0 ffffffffffffffff ffffffffffffffff |................| c00000000f6155b0 000000000000f800 000000000000f800 |................| c00000000f6155c0 0000000000000400 0000000000000400 |................| c00000000f6155d0 0000000000008000 0000000000008000 |................| c00000000f6155e0 ffffffffffffffff ffffffffffffffff |................| c00000000f6155f0 ffffffffffffffff ffffffffffffffff |................| mon> c00000000f615600 0000000000000400 0000000000000400 |................| c00000000f615610 00000000000c8000 00000000000c8000 |................| c00000000f615620 0000696e69740065 7200000000000000 |..init.er.......| c00000000f615630 0000000000000000 0000000000000000 |................| So it looks like last_task_used_math points to init? Nathan ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 17 13:40:48 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 17 Aug 2004 13:40:48 +1000 Subject: Real mode SLB miss Message-ID: <20040817034048.GD9637@zax> This probably wants testing on Power4 iSeries, at least, before pushing to Andrew. This patch makes the PPC64 SLB miss handler run in real mode (MMU off) for its whole duration, on pSeries machines. Avoiding the rfid used to turn relocation on saves some 70-80 cycles on Power4 and Power5. Not having to save and restore SRR0 and SRR1 saves a few more, and means we no longer need an extra save space (for r3) for the SLB miss. Overall there's around a 27% speedup on a p630 (for a userspace SLB miss). Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/kernel/head.S =================================================================== --- working-2.6.orig/arch/ppc64/kernel/head.S 2004-08-09 09:51:38.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/head.S 2004-08-17 13:33:11.541468824 +1000 @@ -199,6 +199,7 @@ #define EX_R12 24 #define EX_R13 32 #define EX_SRR0 40 +#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */ #define EX_DAR 48 #define EX_LR 48 /* SLB miss saves LR, but not DAR */ #define EX_DSISR 56 @@ -446,21 +447,13 @@ std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - std r3,PACASLBR3(r13) + std r3,PACA_EXSLB+EX_R3(r13) mfspr r9,SPRG1 std r9,PACA_EXSLB+EX_R13(r13) mfcr r9 - clrrdi r12,r13,32 /* get high part of &label */ - mfmsr r10 - mfspr r11,SRR0 /* save SRR0 */ - ori r12,r12,(.do_slb_miss)@l - ori r10,r10,MSR_IR|MSR_DR /* DON'T set RI for SLB miss */ - mtspr SRR0,r12 mfspr r12,SRR1 /* and SRR1 */ - mtspr SRR1,r10 mfspr r3,DAR - rfid - b . /* prevent speculative execution */ + b .do_slb_miss /* Rel. branch works in real mode */ STD_EXCEPTION_PSERIES(0x400, InstructionAccess) @@ -474,21 +467,13 @@ std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - std r3,PACASLBR3(r13) + std r3,PACA_EXSLB+EX_R3(r13) mfspr r9,SPRG1 std r9,PACA_EXSLB+EX_R13(r13) mfcr r9 - clrrdi r12,r13,32 /* get high part of &label */ - mfmsr r10 - mfspr r11,SRR0 /* save SRR0 */ - ori r12,r12,(.do_slb_miss)@l - ori r10,r10,MSR_IR|MSR_DR /* DON'T set RI for SLB miss */ - mtspr SRR0,r12 mfspr r12,SRR1 /* and SRR1 */ - mtspr SRR1,r10 - mr r3,r11 /* SRR0 is faulting address */ - rfid - b . /* prevent speculative execution */ + mfspr r3,SRR0 /* SRR0 is faulting address */ + b .do_slb_miss /* Rel. branch works in real mode */ STD_EXCEPTION_PSERIES(0x500, HardwareInterrupt) STD_EXCEPTION_PSERIES(0x600, Alignment) @@ -630,8 +615,7 @@ DataAccessSLB_Iseries: mtspr SPRG1,r13 /* save r13 */ EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) - std r3,PACASLBR3(r13) - ld r11,PACALPPACA+LPPACASRR0(r13) + std r3,PACA_EXSLB+EX_R3(r13) ld r12,PACALPPACA+LPPACASRR1(r13) mfspr r3,DAR b .do_slb_miss @@ -642,10 +626,9 @@ InstructionAccessSLB_Iseries: mtspr SPRG1,r13 /* save r13 */ EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) - std r3,PACASLBR3(r13) - ld r11,PACALPPACA+LPPACASRR0(r13) + std r3,PACA_EXSLB+EX_R3(r13) ld r12,PACALPPACA+LPPACASRR1(r13) - mr r3,r11 + ld r3,PACALPPACA+LPPACASRR0(r13) b .do_slb_miss MASKABLE_EXCEPTION_ISERIES(0x500, HardwareInterrupt) @@ -1176,7 +1159,6 @@ mflr r10 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ bl .slb_allocate /* handle it */ @@ -1184,9 +1166,11 @@ /* All done -- return from exception. */ ld r10,PACA_EXSLB+EX_LR(r13) - ld r3,PACASLBR3(r13) + ld r3,PACA_EXSLB+EX_R3(r13) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ - ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */ +#ifdef CONFIG_PPC_ISERIES + ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ +#endif /* CONFIG_PPC_ISERIES */ mtlr r10 @@ -1199,8 +1183,10 @@ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ .machine pop +#ifdef CONFIG_PPC_ISERIES mtspr SRR0,r11 mtspr SRR1,r12 +#endif /* CONFIG_PPC_ISERIES */ ld r9,PACA_EXSLB+EX_R9(r13) ld r10,PACA_EXSLB+EX_R10(r13) ld r11,PACA_EXSLB+EX_R11(r13) Index: working-2.6/include/asm-ppc64/paca.h =================================================================== --- working-2.6.orig/include/asm-ppc64/paca.h 2004-08-09 09:52:53.000000000 +1000 +++ working-2.6/include/asm-ppc64/paca.h 2004-08-17 13:33:35.437564584 +1000 @@ -78,7 +78,6 @@ u64 exmc[8]; /* used for machine checks */ u64 exslb[8]; /* used for SLB/segment table misses * on the linear mapping */ - u64 slb_r3; /* spot to save R3 on SLB miss */ mm_context_t context; u16 slb_cache[SLB_CACHE_ENTRIES]; u16 slb_cache_ptr; Index: working-2.6/arch/ppc64/kernel/asm-offsets.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/asm-offsets.c 2004-08-09 09:51:38.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/asm-offsets.c 2004-08-17 13:33:59.597486608 +1000 @@ -93,7 +93,6 @@ DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); - DEFINE(PACASLBR3, offsetof(struct paca_struct, slb_r3)); #ifdef CONFIG_HUGETLB_PAGE DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs)); #endif /* CONFIG_HUGETLB_PAGE */ -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From rajarshi at in.ibm.com Wed Aug 18 01:51:39 2004 From: rajarshi at in.ibm.com (Rajarshi Das) Date: 17 Aug 2004 21:21:39 +0530 Subject: bootup problem on power3 Message-ID: <1092757903.1860.69.camel@darya> Hi, I am running a power 3 (type 7044-170) uniprocessor machine and have a vanilla 2.6.7 kernel built on it. However, when I boot the kernel, I get the following message and the system hangs : -------------------------------------------------------- OF stdout is : /pci at fef00000/isa at b/serial at i3f8 found display : /pci at fef00000/display at 11 Opening displays... opening display : /pci at fef00000/display at 11... done instantiating rtas at 0x000000003ffb5000... done 0000000000000000 : booting cpu /cpus/PowerPC,POWER3 at 0 opening PHB /pci at fef00000... done opening PHB /pci at fee00000... done Setting up bi display... Initializing fake screen: display width 0000000000000400 height 0000000000000300 depth 0000000000000008 linebytes0 Addr of fb: 00000000c0000000 Calling quiesce ... returning from prom_init ------------------------------------------------------------------- I am using a default config file with 'module support' not enabled. This happens also with a 2.6.7 ameslab kernel as well as a 2.6.8-rc2 vanilla kernel. Please suggest on how to work around this. Thanks and Regards, Rajarshi. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From olof at austin.ibm.com Wed Aug 18 02:09:15 2004 From: olof at austin.ibm.com (Olof Johansson) Date: Tue, 17 Aug 2004 11:09:15 -0500 Subject: bootup problem on power3 In-Reply-To: <1092757903.1860.69.camel@darya> References: <1092757903.1860.69.camel@darya> Message-ID: <41222DAB.8060206@austin.ibm.com> Rajarshi Das wrote: > Hi, > I am running a power 3 (type 7044-170) uniprocessor machine and have a > vanilla 2.6.7 kernel built on it. However, when I boot the kernel, I get > the following message and the system hangs : It looks like you built with CONFIG_BOOTX_TEXT. Does it work if you run with the config file from arch/ppc64/configs/pSeries_defconfig? Thanks, Olof ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From olof at austin.ibm.com Wed Aug 18 04:21:04 2004 From: olof at austin.ibm.com (Olof Johansson) Date: Tue, 17 Aug 2004 13:21:04 -0500 Subject: [PATCH] [KDB] Use proper wrappers for udbg calls in KDB Message-ID: <41224C90.8080403@austin.ibm.com> Below patch uses the udbg_*() functions instead of calling ppc_md.udbg_* in KDB. This way, if udbg hasn't been setup right the machine will survive anyway. Since KDB only lives in the Ameslab tree at the moment, and that tree does not compile (hvconsole breakage), I'm not sure just what to do with this patch. I'll follow up with another one that fixes some paca_struct rewrite KDB breakage, but I have not been able to boot and test a kernel with the changes. -Olof -------------- next part -------------- A non-text attachment was scrubbed... Name: kdb-udbg.patch Type: text/x-patch Size: 1594 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040817/0b9e25f4/attachment.bin From olof at austin.ibm.com Wed Aug 18 04:24:33 2004 From: olof at austin.ibm.com (Olof Johansson) Date: Tue, 17 Aug 2004 13:24:33 -0500 Subject: [PATCH] [KDB] Fix build break in kdbasupport Message-ID: <41224D61.6000207@austin.ibm.com> The paca_struct cleanup broke KDB, here's the corresponding changes. Also remove a couple of compilation warnings for rtas calls. See previous email about ameslab breakage: Fixes are trivial but I haven't been able to build/boot a kernel with them because of hvconsole breakage. -Olof -------------- next part -------------- A non-text attachment was scrubbed... Name: kdb-paca-fix.patch Type: text/x-patch Size: 2350 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040817/0b8bcc2d/attachment.bin From nathanl at austin.ibm.com Wed Aug 18 04:30:11 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Tue, 17 Aug 2004 13:30:11 -0500 Subject: [patch] tweak schedule_timeout in __cpu_die Message-ID: <1092767410.23599.121.camel@pants.austin.ibm.com> Hi- The current code does schedule_timeout(HZ) when waiting for a cpu to die, which is a bit coarse and tends to limit the "throughput" of my stress tests :) Change the HZ timeout to HZ/5, increase the number of tries to 25 so the overall wait time is similar. In practice, I've never seen the loop need more than two iterations. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/smp.c~ppc64-cpu-death-sched-timeout arch/ppc64/kernel/smp.c --- 2.6.8.1-mm1/arch/ppc64/kernel/smp.c~ppc64-cpu-death-sched-timeout 2004-08-16 22:25:03.000000000 -0500 +++ 2.6.8.1-mm1-nathanl/arch/ppc64/kernel/smp.c 2004-08-16 22:25:03.000000000 -0500 @@ -272,13 +272,13 @@ void __cpu_die(unsigned int cpu) int cpu_status; unsigned int pcpu = get_hard_smp_processor_id(cpu); - for (tries = 0; tries < 5; tries++) { + for (tries = 0; tries < 25; tries++) { cpu_status = query_cpu_stopped(pcpu); if (cpu_status == 0) break; set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); + schedule_timeout(HZ/5); } if (cpu_status != 0) { printk("Querying DEAD? cpu %i (%i) shows %i\n", _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From olof at austin.ibm.com Wed Aug 18 04:55:00 2004 From: olof at austin.ibm.com (Olof Johansson) Date: Tue, 17 Aug 2004 13:55:00 -0500 Subject: [PATCH] [TRIVIAL] ppc64: switch screen_info init to C99 Message-ID: <20040817185500.GA24300@4> Hi, Minor cleanup: Use C99 initializers for the screen_info struct. Signed-off-by: Olof Johansson --- linux-2.5-olof/arch/ppc64/kernel/setup.c | 15 ++++++--------- 1 files changed, 6 insertions(+), 9 deletions(-) diff -puN arch/ppc64/kernel/setup.c~c99-screen-info arch/ppc64/kernel/setup.c --- linux-2.5/arch/ppc64/kernel/setup.c~c99-screen-info 2004-08-17 11:45:51.900939936 -0500 +++ linux-2.5-olof/arch/ppc64/kernel/setup.c 2004-08-17 11:45:51.905939176 -0500 @@ -112,15 +112,12 @@ static struct notifier_block ppc64_panic * these processors use on existing boards. -- Dan */ struct screen_info screen_info = { - 0, 25, /* orig-x, orig-y */ - 0, /* unused */ - 0, /* orig-video-page */ - 0, /* orig-video-mode */ - 80, /* orig-video-cols */ - 0,0,0, /* ega_ax, ega_bx, ega_cx */ - 25, /* orig-video-lines */ - 1, /* orig-video-isVGA */ - 16 /* orig-video-points */ + .orig_x = 0, + .orig_y = 25, + .orig_video_cols = 80, + .orig_video_lines = 25, + .orig_video_isVGA = 1, + .orig_video_points = 16 }; /* _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jschopp at austin.ibm.com Wed Aug 18 05:02:43 2004 From: jschopp at austin.ibm.com (Joel Schopp) Date: Tue, 17 Aug 2004 14:02:43 -0500 Subject: [patch] tweak schedule_timeout in __cpu_die In-Reply-To: <1092767410.23599.121.camel@pants.austin.ibm.com> References: <1092767410.23599.121.camel@pants.austin.ibm.com> Message-ID: <41225653.9060309@austin.ibm.com> The patch looks fine to me. Wonder what would happen if we did away with the schedule_timeout all together and just did a tight loop, every so often checking the time? -Joel Nathan Lynch wrote: > Hi- > > The current code does schedule_timeout(HZ) when waiting for a cpu to > die, which is a bit coarse and tends to limit the "throughput" of my > stress tests :) > > Change the HZ timeout to HZ/5, increase the number of tries to 25 so the > overall wait time is similar. In practice, I've never seen the loop > need more than two iterations. > > Signed-off-by: Nathan Lynch > > --- > > > diff -puN arch/ppc64/kernel/smp.c~ppc64-cpu-death-sched-timeout arch/ppc64/kernel/smp.c > --- 2.6.8.1-mm1/arch/ppc64/kernel/smp.c~ppc64-cpu-death-sched-timeout 2004-08-16 22:25:03.000000000 -0500 > +++ 2.6.8.1-mm1-nathanl/arch/ppc64/kernel/smp.c 2004-08-16 22:25:03.000000000 -0500 > @@ -272,13 +272,13 @@ void __cpu_die(unsigned int cpu) > int cpu_status; > unsigned int pcpu = get_hard_smp_processor_id(cpu); > > - for (tries = 0; tries < 5; tries++) { > + for (tries = 0; tries < 25; tries++) { > cpu_status = query_cpu_stopped(pcpu); > > if (cpu_status == 0) > break; > set_current_state(TASK_UNINTERRUPTIBLE); > - schedule_timeout(HZ); > + schedule_timeout(HZ/5); > } > if (cpu_status != 0) { > printk("Querying DEAD? cpu %i (%i) shows %i\n", > > _ > > > > ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From linas at austin.ibm.com Wed Aug 18 05:52:48 2004 From: linas at austin.ibm.com (Linas Vepstas) Date: Tue, 17 Aug 2004 14:52:48 -0500 Subject: rtas_call uses kmalloc before the memory subsystem is up In-Reply-To: <16672.17307.578763.854775@cargo.ozlabs.ibm.com> References: <20040815221951.GK5637@krispykreme> <16672.17307.578763.854775@cargo.ozlabs.ibm.com> Message-ID: <20040817195248.GA14002@austin.ibm.com> Hi, Yes, Jake Moilanen reported this a month ago; a patch for this went out on 13 July; it was followed by a discussion with the email subject "RFC: Fix (another) bug in rtas logging". Is there a reason that patch was never sent upstream? (This is why I think having a bug tracker would be nice ... so we could see what the status of this stuff really is). (I just got back from vacation and have a backlog of 550 unread emails so far ...) --linas On Mon, Aug 16, 2004 at 03:18:19PM +1000, Paul Mackerras was heard to remark: > Anton Blanchard writes: > > > rtas_call is doing a kmalloc before the memory subsystem is up, but only > > when we hit an error. > > This is a quick-n-dirty hack to fix the problem. It's not completely > obvious what the proper solution looks like, unfortunately. > > Paul. > > diff -urN linux-2.5/arch/ppc64/kernel/rtas.c test25/arch/ppc64/kernel/rtas.c > --- linux-2.5/arch/ppc64/kernel/rtas.c 2004-08-03 08:07:43.000000000 +1000 > +++ test25/arch/ppc64/kernel/rtas.c 2004-08-16 14:47:22.147162600 +1000 > @@ -165,9 +165,12 @@ > > /* Log the error in the unlikely case that there was one. */ > if (unlikely(logit)) { > - buff_copy = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC); > - if (buff_copy) { > - memcpy(buff_copy, rtas_err_buf, RTAS_ERROR_LOG_MAX); > + buff_copy = rtas_err_buf; > + if (mem_init_done) { > + buff_copy = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC); > + if (buff_copy) > + memcpy(buff_copy, rtas_err_buf, > + RTAS_ERROR_LOG_MAX); > } > } > > @@ -176,7 +179,8 @@ > > if (buff_copy) { > log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0); > - kfree(buff_copy); > + if (mem_init_done) > + kfree(buff_copy); > } > return ret; > } > ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Wed Aug 18 05:54:09 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Tue, 17 Aug 2004 14:54:09 -0500 Subject: [patch] tweak schedule_timeout in __cpu_die In-Reply-To: <41225653.9060309@austin.ibm.com> References: <1092767410.23599.121.camel@pants.austin.ibm.com> <41225653.9060309@austin.ibm.com> Message-ID: <1092772449.23599.154.camel@pants.austin.ibm.com> On Tue, 2004-08-17 at 14:02, Joel Schopp wrote: > Wonder what would happen if we did away with the schedule_timeout all > together and just did a tight loop, every so often checking the time? Gross. It's usually bad manners to have a task busy-loop when you can just put it to sleep and wake it up a short time later without penalty. There's no need to tie up the cpu in this case. Nathan ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From linas at austin.ibm.com Wed Aug 18 07:47:03 2004 From: linas at austin.ibm.com (Linas Vepstas) Date: Tue, 17 Aug 2004 16:47:03 -0500 Subject: [PATCH] 2.6 PPC64 memcpy_toio function signature Message-ID: <20040817214703.GD14002@austin.ibm.com> Hi Paul, Could you forward upstream this stunningly trivial janitorial patch for which I'd received a complaint about? Signed-off-by: Linas Vepstas --linas -------------- next part -------------- ===== eeh.h 1.13 vs edited ===== --- 1.13/include/asm-ppc64/eeh.h Thu Jul 29 23:19:48 2004 +++ edited/eeh.h Tue Aug 17 16:37:16 2004 @@ -244,7 +244,7 @@ } } -static inline void eeh_memcpy_toio(void *dest, void *src, unsigned long n) { +static inline void eeh_memcpy_toio(void *dest, const void *src, unsigned long n) { void *vdest = (void *)IO_TOKEN_TO_ADDR(dest); while(n && (!EEH_CHECK_ALIGN(vdest, 4) || !EEH_CHECK_ALIGN(src, 4))) { From linas at austin.ibm.com Wed Aug 18 08:16:52 2004 From: linas at austin.ibm.com (Linas Vepstas) Date: Tue, 17 Aug 2004 17:16:52 -0500 Subject: Unapplied patches? Message-ID: <20040817221652.GE14002@austin.ibm.com> Hi Paul, I've emailed you six or 8 patches about 2-4 weeks ago, and after going through my email box, it seems that none of these have been applied, nor have I received any answer from you or the mailing list one way or the other on these. What's the story on that? Can I get you to go through your email and start going through these? They're kind-of burning holes in my pockets & they're causing build breaks (todays bkbits kernel won't compile for ppc64 due to the unapplied patches.) --linas ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From olof at austin.ibm.com Wed Aug 18 09:57:42 2004 From: olof at austin.ibm.com (Olof Johansson) Date: Tue, 17 Aug 2004 18:57:42 -0500 Subject: rtas_call uses kmalloc before the memory subsystem is up In-Reply-To: <20040817195248.GA14002@austin.ibm.com> References: <20040815221951.GK5637@krispykreme> <16672.17307.578763.854775@cargo.ozlabs.ibm.com> <20040817195248.GA14002@austin.ibm.com> Message-ID: <41229B76.8060808@austin.ibm.com> Linas Vepstas wrote: > (This is why I think having a bug tracker would be nice ... > so we could see what the status of this stuff really is). There's a brand new one at http://ozlabs.org/ppc64-patches/, it feeds off of the mailing list. -Olof ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Wed Aug 18 09:58:23 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Tue, 17 Aug 2004 16:58:23 -0700 Subject: Unapplied patches? In-Reply-To: <20040817221652.GE14002@austin.ibm.com> References: <20040817221652.GE14002@austin.ibm.com> Message-ID: <1092787103.5415.144.camel@nighthawk> On Tue, 2004-08-17 at 15:16, Linas Vepstas wrote: > I've emailed you six or 8 patches about 2-4 weeks ago, and after > going through my email box, it seems that none of these have been > applied, nor have I received any answer from you or the mailing list one > way or the other on these. What's the story on that? Can I get you to > go through your email and start going through these? They're kind-of > burning holes in my pockets & they're causing build breaks (todays > bkbits kernel won't compile for ppc64 due to the unapplied patches.) If you know what patches are missing, it never hurts to resend them. You'll also help any other people who are also having problems building a recent -bk tree. -- Dave ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Wed Aug 18 11:25:29 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Wed, 18 Aug 2004 11:25:29 +1000 Subject: SLB bolting for iSeries Message-ID: <20040818012529.GE11499@zax> Unfortunately, I don't have easy access to an SLB (i.e. Power4) iSeries machine to test the patch below. From discussions with the iSeries architecture people, I believe it should work, but... Can someone with access to a suitable machine please test this? On pSeries SLB machines we "bolt" an SLB entry for the first segment of the vmalloc() area into the SLB, to reduce the SLB miss rate. This caused problems, so was disabled, on iSeries because the bolted entry was not restored properly on shared processor switch. This patch adds information about the bolted vmalloc segment to the lpar map, which should be restored on shared processor switch. Index: working-2.6/arch/ppc64/mm/slb.c =================================================================== --- working-2.6.orig/arch/ppc64/mm/slb.c 2004-08-09 09:51:38.000000000 +1000 +++ working-2.6/arch/ppc64/mm/slb.c 2004-08-18 11:09:00.027529840 +1000 @@ -36,7 +36,6 @@ static void slb_add_bolted(void) { -#ifndef CONFIG_PPC_ISERIES WARN_ON(!irqs_disabled()); /* If you change this make sure you change SLB_NUM_BOLTED @@ -49,7 +48,6 @@ SLB_VSID_KERNEL, 1); asm volatile("isync":::"memory"); -#endif } /* Flush all user entries from the segment table of the current processor. */ Index: working-2.6/arch/ppc64/kernel/head.S =================================================================== --- working-2.6.orig/arch/ppc64/kernel/head.S 2004-08-09 09:51:38.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/head.S 2004-08-18 11:09:00.029529536 +1000 @@ -580,7 +580,7 @@ * VSID generation algorithm. See include/asm/mmu_context.h. */ - .llong 1 /* # ESIDs to be mapped by hypervisor */ + .llong 2 /* # ESIDs to be mapped by hypervisor */ .llong 1 /* # memory ranges to be mapped by hypervisor */ .llong STAB0_PAGE /* Page # of segment table within load area */ .llong 0 /* Reserved */ @@ -588,8 +588,12 @@ .llong 0 /* Reserved */ .llong 0 /* Reserved */ .llong 0 /* Reserved */ - .llong 0x0c00000000 /* ESID to map (Kernel at EA = 0xC000000000000000) */ - .llong 0x06a99b4b14 /* VSID to map (Kernel at VA = 0x6a99b4b140000000) */ + .llong 0xc00000000 /* KERNELBASE ESID */ + .llong 0x6a99b4b14 /* KERNELBASE VSID */ + /* We have to list the bolted VMALLOC segment here, too, so that it + * will be restored on shared processor switch */ + .llong 0xd00000000 /* VMALLOCBASE ESID */ + .llong 0x08d12e6ab /* VMALLOCBASE VSID */ .llong 8192 /* # pages to map (32 MB) */ .llong 0 /* Offset from start of loadarea to start of map */ .llong 0x0006a99b4b140000 /* VPN of first page to map */ -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Wed Aug 18 13:52:51 2004 From: paulus at samba.org (Paul Mackerras) Date: Wed, 18 Aug 2004 13:52:51 +1000 Subject: Unapplied patches? In-Reply-To: <20040817221652.GE14002@austin.ibm.com> References: <20040817221652.GE14002@austin.ibm.com> Message-ID: <16674.53907.366376.276451@cargo.ozlabs.ibm.com> Linas, > I've emailed you six or 8 patches about 2-4 weeks ago, and after > going through my email box, it seems that none of these have been > applied, nor have I received any answer from you or the mailing list one Some have been sent upstream but not all of those have appeared in Linus' tree yet since Linus is away at the moment. Also I started travelling about the same time that you went on vacation, which made it difficult for me to process patches since I had no good way to test patches. Then when I came back I had a paper to write which distracted me from kernel work. I did want to get the notifier list for EEH isolation events in but you still had the "only if ethernet" policy code in there. We had a short conversation about that, and despite what you said, I still don't think it is right. I think we possibly need something that counts pending EEH errors and panics if the count exceeds a threshold instead. Your "PCI Config Space reads need EEH checking" depends on the notifier list patch, so I haven't sent that upstream. > way or the other on these. What's the story on that? Can I get you to > go through your email and start going through these? They're kind-of > burning holes in my pockets & they're causing build breaks (todays > bkbits kernel won't compile for ppc64 due to the unapplied patches.) It compiles fine if you turn off hotplug PCI. :) It certainly needs to be fixed, but I want it fixed properly. Regards, Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From benh at kernel.crashing.org Wed Aug 18 15:03:01 2004 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 18 Aug 2004 15:03:01 +1000 Subject: glibc fails in tst-timer4, __SI_TIMER needs its own case In-Reply-To: <20040815215142.GA18611@suse.de> References: <20040815215142.GA18611@suse.de> Message-ID: <1092805380.9536.178.camel@gaston> On Mon, 2004-08-16 at 07:51, Olaf Hering wrote: > Ben, > > can you have a look at this one? glibc make check fails since the > tst-timer4 test was added in April this year. I think __ST_TIMER needs > its own case. This patch against 2.6.8.1 fixes it for me. Hi ! Can you try that one instead ? ===== arch/ppc64/kernel/signal32.c 1.54 vs edited ===== --- 1.54/arch/ppc64/kernel/signal32.c 2004-07-05 20:27:10 +10:00 +++ edited/arch/ppc64/kernel/signal32.c 2004-08-18 15:10:20 +10:00 @@ -472,9 +472,13 @@ &d->si_addr); break; case __SI_POLL >> 16: - case __SI_TIMER >> 16: err |= __put_user(s->si_band, &d->si_band); err |= __put_user(s->si_fd, &d->si_fd); + break; + case __SI_TIMER >> 16: + err |= __put_user(s->si_tid, &d->si_tid); + err |= __put_user(s->si_overrun, &d->si_overrun); + err |= __put_user((u32)(u64)s->si_ptr, &d->si_ptr); break; case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ case __SI_MESGQ >> 16: ===== include/asm-ppc64/ppc32.h 1.15 vs edited ===== --- 1.15/include/asm-ppc64/ppc32.h 2004-05-10 21:25:48 +10:00 +++ edited/include/asm-ppc64/ppc32.h 2004-08-18 15:09:51 +10:00 @@ -56,8 +56,10 @@ /* POSIX.1b timers */ struct { - unsigned int _timer1; - unsigned int _timer2; + timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ } _timer; /* POSIX.1b signals */ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Wed Aug 18 15:19:09 2004 From: paulus at samba.org (Paul Mackerras) Date: Wed, 18 Aug 2004 15:19:09 +1000 Subject: upstream pending patches Message-ID: <16674.59086.234.146247@cargo.ozlabs.ibm.com> Since Linus is away at the moment, there is a considerable backlog of patches that I have sent to Andrew Morton but which haven't appeared in Linus' tree yet. This creates problems when people send me patches against the current state of Linus' tree which collide with patches that I have already sent off. I have rolled up the patches that I have sent that aren't in Linus' tree into one patch at http://ozlabs.org/~paulus/akpm-pending.patch. (I didn't include it in this message because it is too big for the list.) Here is the diffstat: arch/ppc/kernel/align.c | 163 ++++++++++++++++---------- arch/ppc/kernel/traps.c | 51 ++++++-- arch/ppc/kernel/vector.S | 4 arch/ppc64/kernel/eeh.c | 11 + arch/ppc64/kernel/idle.c | 14 +- arch/ppc64/kernel/pSeries_iommu.c | 2 arch/ppc64/kernel/prom.c | 15 +- arch/ppc64/kernel/ras.c | 2 arch/ppc64/kernel/rtasd.c | 108 +++++++++++++---- arch/ppc64/kernel/setup.c | 11 - arch/ppc64/kernel/smp.c | 20 +-- arch/ppc64/kernel/time.c | 46 +++++++ arch/ppc64/kernel/traps.c | 233 ++++++++++++++++++++------------------ arch/ppc64/kernel/vector.S | 4 arch/ppc64/kernel/xics.c | 2 arch/ppc64/lib/locks.c | 6 include/asm-ppc64/rtas.h | 119 ++++++++++--------- include/asm-ppc64/smp.h | 18 -- 18 files changed, 510 insertions(+), 319 deletions(-) If you are working on a patch which touches any of those files, please apply the akpm-pending.patch to a copy of Linus' tree and then send me a diff against that. Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From rajarshi at in.ibm.com Wed Aug 18 15:35:58 2004 From: rajarshi at in.ibm.com (Rajarshi Das) Date: 18 Aug 2004 11:05:58 +0530 Subject: bootup problem on power3 In-Reply-To: <41222DAB.8060206@austin.ibm.com> References: <1092757903.1860.69.camel@darya> <41222DAB.8060206@austin.ibm.com> Message-ID: <1092807364.1827.72.camel@darya> Using the pSeries default config, I get the following output : ------------------------------------------------------ Opening displays... opening display : /pci at fef00000/display at 11... done instantiating rtas at 0x000000003ffb5000... done 0000000000000000 : booting cpu /cpus/PowerPC,POWER3 at 0 ------------------------------------------------------ and the system hangs. Thanks, Rajarshi. On Tue, 2004-08-17 at 21:39, Olof Johansson wrote: > Rajarshi Das wrote: > > Hi, > > I am running a power 3 (type 7044-170) uniprocessor machine and have a > > vanilla 2.6.7 kernel built on it. However, when I boot the kernel, I get > > the following message and the system hangs : > > It looks like you built with CONFIG_BOOTX_TEXT. Does it work if you run > with the config file from arch/ppc64/configs/pSeries_defconfig? > > > Thanks, > > Olof > ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Wed Aug 18 17:21:36 2004 From: paulus at samba.org (Paul Mackerras) Date: Wed, 18 Aug 2004 17:21:36 +1000 Subject: New version of EEH notifier code Message-ID: <16675.896.864595.741521@cargo.ozlabs.ibm.com> Linas, How does this look to you? This is based on your code with the ethernet check removed and a counter added. It applies on top of Linus' tree plus akpm-pending.patch. Paul. diff -urN akpm-17aug/arch/ppc64/kernel/eeh.c akpm/arch/ppc64/kernel/eeh.c --- akpm-17aug/arch/ppc64/kernel/eeh.c 2004-08-18 13:51:07.000000000 +1000 +++ akpm/arch/ppc64/kernel/eeh.c 2004-08-18 17:20:19.417971232 +1000 @@ -17,29 +17,79 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include +#include +#include +#include #include #include -#include -#include #include -#include #include -#include -#include -#include +#include +#include #include #include -#include #include +#include #include "pci.h" #undef DEBUG +/** Overview: + * EEH, or "Extended Error Handling" is a PCI bridge technology for + * dealing with PCI bus errors that can't be dealt with within the + * usual PCI framework, except by check-stopping the CPU. Systems + * that are designed for high-availability/reliability cannot afford + * to crash due to a "mere" PCI error, thus the need for EEH. + * An EEH-capable bridge operates by converting a detected error + * into a "slot freeze", taking the PCI adapter off-line, making + * the slot behave, from the OS'es point of view, as if the slot + * were "empty": all reads return 0xff's and all writes are silently + * ignored. EEH slot isolation events can be triggered by parity + * errors on the address or data busses (e.g. during posted writes), + * which in turn might be caused by dust, vibration, humidity, + * radioactivity or plain-old failed hardware. + * + * Note, however, that one of the leading causes of EEH slot + * freeze events are buggy device drivers, buggy device microcode, + * or buggy device hardware. This is because any attempt by the + * device to bus-master data to a memory address that is not + * assigned to the device will trigger a slot freeze. (The idea + * is to prevent devices-gone-wild from corrupting system memory). + * Buggy hardware/drivers will have a miserable time co-existing + * with EEH. + * + * Ideally, a PCI device driver, when suspecting that an isolation + * event has occured (e.g. by reading 0xff's), will then ask EEH + * whether this is the case, and then take appropriate steps to + * reset the PCI slot, the PCI device, and then resume operations. + * However, until that day, the checking is done here, with the + * eeh_check_failure() routine embedded in the MMIO macros. If + * the slot is found to be isolated, an "EEH Event" is synthesized + * and sent out for processing. + */ + +/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */ #define BUID_HI(buid) ((buid) >> 32) #define BUID_LO(buid) ((buid) & 0xffffffff) -#define CONFIG_ADDR(busno, devfn) \ - (((((busno) & 0xff) << 8) | ((devfn) & 0xf8)) << 8) + +/* EEH event workqueue setup. */ +static spinlock_t eeh_eventlist_lock = SPIN_LOCK_UNLOCKED; +LIST_HEAD(eeh_eventlist); +static void eeh_event_handler(void *); +DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL); + +static struct notifier_block *eeh_notifier_chain; + +/* + * If a device driver keeps reading an MMIO register in an interrupt + * handler after a slot isolation event has occurred, we assume it + * is broken and panic. This sets the threshold for how many read + * attempts we allow before panicking. + */ +#define EEH_MAX_FAILS 1000 +static atomic_t eeh_fail_count; /* RTAS tokens */ static int ibm_set_eeh_option; @@ -61,6 +111,7 @@ static DEFINE_PER_CPU(unsigned long, total_mmio_ffs); static DEFINE_PER_CPU(unsigned long, false_positives); static DEFINE_PER_CPU(unsigned long, ignored_failures); +static DEFINE_PER_CPU(unsigned long, slot_resets); static int eeh_check_opts_config(struct device_node *dn, int class_code, int vendor_id, int device_id, @@ -71,7 +122,8 @@ * PCI device address resources into a red-black tree, sorted * according to the address range, so that given only an i/o * address, the corresponding PCI device can be **quickly** - * found. + * found. It is safe to perform an address lookup in an interrupt + * context; this ability is an important feature. * * Currently, the only customer of this code is the EEH subsystem; * thus, this code has been somewhat tailored to suit EEH better. @@ -340,6 +392,94 @@ #endif } +/* --------------------------------------------------------------- */ +/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */ + +/** + * eeh_register_notifier - Register to find out about EEH events. + * @nb: notifier block to callback on events + */ +int eeh_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&eeh_notifier_chain, nb); +} + +/** + * eeh_unregister_notifier - Unregister to an EEH event notifier. + * @nb: notifier block to callback on events + */ +int eeh_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&eeh_notifier_chain, nb); +} + +/** + * eeh_panic - call panic() for an eeh event that cannot be handled. + * The philosophy of this routine is that it is better to panic and + * halt the OS than it is to risk possible data corruption by + * oblivious device drivers that don't know better. + * + * @dev pci device that had an eeh event + * @reset_state current reset state of the device slot + */ +static void eeh_panic(struct pci_dev *dev, int reset_state) +{ + /* + * XXX We should create a seperate sysctl for this. + * + * Since the panic_on_oops sysctl is used to halt the system + * in light of potential corruption, we can use it here. + */ + if (panic_on_oops) + panic("EEH: MMIO failure (%d) on device:%s %s\n", reset_state, + pci_name(dev), pci_pretty_name(dev)); + else { + __get_cpu_var(ignored_failures)++; + printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s %s\n", + reset_state, pci_name(dev), pci_pretty_name(dev)); + } +} + +/** + * eeh_event_handler - dispatch EEH events. The detection of a frozen + * slot can occur inside an interrupt, where it can be hard to do + * anything about it. The goal of this routine is to pull these + * detection events out of the context of the interrupt handler, and + * re-dispatch them for processing at a later time in a normal context. + * + * @dummy - unused + */ +static void eeh_event_handler(void *dummy) +{ + unsigned long flags; + struct eeh_event *event; + + while (1) { + spin_lock_irqsave(&eeh_eventlist_lock, flags); + event = NULL; + if (!list_empty(&eeh_eventlist)) { + event = list_entry(eeh_eventlist.next, struct eeh_event, list); + list_del(&event->list); + } + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + if (event == NULL) + break; + + printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device " + "%s %s\n", event->reset_state, + pci_name(event->dev), pci_pretty_name(event->dev)); + + atomic_set(&eeh_fail_count, 0); + notifier_call_chain (&eeh_notifier_chain, + EEH_NOTIFY_FREEZE, event); + + __get_cpu_var(slot_resets)++; + + pci_dev_put(event->dev); + kfree(event); + } +} + /** * eeh_token_to_phys - convert EEH address token to phys address * @token i/o token, should be address in the form 0xA.... @@ -374,9 +514,9 @@ * 0xffffffff). * * Probe to determine if an error actually occurred. If not return val. - * Otherwise panic. + * Otherwise queue up a slot isolation event notification. * - * Note this routine might be called in an interrupt context ... + * Note this routine is safe to call in an interrupt context. */ unsigned long eeh_check_failure(void *token, unsigned long val) { @@ -386,6 +526,8 @@ int ret; int rets[2]; unsigned long flags; + int rc, reset_state; + struct eeh_event *event; __get_cpu_var(total_mmio_ffs)++; @@ -399,21 +541,33 @@ return val; dn = pci_device_to_OF_node(dev); - if (!dn) { - pci_dev_put(dev); - return val; - } + if (!dn) + goto exit_put; /* Access to IO BARs might get this far and still not want checking. */ if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) || - dn->eeh_mode & EEH_MODE_NOCHECK) { - pci_dev_put(dev); - return val; - } + dn->eeh_mode & EEH_MODE_NOCHECK) + goto exit_put; - if (!dn->eeh_config_addr) { - pci_dev_put(dev); - return val; + if (!dn->eeh_config_addr) + goto exit_put; + + /* + * If we already have a pending isolation event for this + * slot, we know it's bad already, we don't need to check... + */ + if (dn->eeh_mode & EEH_MODE_ISOLATED) { + atomic_inc(&eeh_fail_count); + if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) { + /* re-read the slot reset state */ + rets[0] = -1; + rtas_call(ibm_read_slot_reset_state, 3, 3, rets, + dn->eeh_config_addr, + BUID_HI(dn->phb->buid), + BUID_LO(dn->phb->buid)); + eeh_panic(dev, rets[0]); + } + goto exit_put; } /* @@ -427,45 +581,53 @@ dn->eeh_config_addr, BUID_HI(dn->phb->buid), BUID_LO(dn->phb->buid)); - if (ret == 0 && rets[1] == 1 && rets[0] >= 2) { - int log_event; + if (!(ret == 0 && rets[1] == 1 && rets[0] >= 2)) { + __get_cpu_var(false_positives)++; + goto exit_put; + } - spin_lock_irqsave(&slot_errbuf_lock, flags); - memset(slot_errbuf, 0, eeh_error_buf_size); + /* prevent repeated reports of this failure */ + dn->eeh_mode |= EEH_MODE_ISOLATED; - log_event = rtas_call(ibm_slot_error_detail, - 8, 1, NULL, dn->eeh_config_addr, - BUID_HI(dn->phb->buid), - BUID_LO(dn->phb->buid), NULL, 0, - virt_to_phys(slot_errbuf), - eeh_error_buf_size, - 2 /* Permanent Error */); - - if (log_event == 0) - log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, - 1 /* Fatal */); - - spin_unlock_irqrestore(&slot_errbuf_lock, flags); - - /* - * XXX We should create a separate sysctl for this. - * - * Since the panic_on_oops sysctl is used to halt - * the system in light of potential corruption, we - * can use it here. - */ - if (panic_on_oops) { - panic("EEH: MMIO failure (%d) on device:%s %s\n", - rets[0], pci_name(dev), pci_pretty_name(dev)); - } else { - __get_cpu_var(ignored_failures)++; - printk(KERN_INFO "EEH: MMIO failure (%d) on device:%s %s\n", - rets[0], pci_name(dev), pci_pretty_name(dev)); - } - } else { - __get_cpu_var(false_positives)++; + reset_state = rets[0]; + + spin_lock_irqsave(&slot_errbuf_lock, flags); + memset(slot_errbuf, 0, eeh_error_buf_size); + + rc = rtas_call(ibm_slot_error_detail, + 8, 1, NULL, dn->eeh_config_addr, + BUID_HI(dn->phb->buid), + BUID_LO(dn->phb->buid), NULL, 0, + virt_to_phys(slot_errbuf), + eeh_error_buf_size, + 2 /* Permanent Error */); + + if (rc == 0) + log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0); + spin_unlock_irqrestore(&slot_errbuf_lock, flags); + + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (event == NULL) { + eeh_panic(dev, reset_state); + goto exit_put; } + event->dev = dev; + event->dn = dn; + event->reset_state = reset_state; + + /* We may or may not be called in an interrupt context */ + spin_lock_irqsave(&eeh_eventlist_lock, flags); + list_add(&event->list, &eeh_eventlist); + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + + /* Most EEH events are due to device driver bugs. Having + * a stack trace will help the device-driver authors figure + * out what happened. So print that out. */ + dump_stack(); + schedule_work(&eeh_event_wq); + + exit_put: pci_dev_put(dev); return val; } @@ -747,11 +909,13 @@ { unsigned int cpu; unsigned long ffs = 0, positives = 0, failures = 0; + unsigned long resets = 0; for_each_cpu(cpu) { ffs += per_cpu(total_mmio_ffs, cpu); positives += per_cpu(false_positives, cpu); failures += per_cpu(ignored_failures, cpu); + resets += per_cpu(slot_resets, cpu); } if (0 == eeh_subsystem_enabled) { @@ -761,8 +925,9 @@ seq_printf(m, "EEH Subsystem is enabled\n"); seq_printf(m, "eeh_total_mmio_ffs=%ld\n" "eeh_false_positives=%ld\n" - "eeh_ignored_failures=%ld\n", - ffs, positives, failures); + "eeh_ignored_failures=%ld\n" + "eeh_slot_resets=%ld\n", + ffs, positives, failures, resets); } return 0; diff -urN akpm-17aug/include/asm-ppc64/eeh.h akpm/include/asm-ppc64/eeh.h --- akpm-17aug/include/asm-ppc64/eeh.h 2004-07-31 00:40:07.000000000 +1000 +++ akpm/include/asm-ppc64/eeh.h 2004-08-18 15:22:34.419932600 +1000 @@ -20,8 +20,10 @@ #ifndef _PPC64_EEH_H #define _PPC64_EEH_H -#include #include +#include +#include +#include struct pci_dev; struct device_node; @@ -41,6 +43,7 @@ /* Values for eeh_mode bits in device_node */ #define EEH_MODE_SUPPORTED (1<<0) #define EEH_MODE_NOCHECK (1<<1) +#define EEH_MODE_ISOLATED (1<<2) extern void __init eeh_init(void); unsigned long eeh_check_failure(void *token, unsigned long val); @@ -75,7 +78,28 @@ #define EEH_RELEASE_DMA 3 int eeh_set_option(struct pci_dev *dev, int options); -/* + +/** + * Notifier event flags. + */ +#define EEH_NOTIFY_FREEZE 1 + +/** EEH event -- structure holding pci slot data that describes + * a change in the isolation status of a PCI slot. A pointer + * to this struct is passed as the data pointer in a notify callback. + */ +struct eeh_event { + struct list_head list; + struct pci_dev *dev; + struct device_node *dn; + int reset_state; +}; + +/** Register to find out about EEH events. */ +int eeh_register_notifier(struct notifier_block *nb); +int eeh_unregister_notifier(struct notifier_block *nb); + +/** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. * * Order this macro for performance. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From olh at suse.de Thu Aug 19 02:44:36 2004 From: olh at suse.de (Olaf Hering) Date: Wed, 18 Aug 2004 18:44:36 +0200 Subject: glibc fails in tst-timer4, __SI_TIMER needs its own case In-Reply-To: <1092805380.9536.178.camel@gaston> References: <20040815215142.GA18611@suse.de> <1092805380.9536.178.camel@gaston> Message-ID: <20040818164436.GA26820@suse.de> On Wed, Aug 18, Benjamin Herrenschmidt wrote: > On Mon, 2004-08-16 at 07:51, Olaf Hering wrote: > > Ben, > > > > can you have a look at this one? glibc make check fails since the > > tst-timer4 test was added in April this year. I think __ST_TIMER needs > > its own case. This patch against 2.6.8.1 fixes it for me. > > Hi ! > > Can you try that one instead ? It fails as well. -- USB is for mice, FireWire is for men! sUse lINUX ag, n?RNBERG ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Thu Aug 19 04:36:01 2004 From: anton at samba.org (Anton Blanchard) Date: Thu, 19 Aug 2004 04:36:01 +1000 Subject: [PATCH] [KDB] Use proper wrappers for udbg calls in KDB In-Reply-To: <41224C90.8080403@austin.ibm.com> References: <41224C90.8080403@austin.ibm.com> Message-ID: <20040818183601.GD6749@krispykreme> Hi, > Below patch uses the udbg_*() functions instead of calling ppc_md.udbg_* > in KDB. This way, if udbg hasn't been setup right the machine will > survive anyway. > > Since KDB only lives in the Ameslab tree at the moment, and that tree > does not compile (hvconsole breakage), I'm not sure just what to do with > this patch. I'll follow up with another one that fixes some paca_struct > rewrite KDB breakage, but I have not been able to boot and test a kernel > with the changes. Keith Owens was asking about this recently. We really need to get our kdb changes into upstream. I did get an email from someone working on this but the email disappeared into a lotus notes black hole. Anton ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From linas at austin.ibm.com Thu Aug 19 06:05:56 2004 From: linas at austin.ibm.com (Linas Vepstas) Date: Wed, 18 Aug 2004 15:05:56 -0500 Subject: Unapplied patches? In-Reply-To: <16674.53907.366376.276451@cargo.ozlabs.ibm.com> References: <20040817221652.GE14002@austin.ibm.com> <16674.53907.366376.276451@cargo.ozlabs.ibm.com> Message-ID: <20040818200556.GG14002@austin.ibm.com> On Wed, Aug 18, 2004 at 01:52:51PM +1000, Paul Mackerras was heard to remark: > Linas, > > > I've emailed you six or 8 patches about 2-4 weeks ago, and after > > going through my email box, it seems that none of these have been > > applied, nor have I received any answer from you or the mailing list one > > Some have been sent upstream but not all of those have appeared in > Linus' tree yet since Linus is away at the moment. Also I started > travelling about the same time that you went on vacation, which made > it difficult for me to process patches since I had no good way to test > patches. Then when I came back I had a paper to write which > distracted me from kernel work. OK, thanks, I understand, sorry if I sounded prickly, I get that way, sometimes. Live in glass house, throw rocks, get carried away. --linas ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From linas at austin.ibm.com Thu Aug 19 06:17:54 2004 From: linas at austin.ibm.com (Linas Vepstas) Date: Wed, 18 Aug 2004 15:17:54 -0500 Subject: EEH Ethernet [was Re: Unapplied patches?] In-Reply-To: <16674.53907.366376.276451@cargo.ozlabs.ibm.com> References: <20040817221652.GE14002@austin.ibm.com> <16674.53907.366376.276451@cargo.ozlabs.ibm.com> Message-ID: <20040818201754.GH14002@austin.ibm.com> On Wed, Aug 18, 2004 at 01:52:51PM +1000, Paul Mackerras was heard to remark: > > I did want to get the notifier list for EEH isolation events in but > you still had the "only if ethernet" policy code in there. We had a > short conversation about that, and despite what you said, I still > don't think it is right. OK, I'm agnostic. Recall, the ethernet check is interim scaffolding. Others are untested/unsupported. For example, I tried a 4-port USB device once, and the kernel died a flaming death. So the current "check if ethernet and try to recover" is more of a statement about what's known to work. I'm hoping to broaden support real soon now. In the interest of keeping things rolling, can I get you to accept the patch 'with ethernet check' for now, and if nothing superceeds it in a month or two, then you can strip it out? > I think we possibly need something that > counts pending EEH errors and panics if the count exceeds a threshold > instead. Well, it won't work quite like that; The very first error can either be recovered, or it can't be. There's no way to 'ignore' eeh errors. I am planning on countig the number of times that the same hardware has faulted, and offlining it if the count exceeds a threshold. This would prevent an infinite loop of going down, recovering, going down, etc. The hard part turns out to be that once a device has been removed, there aren't any kernel structures left to keep track of "that device", so its a little tricky to figure out if its the same device that keeps failing over and over. > It compiles fine if you turn off hotplug PCI. :) It certainly needs to > be fixed, but I want it fixed properly. Actually, that part is a patch that gregkh needs to apply. I guess I have to harangue him first. --linas ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From johnrose at austin.ibm.com Thu Aug 19 07:32:27 2004 From: johnrose at austin.ibm.com (John Rose) Date: Wed, 18 Aug 2004 16:32:27 -0500 Subject: [PATCH] create pcibios_remove_root_bus() Message-ID: <1092864747.15281.18.camel@sinatra.austin.ibm.com> The following patch creates pcibios_remove_root_bus(), which performs the ppc64-specific actions for PHB removal. This patch depends on the following patches, which haven't yet been accepted :) Patch 1: imalloc supersets - submitted 7/22/2004 http://lists.linuxppc.org/linuxppc64-dev/200407/msg00171.html Patch 2: promote hose_list to an "official" list - submitted 8/11/2004 http://lists.linuxppc.org/linuxppc64-dev/200408/msg00041.html Thanks- John Signed-off-by: John Rose diff -puN arch/ppc64/kernel/pSeries_pci.c~remove_split arch/ppc64/kernel/pSeries_pci.c --- sles9-rc5/arch/ppc64/kernel/pSeries_pci.c~remove_split 2004-08-18 14:09:57.000000000 -0500 +++ sles9-rc5-johnrose/arch/ppc64/kernel/pSeries_pci.c 2004-08-18 16:14:59.000000000 -0500 @@ -793,3 +793,50 @@ pcibios_scan_all_fns(struct pci_bus *bus return 0; } + +/* RPA-specific bits for removing PHBs */ +int pcibios_remove_root_bus(struct pci_controller *phb) +{ + struct pci_bus *b = phb->bus; + struct resource *res; + int rc, i; + + res = b->resource[0]; + if (!res->flags) { + printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__, + b->name); + return 1; + } + + rc = unmap_bus_range(b); + if (rc) { + printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", + __FUNCTION__, b->name); + return 1; + } + + if (release_resource(res)) { + printk(KERN_ERR "%s: failed to release IO on bus %s\n", + __FUNCTION__, b->name); + return 1; + } + + for (i = 1; i < 3; ++i) { + res = b->resource[i]; + if (!res->flags && i == 0) { + printk(KERN_ERR "%s: no MEM resource for PHB %s\n", + __FUNCTION__, b->name); + return 1; + } + if (res->flags && release_resource(res)) { + printk(KERN_ERR + "%s: failed to release IO %d on bus %s\n", + __FUNCTION__, i, b->name); + return 1; + } + } + + list_del(&phb->list_node); + return 0; +} +EXPORT_SYMBOL(pcibios_remove_root_bus); diff -puN include/asm-ppc64/pci-bridge.h~remove_split include/asm-ppc64/pci-bridge.h --- sles9-rc5/include/asm-ppc64/pci-bridge.h~remove_split 2004-08-18 14:09:57.000000000 -0500 +++ sles9-rc5-johnrose/include/asm-ppc64/pci-bridge.h 2004-08-18 16:14:59.000000000 -0500 @@ -77,6 +77,8 @@ int pci_device_loc(struct device_node *d struct device_node *fetch_dev_dn(struct pci_dev *dev); +extern int pcibios_remove_root_bus(struct pci_controller *phb); + /* Get a device_node from a pci_dev. This code must be fast except in the case * where the sysdata is incorrect and needs to be fixed up (hopefully just once) */ _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From olof at austin.ibm.com Thu Aug 19 08:26:12 2004 From: olof at austin.ibm.com (Olof Johansson) Date: Wed, 18 Aug 2004 17:26:12 -0500 Subject: [PATCH] Aggregate little endian bit ops and make ext2_{set,clear}_bit_atomic lockless Message-ID: <20040818222612.GA15580@4> Hi, Below patch reuses the big-endian bitops for the little endian ones, and moves the ext2_{set,clear}_bit_atomic functions to be truly atomic instead of lock based. I've given it basic testing (i.e. booted it, untarred a kernel tree and built it, booted back to an older kernel, fscked and compared file contents). Only limited stress testing has been done so far. Signed-off-by: Olof Johansson --- linux-2.5-olof/include/asm-ppc64/bitops.h | 86 ++++++++++-------------------- 1 files changed, 29 insertions(+), 57 deletions(-) diff -puN include/asm-ppc64/bitops.h~ext2-set-bit include/asm-ppc64/bitops.h --- linux-2.5/include/asm-ppc64/bitops.h~ext2-set-bit 2004-08-18 12:04:43.208963520 -0500 +++ linux-2.5-olof/include/asm-ppc64/bitops.h 2004-08-18 15:11:57.088963696 -0500 @@ -22,6 +22,15 @@ * it will be a bad memory reference since we want to store in chunks * of unsigned long (64 bits here) size. * + * There are a few little-endian macros used mostly for filesystem bitmaps, + * these work on similar bit arrays layouts, but byte-oriented: + * + * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56| + * + * The main difference is that bit 3-5 in the bit number field needs to be + * reversed compared to the big-endian bit fields. This can be achieved + * by XOR with 0b111000 (0x38). + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -306,71 +315,34 @@ static __inline__ int test_le_bit(unsign return (ADDR[nr >> 3] >> (nr & 7)) & 1; } +#define test_and_clear_le_bit(nr, addr) \ + test_and_clear_bit((nr) ^ 0x38, (addr)) +#define test_and_set_le_bit(nr, addr) \ + test_and_set_bit((nr) ^ 0x38, (addr)) + /* * non-atomic versions */ -static __inline__ void __set_le_bit(unsigned long nr, unsigned long *addr) -{ - unsigned char *ADDR = (unsigned char *)addr; - ADDR += nr >> 3; - *ADDR |= 1 << (nr & 0x07); -} - -static __inline__ void __clear_le_bit(unsigned long nr, unsigned long *addr) -{ - unsigned char *ADDR = (unsigned char *)addr; - - ADDR += nr >> 3; - *ADDR &= ~(1 << (nr & 0x07)); -} - -static __inline__ int __test_and_set_le_bit(unsigned long nr, unsigned long *addr) -{ - int mask, retval; - unsigned char *ADDR = (unsigned char *)addr; - - ADDR += nr >> 3; - mask = 1 << (nr & 0x07); - retval = (mask & *ADDR) != 0; - *ADDR |= mask; - return retval; -} - -static __inline__ int __test_and_clear_le_bit(unsigned long nr, unsigned long *addr) -{ - int mask, retval; - unsigned char *ADDR = (unsigned char *)addr; - - ADDR += nr >> 3; - mask = 1 << (nr & 0x07); - retval = (mask & *ADDR) != 0; - *ADDR &= ~mask; - return retval; -} +#define __set_le_bit(nr, addr) \ + __set_bit((nr) ^ 0x38, (addr)) +#define __clear_le_bit(nr, addr) \ + __clear_bit((nr) ^ 0x38, (addr)) +#define __test_and_clear_le_bit(nr, addr) \ + __test_and_clear_bit((nr) ^ 0x38, (addr)) +#define __test_and_set_le_bit(nr, addr) \ + __test_and_set_bit((nr) ^ 0x38, (addr)) #define ext2_set_bit(nr,addr) \ - __test_and_set_le_bit((nr),(unsigned long*)addr) + __test_and_set_le_bit((nr), (unsigned long*)addr) #define ext2_clear_bit(nr, addr) \ - __test_and_clear_le_bit((nr),(unsigned long*)addr) + __test_and_clear_le_bit((nr), (unsigned long*)addr) + +#define ext2_set_bit_atomic(lock, nr, addr) \ + test_and_set_le_bit((nr), (unsigned long*)addr) +#define ext2_clear_bit_atomic(lock, nr, addr) \ + test_and_clear_le_bit((nr), (unsigned long*)addr) -#define ext2_set_bit_atomic(lock, nr, addr) \ - ({ \ - int ret; \ - spin_lock(lock); \ - ret = ext2_set_bit((nr), (addr)); \ - spin_unlock(lock); \ - ret; \ - }) - -#define ext2_clear_bit_atomic(lock, nr, addr) \ - ({ \ - int ret; \ - spin_lock(lock); \ - ret = ext2_clear_bit((nr), (addr)); \ - spin_unlock(lock); \ - ret; \ - }) #define ext2_test_bit(nr, addr) test_le_bit((nr),(unsigned long*)addr) #define ext2_find_first_zero_bit(addr, size) \ _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Thu Aug 19 11:26:03 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Thu, 19 Aug 2004 11:26:03 +1000 Subject: SLB bolting for iSeries In-Reply-To: References: <20040818012529.GE11499@zax> Message-ID: <20040819012603.GE13672@zax> On Wed, Aug 18, 2004 at 02:38:46PM -0500, Michael J Wolf wrote: > > I applied the patch to the Linus' latest bk tree. I ran it on a > Power 4 iseries with 8 shared processors and 4 gig of ram. It booted > and ran through the LTP testsuited just fine. Excellent! I shall push to akpm, then. Thanks for testing. With that much working, could you give the patch below a whirl (applies on top of the previous patch). It needs a kinda nasty workaround for iSeries, since that lparmap information is not per-cpu. At present, the SLB miss handler has to check the SLB slot it is about to use to ensure that it does not contain the SLBE for the current kernel stack - throwing out the SLBE for the kernel stack can trigger the megabug. This patch implements a different approach - with this patch SLB slot 2 always (well, nearly always) contains an SLBE for the stack. This slot is never cast out by the normal SLB miss path. On context switch, an SLBE for the new stack is pinned into this slot, unless the new stack is the the bolted segment. This approach shaves a few ns off the slb miss time, but more usefully makes it easier to experiment with different SLB castout aporoaches without worrying about reinstating the megabug. Index: working-2.6/arch/ppc64/kernel/asm-offsets.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/asm-offsets.c 2004-08-18 16:29:17.677543096 +1000 +++ working-2.6/arch/ppc64/kernel/asm-offsets.c 2004-08-18 16:30:58.538515992 +1000 @@ -58,6 +58,7 @@ DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0])); DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr)); DEFINE(KSP, offsetof(struct thread_struct, ksp)); + DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); #ifdef CONFIG_ALTIVEC DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0])); Index: working-2.6/arch/ppc64/kernel/entry.S =================================================================== --- working-2.6.orig/arch/ppc64/kernel/entry.S 2004-08-09 09:51:38.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/entry.S 2004-08-18 16:30:58.539515840 +1000 @@ -393,9 +393,17 @@ cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ cror eq,4*cr1+eq,eq beq 2f /* if yes, don't slbie it */ - oris r6,r6,0x0800 /* set C (class) bit */ - slbie r6 - slbie r6 /* Workaround POWER5 < DD2.1 issue */ + oris r0,r6,0x0800 /* set C (class) bit */ + + /* Bolt in the new stack SLB entry */ + ld r7,KSP_VSID(r4) /* Get new stack's VSID */ + oris r6,r6,(SLB_ESID_V)@h + ori r6,r6,(SLB_NUM_BOLTED-1)@l + slbie r0 + slbie r0 /* Workaround POWER5 < DD2.1 issue */ + slbmte r7,r6 + isync + 2: END_FTR_SECTION_IFSET(CPU_FTR_SLB) clrrdi r7,r8,THREAD_SHIFT /* base of new stack */ Index: working-2.6/arch/ppc64/kernel/process.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/process.c 2004-08-09 09:51:38.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/process.c 2004-08-18 16:30:58.540515688 +1000 @@ -356,6 +356,16 @@ kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; + if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { + unsigned long sp_vsid = get_kernel_vsid(sp); + + sp_vsid <<= SLB_VSID_SHIFT; + sp_vsid |= SLB_VSID_KERNEL; + if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) + sp_vsid |= SLB_VSID_L; + + p->thread.ksp_vsid = sp_vsid; + } /* * The PPC64 ABI makes use of a TOC to contain function Index: working-2.6/arch/ppc64/mm/slb.c =================================================================== --- working-2.6.orig/arch/ppc64/mm/slb.c 2004-08-18 16:29:17.720536560 +1000 +++ working-2.6/arch/ppc64/mm/slb.c 2004-08-18 16:30:58.541515536 +1000 @@ -24,30 +24,55 @@ extern void slb_allocate(unsigned long ea); +static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot) +{ + return (ea & ESID_MASK) | SLB_ESID_V | slot; +} + +static inline unsigned long mk_vsid_data(unsigned long ea, unsigned long flags) +{ + return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags; +} + static inline void create_slbe(unsigned long ea, unsigned long vsid, unsigned long flags, unsigned long entry) { - ea = (ea & ESID_MASK) | SLB_ESID_V | entry; - vsid = (vsid << SLB_VSID_SHIFT) | flags; asm volatile("slbmte %0,%1" : - : "r" (vsid), "r" (ea) + : "r" (mk_vsid_data(ea, flags)), + "r" (mk_esid_data(ea, entry)) : "memory" ); } -static void slb_add_bolted(void) +static void slb_flush_and_rebolt(void) { - WARN_ON(!irqs_disabled()); - /* If you change this make sure you change SLB_NUM_BOLTED - * appropriately too */ + * appropriately too. */ + unsigned long ksp_flags = SLB_VSID_KERNEL; + unsigned long ksp_esid_data; - /* Slot 1 - first VMALLOC segment - * Since modules end up there it gets hit very heavily. - */ - create_slbe(VMALLOCBASE, get_kernel_vsid(VMALLOCBASE), - SLB_VSID_KERNEL, 1); + WARN_ON(!irqs_disabled()); - asm volatile("isync":::"memory"); + if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) + ksp_flags |= SLB_VSID_L; + + ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); + if ((ksp_esid_data & ESID_MASK) == KERNELBASE) + ksp_esid_data &= ~SLB_ESID_V; + + /* We need to do this all in asm, so we're sure we don't touch + * the stack between the slbia and rebolting it. */ + asm volatile("isync\n" + "slbia\n" + /* Slot 1 - first VMALLOC segment */ + "slbmte %0,%1\n" + /* Slot 2 - kernel stack */ + "slbmte %2,%3\n" + "isync" + :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)), + "r"(mk_esid_data(VMALLOCBASE, 1)), + "r"(mk_vsid_data(ksp_esid_data, ksp_flags)), + "r"(ksp_esid_data) + : "memory"); } /* Flush all user entries from the segment table of the current processor. */ @@ -69,8 +94,7 @@ } asm volatile("isync" : : : "memory"); } else { - asm volatile("isync; slbia; isync" : : : "memory"); - slb_add_bolted(); + slb_flush_and_rebolt(); } /* Workaround POWER5 < DD2.1 issue */ @@ -113,22 +137,27 @@ void slb_initialize(void) { -#ifdef CONFIG_PPC_ISERIES - asm volatile("isync; slbia; isync":::"memory"); -#else + /* On iSeries the bolted entries have already been set up by + * the hypervisor from the lparMap data in head.S */ +#ifndef CONFIG_PPC_ISERIES unsigned long flags = SLB_VSID_KERNEL; - /* Invalidate the entire SLB (even slot 0) & all the ERATS */ - if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) - flags |= SLB_VSID_L; - - asm volatile("isync":::"memory"); - asm volatile("slbmte %0,%0"::"r" (0) : "memory"); + /* Invalidate the entire SLB (even slot 0) & all the ERATS */ + if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) + flags |= SLB_VSID_L; + + asm volatile("isync":::"memory"); + asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("isync; slbia; isync":::"memory"); - create_slbe(KERNELBASE, get_kernel_vsid(KERNELBASE), - flags, 0); - + create_slbe(KERNELBASE, get_kernel_vsid(KERNELBASE), flags, 0); + create_slbe(VMALLOCBASE, get_kernel_vsid(KERNELBASE), + SLB_VSID_KERNEL, 1); + /* We don't bolt the stack for the time being - we're in boot, + * so the stack is in the bolted segment. By the time it goes + * elsewhere, we'll call _switch() which will bolt in the new + * one. */ + asm volatile("isync":::"memory"); #endif - slb_add_bolted(); + get_paca()->stab_rr = SLB_NUM_BOLTED; } Index: working-2.6/arch/ppc64/mm/slb_low.S =================================================================== --- working-2.6.orig/arch/ppc64/mm/slb_low.S 2004-08-10 11:14:24.000000000 +1000 +++ working-2.6/arch/ppc64/mm/slb_low.S 2004-08-18 16:30:58.541515536 +1000 @@ -37,8 +37,21 @@ * a free slot first but that took too long. Unfortunately we * dont have any LRU information to help us choose a slot. */ +#ifdef CONFIG_PPC_ISERIES + /* + * On iSeries, the "bolted" stack segment can be cast out on + * shared processor switch so we need to check for a miss on + * it and restore it to the right slot. + */ + ld r9,PACAKSAVE(r13) + clrrdi r9,r9,28 + clrrdi r11,r3,28 + li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */ + cmpld r9,r11 + beq 3f +#endif /* CONFIG_PPC_ISERIES */ + ld r10,PACASTABRR(r13) -3: addi r10,r10,1 /* use a cpu feature mask if we ever change our slb size */ cmpldi r10,SLB_NUM_ENTRIES @@ -46,36 +59,9 @@ blt+ 4f li r10,SLB_NUM_BOLTED - /* - * Never cast out the segment for our kernel stack. Since we - * dont invalidate the ERAT we could have a valid translation - * for the kernel stack during the first part of exception exit - * which gets invalidated due to a tlbie from another cpu at a - * non recoverable point (after setting srr0/1) - Anton - */ -4: slbmfee r11,r10 - srdi r11,r11,27 - /* - * Use paca->ksave as the value of the kernel stack pointer, - * because this is valid at all times. - * The >> 27 (rather than >> 28) is so that the LSB is the - * valid bit - this way we check valid and ESID in one compare. - * In order to completely close the tiny race in the context - * switch (between updating r1 and updating paca->ksave), - * we check against both r1 and paca->ksave. - */ - srdi r9,r1,27 - ori r9,r9,1 /* mangle SP for later compare */ - cmpd r11,r9 - beq- 3b - ld r9,PACAKSAVE(r13) - srdi r9,r9,27 - ori r9,r9,1 - cmpd r11,r9 - beq- 3b - +4: std r10,PACASTABRR(r13) - +3: /* r3 = faulting address, r10 = entry */ srdi r9,r3,60 /* get region */ Index: working-2.6/include/asm-ppc64/mmu.h =================================================================== --- working-2.6.orig/include/asm-ppc64/mmu.h 2004-08-09 09:52:53.000000000 +1000 +++ working-2.6/include/asm-ppc64/mmu.h 2004-08-18 16:30:58.542515384 +1000 @@ -224,7 +224,7 @@ #define STAB0_PHYS_ADDR (STAB0_PAGE< References: <41003EC1.5030109@austin.ibm.com> Message-ID: <16676.12649.505601.453649@cargo.ozlabs.ibm.com> Nathan Fontenot writes: > This patch updates enable_surveillance() so we do not return an error > on platforms (notably power5) that do not have a surveillance sensor. > Additionaly, the rtas_call was changed to rtas_set_indicator as to avoid > having to handle RTAS_BUSY returns. Could you just add a #define in rtas.h for the -3 value and use that instead of the literal -3 please? Looks good apart from that. Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Thu Aug 19 15:08:52 2004 From: paulus at samba.org (Paul Mackerras) Date: Thu, 19 Aug 2004 15:08:52 +1000 Subject: Patch tracking system Message-ID: <16676.13796.570123.714484@cargo.ozlabs.ibm.com> Thanks to Jeremy Kerr's good work, we now have a patch tracking website at http://ozlabs.org/ppc64-patches/. It's fed off this mailing list; any posts to the list that include a patch will create a new entry on the webpage. If you post a reply on the list, your reply will get included in the list of comments for the patch. Anton and I have admin access and can change the state of patches. It's proving useful for us to keep track of which patches still need to be looked at. I hope it will prove useful for people sending patches to see where their patches are up to as well. Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Thu Aug 19 15:29:53 2004 From: anton at samba.org (Anton Blanchard) Date: Thu, 19 Aug 2004 15:29:53 +1000 Subject: bootup problem on power3 In-Reply-To: <1092807364.1827.72.camel@darya> References: <1092757903.1860.69.camel@darya> <41222DAB.8060206@austin.ibm.com> <1092807364.1827.72.camel@darya> Message-ID: <20040819052953.GF1945@krispykreme> Hi, > Using the pSeries default config, I get the following output : > ------------------------------------------------------ > Opening displays... > opening display : /pci at fef00000/display at 11... done > instantiating rtas at 0x000000003ffb5000... done > 0000000000000000 : booting cpu /cpus/PowerPC,POWER3 at 0 > ------------------------------------------------------ > and the system hangs. Sounds a bit like the memcpy bug. Can you try 2.6.8? Anton ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Thu Aug 19 21:36:47 2004 From: paulus at samba.org (Paul Mackerras) Date: Thu, 19 Aug 2004 21:36:47 +1000 Subject: [PATCH] [KDB] Use proper wrappers for udbg calls in KDB In-Reply-To: <41224C90.8080403@austin.ibm.com> References: <41224C90.8080403@austin.ibm.com> Message-ID: <16676.37071.29182.147512@cargo.ozlabs.ibm.com> Olof Johansson writes: > Since KDB only lives in the Ameslab tree at the moment, and that tree > does not compile (hvconsole breakage), I'm not sure just what to do with > this patch. I'll follow up with another one that fixes some paca_struct > rewrite KDB breakage, but I have not been able to boot and test a kernel > with the changes. I get: arch/ppc64/kdb/kdba_io.c:55: error: storage size of `kdb_serial' isn't known with or without that patch. I have reverted the hvc console stuff now and the rest of it compiles, so send me a new patch when you have it sorted. Thanks, Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Thu Aug 19 21:37:46 2004 From: paulus at samba.org (Paul Mackerras) Date: Thu, 19 Aug 2004 21:37:46 +1000 Subject: [PATCH] [KDB] Fix build break in kdbasupport In-Reply-To: <41224D61.6000207@austin.ibm.com> References: <41224D61.6000207@austin.ibm.com> Message-ID: <16676.37130.805765.113435@cargo.ozlabs.ibm.com> Olof Johansson writes: > The paca_struct cleanup broke KDB, here's the corresponding changes. > > Also remove a couple of compilation warnings for rtas calls. > > See previous email about ameslab breakage: Fixes are trivial but I > haven't been able to build/boot a kernel with them because of hvconsole > breakage. I get: arch/ppc64/kdb/kdbasupport.c: In function `kdba_dump_pci_info': arch/ppc64/kdb/kdbasupport.c:1594: warning: implicit declaration of function `traverse_all_pci_devices' with that patch (I didn't try without). True it's only a warning but it would be good to fix that too while you're at it. Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From sfr at canb.auug.org.au Thu Aug 19 22:40:24 2004 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Thu, 19 Aug 2004 22:40:24 +1000 Subject: [PATCH] [KDB] Fix build break in kdbasupport In-Reply-To: <16676.37130.805765.113435@cargo.ozlabs.ibm.com> References: <41224D61.6000207@austin.ibm.com> <16676.37130.805765.113435@cargo.ozlabs.ibm.com> Message-ID: <20040819224024.77ebac73.sfr@canb.auug.org.au> On Thu, 19 Aug 2004 21:37:46 +1000 Paul Mackerras wrote: > > arch/ppc64/kdb/kdbasupport.c: In function `kdba_dump_pci_info': > arch/ppc64/kdb/kdbasupport.c:1594: warning: implicit declaration of function `traverse_all_pci_devices' > > with that patch (I didn't try without). True it's only a warning but > it would be good to fix that too while you're at it. My previous patch "PPC64 pci_dn cleanups" removed the declaration of that function from kernel/pci.h and made it static ... I was working against Linus' tree and so didn't see that kdb used it. However, the include of "../kernel/pci.h" in kdb/kdbasupport.c should be fixed up (as well as unstatic'ing traverse_all_pci_devices). -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From linas at austin.ibm.com Fri Aug 20 06:46:03 2004 From: linas at austin.ibm.com (Linas Vepstas) Date: Thu, 19 Aug 2004 15:46:03 -0500 Subject: [PATCH] 2.6 ppc64 RTAS: use dynamic buffer size Message-ID: <20040819204603.GA20195@austin.ibm.com> Hi, This simple patch was discussed on the mailing lists in mid July, but the current bkbits tree doesn't yet have this. Please forward upstream. Firmware expects error log sizes to be of a very specific size, but different versions of firmware appearently expect different sizes; using the wrong size results in a painful, hard-to-debug crash in firmware. Benh provided a patch for this some months ago, but appreantly missed this code path. This patch sets up the log buffer size dynamically; it also fixes a bug with the return code not being handled correctly. Signed-off-by: Linas Vepstas --linas -------------- next part -------------- --- 1.38/arch/ppc64/kernel/rtas.c Wed Jul 14 15:27:37 2004 +++ edited/arch/ppc64/kernel/rtas.c Mon Jul 23 14:12:23 2004 @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -73,7 +72,6 @@ return tokp ? *tokp : RTAS_UNKNOWN_SERVICE; } - /** Return a copy of the detailed error text associated with the * most recent failed call to rtas. Because the error text * might go stale if there are any other intervening rtas calls, @@ -84,28 +82,32 @@ __fetch_rtas_last_error(void) { struct rtas_args err_args, save_args; + u32 bufsz; + + bufsz = rtas_token ("rtas-error-log-max"); + if ((bufsz == RTAS_UNKNOWN_SERVICE) || + (bufsz > RTAS_ERROR_LOG_MAX)) { + printk (KERN_WARNING "RTAS: bad log buffer size %d\n", bufsz); + bufsz = RTAS_ERROR_LOG_MAX; + } err_args.token = rtas_token("rtas-last-error"); err_args.nargs = 2; err_args.nret = 1; - err_args.rets = (rtas_arg_t *)&(err_args.args[2]); err_args.args[0] = (rtas_arg_t)__pa(rtas_err_buf); - err_args.args[1] = RTAS_ERROR_LOG_MAX; + err_args.args[1] = bufsz; err_args.args[2] = 0; save_args = rtas.args; rtas.args = err_args; - PPCDBG(PPCDBG_RTAS, "\tentering rtas with 0x%lx\n", - __pa(&err_args)); enter_rtas(__pa(&rtas.args)); - PPCDBG(PPCDBG_RTAS, "\treturned from rtas ...\n"); err_args = rtas.args; rtas.args = save_args; - return err_args.rets[0]; + return err_args.args[2]; } int rtas_call(int token, int nargs, int nret, int *outputs, ...) From rsa at us.ibm.com Fri Aug 20 06:50:22 2004 From: rsa at us.ibm.com (Ryan Arnold) Date: Thu, 19 Aug 2004 15:50:22 -0500 Subject: [RFC] interrupt drive hvc_console as vio device In-Reply-To: <1092775847.3906.18.camel@localhost> References: <41224F3E.5030702@austin.ibm.com> <1092775847.3906.18.camel@localhost> Message-ID: <1092948622.28320.73.camel@localhost> Greetings, Thanks for the patience with the hvc_console build problems in Ameslab. Paulus just reverted Ameslab to the equivalent of what is in mainline to fix some arch/ppc64/hvconsole merge problems when the newest version of HVCS was pulled into Ameslab. This patch adds the items listed in the changelog to the mainline (2.6.8.1) version of hvc_console. Once accepted there it will be pulled down into Ameslab to fix Paulus's revert. http://www-124.ibm.com/linux/patches/?patch_id=1619 Please comment! For the most part this is very similar code to what was in hvc_console in Ameslab before the revert. Changelog: arch/ppc64/kernel/hvconsole.c ----------------------------------------- -Changed hvc_get_chars() and hvc_put_chars() api to take vtermno rather than index number. -Added hvc_find_vtys() function which walks the bus looking for vterm/vty devices to callback to the hvc_console driver. This is for early console init (pre mem init and pre device probe). include/asm-ppc64/hvconsole.h ------------------------------------------ -Changed hvc_get_chars() and hvc_put_chars() api to take vtermno rather than index number. -Added hvc_find_vtys() function. -Added hvc_instantiate() function which is implemented by a console driver wanting to receive a callback of and early console init. drivers/char/hvc_console.c ------------------------------------------- -Switch khvcd from kernel_threads to kthreads which got rid of deprecated daemonize(). -Added module exit clause to be thorough (not terribly necessary with a console driver of course) -Added early discovery of vterm/vty adapters by doing a bus walk on early console init which results in hvc_instantiate() callback and addition of the vtermno into a static array of vtermnos supported as console adapters (meaning the console api's work against these vtermnos prior to full console initialization). -This driver is now registered as a vio driver which means that vty adapters are now managed via probe/remove. This means hvc_console theoretically supports hotplug vty adapters. -Removed static hvc_struct array and replaced with a linux list that has elements (hvc_struct instances) added/removed on probe/remove AFTER early console init. This is important because kmalloc can't be done at early console init. -Driver now either runs in interrupt driven mode or in polling mode on older hardware. The khvcd is smart enough to not 'schedule()' when there are no interrupts. -kobjects are now used for ref counting on the hvc_struct instances. Thanks, Ryan S. Arnold IBM Linux Technology Center ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From sharada at in.ibm.com Fri Aug 20 18:52:01 2004 From: sharada at in.ibm.com (R Sharada) Date: Fri, 20 Aug 2004 14:22:01 +0530 Subject: cpumask move patch revised - RFC In-Reply-To: <16668.23974.392788.23268@cargo.ozlabs.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> <16658.58557.455887.141510@cargo.ozlabs.ibm.com> <20040806042627.GA1288@in.ibm.com> <20040812121703.GA9760@in.ibm.com> <411BBF5F.3070901@austin.ibm.com> <20040813043000.GA1168@in.ibm.com> <16668.23974.392788.23268@cargo.ozlabs.ibm.com> Message-ID: <20040820085201.GA1290@in.ibm.com> Hello, I have made a new patch for the cpumask move, against linus 2.6.8.1 tree. I don't see Nathan's changes in there yet, so this patch still works with the old cpumasks, and not with Nathan's cleaned up ones. The patch includes: - move the cpumask_setup function from chrp_setup.c to setup.c - remove extern function declarations in setup.c - remove the of_node_put() call outside the while loop Please review, test and provide comments. Thanks and Regards, Sharada On Fri, Aug 13, 2004 at 04:20:22PM +1000, Paul Mackerras wrote: > > R Sharada writes: > > > Yes, you are correct. I did see Nathan's patch on the removal of the > > unnecessary cpu maps. And yes, I am waiting for his patch to go first > > and then have this reworked to match that change. > > I have just sent Nathan's patches on to Andrew Morton. > > > > Is this really necessary? Might it go better in a .h file somewhere? > > > > Well, yes, perhaps it could be put in some .h file. However, the idea here > > was that, I just followed the conventions for other functions in chrp_setup.c > > file > > Hmmm. Anything that is defined in one file and referenced in another > should be declared in a header, not in the individual C files. Put it > in asm-ppc64/smp.h (unless you can think of a better place). Either > that or move cpumask_setup() into setup.c. > > > As regards the of_node_put, discussing with Nathan, I realized that it isn't > > really necessary, even for the last cpu node data structure in the while > > loop. So, this of_node_put will be gone soon, in the next patch. > > Note that it is not necessary because np is NULL by the time you exit > the loop. > > > > I think it is about time we start making code that will deal with more > > > than 2 cpu_threads, as the processors seem inevitable and not too far off. > > > > > So, can SMT/HMT have more than 2 threads now? or planned in the near future? > > Not that I know of. :) There are diminishing returns from having more > than 2 threads. If we ever get more than 2 threads we can change the > code then, but that won't be in the next few years at least. > > > > Again I'd have the CONFIG_SMP cover more. The whole while loop and the > > > of_node_put. > > > > > However, here we still need to be able to check cpu node status and > > interrupt-server#s property, etc. for non-SMP (UP) systems as well, > > is it not? In that case, we can't really move the while loop inside the > > #ifdef SMP, can we? > > The case that you are talking about ( iterating over the cpus and not doing > > anything ) would occur only in the case of a SMP machine running a UP > > kernel, is it not? That seems unlikely? Or are there other scenarios? > > That would be an uncommon case, and performance is not critical. I > would like to see such optimizations as a second patch after we have > moved the code and tested it. > > Regards, > Paul. > > -------------- next part -------------- diff -Naur linux-2.5-org/arch/ppc64/kernel/chrp_setup.c linux-2.5-chg/arch/ppc64/kernel/chrp_setup.c --- linux-2.5-org/arch/ppc64/kernel/chrp_setup.c 2004-08-19 01:15:24.000000000 -0700 +++ linux-2.5-chg/arch/ppc64/kernel/chrp_setup.c 2004-08-19 23:59:12.000000000 -0700 @@ -77,6 +77,7 @@ void pSeries_calibrate_decr(void); void fwnmi_init(void); extern void SystemReset_FWNMI(void), MachineCheck_FWNMI(void); /* from head.S */ + int fwnmi_active; /* TRUE if an FWNMI handler is present */ dev_t boot_dev; @@ -462,3 +463,4 @@ setup_default_decr(); } + diff -Naur linux-2.5-org/arch/ppc64/kernel/prom.c linux-2.5-chg/arch/ppc64/kernel/prom.c --- linux-2.5-org/arch/ppc64/kernel/prom.c 2004-08-19 01:15:25.000000000 -0700 +++ linux-2.5-chg/arch/ppc64/kernel/prom.c 2004-08-19 23:49:58.000000000 -0700 @@ -939,13 +939,6 @@ prom_getprop(node, "reg", ®, sizeof(reg)); lpaca[cpuid].hw_cpu_id = reg; -#ifdef CONFIG_SMP - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); - if (reg == 0) - cpu_set(cpuid, RELOC(cpu_online_map)); -#endif /* CONFIG_SMP */ cpuid++; } return; @@ -1042,9 +1035,6 @@ #ifdef CONFIG_SMP /* Set the number of active processors. */ _systemcfg->processorCount++; - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); #endif } else { prom_printf("... failed: %x\n", *acknowledge); @@ -1053,10 +1043,6 @@ #ifdef CONFIG_SMP else { prom_printf("%x : booting cpu %s\n", cpuid, path); - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_online_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); } #endif next: @@ -1069,13 +1055,6 @@ lpaca[cpuid].hw_cpu_id = interrupt_server[i]; prom_printf("%x : preparing thread ... ", interrupt_server[i]); - if (_naca->smt_state) { - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); - prom_printf("available\n"); - } else { - prom_printf("not available\n"); - } } #endif cpuid++; @@ -1101,8 +1080,6 @@ pir & 0x3ff; } } -/* cpu_set(i+1, cpu_online_map); */ - cpu_set(i+1, RELOC(cpu_possible_map)); } _systemcfg->processorCount *= 2; } else { diff -Naur linux-2.5-org/arch/ppc64/kernel/setup.c linux-2.5-chg/arch/ppc64/kernel/setup.c --- linux-2.5-org/arch/ppc64/kernel/setup.c 2004-08-19 01:15:25.000000000 -0700 +++ linux-2.5-chg/arch/ppc64/kernel/setup.c 2004-08-20 01:34:24.037909496 -0700 @@ -89,6 +89,7 @@ void parse_cmd_line(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7); int parse_bootinfo(void); +void cpumask_setup(void); #ifdef CONFIG_MAGIC_SYSRQ unsigned long SYSRQ_KEY; @@ -229,6 +230,7 @@ register_console(&udbg_console); __irq_offset_value = NUM_ISA_INTERRUPTS; finish_device_tree(); + cpumask_setup(); chrp_init(r3, r4, r5, r6, r7); #ifdef CONFIG_SMP @@ -251,6 +253,7 @@ #ifdef CONFIG_PPC_PMAC if (systemcfg->platform == PLATFORM_POWERMAC) { finish_device_tree(); + cpumask_setup(); pmac_init(r3, r4, r5, r6, r7); } #endif /* CONFIG_PPC_PMAC */ @@ -750,6 +753,91 @@ } +void cpumask_setup() +{ + unsigned long ind; + struct device_node *np = NULL; + int cpuid = 0; + unsigned int *reg; + char *statusp; + int prop; + int *propsize = ∝ + unsigned int cpu_threads; + + printk(KERN_INFO "cpumask_setup\n"); + /* On pmac, we just fill out the various global bitmasks and + * arrays indicating our CPUs are here, they are actually started + * later on from pmac_smp + */ + if (systemcfg->platform == PLATFORM_POWERMAC) { + while ((np = of_find_node_by_type(np, "cpu"))) { + reg = (unsigned int *)get_property(np, "reg", NULL); +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, cpu_present_at_boot); + if (*reg == 0) + cpu_set(cpuid, cpu_online_map); +#endif /* CONFIG_SMP */ + cpuid++; + } + return; + } + + while ((np = of_find_node_by_type(np, "cpu"))) { + + statusp = (char *)get_property(np, "status", NULL); + if ((statusp == NULL) || (statusp && strcmp(statusp, "okay") != +0)) + continue; + + reg = (unsigned int *)get_property(np, "reg", NULL); + get_property(np, "ibm,ppc-interrupt-server#s", propsize); + if (*propsize < 0) { + /* no property. old hardware has no SMT */ + cpu_threads = 1; + } else { + /* We have a threaded processor */ + cpu_threads = *propsize / sizeof(u32); + if (cpu_threads > 2) + cpu_threads = 1; /* ToDo: panic? */ + } + +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, cpu_present_at_boot); + if (cpuid == boot_cpuid) + cpu_set(cpuid, cpu_online_map); + /* set the secondary threads into the cpuid mask */ + for (ind=1; ind < cpu_threads; ind++) { + cpuid++; + if (cpuid >= NR_CPUS) + continue; + if (naca->smt_state) { + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_present_at_boot); + } + } +#endif /* CONFIG_SMP */ + cpuid++; + } + +#ifdef CONFIG_HMT + /* Only enable HMT on processors that provide support. */ + if (__is_processor(PV_PULSAR) || + __is_processor(PV_ICESTAR) || + __is_processor(PV_SSTAR)) { + for (ind = 0; ind < NR_CPUS; ind += 2) { + if (!cpu_online(ind)) + continue; + cpu_set(ind+1, cpu_possible_map); + } + } +#endif + return; +} + __setup("spread_lpevents=", set_spread_lpevents ); __setup("decr_overclock_proc0=", set_decr_overclock_proc0 ); __setup("decr_overclock=", set_decr_overclock ); From paulus at samba.org Fri Aug 20 20:16:55 2004 From: paulus at samba.org (Paul Mackerras) Date: Fri, 20 Aug 2004 20:16:55 +1000 Subject: [PATCH] [correction] promote hose_list to an "official" list In-Reply-To: <1092241340.3940.12.camel@sinatra.austin.ibm.com> References: <1092241340.3940.12.camel@sinatra.austin.ibm.com> Message-ID: <16677.53143.808693.886367@cargo.ozlabs.ibm.com> John Rose writes: > On second thought, no need to externalize the list to > include/asm-ppc64/pci.h. Here's a corrected patch. > > This patch changes hose_list from a simple linked list to a > "list.h"-style list. This is in preparation for the runtime > addition/removal of PCI Host Bridges. I get lots of errors compiling this, starting with: arch/ppc64/kernel/pmac_pci.c: In function `pmac_fixup_phb_resources': arch/ppc64/kernel/pmac_pci.c:677: error: `phb' undeclared (first use in this function) arch/ppc64/kernel/pmac_pci.c:677: error: (Each undeclared identifier is reported only once arch/ppc64/kernel/pmac_pci.c:677: error: for each function it appears in.) arch/ppc64/kernel/pmac_pci.c:677: warning: type defaults to `int' in declaration of `type name' arch/ppc64/kernel/pmac_pci.c:677: error: request for member `list_node' in something not a structure or union This is with g5_defconfig (which needs updating :-P). Send me a revised patch, please. Regards, Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Fri Aug 20 23:24:26 2004 From: paulus at samba.org (Paul Mackerras) Date: Fri, 20 Aug 2004 23:24:26 +1000 Subject: cpumask move patch revised - RFC In-Reply-To: <20040820085201.GA1290@in.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> <16658.58557.455887.141510@cargo.ozlabs.ibm.com> <20040806042627.GA1288@in.ibm.com> <20040812121703.GA9760@in.ibm.com> <411BBF5F.3070901@austin.ibm.com> <20040813043000.GA1168@in.ibm.com> <16668.23974.392788.23268@cargo.ozlabs.ibm.com> <20040820085201.GA1290@in.ibm.com> Message-ID: <16677.64394.808540.867378@cargo.ozlabs.ibm.com> R Sharada writes: > I have made a new patch for the cpumask move, against linus 2.6.8.1 > tree. I don't see Nathan's changes in there yet, so this patch still works > with the old cpumasks, and not with Nathan's cleaned up ones. I have sent off a lot of patches to Andrew Morton, including Nathan's patches, which are queued up waiting for Linus to get back (he is away at the moment). You can get the roll-up of the patches I have sent at: http://ozlabs.org/~paulus/akpm-pending-040820.patch Most of those patches are also in Andrew's 2.6.8.1-mm3 patch. I would need a patch that applies on top of the patches that are already pending. Thanks, Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From olof at austin.ibm.com Sat Aug 21 02:56:13 2004 From: olof at austin.ibm.com (Olof Johansson) Date: Fri, 20 Aug 2004 11:56:13 -0500 Subject: [PATCH] [KDB] Use proper wrappers for udbg calls in KDB In-Reply-To: <16676.37071.29182.147512@cargo.ozlabs.ibm.com> References: <41224C90.8080403@austin.ibm.com> <16676.37071.29182.147512@cargo.ozlabs.ibm.com> Message-ID: <41262D2D.60502@austin.ibm.com> Paul Mackerras wrote: > Olof Johansson writes: > >>Since KDB only lives in the Ameslab tree at the moment, and that tree >>does not compile (hvconsole breakage), I'm not sure just what to do with >>this patch. I'll follow up with another one that fixes some paca_struct >>rewrite KDB breakage, but I have not been able to boot and test a kernel >>with the changes. > > I get: > > arch/ppc64/kdb/kdba_io.c:55: error: storage size of `kdb_serial' isn't known > > with or without that patch. I have reverted the hvc console stuff now > and the rest of it compiles, so send me a new patch when you have it > sorted. Ok, got it to build and boot. The previous two patches (udbg and paca fixes) are still valid, the attached patch is also needed. The kdb_serial build break on pmac is fixed by this patch too. I haven't tested KDB on G5, I'm not sure if anyone ever has. -Olof -------------- next part -------------- A non-text attachment was scrubbed... Name: kdb-fixes.patch Type: text/x-patch Size: 4897 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040820/049e56b7/attachment.bin From johnrose at austin.ibm.com Tue Aug 24 02:21:44 2004 From: johnrose at austin.ibm.com (John Rose) Date: Mon, 23 Aug 2004 11:21:44 -0500 Subject: [PATCH] [correction] promote hose_list to an "official" list In-Reply-To: References: Message-ID: <1093278104.22209.2.camel@sinatra.austin.ibm.com> Hi Paul- > I get lots of errors compiling this, starting with: ... > Send me a revised patch, please. > > Regards, > Paul. My apologies, here's a revised patch. Thanks- John Signed-off-by: John Rose diff -Nru a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c --- a/arch/ppc64/kernel/pSeries_iommu.c Mon Aug 23 11:11:16 2004 +++ b/arch/ppc64/kernel/pSeries_iommu.c Mon Aug 23 11:11:16 2004 @@ -90,7 +90,7 @@ static void iommu_buses_init(void) { - struct pci_controller* phb; + struct pci_controller *phb, *tmp; struct device_node *dn, *first_dn; int num_slots, num_slots_ilog2; int first_phb = 1; @@ -109,7 +109,7 @@ /* XXX Should we be using pci_root_buses instead? -ojn */ - for (phb=hose_head; phb; phb=phb->next) { + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { first_dn = ((struct device_node *)phb->arch_data)->child; /* Carve 2GB into the largest dma_window_size possible */ diff -Nru a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c --- a/arch/ppc64/kernel/pSeries_pci.c Mon Aug 23 11:11:16 2004 +++ b/arch/ppc64/kernel/pSeries_pci.c Mon Aug 23 11:11:16 2004 @@ -728,9 +728,9 @@ static void phbs_fixup_io(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; - for (hose=hose_head;hose;hose=hose->next) + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) remap_bus_range(hose->bus); } @@ -763,8 +763,8 @@ pci_find_hose_for_OF_device(struct device_node *node) { while (node) { - struct pci_controller *hose; - for (hose=hose_head;hose;hose=hose->next) + struct pci_controller *hose, *tmp; + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) if (hose->arch_data == node) return hose; node=node->parent; diff -Nru a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c --- a/arch/ppc64/kernel/pci.c Mon Aug 23 11:11:16 2004 +++ b/arch/ppc64/kernel/pci.c Mon Aug 23 11:11:16 2004 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -63,8 +64,7 @@ void iSeries_pcibios_init(void); -struct pci_controller *hose_head; -struct pci_controller **hose_tail = &hose_head; +LIST_HEAD(hose_list); struct pci_dma_ops pci_dma_ops; EXPORT_SYMBOL(pci_dma_ops); @@ -240,8 +240,8 @@ hose->type = controller_type; hose->global_number = global_phb_number++; - *hose_tail = hose; - hose_tail = &hose->next; + list_add_tail(&hose->list_node, &hose_list); + return hose; } @@ -281,7 +281,7 @@ static int __init pcibios_init(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; struct pci_bus *bus; #ifdef CONFIG_PPC_ISERIES @@ -292,7 +292,7 @@ printk("PCI: Probing PCI hardware\n"); /* Scan all of the recorded PCI controllers. */ - for (hose = hose_head; hose; hose = hose->next) { + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { hose->last_busno = 0xff; bus = pci_scan_bus(hose->first_busno, hose->ops, hose->arch_data); diff -Nru a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h --- a/arch/ppc64/kernel/pci.h Mon Aug 23 11:11:16 2004 +++ b/arch/ppc64/kernel/pci.h Mon Aug 23 11:11:16 2004 @@ -17,9 +17,7 @@ extern struct pci_controller* pci_alloc_pci_controller(enum phb_types controller_type); extern struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node); -extern struct pci_controller* hose_head; -extern struct pci_controller** hose_tail; - +extern struct list_head hose_list; extern int global_phb_number; /******************************************************************* diff -Nru a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c --- a/arch/ppc64/kernel/pci_dn.c Mon Aug 23 11:11:16 2004 +++ b/arch/ppc64/kernel/pci_dn.c Mon Aug 23 11:11:16 2004 @@ -129,10 +129,10 @@ */ static void *traverse_all_pci_devices(traverse_func pre) { - struct pci_controller *phb; + struct pci_controller *phb, *tmp; void *ret; - for (phb = hose_head; phb; phb = phb->next) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) if ((ret = traverse_pci_devices(phb->arch_data, pre, phb)) != NULL) return ret; diff -Nru a/arch/ppc64/kernel/pmac_pci.c b/arch/ppc64/kernel/pmac_pci.c --- a/arch/ppc64/kernel/pmac_pci.c Mon Aug 23 11:11:16 2004 +++ b/arch/ppc64/kernel/pmac_pci.c Mon Aug 23 11:11:16 2004 @@ -672,9 +672,9 @@ static void __init pmac_fixup_phb_resources(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; - for (hose = hose_head; hose; hose = hose->next) { + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; hose->io_resource.start += offset; hose->io_resource.end += offset; diff -Nru a/include/asm-ppc64/pci-bridge.h b/include/asm-ppc64/pci-bridge.h --- a/include/asm-ppc64/pci-bridge.h Mon Aug 23 11:11:16 2004 +++ b/include/asm-ppc64/pci-bridge.h Mon Aug 23 11:11:16 2004 @@ -33,9 +33,9 @@ struct pci_controller { char what[8]; /* Eye catcher */ enum phb_types type; /* Type of hardware */ - struct pci_controller *next; struct pci_bus *bus; void *arch_data; + struct list_head list_node; int first_busno; int last_busno; ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nfont at austin.ibm.com Tue Aug 24 04:11:35 2004 From: nfont at austin.ibm.com (Nathan Fontenot) Date: Mon, 23 Aug 2004 13:11:35 -0500 Subject: [PATCH] updates to surveillance for power5 In-Reply-To: <16676.12649.505601.453649@cargo.ozlabs.ibm.com> References: <41003EC1.5030109@austin.ibm.com> <16676.12649.505601.453649@cargo.ozlabs.ibm.com> Message-ID: <412A3357.5040509@austin.ibm.com> Paul Mackerras wrote: > Nathan Fontenot writes: > > >>This patch updates enable_surveillance() so we do not return an error >>on platforms (notably power5) that do not have a surveillance sensor. >>Additionaly, the rtas_call was changed to rtas_set_indicator as to avoid >>having to handle RTAS_BUSY returns. > > > Could you just add a #define in rtas.h for the -3 value and use that > instead of the literal -3 please? Looks good apart from that. > > Paul. > Here's an updated patch that adds a #define for the -3 return code. -- Nathan Fontenot Power Linux Platform Serviceability Home: IBM Austin 908/1E-036 Phone: 512.838.3377 (T/L 678.3377) Email: nfont at austin.ibm.com -------------- next part -------------- A non-text attachment was scrubbed... Name: surv2.patch Type: text/x-patch Size: 1567 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040823/b2e4f2fb/attachment.bin From kravetz at us.ibm.com Tue Aug 24 07:24:59 2004 From: kravetz at us.ibm.com (Mike Kravetz) Date: Mon, 23 Aug 2004 14:24:59 -0700 Subject: alloc via alloc_bootmem(), free via kfree() Message-ID: <20040823212459.GA6120@w-mikek2.beaverton.ibm.com> Was just looking at the routine alloc_phb(). In this routine there is the following line: phb = pci_alloc_pci_controller(phb_type); In pci_alloc_pci_controller() allocation for the returned structure is as follows: #ifdef CONFIG_PPC_ISERIES hose = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), GFP_KERNEL); #else hose = (struct pci_controller *)alloc_bootmem(sizeof(struct pci_controller)); #endif So, on pSeries we would do the allocation via alloc_bootmem(). However, a couple of error paths in alloc_phb() contain: kfree(phb); Looks like the error paths need to be changed so that the 'type of free' corresponds to the type of allocation. I could provide a trivial patch to do this, but ideally we might want to create another routine such as pci_free_pci_controller() so that the caller doesn't need to know what underlying allocation scheme was used. It also looks like there might be an opportunity for cleanup in this area. Does someone familiar with this code want to fix this? Like I said, I could provide a trivial patch if needed. -- Mike ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Tue Aug 24 15:53:31 2004 From: anton at samba.org (Anton Blanchard) Date: Tue, 24 Aug 2004 15:53:31 +1000 Subject: [PATCH] updates to surveillance for power5 In-Reply-To: <412A3357.5040509@austin.ibm.com> References: <41003EC1.5030109@austin.ibm.com> <16676.12649.505601.453649@cargo.ozlabs.ibm.com> <412A3357.5040509@austin.ibm.com> Message-ID: <20040824055331.GA2306@krispykreme> Hi Nathan, > >>This patch updates enable_surveillance() so we do not return an error > >>on platforms (notably power5) that do not have a surveillance sensor. > >>Additionaly, the rtas_call was changed to rtas_set_indicator as to avoid > >>having to handle RTAS_BUSY returns. Any ideas why they got rid of surveillance on POWER5? Anton ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Tue Aug 24 16:40:01 2004 From: paulus at samba.org (Paul Mackerras) Date: Tue, 24 Aug 2004 16:40:01 +1000 Subject: [PATCH] [KDB] Use proper wrappers for udbg calls in KDB In-Reply-To: <41262D2D.60502@austin.ibm.com> References: <41224C90.8080403@austin.ibm.com> <16676.37071.29182.147512@cargo.ozlabs.ibm.com> <41262D2D.60502@austin.ibm.com> Message-ID: <16682.58049.253555.992916@cargo.ozlabs.ibm.com> Olof Johansson writes: > if (eip == (kdb_machreg_t)ret_from_except || > - eip == (kdb_machreg_t)ret_from_syscall_1 /* || > - eip == (kdb_machreg_t)do_bottom_half_ret */) { > + eip == (kdb_machreg_t)syscall_exit /* || > + eip == (kdb_machreg_t)do_bottom_half_ret */) { > /* pull exception regs from the stack */ > struct pt_regs eregs; I don't like the exporting of syscall_exit, and I'd much rather see it look for the exception frame marker on the stack instead of looking at eip. Could you (and/or Ananth) do a single patch that does that and fixes the build breakage please? Thanks, Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Tue Aug 24 23:11:15 2004 From: anton at samba.org (Anton Blanchard) Date: Tue, 24 Aug 2004 23:11:15 +1000 Subject: -mminimal-toc removal Message-ID: <20040824131115.GH2306@krispykreme> The following patches remove -mminimal-toc from the kernel compile. -mminimal-toc provides a 2 level TOC which was required back when binutils couldnt create multiple TOCs. Alan fixed this a while ago but we havent got around to fixing the kernel to match. Anton ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Tue Aug 24 23:16:46 2004 From: anton at samba.org (Anton Blanchard) Date: Tue, 24 Aug 2004 23:16:46 +1000 Subject: [PATCH] -mminimal-toc removal 1: Cleanups In-Reply-To: <20040824131115.GH2306@krispykreme> References: <20040824131115.GH2306@krispykreme> Message-ID: <20040824131646.GI2306@krispykreme> - Get rid of a number of . symbols that arent required. No one likes . symbols - Remove a number of unnecessary .globl definitions - Remove _STATIC(), we dont need procedure descriptors for static functions in assembly files. Signed-off-by: Anton Blanchard --- mminimal_toc_die_die_die-anton/arch/ppc64/kernel/entry.S | 8 mminimal_toc_die_die_die-anton/arch/ppc64/kernel/head.S | 141 ++++------- mminimal_toc_die_die_die-anton/include/asm-ppc64/processor.h | 12 3 files changed, 63 insertions(+), 98 deletions(-) diff -puN arch/ppc64/kernel/entry.S~mminimal_toc_die_die_die_1 arch/ppc64/kernel/entry.S --- mminimal_toc_die_die_die/arch/ppc64/kernel/entry.S~mminimal_toc_die_die_die_1 2004-08-24 18:55:08.014058241 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/kernel/entry.S 2004-08-24 19:23:05.896257693 +1000 @@ -661,7 +661,7 @@ _GLOBAL(enter_rtas) std r6,PACASAVEDMSR(r13) /* Setup our real return addr */ - SET_REG_TO_LABEL(r4,.rtas_return_loc) + SET_REG_TO_LABEL(r4,rtas_return_loc) SET_REG_TO_CONST(r9,KERNELBASE) sub r4,r4,r9 mtlr r4 @@ -687,7 +687,7 @@ _GLOBAL(enter_rtas) rfid b . /* prevent speculative execution */ -_STATIC(rtas_return_loc) +rtas_return_loc: /* relocation is off at this point */ mfspr r4,SPRG3 /* Get PACA */ SET_REG_TO_CONST(r5, KERNELBASE) @@ -700,7 +700,7 @@ _STATIC(rtas_return_loc) mtmsrd r6 ld r1,PACAR1(r4) /* Restore our SP */ - LOADADDR(r3,.rtas_restore_regs) + LOADADDR(r3,rtas_restore_regs) ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ mtspr SRR0,r3 @@ -708,7 +708,7 @@ _STATIC(rtas_return_loc) rfid b . /* prevent speculative execution */ -_STATIC(rtas_restore_regs) +rtas_restore_regs: /* relocation is on at this point */ REST_GPR(2, r1) /* Restore the TOC */ REST_GPR(13, r1) /* Restore paca */ diff -puN arch/ppc64/kernel/head.S~mminimal_toc_die_die_die_1 arch/ppc64/kernel/head.S --- mminimal_toc_die_die_die/arch/ppc64/kernel/head.S~mminimal_toc_die_die_die_1 2004-08-24 18:55:08.021057703 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/kernel/head.S 2004-08-24 19:32:30.672607890 +1000 @@ -84,10 +84,10 @@ .globl _stext _stext: #ifdef CONFIG_PPC_PSERIES -_STATIC(__start) +__start: /* NOP this out unconditionally */ BEGIN_FTR_SECTION - b .__start_initialization_pSeries + b __start_initialization_pSeries END_FTR_SECTION(0, 1) #endif /* Catch branch to 0 in real mode */ @@ -158,7 +158,7 @@ _GLOBAL(__secondary_hold) bne 100b #ifdef CONFIG_HMT - b .hmt_init + b hmt_init #else #ifdef CONFIG_SMP mr r3,r24 @@ -301,7 +301,6 @@ exception_marker: */ #define STD_EXCEPTION_PSERIES(n, label) \ . = n; \ - .globl label##_Pseries; \ label##_Pseries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ @@ -383,7 +382,6 @@ label##_Iseries_profile: \ #define STD_EXCEPTION_COMMON(trap, label, hdlr) \ .align 7; \ - .globl label##_common; \ label##_common: \ EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ DISABLE_INTS; \ @@ -394,7 +392,6 @@ label##_common: \ #define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr) \ .align 7; \ - .globl label##_common; \ label##_common: \ EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ DISABLE_INTS; \ @@ -406,19 +403,17 @@ label##_common: \ * Start of pSeries system interrupt routines */ . = 0x100 - .globl __start_interrupts __start_interrupts: STD_EXCEPTION_PSERIES(0x100, SystemReset) . = 0x200 -_MachineCheckPseries: +MachineCheck_Pseries: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ EXCEPTION_PROLOG_PSERIES(PACA_EXMC, MachineCheck_common) . = 0x300 - .globl DataAccess_Pseries DataAccess_Pseries: HMT_MEDIUM mtspr SPRG1,r13 @@ -430,14 +425,13 @@ BEGIN_FTR_SECTION rlwimi r13,r12,16,0x20 mfcr r12 cmpwi r13,0x2c - beq .do_stab_bolted_Pseries + beq do_stab_bolted_Pseries mtcrf 0x80,r12 mfspr r12,SPRG2 END_FTR_SECTION_IFCLR(CPU_FTR_SLB) EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, DataAccess_common) . = 0x380 - .globl DataAccessSLB_Pseries DataAccessSLB_Pseries: HMT_MEDIUM mtspr SPRG1,r13 @@ -453,7 +447,7 @@ DataAccessSLB_Pseries: clrrdi r12,r13,32 /* get high part of &label */ mfmsr r10 mfspr r11,SRR0 /* save SRR0 */ - ori r12,r12,(.do_slb_miss)@l + ori r12,r12,(do_slb_miss)@l ori r10,r10,MSR_IR|MSR_DR /* DON'T set RI for SLB miss */ mtspr SRR0,r12 mfspr r12,SRR1 /* and SRR1 */ @@ -465,7 +459,6 @@ DataAccessSLB_Pseries: STD_EXCEPTION_PSERIES(0x400, InstructionAccess) . = 0x480 - .globl InstructionAccessSLB_Pseries InstructionAccessSLB_Pseries: HMT_MEDIUM mtspr SPRG1,r13 @@ -481,7 +474,7 @@ InstructionAccessSLB_Pseries: clrrdi r12,r13,32 /* get high part of &label */ mfmsr r10 mfspr r11,SRR0 /* save SRR0 */ - ori r12,r12,(.do_slb_miss)@l + ori r12,r12,(do_slb_miss)@l ori r10,r10,MSR_IR|MSR_DR /* DON'T set RI for SLB miss */ mtspr SRR0,r12 mfspr r12,SRR1 /* and SRR1 */ @@ -499,7 +492,6 @@ InstructionAccessSLB_Pseries: STD_EXCEPTION_PSERIES(0xb00, Trap_0b) . = 0xc00 - .globl SystemCall_Pseries SystemCall_Pseries: HMT_MEDIUM mr r9,r13 @@ -536,10 +528,10 @@ SystemCall_Pseries: STD_EXCEPTION_PSERIES(0x3000, PerformanceMonitor) . = 0x3100 -_GLOBAL(do_stab_bolted_Pseries) +do_stab_bolted_Pseries: mtcrf 0x80,r12 mfspr r12,SPRG2 - EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) + EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, do_stab_bolted) /* Space for the naca. Architected to be located at real address @@ -548,8 +540,6 @@ _GLOBAL(do_stab_bolted_Pseries) * point to itVpdAreas. On pSeries native, this value is not used. */ . = NACA_PHYS_ADDR - .globl __end_interrupts - .globl __start_naca __end_interrupts: __start_naca: #ifdef CONFIG_PPC_ISERIES @@ -562,12 +552,9 @@ __start_naca: .llong paca . = SYSTEMCFG_PHYS_ADDR - .globl __end_naca - .globl __start_systemcfg __end_naca: __start_systemcfg: . = (SYSTEMCFG_PHYS_ADDR + PAGE_SIZE) - .globl __end_systemcfg __end_systemcfg: #ifdef CONFIG_PPC_ISERIES @@ -615,7 +602,7 @@ BEGIN_FTR_SECTION rlwimi r13,r12,16,0x20 mfcr r12 cmpwi r13,0x2c - beq .do_stab_bolted_Iseries + beq do_stab_bolted_Iseries mtcrf 0x80,r12 mfspr r12,SPRG2 END_FTR_SECTION_IFCLR(CPU_FTR_SLB) @@ -623,14 +610,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) EXCEPTION_PROLOG_ISERIES_2 b DataAccess_common -.do_stab_bolted_Iseries: +do_stab_bolted_Iseries: mtcrf 0x80,r12 mfspr r12,SPRG2 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) EXCEPTION_PROLOG_ISERIES_2 - b .do_stab_bolted + b do_stab_bolted - .globl DataAccessSLB_Iseries + .globl DataAccessSLB_Iseries DataAccessSLB_Iseries: mtspr SPRG1,r13 /* save r13 */ EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) @@ -638,11 +625,11 @@ DataAccessSLB_Iseries: ld r11,PACALPPACA+LPPACASRR0(r13) ld r12,PACALPPACA+LPPACASRR1(r13) mfspr r3,DAR - b .do_slb_miss + b do_slb_miss STD_EXCEPTION_ISERIES(0x400, InstructionAccess, PACA_EXGEN) - .globl InstructionAccessSLB_Iseries + .globl InstructionAccessSLB_Iseries InstructionAccessSLB_Iseries: mtspr SPRG1,r13 /* save r13 */ EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) @@ -650,7 +637,7 @@ InstructionAccessSLB_Iseries: ld r11,PACALPPACA+LPPACASRR0(r13) ld r12,PACALPPACA+LPPACASRR1(r13) mr r3,r11 - b .do_slb_miss + b do_slb_miss MASKABLE_EXCEPTION_ISERIES(0x500, HardwareInterrupt) STD_EXCEPTION_ISERIES(0x600, Alignment, PACA_EXGEN) @@ -660,7 +647,7 @@ InstructionAccessSLB_Iseries: STD_EXCEPTION_ISERIES(0xa00, Trap_0a, PACA_EXGEN) STD_EXCEPTION_ISERIES(0xb00, Trap_0b, PACA_EXGEN) - .globl SystemCall_Iseries + .globl SystemCall_Iseries SystemCall_Iseries: mr r9,r13 mfspr r13,SPRG3 @@ -722,7 +709,6 @@ iseries_secondary_smp_loop: b 1b /* If SMP not configured, secondaries * loop forever */ - .globl Decrementer_Iseries_masked Decrementer_Iseries_masked: li r11,1 stb r11,PACALPPACA+LPPACADECRINT(r13) @@ -730,7 +716,6 @@ Decrementer_Iseries_masked: mtspr SPRN_DEC,r12 /* fall through */ - .globl HardwareInterrupt_Iseries_masked HardwareInterrupt_Iseries_masked: mtcrf 0x80,r9 /* Restore regs */ ld r11,PACALPPACA+LPPACASRR0(r13) @@ -750,7 +735,6 @@ HardwareInterrupt_Iseries_masked: * Data area reserved for FWNMI option. */ .= 0x7000 - .globl fwnmi_data_area fwnmi_data_area: /* @@ -774,11 +758,9 @@ MachineCheck_FWNMI: * before we get control (with relocate on) */ . = STAB0_PHYS_ADDR - .globl __start_stab __start_stab: . = (STAB0_PHYS_ADDR + PAGE_SIZE) - .globl __end_stab __end_stab: @@ -791,7 +773,6 @@ __end_stab: * save area: PACA_EXMC instead of PACA_EXGEN. */ .align 7 - .globl MachineCheck_common MachineCheck_common: EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) DISABLE_INTS @@ -900,7 +881,6 @@ unrecov_fer: * r9 - r13 are saved in paca->exgen. */ .align 7 - .globl DataAccess_common DataAccess_common: mfspr r10,DAR std r10,PACA_EXGEN+EX_DAR(r13) @@ -910,19 +890,17 @@ DataAccess_common: ld r3,PACA_EXGEN+EX_DAR(r13) lwz r4,PACA_EXGEN+EX_DSISR(r13) li r5,0x300 - b .do_hash_page /* Try to handle as hpte fault */ + b do_hash_page /* Try to handle as hpte fault */ .align 7 - .globl InstructionAccess_common InstructionAccess_common: EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) ld r3,_NIP(r1) andis. r4,r12,0x5820 li r5,0x400 - b .do_hash_page /* Try to handle as hpte fault */ + b do_hash_page /* Try to handle as hpte fault */ .align 7 - .globl HardwareInterrupt_common .globl HardwareInterrupt_entry HardwareInterrupt_common: EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN) @@ -933,7 +911,6 @@ HardwareInterrupt_entry: b .ret_from_except_lite .align 7 - .globl Alignment_common Alignment_common: mfspr r10,DAR std r10,PACA_EXGEN+EX_DAR(r13) @@ -951,7 +928,6 @@ Alignment_common: b .ret_from_except .align 7 - .globl ProgramCheck_common ProgramCheck_common: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) bl .save_nvgprs @@ -961,10 +937,9 @@ ProgramCheck_common: b .ret_from_except .align 7 - .globl FPUnavailable_common FPUnavailable_common: EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) - bne .load_up_fpu /* if from user, just load it up */ + bne load_up_fpu /* if from user, just load it up */ bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD ENABLE_INTS @@ -972,11 +947,10 @@ FPUnavailable_common: BUG_OPCODE .align 7 - .globl AltivecUnavailable_common AltivecUnavailable_common: EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) #ifdef CONFIG_ALTIVEC - bne .load_up_altivec /* if from user, just load it up */ + bne load_up_altivec /* if from user, just load it up */ #endif bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD @@ -988,15 +962,15 @@ AltivecUnavailable_common: * Hash table stuff */ .align 7 -_GLOBAL(do_hash_page) +do_hash_page: std r3,_DAR(r1) std r4,_DSISR(r1) andis. r0,r4,0xa450 /* weird error? */ - bne- .handle_page_fault /* if not, try to insert a HPTE */ + bne- handle_page_fault /* if not, try to insert a HPTE */ BEGIN_FTR_SECTION andis. r0,r4,0x0020 /* Is it a segment table fault? */ - bne- .do_ste_alloc /* If so handle it */ + bne- do_ste_alloc /* If so handle it */ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) /* @@ -1051,7 +1025,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) #endif /* Here we have a page fault that hash_page can't handle. */ -_GLOBAL(handle_page_fault) +handle_page_fault: ENABLE_INTS 11: ld r4,_DAR(r1) ld r5,_DSISR(r1) @@ -1067,11 +1041,11 @@ _GLOBAL(handle_page_fault) b .ret_from_except /* here we have a segment miss */ -_GLOBAL(do_ste_alloc) +do_ste_alloc: bl .ste_allocate /* try to insert stab entry */ cmpdi r3,0 beq+ fast_exception_return - b .handle_page_fault + b handle_page_fault /* * r13 points to the PACA, r9 contains the saved CR, @@ -1081,7 +1055,7 @@ _GLOBAL(do_ste_alloc) * We assume (DAR >> 60) == 0xc. */ .align 7 -_GLOBAL(do_stab_bolted) +do_stab_bolted: stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ @@ -1176,7 +1150,7 @@ _GLOBAL(do_stab_bolted) * r3 is saved in paca->slb_r3 * We assume we aren't going to take any exceptions during this procedure. */ -_GLOBAL(do_slb_miss) +do_slb_miss: mflr r10 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ @@ -1228,7 +1202,7 @@ unrecov_slb: */ _GLOBAL(pseries_secondary_smp_init) /* turn on 64-bit mode */ - bl .enable_64b_mode + bl enable_64b_mode isync /* Set up a paca value for this processor. */ @@ -1303,12 +1277,12 @@ _GLOBAL(__start_initialization_iSeries) /* relocation is on at this point */ - b .start_here_common + b start_here_common #endif #ifdef CONFIG_PPC_PSERIES -_STATIC(mmu_off) +mmu_off: mfmsr r3 andi. r0,r3,MSR_IR|MSR_DR beqlr @@ -1318,14 +1292,15 @@ _STATIC(mmu_off) sync rfid b . /* prevent speculative execution */ -_GLOBAL(__start_initialization_pSeries) + +__start_initialization_pSeries: mr r31,r3 /* save parameters */ mr r30,r4 mr r29,r5 mr r28,r6 mr r27,r7 - bl .enable_64b_mode + bl enable_64b_mode /* put a relocation offset into r3 */ bl .reloc_offset @@ -1354,15 +1329,15 @@ _GLOBAL(__start_initialization_pSeries) li r24,0 /* cpu # */ /* Switch off MMU if not already */ - LOADADDR(r4, .__after_prom_start - KERNELBASE) + LOADADDR(r4, after_prom_start - KERNELBASE) add r4,r4,r23 - bl .mmu_off + bl mmu_off /* * At this point, r3 contains the physical address we are running at, * returned by prom_init() */ -_STATIC(__after_prom_start) +after_prom_start: /* * We need to run with __start at physical address 0. @@ -1407,7 +1382,7 @@ _STATIC(__after_prom_start) ld r5,0(r5) /* get the value of klimit */ sub r5,r5,r27 bl .copy_and_flush /* copy the rest */ - b .start_here_pSeries + b start_here_pSeries #endif /* @@ -1457,7 +1432,7 @@ copy_to_here: * switch (ie, no lazy save of the FP registers). * On entry: r13 == 'current' && last_task_used_math != 'current' */ -_STATIC(load_up_fpu) +load_up_fpu: mfmsr r5 /* grab the current MSR */ ori r5,r5,MSR_FP mtmsrd r5 /* enable use of fpu now */ @@ -1560,7 +1535,7 @@ _GLOBAL(giveup_fpu) * switch (ie, no lazy save of the vector registers). * On entry: r13 == 'current' && last_task_used_altivec != 'current' */ -_STATIC(load_up_altivec) +load_up_altivec: mfmsr r5 /* grab the current MSR */ oris r5,r5,MSR_VEC at h mtmsrd r5 /* enable use of VMX now */ @@ -1681,21 +1656,21 @@ _GLOBAL(giveup_altivec) .globl pmac_secondary_start_1 pmac_secondary_start_1: li r24, 1 - b .pmac_secondary_start + b pmac_secondary_start .globl pmac_secondary_start_2 pmac_secondary_start_2: li r24, 2 - b .pmac_secondary_start + b pmac_secondary_start .globl pmac_secondary_start_3 pmac_secondary_start_3: li r24, 3 - b .pmac_secondary_start + b pmac_secondary_start -_GLOBAL(pmac_secondary_start) +pmac_secondary_start: /* turn on 64-bit mode */ - bl .enable_64b_mode + bl enable_64b_mode isync /* Copy some CPU settings from CPU 0 */ @@ -1792,7 +1767,7 @@ _GLOBAL(__secondary_start) mtlr r7 /* enable MMU and jump to start_secondary */ - LOADADDR(r3,.start_secondary_prolog) + LOADADDR(r3,start_secondary_prolog) SET_REG_TO_CONST(r4, MSR_KERNEL) #ifdef DO_SOFT_DISABLE ori r4,r4,MSR_EE @@ -1806,7 +1781,8 @@ _GLOBAL(__secondary_start) * Running with relocation on at this point. All we want to do is * zero the stack back-chain pointer before going into C code. */ -_GLOBAL(start_secondary_prolog) + .global start_secondary_prolog +start_secondary_prolog: li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ bl .start_secondary @@ -1815,7 +1791,7 @@ _GLOBAL(start_secondary_prolog) /* * This subroutine clobbers r11 and r12 */ -_GLOBAL(enable_64b_mode) +enable_64b_mode: mfmsr r11 /* grab the current MSR */ li r12,1 rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) @@ -1831,7 +1807,7 @@ _GLOBAL(enable_64b_mode) /* * This is where the main kernel code starts. */ -_STATIC(start_here_pSeries) +start_here_pSeries: /* get a new offset, now that the kernel has moved. */ bl .reloc_offset mr r26,r3 @@ -1864,7 +1840,7 @@ _STATIC(start_here_pSeries) beq 90f b 91f /* HMT not supported */ 90: li r3,0 - bl .hmt_start_secondary + bl hmt_start_secondary 91: #endif @@ -1895,7 +1871,7 @@ _STATIC(start_here_pSeries) li r0,0 stdu r0,-STACK_FRAME_OVERHEAD(r1) - /* set up the TOC (physical address) */ + /* set up the TOC (physical address) */ LOADADDR(r2,__toc_start) addi r2,r2,0x4000 addi r2,r2,0x4000 @@ -1959,7 +1935,7 @@ _STATIC(start_here_pSeries) ld r6,0(r6) /* get the value of _SDR1 */ mtspr SDR1,r6 /* set the htab location */ 98: - LOADADDR(r3,.start_here_common) + LOADADDR(r3,start_here_common) SET_REG_TO_CONST(r4, MSR_KERNEL) mtspr SRR0,r3 mtspr SRR1,r4 @@ -1968,7 +1944,7 @@ _STATIC(start_here_pSeries) #endif /* CONFIG_PPC_PSERIES */ /* This is where all platforms converge execution */ -_STATIC(start_here_common) +start_here_common: /* relocation is on at this point */ /* The following code sets up the SP and TOC now that we are */ @@ -2037,7 +2013,8 @@ _STATIC(start_here_common) _GLOBAL(__setup_cpu_power3) blr -_GLOBAL(hmt_init) + .global hmt_init +hmt_init: #ifdef CONFIG_HMT LOADADDR(r5, hmt_thread_data) mfspr r7,PVR @@ -2056,7 +2033,7 @@ _GLOBAL(hmt_init) andi. r6,r6,0x3ff 92: sldi r4,r24,3 stwx r6,r5,r4 - bl .hmt_start_secondary + bl hmt_start_secondary b 101f __hmt_secondary_hold: @@ -2087,7 +2064,7 @@ __hmt_secondary_hold: b .pseries_secondary_smp_init #ifdef CONFIG_HMT -_GLOBAL(hmt_start_secondary) +hmt_start_secondary: LOADADDR(r4,__hmt_secondary_hold) clrldi r4,r4,4 mtspr NIADORM, r4 diff -puN include/asm-ppc64/processor.h~mminimal_toc_die_die_die_1 include/asm-ppc64/processor.h --- mminimal_toc_die_die_die/include/asm-ppc64/processor.h~mminimal_toc_die_die_die_1 2004-08-24 19:34:49.849838193 +1000 +++ mminimal_toc_die_die_die-anton/include/asm-ppc64/processor.h 2004-08-24 19:34:58.122518472 +1000 @@ -426,18 +426,6 @@ name: \ .type GLUE(.,name), at function; \ GLUE(.,name): -#define _STATIC(name) \ - .section ".text"; \ - .align 2 ; \ - .section ".opd","aw"; \ -name: \ - .quad GLUE(.,name); \ - .quad .TOC. at tocbase; \ - .quad 0; \ - .previous; \ - .type GLUE(.,name), at function; \ -GLUE(.,name): - #endif /* __ASSEMBLY__ */ _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Tue Aug 24 23:18:50 2004 From: anton at samba.org (Anton Blanchard) Date: Tue, 24 Aug 2004 23:18:50 +1000 Subject: [PATCH] -mminimal-toc removal 2: Add required nops In-Reply-To: <20040824131646.GI2306@krispykreme> References: <20040824131115.GH2306@krispykreme> <20040824131646.GI2306@krispykreme> Message-ID: <20040824131850.GJ2306@krispykreme> Add nops after function calls where required. The linker may need to restore r2. Signed-off-by: Anton Blanchard --- mminimal_toc_die_die_die-anton/arch/ppc64/kernel/entry.S | 25 ++++++- mminimal_toc_die_die_die-anton/arch/ppc64/kernel/head.S | 52 ++++++++++++++- mminimal_toc_die_die_die-anton/arch/ppc64/kernel/misc.S | 2 3 files changed, 76 insertions(+), 3 deletions(-) diff -puN arch/ppc64/kernel/entry.S~mminimal_toc_die_die_die_2 arch/ppc64/kernel/entry.S --- mminimal_toc_die_die_die/arch/ppc64/kernel/entry.S~mminimal_toc_die_die_die_2 2004-08-24 19:50:08.942397929 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/kernel/entry.S 2004-08-24 19:50:08.976395316 +1000 @@ -110,6 +110,7 @@ SystemCall_common: #ifdef SHOW_SYSCALLS bl .do_show_syscall + nop REST_GPR(0,r1) REST_4GPRS(3,r1) REST_2GPRS(7,r1) @@ -150,6 +151,7 @@ syscall_exit: #ifdef SHOW_SYSCALLS std r3,GPR3(r1) bl .do_show_syscall_exit + nop ld r3,GPR3(r1) #endif std r3,RESULT(r1) @@ -216,6 +218,7 @@ syscall_dotrace: bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl .do_syscall_trace_enter + nop ld r0,GPR0(r1) /* Restore original registers */ ld r3,GPR3(r1) ld r4,GPR4(r1) @@ -232,6 +235,7 @@ syscall_exit_trace: std r3,GPR3(r1) bl .save_nvgprs bl .do_syscall_trace_leave + nop REST_NVGPRS(r1) ld r3,GPR3(r1) ld r5,_CCR(r1) @@ -265,53 +269,64 @@ _GLOBAL(save_nvgprs) _GLOBAL(ppc32_sigsuspend) bl .save_nvgprs bl .sys32_sigsuspend + nop b syscall_exit _GLOBAL(ppc64_rt_sigsuspend) bl .save_nvgprs bl .sys_rt_sigsuspend + nop b syscall_exit _GLOBAL(ppc32_rt_sigsuspend) bl .save_nvgprs bl .sys32_rt_sigsuspend + nop b syscall_exit _GLOBAL(ppc_fork) bl .save_nvgprs bl .sys_fork + nop b syscall_exit _GLOBAL(ppc_vfork) bl .save_nvgprs bl .sys_vfork + nop b syscall_exit _GLOBAL(ppc_clone) bl .save_nvgprs bl .sys_clone + nop b syscall_exit _GLOBAL(ppc32_swapcontext) bl .save_nvgprs bl .sys32_swapcontext + nop b 80f _GLOBAL(ppc64_swapcontext) bl .save_nvgprs bl .sys_swapcontext + nop b 80f _GLOBAL(ppc32_sigreturn) bl .sys32_sigreturn + nop b 80f _GLOBAL(ppc32_rt_sigreturn) bl .sys32_rt_sigreturn + nop b 80f _GLOBAL(ppc64_rt_sigreturn) bl .sys_rt_sigreturn + nop 80: cmpdi 0,r3,0 blt syscall_exit @@ -320,10 +335,12 @@ _GLOBAL(ppc64_rt_sigreturn) andi. r4,r4,_TIF_SYSCALL_T_OR_A beq+ 81f bl .do_syscall_trace_leave + nop 81: b .ret_from_except _GLOBAL(ret_from_fork) bl .schedule_tail + nop REST_NVGPRS(r1) li r3,0 b syscall_exit @@ -492,6 +509,7 @@ restore: mtmsrd r10 /* hard-enable again */ addi r3,r1,STACK_FRAME_OVERHEAD bl .do_IRQ + nop b .ret_from_except /* loop back and handle more */ 4: stb r5,PACAPROCENABLED(r13) @@ -568,6 +586,7 @@ do_work: #endif mtmsrd r10,1 /* reenable interrupts */ bl .schedule + nop mfmsr r10 clrrdi r9,r1,THREAD_SHIFT rldicl r10,r10,48,1 /* disable interrupts again */ @@ -588,17 +607,20 @@ user_work: andi. r0,r4,_TIF_NEED_RESCHED beq 1f bl .schedule + nop b .ret_from_except_lite 1: bl .save_nvgprs li r3,0 addi r4,r1,STACK_FRAME_OVERHEAD bl .do_signal + nop b .ret_from_except unrecov_restore: addi r3,r1,STACK_FRAME_OVERHEAD bl .unrecoverable_exception + nop b unrecov_restore #ifdef CONFIG_PPC_PSERIES @@ -769,7 +791,8 @@ _GLOBAL(enter_prom) std r11,_MSR(r1) /* Get the PROM entrypoint */ - bl .reloc_offset + bl .reloc_offset + nop LOADADDR(r12,prom) sub r12,r12,r3 ld r12,PROMENTRY(r12) diff -puN arch/ppc64/kernel/head.S~mminimal_toc_die_die_die_2 arch/ppc64/kernel/head.S --- mminimal_toc_die_die_die/arch/ppc64/kernel/head.S~mminimal_toc_die_die_die_2 2004-08-24 19:50:08.948397468 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/kernel/head.S 2004-08-24 19:50:08.985394624 +1000 @@ -386,9 +386,12 @@ label##_common: \ EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ DISABLE_INTS; \ bl .save_nvgprs; \ + nop; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ - b .ret_from_except + nop; \ + b .ret_from_except; \ + nop #define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr) \ .align 7; \ @@ -397,7 +400,9 @@ label##_common: \ DISABLE_INTS; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ - b .ret_from_except_lite + nop; \ + b .ret_from_except_lite; \ + nop /* * Start of pSeries system interrupt routines @@ -777,9 +782,12 @@ MachineCheck_common: EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) DISABLE_INTS bl .save_nvgprs + nop addi r3,r1,STACK_FRAME_OVERHEAD bl .MachineCheckException + nop b .ret_from_except + nop STD_EXCEPTION_COMMON_LITE(0x900, Decrementer, .timer_interrupt) STD_EXCEPTION_COMMON(0xa00, Trap_0a, .UnknownException) @@ -832,6 +840,7 @@ bad_stack: ld r2,PACATOC(r13) 1: addi r3,r1,STACK_FRAME_OVERHEAD bl .kernel_bad_stack + nop b 1b /* @@ -871,8 +880,10 @@ fast_exception_return: unrecov_fer: bl .save_nvgprs + nop 1: addi r3,r1,STACK_FRAME_OVERHEAD bl .unrecoverable_exception + nop b 1b /* @@ -908,7 +919,9 @@ HardwareInterrupt_entry: DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD bl .do_IRQ + nop b .ret_from_except_lite + nop .align 7 Alignment_common: @@ -922,28 +935,36 @@ Alignment_common: std r3,_DAR(r1) std r4,_DSISR(r1) bl .save_nvgprs + nop addi r3,r1,STACK_FRAME_OVERHEAD ENABLE_INTS bl .AlignmentException + nop b .ret_from_except + nop .align 7 ProgramCheck_common: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) bl .save_nvgprs + nop addi r3,r1,STACK_FRAME_OVERHEAD ENABLE_INTS bl .ProgramCheckException + nop b .ret_from_except + nop .align 7 FPUnavailable_common: EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) bne load_up_fpu /* if from user, just load it up */ bl .save_nvgprs + nop addi r3,r1,STACK_FRAME_OVERHEAD ENABLE_INTS bl .KernelFPUnavailableException + nop BUG_OPCODE .align 7 @@ -953,10 +974,13 @@ AltivecUnavailable_common: bne load_up_altivec /* if from user, just load it up */ #endif bl .save_nvgprs + nop addi r3,r1,STACK_FRAME_OVERHEAD ENABLE_INTS bl .AltivecUnavailableException + nop b .ret_from_except + nop /* * Hash table stuff @@ -999,6 +1023,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) * at return r3 = 0 for success */ bl .hash_page /* build HPTE if possible */ + nop cmpdi r3,0 /* see if hash_page succeeded */ #ifdef DO_SOFT_DISABLE @@ -1011,6 +1036,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) * interrupts if necessary. */ beq .ret_from_except_lite + nop /* * hash_page couldn't handle it, set soft interrupt enable back * to what it was before the trap. Note that .local_irq_restore @@ -1031,14 +1057,19 @@ handle_page_fault: ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl .do_page_fault + nop cmpdi r3,0 beq+ .ret_from_except_lite + nop bl .save_nvgprs + nop mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD lwz r4,_DAR(r1) bl .bad_page_fault + nop b .ret_from_except + nop /* here we have a segment miss */ do_ste_alloc: @@ -1158,6 +1189,7 @@ do_slb_miss: std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ bl .slb_allocate /* handle it */ + nop /* All done -- return from exception. */ @@ -1191,8 +1223,10 @@ unrecov_slb: EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) DISABLE_INTS bl .save_nvgprs + nop 1: addi r3,r1,STACK_FRAME_OVERHEAD bl .unrecoverable_exception + nop b 1b @@ -1256,6 +1290,7 @@ _GLOBAL(__start_initialization_iSeries) LOADADDR(r4,cur_cpu_spec) li r5,0 bl .identify_cpu + nop LOADADDR(r2,__toc_start) addi r2,r2,0x4000 @@ -1304,6 +1339,7 @@ __start_initialization_pSeries: /* put a relocation offset into r3 */ bl .reloc_offset + nop LOADADDR(r2,__toc_start) addi r2,r2,0x4000 @@ -1321,6 +1357,7 @@ __start_initialization_pSeries: /* Do all of the interaction with OF client interface */ bl .prom_init + nop mr r23,r3 /* Save phys address we are running at */ /* Setup some critical 970 SPRs before switching MMU off */ @@ -1352,6 +1389,7 @@ after_prom_start: * r27 == KERNELBASE */ bl .reloc_offset + nop mr r26,r3 SET_REG_TO_CONST(r27,KERNELBASE) @@ -1725,6 +1763,7 @@ _GLOBAL(__secondary_start) /* Initialize the first segment table (or SLB) entry */ ld r3,PACASTABVIRT(r13) /* get addr of segment table */ bl .stab_initialize + nop /* Initialize the kernel stack. Just a repeat for iSeries. */ LOADADDR(r3,current_set) @@ -1786,6 +1825,7 @@ start_secondary_prolog: li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ bl .start_secondary + nop #endif /* @@ -1810,6 +1850,7 @@ enable_64b_mode: start_here_pSeries: /* get a new offset, now that the kernel has moved. */ bl .reloc_offset + nop mr r26,r3 mfmsr r6 @@ -1883,6 +1924,7 @@ start_here_pSeries: sub r4,r4,r26 mr r5,r26 bl .identify_cpu + nop /* Get the pointer to the segment table which is used by */ /* stab_initialize */ @@ -1923,7 +1965,9 @@ start_here_pSeries: /* Initialize an initial memory mapping and turn on relocation. */ bl .stab_initialize + nop bl .htab_initialize + nop li r3,SYSTEMCFG_PHYS_ADDR /* r3 = ptr to systemcfg */ lwz r3,PLATFORM(r3) /* r3 = platform flags */ @@ -1962,6 +2006,7 @@ start_here_common: */ li r3,0 bl .do_cpu_ftr_fixups + nop /* setup the systemcfg pointer */ LOADADDR(r9,systemcfg) @@ -1997,6 +2042,7 @@ start_here_common: mr r7,r27 bl .setup_system + nop /* Load up the kernel context */ 5: @@ -2009,6 +2055,7 @@ start_here_common: #endif bl .start_kernel + nop _GLOBAL(__setup_cpu_power3) blr @@ -2062,6 +2109,7 @@ __hmt_secondary_hold: #endif mr r3,r24 b .pseries_secondary_smp_init + nop #ifdef CONFIG_HMT hmt_start_secondary: diff -puN arch/ppc64/kernel/misc.S~mminimal_toc_die_die_die_2 arch/ppc64/kernel/misc.S --- mminimal_toc_die_die_die/arch/ppc64/kernel/misc.S~mminimal_toc_die_die_die_2 2004-08-24 19:50:08.954397007 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/kernel/misc.S 2004-08-24 19:50:08.988394393 +1000 @@ -110,6 +110,7 @@ _GLOBAL(call_do_softirq) stdu r1,THREAD_SIZE-112(r3) mr r1,r3 bl .__do_softirq + nop ld r1,0(r1) ld r0,16(r1) mtlr r0 @@ -121,6 +122,7 @@ _GLOBAL(call_handle_irq_event) stdu r1,THREAD_SIZE-112(r6) mr r1,r6 bl .handle_irq_event + nop ld r1,0(r1) ld r0,16(r1) mtlr r0 _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Tue Aug 24 23:22:30 2004 From: anton at samba.org (Anton Blanchard) Date: Tue, 24 Aug 2004 23:22:30 +1000 Subject: [PATCH] -mminimal-toc removal 3: Remove it In-Reply-To: <20040824131850.GJ2306@krispykreme> References: <20040824131115.GH2306@krispykreme> <20040824131646.GI2306@krispykreme> <20040824131850.GJ2306@krispykreme> Message-ID: <20040824132230.GK2306@krispykreme> Remove -mminimal-toc. We now store the address of the procedure descriptor in the syscall table. Fix the syscall path to load and restore r2. Fix cond_syscall to match. BTW with a working tocof() we could stash r2 + the PC in the syscall table and save ourselves the load of the TOC. Signed-off-by: Anton Blanchard --- mminimal_toc_die_die_die-anton/arch/ppc64/Makefile | 3 mminimal_toc_die_die_die-anton/arch/ppc64/kernel/Makefile | 1 mminimal_toc_die_die_die-anton/arch/ppc64/kernel/entry.S | 5 mminimal_toc_die_die_die-anton/arch/ppc64/kernel/misc.S | 1076 +++++++------- mminimal_toc_die_die_die-anton/arch/ppc64/mm/Makefile | 2 mminimal_toc_die_die_die-anton/arch/ppc64/xmon/Makefile | 2 mminimal_toc_die_die_die-anton/include/asm-ppc64/unistd.h | 3 7 files changed, 545 insertions(+), 547 deletions(-) diff -puN arch/ppc64/Makefile~mminimal_toc_die_die_die_3 arch/ppc64/Makefile --- mminimal_toc_die_die_die/arch/ppc64/Makefile~mminimal_toc_die_die_die_3 2004-08-24 19:50:11.553591159 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/Makefile 2004-08-24 19:50:11.609586854 +1000 @@ -26,8 +26,7 @@ CHECKFLAGS += -m64 -D__powerpc__=1 LDFLAGS := -m elf64ppc LDFLAGS_vmlinux := -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD) -CFLAGS += -msoft-float -pipe -Wno-uninitialized -mminimal-toc \ - -mtraceback=none +CFLAGS += -msoft-float -pipe -Wno-uninitialized -mtraceback=none ifeq ($(CONFIG_POWER4_ONLY),y) CFLAGS += $(call cc-option,-mcpu=power4) diff -puN arch/ppc64/kernel/Makefile~mminimal_toc_die_die_die_3 arch/ppc64/kernel/Makefile --- mminimal_toc_die_die_die/arch/ppc64/kernel/Makefile~mminimal_toc_die_die_die_3 2004-08-24 19:50:11.560590621 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/kernel/Makefile 2004-08-24 19:50:11.610586778 +1000 @@ -2,7 +2,6 @@ # Makefile for the linux ppc64 kernel. # -EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ diff -puN arch/ppc64/kernel/entry.S~mminimal_toc_die_die_die_3 arch/ppc64/kernel/entry.S --- mminimal_toc_die_die_die/arch/ppc64/kernel/entry.S~mminimal_toc_die_die_die_3 2004-08-24 19:50:11.566590160 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/kernel/entry.S 2004-08-24 19:50:11.613586547 +1000 @@ -143,9 +143,12 @@ system_call: /* label this so stack tr clrldi r8,r8,32 15: slwi r0,r0,3 - ldx r10,r11,r0 /* Fetch system call handler [ptr] */ + ldx r10,r11,r0 /* Fetch OPD */ + ld r2,8(r10) /* load r2 */ + ld r10,0(r10) /* load pc */ mtctr r10 bctrl /* Call handler */ + ld r2,PACATOC(r13) /* get kernel TOC into r2 */ syscall_exit: #ifdef SHOW_SYSCALLS diff -puN arch/ppc64/kernel/misc.S~mminimal_toc_die_die_die_3 arch/ppc64/kernel/misc.S --- mminimal_toc_die_die_die/arch/ppc64/kernel/misc.S~mminimal_toc_die_die_die_3 2004-08-24 19:50:11.572589698 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/kernel/misc.S 2004-08-24 19:50:11.626585548 +1000 @@ -609,544 +609,544 @@ _GLOBAL(kernel_thread) /* Why isn't this a) automatic, b) written in 'C'? */ .balign 8 _GLOBAL(sys_call_table32) - .llong .sys_restart_syscall /* 0 */ - .llong .sys_exit - .llong .ppc_fork - .llong .sys_read - .llong .sys_write - .llong .sys32_open /* 5 */ - .llong .sys_close - .llong .sys32_waitpid - .llong .sys32_creat - .llong .sys_link - .llong .sys_unlink /* 10 */ - .llong .sys32_execve - .llong .sys_chdir - .llong .sys32_time - .llong .sys_mknod - .llong .sys_chmod /* 15 */ - .llong .sys_lchown - .llong .sys_ni_syscall /* old break syscall */ - .llong .sys_ni_syscall /* old stat syscall */ - .llong .ppc32_lseek - .llong .sys_getpid /* 20 */ - .llong .compat_sys_mount - .llong .sys_oldumount - .llong .sys_setuid - .llong .sys_getuid - .llong .ppc64_sys32_stime /* 25 */ - .llong .sys32_ptrace - .llong .sys_alarm - .llong .sys_ni_syscall /* old fstat syscall */ - .llong .sys32_pause - .llong .compat_sys_utime /* 30 */ - .llong .sys_ni_syscall /* old stty syscall */ - .llong .sys_ni_syscall /* old gtty syscall */ - .llong .sys32_access - .llong .sys32_nice - .llong .sys_ni_syscall /* 35 - old ftime syscall */ - .llong .sys_sync - .llong .sys32_kill - .llong .sys_rename - .llong .sys32_mkdir - .llong .sys_rmdir /* 40 */ - .llong .sys_dup - .llong .sys_pipe - .llong .compat_sys_times - .llong .sys_ni_syscall /* old prof syscall */ - .llong .sys_brk /* 45 */ - .llong .sys_setgid - .llong .sys_getgid - .llong .sys_signal - .llong .sys_geteuid - .llong .sys_getegid /* 50 */ - .llong .sys_acct - .llong .sys_umount - .llong .sys_ni_syscall /* old lock syscall */ - .llong .compat_sys_ioctl - .llong .compat_sys_fcntl /* 55 */ - .llong .sys_ni_syscall /* old mpx syscall */ - .llong .sys32_setpgid - .llong .sys_ni_syscall /* old ulimit syscall */ - .llong .sys32_olduname - .llong .sys32_umask /* 60 */ - .llong .sys_chroot - .llong .sys_ustat - .llong .sys_dup2 - .llong .sys_getppid - .llong .sys_getpgrp /* 65 */ - .llong .sys_setsid - .llong .sys32_sigaction - .llong .sys_sgetmask - .llong .sys32_ssetmask - .llong .sys_setreuid /* 70 */ - .llong .sys_setregid - .llong .ppc32_sigsuspend - .llong .compat_sys_sigpending - .llong .sys32_sethostname - .llong .compat_sys_setrlimit /* 75 */ - .llong .compat_sys_old_getrlimit - .llong .compat_sys_getrusage - .llong .sys32_gettimeofday - .llong .sys32_settimeofday - .llong .sys32_getgroups /* 80 */ - .llong .sys32_setgroups - .llong .sys_ni_syscall /* old select syscall */ - .llong .sys_symlink - .llong .sys_ni_syscall /* old lstat syscall */ - .llong .sys32_readlink /* 85 */ - .llong .sys_uselib - .llong .sys_swapon - .llong .sys_reboot - .llong .old32_readdir - .llong .sys_mmap /* 90 */ - .llong .sys_munmap - .llong .sys_truncate - .llong .sys_ftruncate - .llong .sys_fchmod - .llong .sys_fchown /* 95 */ - .llong .sys32_getpriority - .llong .sys32_setpriority - .llong .sys_ni_syscall /* old profil syscall */ - .llong .compat_sys_statfs - .llong .compat_sys_fstatfs /* 100 */ - .llong .sys_ni_syscall /* old ioperm syscall */ - .llong .compat_sys_socketcall - .llong .sys32_syslog - .llong .compat_sys_setitimer - .llong .compat_sys_getitimer /* 105 */ - .llong .compat_sys_newstat - .llong .compat_sys_newlstat - .llong .compat_sys_newfstat - .llong .sys_uname - .llong .sys_ni_syscall /* 110 old iopl syscall */ - .llong .sys_vhangup - .llong .sys_ni_syscall /* old idle syscall */ - .llong .sys_ni_syscall /* old vm86 syscall */ - .llong .compat_sys_wait4 - .llong .sys_swapoff /* 115 */ - .llong .sys32_sysinfo - .llong .sys32_ipc - .llong .sys_fsync - .llong .ppc32_sigreturn - .llong .ppc_clone /* 120 */ - .llong .sys32_setdomainname - .llong .ppc64_newuname - .llong .sys_ni_syscall /* old modify_ldt syscall */ - .llong .sys32_adjtimex - .llong .sys_mprotect /* 125 */ - .llong .compat_sys_sigprocmask - .llong .sys_ni_syscall /* old create_module syscall */ - .llong .sys_init_module - .llong .sys_delete_module - .llong .sys_ni_syscall /* 130 old get_kernel_syms syscall */ - .llong .sys_quotactl - .llong .sys32_getpgid - .llong .sys_fchdir - .llong .sys_bdflush - .llong .sys32_sysfs /* 135 */ - .llong .ppc64_personality - .llong .sys_ni_syscall /* for afs_syscall */ - .llong .sys_setfsuid - .llong .sys_setfsgid - .llong .sys_llseek /* 140 */ - .llong .sys32_getdents - .llong .ppc32_select - .llong .sys_flock - .llong .sys_msync - .llong .compat_sys_readv /* 145 */ - .llong .compat_sys_writev - .llong .sys32_getsid - .llong .sys_fdatasync - .llong .sys32_sysctl - .llong .sys_mlock /* 150 */ - .llong .sys_munlock - .llong .sys_mlockall - .llong .sys_munlockall - .llong .sys32_sched_setparam - .llong .sys32_sched_getparam /* 155 */ - .llong .sys32_sched_setscheduler - .llong .sys32_sched_getscheduler - .llong .sys_sched_yield - .llong .sys32_sched_get_priority_max - .llong .sys32_sched_get_priority_min /* 160 */ - .llong .sys32_sched_rr_get_interval - .llong .compat_sys_nanosleep - .llong .sys_mremap - .llong .sys_setresuid - .llong .sys_getresuid /* 165 */ - .llong .sys_ni_syscall /* old query_module syscall */ - .llong .sys_poll - .llong .compat_sys_nfsservctl - .llong .sys_setresgid - .llong .sys_getresgid /* 170 */ - .llong .sys32_prctl - .llong .ppc32_rt_sigreturn - .llong .sys32_rt_sigaction - .llong .sys32_rt_sigprocmask - .llong .sys32_rt_sigpending /* 175 */ - .llong .sys32_rt_sigtimedwait - .llong .sys32_rt_sigqueueinfo - .llong .ppc32_rt_sigsuspend - .llong .sys32_pread64 - .llong .sys32_pwrite64 /* 180 */ - .llong .sys_chown - .llong .sys_getcwd - .llong .sys_capget - .llong .sys_capset - .llong .sys32_sigaltstack /* 185 */ - .llong .sys32_sendfile - .llong .sys_ni_syscall /* reserved for streams1 */ - .llong .sys_ni_syscall /* reserved for streams2 */ - .llong .ppc_vfork - .llong .compat_sys_getrlimit /* 190 */ - .llong .sys32_readahead - .llong .sys32_mmap2 - .llong .sys32_truncate64 - .llong .sys32_ftruncate64 - .llong .sys_stat64 /* 195 */ - .llong .sys_lstat64 - .llong .sys_fstat64 - .llong .sys32_pciconfig_read - .llong .sys32_pciconfig_write - .llong .sys32_pciconfig_iobase /* 200 - pciconfig_iobase */ - .llong .sys_ni_syscall /* reserved for MacOnLinux */ - .llong .sys_getdents64 - .llong .sys_pivot_root - .llong .compat_sys_fcntl64 - .llong .sys_madvise /* 205 */ - .llong .sys_mincore - .llong .sys_gettid - .llong .sys_tkill - .llong .sys_setxattr - .llong .sys_lsetxattr /* 210 */ - .llong .sys_fsetxattr - .llong .sys_getxattr - .llong .sys_lgetxattr - .llong .sys_fgetxattr - .llong .sys_listxattr /* 215 */ - .llong .sys_llistxattr - .llong .sys_flistxattr - .llong .sys_removexattr - .llong .sys_lremovexattr - .llong .sys_fremovexattr /* 220 */ - .llong .compat_sys_futex - .llong .compat_sys_sched_setaffinity - .llong .compat_sys_sched_getaffinity - .llong .sys_ni_syscall - .llong .sys_ni_syscall /* 225 - reserved for tux */ - .llong .sys32_sendfile64 - .llong .compat_sys_io_setup - .llong .sys_io_destroy - .llong .compat_sys_io_getevents - .llong .compat_sys_io_submit - .llong .sys_io_cancel - .llong .sys_set_tid_address - .llong .ppc32_fadvise64 - .llong .sys_exit_group - .llong .ppc32_lookup_dcookie /* 235 */ - .llong .sys_epoll_create - .llong .sys_epoll_ctl - .llong .sys_epoll_wait - .llong .sys_remap_file_pages - .llong .ppc32_timer_create /* 240 */ - .llong .compat_timer_settime - .llong .compat_timer_gettime - .llong .sys_timer_getoverrun - .llong .sys_timer_delete - .llong .compat_clock_settime /* 245 */ - .llong .compat_clock_gettime - .llong .compat_clock_getres - .llong .compat_clock_nanosleep - .llong .ppc32_swapcontext - .llong .sys32_tgkill /* 250 */ - .llong .sys32_utimes - .llong .compat_statfs64 - .llong .compat_fstatfs64 - .llong .ppc32_fadvise64_64 /* 32bit only fadvise64_64 */ - .llong .ppc_rtas /* 255 */ - .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .llong .sys_ni_syscall /* 257 reserved for vserver */ - .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ - .llong .sys_ni_syscall /* 259 reserved for new sys_mbind */ - .llong .sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */ - .llong .sys_ni_syscall /* 261 reserved for new sys_set_mempolicy */ - .llong .compat_sys_mq_open - .llong .sys_mq_unlink - .llong .compat_sys_mq_timedsend - .llong .compat_sys_mq_timedreceive /* 265 */ - .llong .compat_sys_mq_notify - .llong .compat_sys_mq_getsetattr - .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ + .llong sys_restart_syscall /* 0 */ + .llong sys_exit + .llong ppc_fork + .llong sys_read + .llong sys_write + .llong sys32_open /* 5 */ + .llong sys_close + .llong sys32_waitpid + .llong sys32_creat + .llong sys_link + .llong sys_unlink /* 10 */ + .llong sys32_execve + .llong sys_chdir + .llong sys32_time + .llong sys_mknod + .llong sys_chmod /* 15 */ + .llong sys_lchown + .llong sys_ni_syscall /* old break syscall */ + .llong sys_ni_syscall /* old stat syscall */ + .llong ppc32_lseek + .llong sys_getpid /* 20 */ + .llong compat_sys_mount + .llong sys_oldumount + .llong sys_setuid + .llong sys_getuid + .llong ppc64_sys32_stime /* 25 */ + .llong sys32_ptrace + .llong sys_alarm + .llong sys_ni_syscall /* old fstat syscall */ + .llong sys32_pause + .llong compat_sys_utime /* 30 */ + .llong sys_ni_syscall /* old stty syscall */ + .llong sys_ni_syscall /* old gtty syscall */ + .llong sys32_access + .llong sys32_nice + .llong sys_ni_syscall /* 35 - old ftime syscall */ + .llong sys_sync + .llong sys32_kill + .llong sys_rename + .llong sys32_mkdir + .llong sys_rmdir /* 40 */ + .llong sys_dup + .llong sys_pipe + .llong compat_sys_times + .llong sys_ni_syscall /* old prof syscall */ + .llong sys_brk /* 45 */ + .llong sys_setgid + .llong sys_getgid + .llong sys_signal + .llong sys_geteuid + .llong sys_getegid /* 50 */ + .llong sys_acct + .llong sys_umount + .llong sys_ni_syscall /* old lock syscall */ + .llong compat_sys_ioctl + .llong compat_sys_fcntl /* 55 */ + .llong sys_ni_syscall /* old mpx syscall */ + .llong sys32_setpgid + .llong sys_ni_syscall /* old ulimit syscall */ + .llong sys32_olduname + .llong sys32_umask /* 60 */ + .llong sys_chroot + .llong sys_ustat + .llong sys_dup2 + .llong sys_getppid + .llong sys_getpgrp /* 65 */ + .llong sys_setsid + .llong sys32_sigaction + .llong sys_sgetmask + .llong sys32_ssetmask + .llong sys_setreuid /* 70 */ + .llong sys_setregid + .llong ppc32_sigsuspend + .llong compat_sys_sigpending + .llong sys32_sethostname + .llong compat_sys_setrlimit /* 75 */ + .llong compat_sys_old_getrlimit + .llong compat_sys_getrusage + .llong sys32_gettimeofday + .llong sys32_settimeofday + .llong sys32_getgroups /* 80 */ + .llong sys32_setgroups + .llong sys_ni_syscall /* old select syscall */ + .llong sys_symlink + .llong sys_ni_syscall /* old lstat syscall */ + .llong sys32_readlink /* 85 */ + .llong sys_uselib + .llong sys_swapon + .llong sys_reboot + .llong old32_readdir + .llong sys_mmap /* 90 */ + .llong sys_munmap + .llong sys_truncate + .llong sys_ftruncate + .llong sys_fchmod + .llong sys_fchown /* 95 */ + .llong sys32_getpriority + .llong sys32_setpriority + .llong sys_ni_syscall /* old profil syscall */ + .llong compat_sys_statfs + .llong compat_sys_fstatfs /* 100 */ + .llong sys_ni_syscall /* old ioperm syscall */ + .llong compat_sys_socketcall + .llong sys32_syslog + .llong compat_sys_setitimer + .llong compat_sys_getitimer /* 105 */ + .llong compat_sys_newstat + .llong compat_sys_newlstat + .llong compat_sys_newfstat + .llong sys_uname + .llong sys_ni_syscall /* 110 old iopl syscall */ + .llong sys_vhangup + .llong sys_ni_syscall /* old idle syscall */ + .llong sys_ni_syscall /* old vm86 syscall */ + .llong compat_sys_wait4 + .llong sys_swapoff /* 115 */ + .llong sys32_sysinfo + .llong sys32_ipc + .llong sys_fsync + .llong ppc32_sigreturn + .llong ppc_clone /* 120 */ + .llong sys32_setdomainname + .llong ppc64_newuname + .llong sys_ni_syscall /* old modify_ldt syscall */ + .llong sys32_adjtimex + .llong sys_mprotect /* 125 */ + .llong compat_sys_sigprocmask + .llong sys_ni_syscall /* old create_module syscall */ + .llong sys_init_module + .llong sys_delete_module + .llong sys_ni_syscall /* 130 old get_kernel_syms syscall */ + .llong sys_quotactl + .llong sys32_getpgid + .llong sys_fchdir + .llong sys_bdflush + .llong sys32_sysfs /* 135 */ + .llong ppc64_personality + .llong sys_ni_syscall /* for afs_syscall */ + .llong sys_setfsuid + .llong sys_setfsgid + .llong sys_llseek /* 140 */ + .llong sys32_getdents + .llong ppc32_select + .llong sys_flock + .llong sys_msync + .llong compat_sys_readv /* 145 */ + .llong compat_sys_writev + .llong sys32_getsid + .llong sys_fdatasync + .llong sys32_sysctl + .llong sys_mlock /* 150 */ + .llong sys_munlock + .llong sys_mlockall + .llong sys_munlockall + .llong sys32_sched_setparam + .llong sys32_sched_getparam /* 155 */ + .llong sys32_sched_setscheduler + .llong sys32_sched_getscheduler + .llong sys_sched_yield + .llong sys32_sched_get_priority_max + .llong sys32_sched_get_priority_min /* 160 */ + .llong sys32_sched_rr_get_interval + .llong compat_sys_nanosleep + .llong sys_mremap + .llong sys_setresuid + .llong sys_getresuid /* 165 */ + .llong sys_ni_syscall /* old query_module syscall */ + .llong sys_poll + .llong compat_sys_nfsservctl + .llong sys_setresgid + .llong sys_getresgid /* 170 */ + .llong sys32_prctl + .llong ppc32_rt_sigreturn + .llong sys32_rt_sigaction + .llong sys32_rt_sigprocmask + .llong sys32_rt_sigpending /* 175 */ + .llong sys32_rt_sigtimedwait + .llong sys32_rt_sigqueueinfo + .llong ppc32_rt_sigsuspend + .llong sys32_pread64 + .llong sys32_pwrite64 /* 180 */ + .llong sys_chown + .llong sys_getcwd + .llong sys_capget + .llong sys_capset + .llong sys32_sigaltstack /* 185 */ + .llong sys32_sendfile + .llong sys_ni_syscall /* reserved for streams1 */ + .llong sys_ni_syscall /* reserved for streams2 */ + .llong ppc_vfork + .llong compat_sys_getrlimit /* 190 */ + .llong sys32_readahead + .llong sys32_mmap2 + .llong sys32_truncate64 + .llong sys32_ftruncate64 + .llong sys_stat64 /* 195 */ + .llong sys_lstat64 + .llong sys_fstat64 + .llong sys32_pciconfig_read + .llong sys32_pciconfig_write + .llong sys32_pciconfig_iobase /* 200 - pciconfig_iobase */ + .llong sys_ni_syscall /* reserved for MacOnLinux */ + .llong sys_getdents64 + .llong sys_pivot_root + .llong compat_sys_fcntl64 + .llong sys_madvise /* 205 */ + .llong sys_mincore + .llong sys_gettid + .llong sys_tkill + .llong sys_setxattr + .llong sys_lsetxattr /* 210 */ + .llong sys_fsetxattr + .llong sys_getxattr + .llong sys_lgetxattr + .llong sys_fgetxattr + .llong sys_listxattr /* 215 */ + .llong sys_llistxattr + .llong sys_flistxattr + .llong sys_removexattr + .llong sys_lremovexattr + .llong sys_fremovexattr /* 220 */ + .llong compat_sys_futex + .llong compat_sys_sched_setaffinity + .llong compat_sys_sched_getaffinity + .llong sys_ni_syscall + .llong sys_ni_syscall /* 225 - reserved for tux */ + .llong sys32_sendfile64 + .llong compat_sys_io_setup + .llong sys_io_destroy + .llong compat_sys_io_getevents + .llong compat_sys_io_submit + .llong sys_io_cancel + .llong sys_set_tid_address + .llong ppc32_fadvise64 + .llong sys_exit_group + .llong ppc32_lookup_dcookie /* 235 */ + .llong sys_epoll_create + .llong sys_epoll_ctl + .llong sys_epoll_wait + .llong sys_remap_file_pages + .llong ppc32_timer_create /* 240 */ + .llong compat_timer_settime + .llong compat_timer_gettime + .llong sys_timer_getoverrun + .llong sys_timer_delete + .llong compat_clock_settime /* 245 */ + .llong compat_clock_gettime + .llong compat_clock_getres + .llong compat_clock_nanosleep + .llong ppc32_swapcontext + .llong sys32_tgkill /* 250 */ + .llong sys32_utimes + .llong compat_statfs64 + .llong compat_fstatfs64 + .llong ppc32_fadvise64_64 /* 32bit only fadvise64_64 */ + .llong ppc_rtas /* 255 */ + .llong sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ + .llong sys_ni_syscall /* 257 reserved for vserver */ + .llong sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ + .llong sys_ni_syscall /* 259 reserved for new sys_mbind */ + .llong sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */ + .llong sys_ni_syscall /* 261 reserved for new sys_set_mempolicy */ + .llong compat_sys_mq_open + .llong sys_mq_unlink + .llong compat_sys_mq_timedsend + .llong compat_sys_mq_timedreceive /* 265 */ + .llong compat_sys_mq_notify + .llong compat_sys_mq_getsetattr + .llong sys_ni_syscall /* 268 reserved for sys_kexec_load */ .balign 8 _GLOBAL(sys_call_table) - .llong .sys_restart_syscall /* 0 */ - .llong .sys_exit - .llong .ppc_fork - .llong .sys_read - .llong .sys_write - .llong .sys_open /* 5 */ - .llong .sys_close - .llong .sys_waitpid - .llong .sys_creat - .llong .sys_link - .llong .sys_unlink /* 10 */ - .llong .sys_execve - .llong .sys_chdir - .llong .sys64_time - .llong .sys_mknod - .llong .sys_chmod /* 15 */ - .llong .sys_lchown - .llong .sys_ni_syscall /* old break syscall */ - .llong .sys_ni_syscall /* old stat syscall */ - .llong .sys_lseek - .llong .sys_getpid /* 20 */ - .llong .sys_mount - .llong .sys_ni_syscall /* old umount syscall */ - .llong .sys_setuid - .llong .sys_getuid - .llong .ppc64_sys_stime /* 25 */ - .llong .sys_ptrace - .llong .sys_alarm - .llong .sys_ni_syscall /* old fstat syscall */ - .llong .sys_pause - .llong .sys_utime /* 30 */ - .llong .sys_ni_syscall /* old stty syscall */ - .llong .sys_ni_syscall /* old gtty syscall */ - .llong .sys_access - .llong .sys_nice - .llong .sys_ni_syscall /* 35 - old ftime syscall */ - .llong .sys_sync - .llong .sys_kill - .llong .sys_rename - .llong .sys_mkdir - .llong .sys_rmdir /* 40 */ - .llong .sys_dup - .llong .sys_pipe - .llong .sys_times - .llong .sys_ni_syscall /* old prof syscall */ - .llong .sys_brk /* 45 */ - .llong .sys_setgid - .llong .sys_getgid - .llong .sys_signal - .llong .sys_geteuid - .llong .sys_getegid /* 50 */ - .llong .sys_acct - .llong .sys_umount - .llong .sys_ni_syscall /* old lock syscall */ - .llong .sys_ioctl - .llong .sys_fcntl /* 55 */ - .llong .sys_ni_syscall /* old mpx syscall */ - .llong .sys_setpgid - .llong .sys_ni_syscall /* old ulimit syscall */ - .llong .sys_ni_syscall /* old uname syscall */ - .llong .sys_umask /* 60 */ - .llong .sys_chroot - .llong .sys_ustat - .llong .sys_dup2 - .llong .sys_getppid - .llong .sys_getpgrp /* 65 */ - .llong .sys_setsid - .llong .sys_ni_syscall - .llong .sys_sgetmask - .llong .sys_ssetmask - .llong .sys_setreuid /* 70 */ - .llong .sys_setregid - .llong .sys_ni_syscall - .llong .sys_ni_syscall - .llong .sys_sethostname - .llong .sys_setrlimit /* 75 */ - .llong .sys_ni_syscall /* old getrlimit syscall */ - .llong .sys_getrusage - .llong .sys_gettimeofday - .llong .sys_settimeofday - .llong .sys_getgroups /* 80 */ - .llong .sys_setgroups - .llong .sys_ni_syscall /* old select syscall */ - .llong .sys_symlink - .llong .sys_ni_syscall /* old lstat syscall */ - .llong .sys_readlink /* 85 */ - .llong .sys_uselib - .llong .sys_swapon - .llong .sys_reboot - .llong .sys_ni_syscall /* old readdir syscall */ - .llong .sys_mmap /* 90 */ - .llong .sys_munmap - .llong .sys_truncate - .llong .sys_ftruncate - .llong .sys_fchmod - .llong .sys_fchown /* 95 */ - .llong .sys_getpriority - .llong .sys_setpriority - .llong .sys_ni_syscall /* old profil syscall holder */ - .llong .sys_statfs - .llong .sys_fstatfs /* 100 */ - .llong .sys_ni_syscall /* old ioperm syscall */ - .llong .sys_socketcall - .llong .sys_syslog - .llong .sys_setitimer - .llong .sys_getitimer /* 105 */ - .llong .sys_newstat - .llong .sys_newlstat - .llong .sys_newfstat - .llong .sys_ni_syscall /* old uname syscall */ - .llong .sys_ni_syscall /* 110 old iopl syscall */ - .llong .sys_vhangup - .llong .sys_ni_syscall /* old idle syscall */ - .llong .sys_ni_syscall /* old vm86 syscall */ - .llong .sys_wait4 - .llong .sys_swapoff /* 115 */ - .llong .sys_sysinfo - .llong .sys_ipc - .llong .sys_fsync - .llong .sys_ni_syscall - .llong .ppc_clone /* 120 */ - .llong .sys_setdomainname - .llong .ppc64_newuname - .llong .sys_ni_syscall /* old modify_ldt syscall */ - .llong .sys_adjtimex - .llong .sys_mprotect /* 125 */ - .llong .sys_ni_syscall - .llong .sys_ni_syscall /* old create_module syscall */ - .llong .sys_init_module - .llong .sys_delete_module - .llong .sys_ni_syscall /* 130 old get_kernel_syms syscall */ - .llong .sys_quotactl - .llong .sys_getpgid - .llong .sys_fchdir - .llong .sys_bdflush - .llong .sys_sysfs /* 135 */ - .llong .ppc64_personality - .llong .sys_ni_syscall /* for afs_syscall */ - .llong .sys_setfsuid - .llong .sys_setfsgid - .llong .sys_llseek /* 140 */ - .llong .sys_getdents - .llong .sys_select - .llong .sys_flock - .llong .sys_msync - .llong .sys_readv /* 145 */ - .llong .sys_writev - .llong .sys_getsid - .llong .sys_fdatasync - .llong .sys_sysctl - .llong .sys_mlock /* 150 */ - .llong .sys_munlock - .llong .sys_mlockall - .llong .sys_munlockall - .llong .sys_sched_setparam - .llong .sys_sched_getparam /* 155 */ - .llong .sys_sched_setscheduler - .llong .sys_sched_getscheduler - .llong .sys_sched_yield - .llong .sys_sched_get_priority_max - .llong .sys_sched_get_priority_min /* 160 */ - .llong .sys_sched_rr_get_interval - .llong .sys_nanosleep - .llong .sys_mremap - .llong .sys_setresuid - .llong .sys_getresuid /* 165 */ - .llong .sys_ni_syscall /* old query_module syscall */ - .llong .sys_poll - .llong .sys_nfsservctl - .llong .sys_setresgid - .llong .sys_getresgid /* 170 */ - .llong .sys_prctl - .llong .ppc64_rt_sigreturn - .llong .sys_rt_sigaction - .llong .sys_rt_sigprocmask - .llong .sys_rt_sigpending /* 175 */ - .llong .sys_rt_sigtimedwait - .llong .sys_rt_sigqueueinfo - .llong .ppc64_rt_sigsuspend - .llong .sys_pread64 - .llong .sys_pwrite64 /* 180 */ - .llong .sys_chown - .llong .sys_getcwd - .llong .sys_capget - .llong .sys_capset - .llong .sys_sigaltstack /* 185 */ - .llong .sys_sendfile64 - .llong .sys_ni_syscall /* reserved for streams1 */ - .llong .sys_ni_syscall /* reserved for streams2 */ - .llong .ppc_vfork - .llong .sys_getrlimit /* 190 */ - .llong .sys_readahead - .llong .sys_ni_syscall /* 32bit only mmap2 */ - .llong .sys_ni_syscall /* 32bit only truncate64 */ - .llong .sys_ni_syscall /* 32bit only ftruncate64 */ - .llong .sys_ni_syscall /* 195 - 32bit only stat64 */ - .llong .sys_ni_syscall /* 32bit only lstat64 */ - .llong .sys_ni_syscall /* 32bit only fstat64 */ - .llong .sys_ni_syscall /* 32bit only pciconfig_read */ - .llong .sys_ni_syscall /* 32bit only pciconfig_write */ - .llong .sys_ni_syscall /* 32bit only pciconfig_iobase */ - .llong .sys_ni_syscall /* reserved for MacOnLinux */ - .llong .sys_getdents64 - .llong .sys_pivot_root - .llong .sys_ni_syscall /* 32bit only fcntl64 */ - .llong .sys_madvise /* 205 */ - .llong .sys_mincore - .llong .sys_gettid - .llong .sys_tkill - .llong .sys_setxattr - .llong .sys_lsetxattr /* 210 */ - .llong .sys_fsetxattr - .llong .sys_getxattr - .llong .sys_lgetxattr - .llong .sys_fgetxattr - .llong .sys_listxattr /* 215 */ - .llong .sys_llistxattr - .llong .sys_flistxattr - .llong .sys_removexattr - .llong .sys_lremovexattr - .llong .sys_fremovexattr /* 220 */ - .llong .sys_futex - .llong .sys_sched_setaffinity - .llong .sys_sched_getaffinity - .llong .sys_ni_syscall - .llong .sys_ni_syscall /* 225 - reserved for tux */ - .llong .sys_ni_syscall /* 32bit only sendfile64 */ - .llong .sys_io_setup - .llong .sys_io_destroy - .llong .sys_io_getevents - .llong .sys_io_submit /* 230 */ - .llong .sys_io_cancel - .llong .sys_set_tid_address - .llong .sys_fadvise64 - .llong .sys_exit_group - .llong .sys_lookup_dcookie /* 235 */ - .llong .sys_epoll_create - .llong .sys_epoll_ctl - .llong .sys_epoll_wait - .llong .sys_remap_file_pages - .llong .sys_timer_create /* 240 */ - .llong .sys_timer_settime - .llong .sys_timer_gettime - .llong .sys_timer_getoverrun - .llong .sys_timer_delete - .llong .sys_clock_settime /* 245 */ - .llong .sys_clock_gettime - .llong .sys_clock_getres - .llong .sys_clock_nanosleep - .llong .ppc64_swapcontext - .llong .sys_tgkill /* 250 */ - .llong .sys_utimes - .llong .sys_statfs64 - .llong .sys_fstatfs64 - .llong .sys_ni_syscall /* 32bit only fadvise64_64 */ - .llong .ppc_rtas /* 255 */ - .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .llong .sys_ni_syscall /* 257 reserved for vserver */ - .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ - .llong .sys_ni_syscall /* 259 reserved for new sys_mbind */ - .llong .sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */ - .llong .sys_ni_syscall /* 261 reserved for new sys_set_mempolicy */ - .llong .sys_mq_open - .llong .sys_mq_unlink - .llong .sys_mq_timedsend - .llong .sys_mq_timedreceive /* 265 */ - .llong .sys_mq_notify - .llong .sys_mq_getsetattr - .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ + .llong sys_restart_syscall /* 0 */ + .llong sys_exit + .llong ppc_fork + .llong sys_read + .llong sys_write + .llong sys_open /* 5 */ + .llong sys_close + .llong sys_waitpid + .llong sys_creat + .llong sys_link + .llong sys_unlink /* 10 */ + .llong sys_execve + .llong sys_chdir + .llong sys64_time + .llong sys_mknod + .llong sys_chmod /* 15 */ + .llong sys_lchown + .llong sys_ni_syscall /* old break syscall */ + .llong sys_ni_syscall /* old stat syscall */ + .llong sys_lseek + .llong sys_getpid /* 20 */ + .llong sys_mount + .llong sys_ni_syscall /* old umount syscall */ + .llong sys_setuid + .llong sys_getuid + .llong ppc64_sys_stime /* 25 */ + .llong sys_ptrace + .llong sys_alarm + .llong sys_ni_syscall /* old fstat syscall */ + .llong sys_pause + .llong sys_utime /* 30 */ + .llong sys_ni_syscall /* old stty syscall */ + .llong sys_ni_syscall /* old gtty syscall */ + .llong sys_access + .llong sys_nice + .llong sys_ni_syscall /* 35 - old ftime syscall */ + .llong sys_sync + .llong sys_kill + .llong sys_rename + .llong sys_mkdir + .llong sys_rmdir /* 40 */ + .llong sys_dup + .llong sys_pipe + .llong sys_times + .llong sys_ni_syscall /* old prof syscall */ + .llong sys_brk /* 45 */ + .llong sys_setgid + .llong sys_getgid + .llong sys_signal + .llong sys_geteuid + .llong sys_getegid /* 50 */ + .llong sys_acct + .llong sys_umount + .llong sys_ni_syscall /* old lock syscall */ + .llong sys_ioctl + .llong sys_fcntl /* 55 */ + .llong sys_ni_syscall /* old mpx syscall */ + .llong sys_setpgid + .llong sys_ni_syscall /* old ulimit syscall */ + .llong sys_ni_syscall /* old uname syscall */ + .llong sys_umask /* 60 */ + .llong sys_chroot + .llong sys_ustat + .llong sys_dup2 + .llong sys_getppid + .llong sys_getpgrp /* 65 */ + .llong sys_setsid + .llong sys_ni_syscall + .llong sys_sgetmask + .llong sys_ssetmask + .llong sys_setreuid /* 70 */ + .llong sys_setregid + .llong sys_ni_syscall + .llong sys_ni_syscall + .llong sys_sethostname + .llong sys_setrlimit /* 75 */ + .llong sys_ni_syscall /* old getrlimit syscall */ + .llong sys_getrusage + .llong sys_gettimeofday + .llong sys_settimeofday + .llong sys_getgroups /* 80 */ + .llong sys_setgroups + .llong sys_ni_syscall /* old select syscall */ + .llong sys_symlink + .llong sys_ni_syscall /* old lstat syscall */ + .llong sys_readlink /* 85 */ + .llong sys_uselib + .llong sys_swapon + .llong sys_reboot + .llong sys_ni_syscall /* old readdir syscall */ + .llong sys_mmap /* 90 */ + .llong sys_munmap + .llong sys_truncate + .llong sys_ftruncate + .llong sys_fchmod + .llong sys_fchown /* 95 */ + .llong sys_getpriority + .llong sys_setpriority + .llong sys_ni_syscall /* old profil syscall holder */ + .llong sys_statfs + .llong sys_fstatfs /* 100 */ + .llong sys_ni_syscall /* old ioperm syscall */ + .llong sys_socketcall + .llong sys_syslog + .llong sys_setitimer + .llong sys_getitimer /* 105 */ + .llong sys_newstat + .llong sys_newlstat + .llong sys_newfstat + .llong sys_ni_syscall /* old uname syscall */ + .llong sys_ni_syscall /* 110 old iopl syscall */ + .llong sys_vhangup + .llong sys_ni_syscall /* old idle syscall */ + .llong sys_ni_syscall /* old vm86 syscall */ + .llong sys_wait4 + .llong sys_swapoff /* 115 */ + .llong sys_sysinfo + .llong sys_ipc + .llong sys_fsync + .llong sys_ni_syscall + .llong ppc_clone /* 120 */ + .llong sys_setdomainname + .llong ppc64_newuname + .llong sys_ni_syscall /* old modify_ldt syscall */ + .llong sys_adjtimex + .llong sys_mprotect /* 125 */ + .llong sys_ni_syscall + .llong sys_ni_syscall /* old create_module syscall */ + .llong sys_init_module + .llong sys_delete_module + .llong sys_ni_syscall /* 130 old get_kernel_syms syscall */ + .llong sys_quotactl + .llong sys_getpgid + .llong sys_fchdir + .llong sys_bdflush + .llong sys_sysfs /* 135 */ + .llong ppc64_personality + .llong sys_ni_syscall /* for afs_syscall */ + .llong sys_setfsuid + .llong sys_setfsgid + .llong sys_llseek /* 140 */ + .llong sys_getdents + .llong sys_select + .llong sys_flock + .llong sys_msync + .llong sys_readv /* 145 */ + .llong sys_writev + .llong sys_getsid + .llong sys_fdatasync + .llong sys_sysctl + .llong sys_mlock /* 150 */ + .llong sys_munlock + .llong sys_mlockall + .llong sys_munlockall + .llong sys_sched_setparam + .llong sys_sched_getparam /* 155 */ + .llong sys_sched_setscheduler + .llong sys_sched_getscheduler + .llong sys_sched_yield + .llong sys_sched_get_priority_max + .llong sys_sched_get_priority_min /* 160 */ + .llong sys_sched_rr_get_interval + .llong sys_nanosleep + .llong sys_mremap + .llong sys_setresuid + .llong sys_getresuid /* 165 */ + .llong sys_ni_syscall /* old query_module syscall */ + .llong sys_poll + .llong sys_nfsservctl + .llong sys_setresgid + .llong sys_getresgid /* 170 */ + .llong sys_prctl + .llong ppc64_rt_sigreturn + .llong sys_rt_sigaction + .llong sys_rt_sigprocmask + .llong sys_rt_sigpending /* 175 */ + .llong sys_rt_sigtimedwait + .llong sys_rt_sigqueueinfo + .llong ppc64_rt_sigsuspend + .llong sys_pread64 + .llong sys_pwrite64 /* 180 */ + .llong sys_chown + .llong sys_getcwd + .llong sys_capget + .llong sys_capset + .llong sys_sigaltstack /* 185 */ + .llong sys_sendfile64 + .llong sys_ni_syscall /* reserved for streams1 */ + .llong sys_ni_syscall /* reserved for streams2 */ + .llong ppc_vfork + .llong sys_getrlimit /* 190 */ + .llong sys_readahead + .llong sys_ni_syscall /* 32bit only mmap2 */ + .llong sys_ni_syscall /* 32bit only truncate64 */ + .llong sys_ni_syscall /* 32bit only ftruncate64 */ + .llong sys_ni_syscall /* 195 - 32bit only stat64 */ + .llong sys_ni_syscall /* 32bit only lstat64 */ + .llong sys_ni_syscall /* 32bit only fstat64 */ + .llong sys_ni_syscall /* 32bit only pciconfig_read */ + .llong sys_ni_syscall /* 32bit only pciconfig_write */ + .llong sys_ni_syscall /* 32bit only pciconfig_iobase */ + .llong sys_ni_syscall /* reserved for MacOnLinux */ + .llong sys_getdents64 + .llong sys_pivot_root + .llong sys_ni_syscall /* 32bit only fcntl64 */ + .llong sys_madvise /* 205 */ + .llong sys_mincore + .llong sys_gettid + .llong sys_tkill + .llong sys_setxattr + .llong sys_lsetxattr /* 210 */ + .llong sys_fsetxattr + .llong sys_getxattr + .llong sys_lgetxattr + .llong sys_fgetxattr + .llong sys_listxattr /* 215 */ + .llong sys_llistxattr + .llong sys_flistxattr + .llong sys_removexattr + .llong sys_lremovexattr + .llong sys_fremovexattr /* 220 */ + .llong sys_futex + .llong sys_sched_setaffinity + .llong sys_sched_getaffinity + .llong sys_ni_syscall + .llong sys_ni_syscall /* 225 - reserved for tux */ + .llong sys_ni_syscall /* 32bit only sendfile64 */ + .llong sys_io_setup + .llong sys_io_destroy + .llong sys_io_getevents + .llong sys_io_submit /* 230 */ + .llong sys_io_cancel + .llong sys_set_tid_address + .llong sys_fadvise64 + .llong sys_exit_group + .llong sys_lookup_dcookie /* 235 */ + .llong sys_epoll_create + .llong sys_epoll_ctl + .llong sys_epoll_wait + .llong sys_remap_file_pages + .llong sys_timer_create /* 240 */ + .llong sys_timer_settime + .llong sys_timer_gettime + .llong sys_timer_getoverrun + .llong sys_timer_delete + .llong sys_clock_settime /* 245 */ + .llong sys_clock_gettime + .llong sys_clock_getres + .llong sys_clock_nanosleep + .llong ppc64_swapcontext + .llong sys_tgkill /* 250 */ + .llong sys_utimes + .llong sys_statfs64 + .llong sys_fstatfs64 + .llong sys_ni_syscall /* 32bit only fadvise64_64 */ + .llong ppc_rtas /* 255 */ + .llong sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ + .llong sys_ni_syscall /* 257 reserved for vserver */ + .llong sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ + .llong sys_ni_syscall /* 259 reserved for new sys_mbind */ + .llong sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */ + .llong sys_ni_syscall /* 261 reserved for new sys_set_mempolicy */ + .llong sys_mq_open + .llong sys_mq_unlink + .llong sys_mq_timedsend + .llong sys_mq_timedreceive /* 265 */ + .llong sys_mq_notify + .llong sys_mq_getsetattr + .llong sys_ni_syscall /* 268 reserved for sys_kexec_load */ diff -puN arch/ppc64/mm/Makefile~mminimal_toc_die_die_die_3 arch/ppc64/mm/Makefile --- mminimal_toc_die_die_die/arch/ppc64/mm/Makefile~mminimal_toc_die_die_die_3 2004-08-24 19:50:11.578589237 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/mm/Makefile 2004-08-24 19:50:11.627585471 +1000 @@ -2,8 +2,6 @@ # Makefile for the linux ppc-specific parts of the memory manager. # -EXTRA_CFLAGS += -mno-minimal-toc - obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \ slb_low.o slb.o stab.o obj-$(CONFIG_DISCONTIGMEM) += numa.o diff -puN arch/ppc64/xmon/Makefile~mminimal_toc_die_die_die_3 arch/ppc64/xmon/Makefile --- mminimal_toc_die_die_die/arch/ppc64/xmon/Makefile~mminimal_toc_die_die_die_3 2004-08-24 19:50:11.584588776 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/xmon/Makefile 2004-08-24 19:50:11.628585394 +1000 @@ -1,5 +1,3 @@ # Makefile for xmon -EXTRA_CFLAGS += -mno-minimal-toc - obj-y := start.o xmon.o ppc-dis.o ppc-opc.o subr_prf.o setjmp.o diff -puN include/asm-ppc64/unistd.h~mminimal_toc_die_die_die_3 include/asm-ppc64/unistd.h --- mminimal_toc_die_die_die/include/asm-ppc64/unistd.h~mminimal_toc_die_die_die_3 2004-08-24 19:50:11.590588315 +1000 +++ mminimal_toc_die_die_die-anton/include/asm-ppc64/unistd.h 2004-08-24 19:50:11.629585317 +1000 @@ -468,7 +468,8 @@ long sys_rt_sigaction(int sig, const str * What we want is __attribute__((weak,alias("sys_ni_syscall"))), * but it doesn't work on all toolchains, so we just do it by hand */ -#define cond_syscall(x) asm(".weak\t." #x "\n\t.set\t." #x ",.sys_ni_syscall"); +#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall"); + #endif /* __KERNEL__ */ _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Wed Aug 25 00:26:42 2004 From: anton at samba.org (Anton Blanchard) Date: Wed, 25 Aug 2004 00:26:42 +1000 Subject: [PATCH] -mminimal-toc removal 4: Use --emit-stub-syms In-Reply-To: <20040824132230.GK2306@krispykreme> References: <20040824131115.GH2306@krispykreme> <20040824131646.GI2306@krispykreme> <20040824131850.GJ2306@krispykreme> <20040824132230.GK2306@krispykreme> Message-ID: <20040824142642.GL2306@krispykreme> Use the --emit-stub-syms linker option. Without this the trampolines end up inside whatever label happened to be before them. An example of a trampoline with --emit-stub-syms used: c000000000046e74 <00000075..device_shutdown+0>: c000000000046e74: f8 41 00 28 std r2,40(r1) c000000000046e78: 3c 42 00 01 addis r2,r2,1 c000000000046e7c: 38 42 ef f0 addi r2,r2,-4112 c000000000046e80: 48 1f 74 7c b c00000000023e2fc <.device_shutdown> Signed-off-by: Anton Blanchard --- mminimal_toc_die_die_die-anton/arch/ppc64/Makefile | 2 diff -puN arch/ppc64/Makefile~mminimal_toc_die_die_die_4 arch/ppc64/Makefile --- mminimal_toc_die_die_die/arch/ppc64/Makefile~mminimal_toc_die_die_die_4 2004-08-24 23:30:06.929220191 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/Makefile 2004-08-24 23:40:24.579542815 +1000 @@ -28,6 +28,8 @@ LDFLAGS := -m elf64ppc LDFLAGS_vmlinux := -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD) CFLAGS += -msoft-float -pipe -Wno-uninitialized -mtraceback=none +LDFLAGS_vmlinux += --emit-stub-syms + ifeq ($(CONFIG_POWER4_ONLY),y) CFLAGS += $(call cc-option,-mcpu=power4) else _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Wed Aug 25 00:29:33 2004 From: anton at samba.org (Anton Blanchard) Date: Wed, 25 Aug 2004 00:29:33 +1000 Subject: [PATCH] -mminimal-toc removal 5: Embed r2 in syscall table In-Reply-To: <20040824142642.GL2306@krispykreme> References: <20040824131115.GH2306@krispykreme> <20040824131646.GI2306@krispykreme> <20040824131850.GJ2306@krispykreme> <20040824132230.GK2306@krispykreme> <20040824142642.GL2306@krispykreme> Message-ID: <20040824142933.GM2306@krispykreme> Alan tells me foo at tocbase has been around since last year, so lets save ourselves a load and store the pc and r2 directly in the syscall table. Signed-off-by: Anton Blanchard --- mminimal_toc_die_die_die-anton/arch/ppc64/kernel/entry.S | 8 mminimal_toc_die_die_die-anton/arch/ppc64/kernel/misc.S | 1080 +++++++------- mminimal_toc_die_die_die-anton/include/asm-ppc64/unistd.h | 2 3 files changed, 547 insertions(+), 543 deletions(-) diff -puN arch/ppc64/kernel/misc.S~mminimal_toc_die_die_die_5 arch/ppc64/kernel/misc.S --- mminimal_toc_die_die_die/arch/ppc64/kernel/misc.S~mminimal_toc_die_die_die_5 2004-08-24 23:40:58.783048356 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/kernel/misc.S 2004-08-25 00:08:14.888151052 +1000 @@ -606,547 +606,551 @@ _GLOBAL(kernel_thread) #define ppc_rtas sys_ni_syscall #endif +#define SYSCALL_ENTRY(ENTRY) \ + .llong . ## ENTRY; \ + .llong . ## ENTRY at tocbase + /* Why isn't this a) automatic, b) written in 'C'? */ .balign 8 _GLOBAL(sys_call_table32) - .llong sys_restart_syscall /* 0 */ - .llong sys_exit - .llong ppc_fork - .llong sys_read - .llong sys_write - .llong sys32_open /* 5 */ - .llong sys_close - .llong sys32_waitpid - .llong sys32_creat - .llong sys_link - .llong sys_unlink /* 10 */ - .llong sys32_execve - .llong sys_chdir - .llong sys32_time - .llong sys_mknod - .llong sys_chmod /* 15 */ - .llong sys_lchown - .llong sys_ni_syscall /* old break syscall */ - .llong sys_ni_syscall /* old stat syscall */ - .llong ppc32_lseek - .llong sys_getpid /* 20 */ - .llong compat_sys_mount - .llong sys_oldumount - .llong sys_setuid - .llong sys_getuid - .llong ppc64_sys32_stime /* 25 */ - .llong sys32_ptrace - .llong sys_alarm - .llong sys_ni_syscall /* old fstat syscall */ - .llong sys32_pause - .llong compat_sys_utime /* 30 */ - .llong sys_ni_syscall /* old stty syscall */ - .llong sys_ni_syscall /* old gtty syscall */ - .llong sys32_access - .llong sys32_nice - .llong sys_ni_syscall /* 35 - old ftime syscall */ - .llong sys_sync - .llong sys32_kill - .llong sys_rename - .llong sys32_mkdir - .llong sys_rmdir /* 40 */ - .llong sys_dup - .llong sys_pipe - .llong compat_sys_times - .llong sys_ni_syscall /* old prof syscall */ - .llong sys_brk /* 45 */ - .llong sys_setgid - .llong sys_getgid - .llong sys_signal - .llong sys_geteuid - .llong sys_getegid /* 50 */ - .llong sys_acct - .llong sys_umount - .llong sys_ni_syscall /* old lock syscall */ - .llong compat_sys_ioctl - .llong compat_sys_fcntl /* 55 */ - .llong sys_ni_syscall /* old mpx syscall */ - .llong sys32_setpgid - .llong sys_ni_syscall /* old ulimit syscall */ - .llong sys32_olduname - .llong sys32_umask /* 60 */ - .llong sys_chroot - .llong sys_ustat - .llong sys_dup2 - .llong sys_getppid - .llong sys_getpgrp /* 65 */ - .llong sys_setsid - .llong sys32_sigaction - .llong sys_sgetmask - .llong sys32_ssetmask - .llong sys_setreuid /* 70 */ - .llong sys_setregid - .llong ppc32_sigsuspend - .llong compat_sys_sigpending - .llong sys32_sethostname - .llong compat_sys_setrlimit /* 75 */ - .llong compat_sys_old_getrlimit - .llong compat_sys_getrusage - .llong sys32_gettimeofday - .llong sys32_settimeofday - .llong sys32_getgroups /* 80 */ - .llong sys32_setgroups - .llong sys_ni_syscall /* old select syscall */ - .llong sys_symlink - .llong sys_ni_syscall /* old lstat syscall */ - .llong sys32_readlink /* 85 */ - .llong sys_uselib - .llong sys_swapon - .llong sys_reboot - .llong old32_readdir - .llong sys_mmap /* 90 */ - .llong sys_munmap - .llong sys_truncate - .llong sys_ftruncate - .llong sys_fchmod - .llong sys_fchown /* 95 */ - .llong sys32_getpriority - .llong sys32_setpriority - .llong sys_ni_syscall /* old profil syscall */ - .llong compat_sys_statfs - .llong compat_sys_fstatfs /* 100 */ - .llong sys_ni_syscall /* old ioperm syscall */ - .llong compat_sys_socketcall - .llong sys32_syslog - .llong compat_sys_setitimer - .llong compat_sys_getitimer /* 105 */ - .llong compat_sys_newstat - .llong compat_sys_newlstat - .llong compat_sys_newfstat - .llong sys_uname - .llong sys_ni_syscall /* 110 old iopl syscall */ - .llong sys_vhangup - .llong sys_ni_syscall /* old idle syscall */ - .llong sys_ni_syscall /* old vm86 syscall */ - .llong compat_sys_wait4 - .llong sys_swapoff /* 115 */ - .llong sys32_sysinfo - .llong sys32_ipc - .llong sys_fsync - .llong ppc32_sigreturn - .llong ppc_clone /* 120 */ - .llong sys32_setdomainname - .llong ppc64_newuname - .llong sys_ni_syscall /* old modify_ldt syscall */ - .llong sys32_adjtimex - .llong sys_mprotect /* 125 */ - .llong compat_sys_sigprocmask - .llong sys_ni_syscall /* old create_module syscall */ - .llong sys_init_module - .llong sys_delete_module - .llong sys_ni_syscall /* 130 old get_kernel_syms syscall */ - .llong sys_quotactl - .llong sys32_getpgid - .llong sys_fchdir - .llong sys_bdflush - .llong sys32_sysfs /* 135 */ - .llong ppc64_personality - .llong sys_ni_syscall /* for afs_syscall */ - .llong sys_setfsuid - .llong sys_setfsgid - .llong sys_llseek /* 140 */ - .llong sys32_getdents - .llong ppc32_select - .llong sys_flock - .llong sys_msync - .llong compat_sys_readv /* 145 */ - .llong compat_sys_writev - .llong sys32_getsid - .llong sys_fdatasync - .llong sys32_sysctl - .llong sys_mlock /* 150 */ - .llong sys_munlock - .llong sys_mlockall - .llong sys_munlockall - .llong sys32_sched_setparam - .llong sys32_sched_getparam /* 155 */ - .llong sys32_sched_setscheduler - .llong sys32_sched_getscheduler - .llong sys_sched_yield - .llong sys32_sched_get_priority_max - .llong sys32_sched_get_priority_min /* 160 */ - .llong sys32_sched_rr_get_interval - .llong compat_sys_nanosleep - .llong sys_mremap - .llong sys_setresuid - .llong sys_getresuid /* 165 */ - .llong sys_ni_syscall /* old query_module syscall */ - .llong sys_poll - .llong compat_sys_nfsservctl - .llong sys_setresgid - .llong sys_getresgid /* 170 */ - .llong sys32_prctl - .llong ppc32_rt_sigreturn - .llong sys32_rt_sigaction - .llong sys32_rt_sigprocmask - .llong sys32_rt_sigpending /* 175 */ - .llong sys32_rt_sigtimedwait - .llong sys32_rt_sigqueueinfo - .llong ppc32_rt_sigsuspend - .llong sys32_pread64 - .llong sys32_pwrite64 /* 180 */ - .llong sys_chown - .llong sys_getcwd - .llong sys_capget - .llong sys_capset - .llong sys32_sigaltstack /* 185 */ - .llong sys32_sendfile - .llong sys_ni_syscall /* reserved for streams1 */ - .llong sys_ni_syscall /* reserved for streams2 */ - .llong ppc_vfork - .llong compat_sys_getrlimit /* 190 */ - .llong sys32_readahead - .llong sys32_mmap2 - .llong sys32_truncate64 - .llong sys32_ftruncate64 - .llong sys_stat64 /* 195 */ - .llong sys_lstat64 - .llong sys_fstat64 - .llong sys32_pciconfig_read - .llong sys32_pciconfig_write - .llong sys32_pciconfig_iobase /* 200 - pciconfig_iobase */ - .llong sys_ni_syscall /* reserved for MacOnLinux */ - .llong sys_getdents64 - .llong sys_pivot_root - .llong compat_sys_fcntl64 - .llong sys_madvise /* 205 */ - .llong sys_mincore - .llong sys_gettid - .llong sys_tkill - .llong sys_setxattr - .llong sys_lsetxattr /* 210 */ - .llong sys_fsetxattr - .llong sys_getxattr - .llong sys_lgetxattr - .llong sys_fgetxattr - .llong sys_listxattr /* 215 */ - .llong sys_llistxattr - .llong sys_flistxattr - .llong sys_removexattr - .llong sys_lremovexattr - .llong sys_fremovexattr /* 220 */ - .llong compat_sys_futex - .llong compat_sys_sched_setaffinity - .llong compat_sys_sched_getaffinity - .llong sys_ni_syscall - .llong sys_ni_syscall /* 225 - reserved for tux */ - .llong sys32_sendfile64 - .llong compat_sys_io_setup - .llong sys_io_destroy - .llong compat_sys_io_getevents - .llong compat_sys_io_submit - .llong sys_io_cancel - .llong sys_set_tid_address - .llong ppc32_fadvise64 - .llong sys_exit_group - .llong ppc32_lookup_dcookie /* 235 */ - .llong sys_epoll_create - .llong sys_epoll_ctl - .llong sys_epoll_wait - .llong sys_remap_file_pages - .llong ppc32_timer_create /* 240 */ - .llong compat_timer_settime - .llong compat_timer_gettime - .llong sys_timer_getoverrun - .llong sys_timer_delete - .llong compat_clock_settime /* 245 */ - .llong compat_clock_gettime - .llong compat_clock_getres - .llong compat_clock_nanosleep - .llong ppc32_swapcontext - .llong sys32_tgkill /* 250 */ - .llong sys32_utimes - .llong compat_statfs64 - .llong compat_fstatfs64 - .llong ppc32_fadvise64_64 /* 32bit only fadvise64_64 */ - .llong ppc_rtas /* 255 */ - .llong sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .llong sys_ni_syscall /* 257 reserved for vserver */ - .llong sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ - .llong sys_ni_syscall /* 259 reserved for new sys_mbind */ - .llong sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */ - .llong sys_ni_syscall /* 261 reserved for new sys_set_mempolicy */ - .llong compat_sys_mq_open - .llong sys_mq_unlink - .llong compat_sys_mq_timedsend - .llong compat_sys_mq_timedreceive /* 265 */ - .llong compat_sys_mq_notify - .llong compat_sys_mq_getsetattr - .llong sys_ni_syscall /* 268 reserved for sys_kexec_load */ + SYSCALL_ENTRY(sys_restart_syscall) /* 0 */ + SYSCALL_ENTRY(sys_exit) + SYSCALL_ENTRY(ppc_fork) + SYSCALL_ENTRY(sys_read) + SYSCALL_ENTRY(sys_write) + SYSCALL_ENTRY(sys32_open) /* 5 */ + SYSCALL_ENTRY(sys_close) + SYSCALL_ENTRY(sys32_waitpid) + SYSCALL_ENTRY(sys32_creat) + SYSCALL_ENTRY(sys_link) + SYSCALL_ENTRY(sys_unlink) /* 10 */ + SYSCALL_ENTRY(sys32_execve) + SYSCALL_ENTRY(sys_chdir) + SYSCALL_ENTRY(sys32_time) + SYSCALL_ENTRY(sys_mknod) + SYSCALL_ENTRY(sys_chmod) /* 15 */ + SYSCALL_ENTRY(sys_lchown) + SYSCALL_ENTRY(sys_ni_syscall) /* old break syscall */ + SYSCALL_ENTRY(sys_ni_syscall) /* old stat syscall */ + SYSCALL_ENTRY(ppc32_lseek) + SYSCALL_ENTRY(sys_getpid) /* 20 */ + SYSCALL_ENTRY(compat_sys_mount) + SYSCALL_ENTRY(sys_oldumount) + SYSCALL_ENTRY(sys_setuid) + SYSCALL_ENTRY(sys_getuid) + SYSCALL_ENTRY(ppc64_sys32_stime) /* 25 */ + SYSCALL_ENTRY(sys32_ptrace) + SYSCALL_ENTRY(sys_alarm) + SYSCALL_ENTRY(sys_ni_syscall) /* old fstat syscall */ + SYSCALL_ENTRY(sys32_pause) + SYSCALL_ENTRY(compat_sys_utime) /* 30 */ + SYSCALL_ENTRY(sys_ni_syscall) /* old stty syscall */ + SYSCALL_ENTRY(sys_ni_syscall) /* old gtty syscall */ + SYSCALL_ENTRY(sys32_access) + SYSCALL_ENTRY(sys32_nice) + SYSCALL_ENTRY(sys_ni_syscall) /* 35 - old ftime syscall */ + SYSCALL_ENTRY(sys_sync) + SYSCALL_ENTRY(sys32_kill) + SYSCALL_ENTRY(sys_rename) + SYSCALL_ENTRY(sys32_mkdir) + SYSCALL_ENTRY(sys_rmdir) /* 40 */ + SYSCALL_ENTRY(sys_dup) + SYSCALL_ENTRY(sys_pipe) + SYSCALL_ENTRY(compat_sys_times) + SYSCALL_ENTRY(sys_ni_syscall) /* old prof syscall */ + SYSCALL_ENTRY(sys_brk) /* 45 */ + SYSCALL_ENTRY(sys_setgid) + SYSCALL_ENTRY(sys_getgid) + SYSCALL_ENTRY(sys_signal) + SYSCALL_ENTRY(sys_geteuid) + SYSCALL_ENTRY(sys_getegid) /* 50 */ + SYSCALL_ENTRY(sys_acct) + SYSCALL_ENTRY(sys_umount) + SYSCALL_ENTRY(sys_ni_syscall) /* old lock syscall */ + SYSCALL_ENTRY(compat_sys_ioctl) + SYSCALL_ENTRY(compat_sys_fcntl) /* 55 */ + SYSCALL_ENTRY(sys_ni_syscall) /* old mpx syscall */ + SYSCALL_ENTRY(sys32_setpgid) + SYSCALL_ENTRY(sys_ni_syscall) /* old ulimit syscall */ + SYSCALL_ENTRY(sys32_olduname) + SYSCALL_ENTRY(sys32_umask) /* 60 */ + SYSCALL_ENTRY(sys_chroot) + SYSCALL_ENTRY(sys_ustat) + SYSCALL_ENTRY(sys_dup2) + SYSCALL_ENTRY(sys_getppid) + SYSCALL_ENTRY(sys_getpgrp) /* 65 */ + SYSCALL_ENTRY(sys_setsid) + SYSCALL_ENTRY(sys32_sigaction) + SYSCALL_ENTRY(sys_sgetmask) + SYSCALL_ENTRY(sys32_ssetmask) + SYSCALL_ENTRY(sys_setreuid) /* 70 */ + SYSCALL_ENTRY(sys_setregid) + SYSCALL_ENTRY(ppc32_sigsuspend) + SYSCALL_ENTRY(compat_sys_sigpending) + SYSCALL_ENTRY(sys32_sethostname) + SYSCALL_ENTRY(compat_sys_setrlimit) /* 75 */ + SYSCALL_ENTRY(compat_sys_old_getrlimit) + SYSCALL_ENTRY(compat_sys_getrusage) + SYSCALL_ENTRY(sys32_gettimeofday) + SYSCALL_ENTRY(sys32_settimeofday) + SYSCALL_ENTRY(sys32_getgroups) /* 80 */ + SYSCALL_ENTRY(sys32_setgroups) + SYSCALL_ENTRY(sys_ni_syscall) /* old select syscall */ + SYSCALL_ENTRY(sys_symlink) + SYSCALL_ENTRY(sys_ni_syscall) /* old lstat syscall */ + SYSCALL_ENTRY(sys32_readlink) /* 85 */ + SYSCALL_ENTRY(sys_uselib) + SYSCALL_ENTRY(sys_swapon) + SYSCALL_ENTRY(sys_reboot) + SYSCALL_ENTRY(old32_readdir) + SYSCALL_ENTRY(sys_mmap) /* 90 */ + SYSCALL_ENTRY(sys_munmap) + SYSCALL_ENTRY(sys_truncate) + SYSCALL_ENTRY(sys_ftruncate) + SYSCALL_ENTRY(sys_fchmod) + SYSCALL_ENTRY(sys_fchown) /* 95 */ + SYSCALL_ENTRY(sys32_getpriority) + SYSCALL_ENTRY(sys32_setpriority) + SYSCALL_ENTRY(sys_ni_syscall) /* old profil syscall */ + SYSCALL_ENTRY(compat_sys_statfs) + SYSCALL_ENTRY(compat_sys_fstatfs) /* 100 */ + SYSCALL_ENTRY(sys_ni_syscall) /* old ioperm syscall */ + SYSCALL_ENTRY(compat_sys_socketcall) + SYSCALL_ENTRY(sys32_syslog) + SYSCALL_ENTRY(compat_sys_setitimer) + SYSCALL_ENTRY(compat_sys_getitimer) /* 105 */ + SYSCALL_ENTRY(compat_sys_newstat) + SYSCALL_ENTRY(compat_sys_newlstat) + SYSCALL_ENTRY(compat_sys_newfstat) + SYSCALL_ENTRY(sys_uname) + SYSCALL_ENTRY(sys_ni_syscall) /* 110 old iopl syscall */ + SYSCALL_ENTRY(sys_vhangup) + SYSCALL_ENTRY(sys_ni_syscall) /* old idle syscall */ + SYSCALL_ENTRY(sys_ni_syscall) /* old vm86 syscall */ + SYSCALL_ENTRY(compat_sys_wait4) + SYSCALL_ENTRY(sys_swapoff) /* 115 */ + SYSCALL_ENTRY(sys32_sysinfo) + SYSCALL_ENTRY(sys32_ipc) + SYSCALL_ENTRY(sys_fsync) + SYSCALL_ENTRY(ppc32_sigreturn) + SYSCALL_ENTRY(ppc_clone) /* 120 */ + SYSCALL_ENTRY(sys32_setdomainname) + SYSCALL_ENTRY(ppc64_newuname) + SYSCALL_ENTRY(sys_ni_syscall) /* old modify_ldt syscall */ + SYSCALL_ENTRY(sys32_adjtimex) + SYSCALL_ENTRY(sys_mprotect) /* 125 */ + SYSCALL_ENTRY(compat_sys_sigprocmask) + SYSCALL_ENTRY(sys_ni_syscall) /* old create_module syscall */ + SYSCALL_ENTRY(sys_init_module) + SYSCALL_ENTRY(sys_delete_module) + SYSCALL_ENTRY(sys_ni_syscall) /* 130 old get_kernel_syms syscall */ + SYSCALL_ENTRY(sys_quotactl) + SYSCALL_ENTRY(sys32_getpgid) + SYSCALL_ENTRY(sys_fchdir) + SYSCALL_ENTRY(sys_bdflush) + SYSCALL_ENTRY(sys32_sysfs) /* 135 */ + SYSCALL_ENTRY(ppc64_personality) + SYSCALL_ENTRY(sys_ni_syscall) /* for afs_syscall */ + SYSCALL_ENTRY(sys_setfsuid) + SYSCALL_ENTRY(sys_setfsgid) + SYSCALL_ENTRY(sys_llseek) /* 140 */ + SYSCALL_ENTRY(sys32_getdents) + SYSCALL_ENTRY(ppc32_select) + SYSCALL_ENTRY(sys_flock) + SYSCALL_ENTRY(sys_msync) + SYSCALL_ENTRY(compat_sys_readv) /* 145 */ + SYSCALL_ENTRY(compat_sys_writev) + SYSCALL_ENTRY(sys32_getsid) + SYSCALL_ENTRY(sys_fdatasync) + SYSCALL_ENTRY(sys32_sysctl) + SYSCALL_ENTRY(sys_mlock) /* 150 */ + SYSCALL_ENTRY(sys_munlock) + SYSCALL_ENTRY(sys_mlockall) + SYSCALL_ENTRY(sys_munlockall) + SYSCALL_ENTRY(sys32_sched_setparam) + SYSCALL_ENTRY(sys32_sched_getparam) /* 155 */ + SYSCALL_ENTRY(sys32_sched_setscheduler) + SYSCALL_ENTRY(sys32_sched_getscheduler) + SYSCALL_ENTRY(sys_sched_yield) + SYSCALL_ENTRY(sys32_sched_get_priority_max) + SYSCALL_ENTRY(sys32_sched_get_priority_min) /* 160 */ + SYSCALL_ENTRY(sys32_sched_rr_get_interval) + SYSCALL_ENTRY(compat_sys_nanosleep) + SYSCALL_ENTRY(sys_mremap) + SYSCALL_ENTRY(sys_setresuid) + SYSCALL_ENTRY(sys_getresuid) /* 165 */ + SYSCALL_ENTRY(sys_ni_syscall) /* old query_module syscall */ + SYSCALL_ENTRY(sys_poll) + SYSCALL_ENTRY(compat_sys_nfsservctl) + SYSCALL_ENTRY(sys_setresgid) + SYSCALL_ENTRY(sys_getresgid) /* 170 */ + SYSCALL_ENTRY(sys32_prctl) + SYSCALL_ENTRY(ppc32_rt_sigreturn) + SYSCALL_ENTRY(sys32_rt_sigaction) + SYSCALL_ENTRY(sys32_rt_sigprocmask) + SYSCALL_ENTRY(sys32_rt_sigpending) /* 175 */ + SYSCALL_ENTRY(sys32_rt_sigtimedwait) + SYSCALL_ENTRY(sys32_rt_sigqueueinfo) + SYSCALL_ENTRY(ppc32_rt_sigsuspend) + SYSCALL_ENTRY(sys32_pread64) + SYSCALL_ENTRY(sys32_pwrite64) /* 180 */ + SYSCALL_ENTRY(sys_chown) + SYSCALL_ENTRY(sys_getcwd) + SYSCALL_ENTRY(sys_capget) + SYSCALL_ENTRY(sys_capset) + SYSCALL_ENTRY(sys32_sigaltstack) /* 185 */ + SYSCALL_ENTRY(sys32_sendfile) + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for streams1 */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for streams2 */ + SYSCALL_ENTRY(ppc_vfork) + SYSCALL_ENTRY(compat_sys_getrlimit) /* 190 */ + SYSCALL_ENTRY(sys32_readahead) + SYSCALL_ENTRY(sys32_mmap2) + SYSCALL_ENTRY(sys32_truncate64) + SYSCALL_ENTRY(sys32_ftruncate64) + SYSCALL_ENTRY(sys_stat64) /* 195 */ + SYSCALL_ENTRY(sys_lstat64) + SYSCALL_ENTRY(sys_fstat64) + SYSCALL_ENTRY(sys32_pciconfig_read) + SYSCALL_ENTRY(sys32_pciconfig_write) + SYSCALL_ENTRY(sys32_pciconfig_iobase) /* 200 - pciconfig_iobase */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for MacOnLinux */ + SYSCALL_ENTRY(sys_getdents64) + SYSCALL_ENTRY(sys_pivot_root) + SYSCALL_ENTRY(compat_sys_fcntl64) + SYSCALL_ENTRY(sys_madvise) /* 205 */ + SYSCALL_ENTRY(sys_mincore) + SYSCALL_ENTRY(sys_gettid) + SYSCALL_ENTRY(sys_tkill) + SYSCALL_ENTRY(sys_setxattr) + SYSCALL_ENTRY(sys_lsetxattr) /* 210 */ + SYSCALL_ENTRY(sys_fsetxattr) + SYSCALL_ENTRY(sys_getxattr) + SYSCALL_ENTRY(sys_lgetxattr) + SYSCALL_ENTRY(sys_fgetxattr) + SYSCALL_ENTRY(sys_listxattr) /* 215 */ + SYSCALL_ENTRY(sys_llistxattr) + SYSCALL_ENTRY(sys_flistxattr) + SYSCALL_ENTRY(sys_removexattr) + SYSCALL_ENTRY(sys_lremovexattr) + SYSCALL_ENTRY(sys_fremovexattr) /* 220 */ + SYSCALL_ENTRY(compat_sys_futex) + SYSCALL_ENTRY(compat_sys_sched_setaffinity) + SYSCALL_ENTRY(compat_sys_sched_getaffinity) + SYSCALL_ENTRY(sys_ni_syscall) + SYSCALL_ENTRY(sys_ni_syscall) /* 225 - reserved for tux */ + SYSCALL_ENTRY(sys32_sendfile64) + SYSCALL_ENTRY(compat_sys_io_setup) + SYSCALL_ENTRY(sys_io_destroy) + SYSCALL_ENTRY(compat_sys_io_getevents) + SYSCALL_ENTRY(compat_sys_io_submit) + SYSCALL_ENTRY(sys_io_cancel) + SYSCALL_ENTRY(sys_set_tid_address) + SYSCALL_ENTRY(ppc32_fadvise64) + SYSCALL_ENTRY(sys_exit_group) + SYSCALL_ENTRY(ppc32_lookup_dcookie) /* 235 */ + SYSCALL_ENTRY(sys_epoll_create) + SYSCALL_ENTRY(sys_epoll_ctl) + SYSCALL_ENTRY(sys_epoll_wait) + SYSCALL_ENTRY(sys_remap_file_pages) + SYSCALL_ENTRY(ppc32_timer_create) /* 240 */ + SYSCALL_ENTRY(compat_timer_settime) + SYSCALL_ENTRY(compat_timer_gettime) + SYSCALL_ENTRY(sys_timer_getoverrun) + SYSCALL_ENTRY(sys_timer_delete) + SYSCALL_ENTRY(compat_clock_settime) /* 245 */ + SYSCALL_ENTRY(compat_clock_gettime) + SYSCALL_ENTRY(compat_clock_getres) + SYSCALL_ENTRY(compat_clock_nanosleep) + SYSCALL_ENTRY(ppc32_swapcontext) + SYSCALL_ENTRY(sys32_tgkill) /* 250 */ + SYSCALL_ENTRY(sys32_utimes) + SYSCALL_ENTRY(compat_statfs64) + SYSCALL_ENTRY(compat_fstatfs64) + SYSCALL_ENTRY(ppc32_fadvise64_64) /* 32bit only fadvise64_64 */ + SYSCALL_ENTRY(ppc_rtas) /* 255 */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for sys_debug_setcontext */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for vserver */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for new sys_remap_file_pages */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for new sys_mbind */ + SYSCALL_ENTRY(sys_ni_syscall) /* 260 - reserved for new sys_get_mempolicy */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for new sys_set_mempolicy */ + SYSCALL_ENTRY(compat_sys_mq_open) + SYSCALL_ENTRY(sys_mq_unlink) + SYSCALL_ENTRY(compat_sys_mq_timedsend) + SYSCALL_ENTRY(compat_sys_mq_timedreceive) /* 265 */ + SYSCALL_ENTRY(compat_sys_mq_notify) + SYSCALL_ENTRY(compat_sys_mq_getsetattr) + SYSCALL_ENTRY(sys_ni_syscall) /* 268 reserved for sys_kexec_load */ .balign 8 _GLOBAL(sys_call_table) - .llong sys_restart_syscall /* 0 */ - .llong sys_exit - .llong ppc_fork - .llong sys_read - .llong sys_write - .llong sys_open /* 5 */ - .llong sys_close - .llong sys_waitpid - .llong sys_creat - .llong sys_link - .llong sys_unlink /* 10 */ - .llong sys_execve - .llong sys_chdir - .llong sys64_time - .llong sys_mknod - .llong sys_chmod /* 15 */ - .llong sys_lchown - .llong sys_ni_syscall /* old break syscall */ - .llong sys_ni_syscall /* old stat syscall */ - .llong sys_lseek - .llong sys_getpid /* 20 */ - .llong sys_mount - .llong sys_ni_syscall /* old umount syscall */ - .llong sys_setuid - .llong sys_getuid - .llong ppc64_sys_stime /* 25 */ - .llong sys_ptrace - .llong sys_alarm - .llong sys_ni_syscall /* old fstat syscall */ - .llong sys_pause - .llong sys_utime /* 30 */ - .llong sys_ni_syscall /* old stty syscall */ - .llong sys_ni_syscall /* old gtty syscall */ - .llong sys_access - .llong sys_nice - .llong sys_ni_syscall /* 35 - old ftime syscall */ - .llong sys_sync - .llong sys_kill - .llong sys_rename - .llong sys_mkdir - .llong sys_rmdir /* 40 */ - .llong sys_dup - .llong sys_pipe - .llong sys_times - .llong sys_ni_syscall /* old prof syscall */ - .llong sys_brk /* 45 */ - .llong sys_setgid - .llong sys_getgid - .llong sys_signal - .llong sys_geteuid - .llong sys_getegid /* 50 */ - .llong sys_acct - .llong sys_umount - .llong sys_ni_syscall /* old lock syscall */ - .llong sys_ioctl - .llong sys_fcntl /* 55 */ - .llong sys_ni_syscall /* old mpx syscall */ - .llong sys_setpgid - .llong sys_ni_syscall /* old ulimit syscall */ - .llong sys_ni_syscall /* old uname syscall */ - .llong sys_umask /* 60 */ - .llong sys_chroot - .llong sys_ustat - .llong sys_dup2 - .llong sys_getppid - .llong sys_getpgrp /* 65 */ - .llong sys_setsid - .llong sys_ni_syscall - .llong sys_sgetmask - .llong sys_ssetmask - .llong sys_setreuid /* 70 */ - .llong sys_setregid - .llong sys_ni_syscall - .llong sys_ni_syscall - .llong sys_sethostname - .llong sys_setrlimit /* 75 */ - .llong sys_ni_syscall /* old getrlimit syscall */ - .llong sys_getrusage - .llong sys_gettimeofday - .llong sys_settimeofday - .llong sys_getgroups /* 80 */ - .llong sys_setgroups - .llong sys_ni_syscall /* old select syscall */ - .llong sys_symlink - .llong sys_ni_syscall /* old lstat syscall */ - .llong sys_readlink /* 85 */ - .llong sys_uselib - .llong sys_swapon - .llong sys_reboot - .llong sys_ni_syscall /* old readdir syscall */ - .llong sys_mmap /* 90 */ - .llong sys_munmap - .llong sys_truncate - .llong sys_ftruncate - .llong sys_fchmod - .llong sys_fchown /* 95 */ - .llong sys_getpriority - .llong sys_setpriority - .llong sys_ni_syscall /* old profil syscall holder */ - .llong sys_statfs - .llong sys_fstatfs /* 100 */ - .llong sys_ni_syscall /* old ioperm syscall */ - .llong sys_socketcall - .llong sys_syslog - .llong sys_setitimer - .llong sys_getitimer /* 105 */ - .llong sys_newstat - .llong sys_newlstat - .llong sys_newfstat - .llong sys_ni_syscall /* old uname syscall */ - .llong sys_ni_syscall /* 110 old iopl syscall */ - .llong sys_vhangup - .llong sys_ni_syscall /* old idle syscall */ - .llong sys_ni_syscall /* old vm86 syscall */ - .llong sys_wait4 - .llong sys_swapoff /* 115 */ - .llong sys_sysinfo - .llong sys_ipc - .llong sys_fsync - .llong sys_ni_syscall - .llong ppc_clone /* 120 */ - .llong sys_setdomainname - .llong ppc64_newuname - .llong sys_ni_syscall /* old modify_ldt syscall */ - .llong sys_adjtimex - .llong sys_mprotect /* 125 */ - .llong sys_ni_syscall - .llong sys_ni_syscall /* old create_module syscall */ - .llong sys_init_module - .llong sys_delete_module - .llong sys_ni_syscall /* 130 old get_kernel_syms syscall */ - .llong sys_quotactl - .llong sys_getpgid - .llong sys_fchdir - .llong sys_bdflush - .llong sys_sysfs /* 135 */ - .llong ppc64_personality - .llong sys_ni_syscall /* for afs_syscall */ - .llong sys_setfsuid - .llong sys_setfsgid - .llong sys_llseek /* 140 */ - .llong sys_getdents - .llong sys_select - .llong sys_flock - .llong sys_msync - .llong sys_readv /* 145 */ - .llong sys_writev - .llong sys_getsid - .llong sys_fdatasync - .llong sys_sysctl - .llong sys_mlock /* 150 */ - .llong sys_munlock - .llong sys_mlockall - .llong sys_munlockall - .llong sys_sched_setparam - .llong sys_sched_getparam /* 155 */ - .llong sys_sched_setscheduler - .llong sys_sched_getscheduler - .llong sys_sched_yield - .llong sys_sched_get_priority_max - .llong sys_sched_get_priority_min /* 160 */ - .llong sys_sched_rr_get_interval - .llong sys_nanosleep - .llong sys_mremap - .llong sys_setresuid - .llong sys_getresuid /* 165 */ - .llong sys_ni_syscall /* old query_module syscall */ - .llong sys_poll - .llong sys_nfsservctl - .llong sys_setresgid - .llong sys_getresgid /* 170 */ - .llong sys_prctl - .llong ppc64_rt_sigreturn - .llong sys_rt_sigaction - .llong sys_rt_sigprocmask - .llong sys_rt_sigpending /* 175 */ - .llong sys_rt_sigtimedwait - .llong sys_rt_sigqueueinfo - .llong ppc64_rt_sigsuspend - .llong sys_pread64 - .llong sys_pwrite64 /* 180 */ - .llong sys_chown - .llong sys_getcwd - .llong sys_capget - .llong sys_capset - .llong sys_sigaltstack /* 185 */ - .llong sys_sendfile64 - .llong sys_ni_syscall /* reserved for streams1 */ - .llong sys_ni_syscall /* reserved for streams2 */ - .llong ppc_vfork - .llong sys_getrlimit /* 190 */ - .llong sys_readahead - .llong sys_ni_syscall /* 32bit only mmap2 */ - .llong sys_ni_syscall /* 32bit only truncate64 */ - .llong sys_ni_syscall /* 32bit only ftruncate64 */ - .llong sys_ni_syscall /* 195 - 32bit only stat64 */ - .llong sys_ni_syscall /* 32bit only lstat64 */ - .llong sys_ni_syscall /* 32bit only fstat64 */ - .llong sys_ni_syscall /* 32bit only pciconfig_read */ - .llong sys_ni_syscall /* 32bit only pciconfig_write */ - .llong sys_ni_syscall /* 32bit only pciconfig_iobase */ - .llong sys_ni_syscall /* reserved for MacOnLinux */ - .llong sys_getdents64 - .llong sys_pivot_root - .llong sys_ni_syscall /* 32bit only fcntl64 */ - .llong sys_madvise /* 205 */ - .llong sys_mincore - .llong sys_gettid - .llong sys_tkill - .llong sys_setxattr - .llong sys_lsetxattr /* 210 */ - .llong sys_fsetxattr - .llong sys_getxattr - .llong sys_lgetxattr - .llong sys_fgetxattr - .llong sys_listxattr /* 215 */ - .llong sys_llistxattr - .llong sys_flistxattr - .llong sys_removexattr - .llong sys_lremovexattr - .llong sys_fremovexattr /* 220 */ - .llong sys_futex - .llong sys_sched_setaffinity - .llong sys_sched_getaffinity - .llong sys_ni_syscall - .llong sys_ni_syscall /* 225 - reserved for tux */ - .llong sys_ni_syscall /* 32bit only sendfile64 */ - .llong sys_io_setup - .llong sys_io_destroy - .llong sys_io_getevents - .llong sys_io_submit /* 230 */ - .llong sys_io_cancel - .llong sys_set_tid_address - .llong sys_fadvise64 - .llong sys_exit_group - .llong sys_lookup_dcookie /* 235 */ - .llong sys_epoll_create - .llong sys_epoll_ctl - .llong sys_epoll_wait - .llong sys_remap_file_pages - .llong sys_timer_create /* 240 */ - .llong sys_timer_settime - .llong sys_timer_gettime - .llong sys_timer_getoverrun - .llong sys_timer_delete - .llong sys_clock_settime /* 245 */ - .llong sys_clock_gettime - .llong sys_clock_getres - .llong sys_clock_nanosleep - .llong ppc64_swapcontext - .llong sys_tgkill /* 250 */ - .llong sys_utimes - .llong sys_statfs64 - .llong sys_fstatfs64 - .llong sys_ni_syscall /* 32bit only fadvise64_64 */ - .llong ppc_rtas /* 255 */ - .llong sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .llong sys_ni_syscall /* 257 reserved for vserver */ - .llong sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ - .llong sys_ni_syscall /* 259 reserved for new sys_mbind */ - .llong sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */ - .llong sys_ni_syscall /* 261 reserved for new sys_set_mempolicy */ - .llong sys_mq_open - .llong sys_mq_unlink - .llong sys_mq_timedsend - .llong sys_mq_timedreceive /* 265 */ - .llong sys_mq_notify - .llong sys_mq_getsetattr - .llong sys_ni_syscall /* 268 reserved for sys_kexec_load */ + SYSCALL_ENTRY(sys_restart_syscall) /* 0 */ + SYSCALL_ENTRY(sys_exit) + SYSCALL_ENTRY(ppc_fork) + SYSCALL_ENTRY(sys_read) + SYSCALL_ENTRY(sys_write) + SYSCALL_ENTRY(sys_open) /* 5 */ + SYSCALL_ENTRY(sys_close) + SYSCALL_ENTRY(sys_waitpid) + SYSCALL_ENTRY(sys_creat) + SYSCALL_ENTRY(sys_link) + SYSCALL_ENTRY(sys_unlink) /* 10 */ + SYSCALL_ENTRY(sys_execve) + SYSCALL_ENTRY(sys_chdir) + SYSCALL_ENTRY(sys64_time) + SYSCALL_ENTRY(sys_mknod) + SYSCALL_ENTRY(sys_chmod) /* 15 */ + SYSCALL_ENTRY(sys_lchown) + SYSCALL_ENTRY(sys_ni_syscall) /* old break syscall */ + SYSCALL_ENTRY(sys_ni_syscall) /* old stat syscall */ + SYSCALL_ENTRY(sys_lseek) + SYSCALL_ENTRY(sys_getpid) /* 20 */ + SYSCALL_ENTRY(sys_mount) + SYSCALL_ENTRY(sys_ni_syscall) /* old umount syscall */ + SYSCALL_ENTRY(sys_setuid) + SYSCALL_ENTRY(sys_getuid) + SYSCALL_ENTRY(ppc64_sys_stime) /* 25 */ + SYSCALL_ENTRY(sys_ptrace) + SYSCALL_ENTRY(sys_alarm) + SYSCALL_ENTRY(sys_ni_syscall) /* old fstat syscall */ + SYSCALL_ENTRY(sys_pause) + SYSCALL_ENTRY(sys_utime) /* 30 */ + SYSCALL_ENTRY(sys_ni_syscall) /* old stty syscall */ + SYSCALL_ENTRY(sys_ni_syscall) /* old gtty syscall */ + SYSCALL_ENTRY(sys_access) + SYSCALL_ENTRY(sys_nice) + SYSCALL_ENTRY(sys_ni_syscall) /* 35 - old ftime syscall */ + SYSCALL_ENTRY(sys_sync) + SYSCALL_ENTRY(sys_kill) + SYSCALL_ENTRY(sys_rename) + SYSCALL_ENTRY(sys_mkdir) + SYSCALL_ENTRY(sys_rmdir) /* 40 */ + SYSCALL_ENTRY(sys_dup) + SYSCALL_ENTRY(sys_pipe) + SYSCALL_ENTRY(sys_times) + SYSCALL_ENTRY(sys_ni_syscall) /* old prof syscall */ + SYSCALL_ENTRY(sys_brk) /* 45 */ + SYSCALL_ENTRY(sys_setgid) + SYSCALL_ENTRY(sys_getgid) + SYSCALL_ENTRY(sys_signal) + SYSCALL_ENTRY(sys_geteuid) + SYSCALL_ENTRY(sys_getegid) /* 50 */ + SYSCALL_ENTRY(sys_acct) + SYSCALL_ENTRY(sys_umount) + SYSCALL_ENTRY(sys_ni_syscall) /* old lock syscall */ + SYSCALL_ENTRY(sys_ioctl) + SYSCALL_ENTRY(sys_fcntl) /* 55 */ + SYSCALL_ENTRY(sys_ni_syscall) /* old mpx syscall */ + SYSCALL_ENTRY(sys_setpgid) + SYSCALL_ENTRY(sys_ni_syscall) /* old ulimit syscall */ + SYSCALL_ENTRY(sys_ni_syscall) /* old uname syscall */ + SYSCALL_ENTRY(sys_umask) /* 60 */ + SYSCALL_ENTRY(sys_chroot) + SYSCALL_ENTRY(sys_ustat) + SYSCALL_ENTRY(sys_dup2) + SYSCALL_ENTRY(sys_getppid) + SYSCALL_ENTRY(sys_getpgrp) /* 65 */ + SYSCALL_ENTRY(sys_setsid) + SYSCALL_ENTRY(sys_ni_syscall) + SYSCALL_ENTRY(sys_sgetmask) + SYSCALL_ENTRY(sys_ssetmask) + SYSCALL_ENTRY(sys_setreuid) /* 70 */ + SYSCALL_ENTRY(sys_setregid) + SYSCALL_ENTRY(sys_ni_syscall) + SYSCALL_ENTRY(sys_ni_syscall) + SYSCALL_ENTRY(sys_sethostname) + SYSCALL_ENTRY(sys_setrlimit) /* 75 */ + SYSCALL_ENTRY(sys_ni_syscall) /* old getrlimit syscall */ + SYSCALL_ENTRY(sys_getrusage) + SYSCALL_ENTRY(sys_gettimeofday) + SYSCALL_ENTRY(sys_settimeofday) + SYSCALL_ENTRY(sys_getgroups) /* 80 */ + SYSCALL_ENTRY(sys_setgroups) + SYSCALL_ENTRY(sys_ni_syscall) /* old select syscall */ + SYSCALL_ENTRY(sys_symlink) + SYSCALL_ENTRY(sys_ni_syscall) /* old lstat syscall */ + SYSCALL_ENTRY(sys_readlink) /* 85 */ + SYSCALL_ENTRY(sys_uselib) + SYSCALL_ENTRY(sys_swapon) + SYSCALL_ENTRY(sys_reboot) + SYSCALL_ENTRY(sys_ni_syscall) /* old readdir syscall */ + SYSCALL_ENTRY(sys_mmap) /* 90 */ + SYSCALL_ENTRY(sys_munmap) + SYSCALL_ENTRY(sys_truncate) + SYSCALL_ENTRY(sys_ftruncate) + SYSCALL_ENTRY(sys_fchmod) + SYSCALL_ENTRY(sys_fchown) /* 95 */ + SYSCALL_ENTRY(sys_getpriority) + SYSCALL_ENTRY(sys_setpriority) + SYSCALL_ENTRY(sys_ni_syscall) /* old profil syscall holder */ + SYSCALL_ENTRY(sys_statfs) + SYSCALL_ENTRY(sys_fstatfs) /* 100 */ + SYSCALL_ENTRY(sys_ni_syscall) /* old ioperm syscall */ + SYSCALL_ENTRY(sys_socketcall) + SYSCALL_ENTRY(sys_syslog) + SYSCALL_ENTRY(sys_setitimer) + SYSCALL_ENTRY(sys_getitimer) /* 105 */ + SYSCALL_ENTRY(sys_newstat) + SYSCALL_ENTRY(sys_newlstat) + SYSCALL_ENTRY(sys_newfstat) + SYSCALL_ENTRY(sys_ni_syscall) /* old uname syscall */ + SYSCALL_ENTRY(sys_ni_syscall) /* 110 old iopl syscall */ + SYSCALL_ENTRY(sys_vhangup) + SYSCALL_ENTRY(sys_ni_syscall) /* old idle syscall */ + SYSCALL_ENTRY(sys_ni_syscall) /* old vm86 syscall */ + SYSCALL_ENTRY(sys_wait4) + SYSCALL_ENTRY(sys_swapoff) /* 115 */ + SYSCALL_ENTRY(sys_sysinfo) + SYSCALL_ENTRY(sys_ipc) + SYSCALL_ENTRY(sys_fsync) + SYSCALL_ENTRY(sys_ni_syscall) + SYSCALL_ENTRY(ppc_clone) /* 120 */ + SYSCALL_ENTRY(sys_setdomainname) + SYSCALL_ENTRY(ppc64_newuname) + SYSCALL_ENTRY(sys_ni_syscall) /* old modify_ldt syscall */ + SYSCALL_ENTRY(sys_adjtimex) + SYSCALL_ENTRY(sys_mprotect) /* 125 */ + SYSCALL_ENTRY(sys_ni_syscall) + SYSCALL_ENTRY(sys_ni_syscall) /* old create_module syscall */ + SYSCALL_ENTRY(sys_init_module) + SYSCALL_ENTRY(sys_delete_module) + SYSCALL_ENTRY(sys_ni_syscall) /* 130 old get_kernel_syms syscall */ + SYSCALL_ENTRY(sys_quotactl) + SYSCALL_ENTRY(sys_getpgid) + SYSCALL_ENTRY(sys_fchdir) + SYSCALL_ENTRY(sys_bdflush) + SYSCALL_ENTRY(sys_sysfs) /* 135 */ + SYSCALL_ENTRY(ppc64_personality) + SYSCALL_ENTRY(sys_ni_syscall) /* for afs_syscall */ + SYSCALL_ENTRY(sys_setfsuid) + SYSCALL_ENTRY(sys_setfsgid) + SYSCALL_ENTRY(sys_llseek) /* 140 */ + SYSCALL_ENTRY(sys_getdents) + SYSCALL_ENTRY(sys_select) + SYSCALL_ENTRY(sys_flock) + SYSCALL_ENTRY(sys_msync) + SYSCALL_ENTRY(sys_readv) /* 145 */ + SYSCALL_ENTRY(sys_writev) + SYSCALL_ENTRY(sys_getsid) + SYSCALL_ENTRY(sys_fdatasync) + SYSCALL_ENTRY(sys_sysctl) + SYSCALL_ENTRY(sys_mlock) /* 150 */ + SYSCALL_ENTRY(sys_munlock) + SYSCALL_ENTRY(sys_mlockall) + SYSCALL_ENTRY(sys_munlockall) + SYSCALL_ENTRY(sys_sched_setparam) + SYSCALL_ENTRY(sys_sched_getparam) /* 155 */ + SYSCALL_ENTRY(sys_sched_setscheduler) + SYSCALL_ENTRY(sys_sched_getscheduler) + SYSCALL_ENTRY(sys_sched_yield) + SYSCALL_ENTRY(sys_sched_get_priority_max) + SYSCALL_ENTRY(sys_sched_get_priority_min) /* 160 */ + SYSCALL_ENTRY(sys_sched_rr_get_interval) + SYSCALL_ENTRY(sys_nanosleep) + SYSCALL_ENTRY(sys_mremap) + SYSCALL_ENTRY(sys_setresuid) + SYSCALL_ENTRY(sys_getresuid) /* 165 */ + SYSCALL_ENTRY(sys_ni_syscall) /* old query_module syscall */ + SYSCALL_ENTRY(sys_poll) + SYSCALL_ENTRY(sys_nfsservctl) + SYSCALL_ENTRY(sys_setresgid) + SYSCALL_ENTRY(sys_getresgid) /* 170 */ + SYSCALL_ENTRY(sys_prctl) + SYSCALL_ENTRY(ppc64_rt_sigreturn) + SYSCALL_ENTRY(sys_rt_sigaction) + SYSCALL_ENTRY(sys_rt_sigprocmask) + SYSCALL_ENTRY(sys_rt_sigpending) /* 175 */ + SYSCALL_ENTRY(sys_rt_sigtimedwait) + SYSCALL_ENTRY(sys_rt_sigqueueinfo) + SYSCALL_ENTRY(ppc64_rt_sigsuspend) + SYSCALL_ENTRY(sys_pread64) + SYSCALL_ENTRY(sys_pwrite64) /* 180 */ + SYSCALL_ENTRY(sys_chown) + SYSCALL_ENTRY(sys_getcwd) + SYSCALL_ENTRY(sys_capget) + SYSCALL_ENTRY(sys_capset) + SYSCALL_ENTRY(sys_sigaltstack) /* 185 */ + SYSCALL_ENTRY(sys_sendfile64) + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for streams1 */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for streams2 */ + SYSCALL_ENTRY(ppc_vfork) + SYSCALL_ENTRY(sys_getrlimit) /* 190 */ + SYSCALL_ENTRY(sys_readahead) + SYSCALL_ENTRY(sys_ni_syscall) /* 32bit only mmap2 */ + SYSCALL_ENTRY(sys_ni_syscall) /* 32bit only truncate64 */ + SYSCALL_ENTRY(sys_ni_syscall) /* 32bit only ftruncate64 */ + SYSCALL_ENTRY(sys_ni_syscall) /* 195 - 32bit only stat64 */ + SYSCALL_ENTRY(sys_ni_syscall) /* 32bit only lstat64 */ + SYSCALL_ENTRY(sys_ni_syscall) /* 32bit only fstat64 */ + SYSCALL_ENTRY(sys_ni_syscall) /* 32bit only pciconfig_read */ + SYSCALL_ENTRY(sys_ni_syscall) /* 32bit only pciconfig_write */ + SYSCALL_ENTRY(sys_ni_syscall) /* 32bit only pciconfig_iobase */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for MacOnLinux */ + SYSCALL_ENTRY(sys_getdents64) + SYSCALL_ENTRY(sys_pivot_root) + SYSCALL_ENTRY(sys_ni_syscall) /* 32bit only fcntl64 */ + SYSCALL_ENTRY(sys_madvise) /* 205 */ + SYSCALL_ENTRY(sys_mincore) + SYSCALL_ENTRY(sys_gettid) + SYSCALL_ENTRY(sys_tkill) + SYSCALL_ENTRY(sys_setxattr) + SYSCALL_ENTRY(sys_lsetxattr) /* 210 */ + SYSCALL_ENTRY(sys_fsetxattr) + SYSCALL_ENTRY(sys_getxattr) + SYSCALL_ENTRY(sys_lgetxattr) + SYSCALL_ENTRY(sys_fgetxattr) + SYSCALL_ENTRY(sys_listxattr) /* 215 */ + SYSCALL_ENTRY(sys_llistxattr) + SYSCALL_ENTRY(sys_flistxattr) + SYSCALL_ENTRY(sys_removexattr) + SYSCALL_ENTRY(sys_lremovexattr) + SYSCALL_ENTRY(sys_fremovexattr) /* 220 */ + SYSCALL_ENTRY(sys_futex) + SYSCALL_ENTRY(sys_sched_setaffinity) + SYSCALL_ENTRY(sys_sched_getaffinity) + SYSCALL_ENTRY(sys_ni_syscall) + SYSCALL_ENTRY(sys_ni_syscall) /* 225 - reserved for tux */ + SYSCALL_ENTRY(sys_ni_syscall) /* 32bit only sendfile64 */ + SYSCALL_ENTRY(sys_io_setup) + SYSCALL_ENTRY(sys_io_destroy) + SYSCALL_ENTRY(sys_io_getevents) + SYSCALL_ENTRY(sys_io_submit) /* 230 */ + SYSCALL_ENTRY(sys_io_cancel) + SYSCALL_ENTRY(sys_set_tid_address) + SYSCALL_ENTRY(sys_fadvise64) + SYSCALL_ENTRY(sys_exit_group) + SYSCALL_ENTRY(sys_lookup_dcookie) /* 235 */ + SYSCALL_ENTRY(sys_epoll_create) + SYSCALL_ENTRY(sys_epoll_ctl) + SYSCALL_ENTRY(sys_epoll_wait) + SYSCALL_ENTRY(sys_remap_file_pages) + SYSCALL_ENTRY(sys_timer_create) /* 240 */ + SYSCALL_ENTRY(sys_timer_settime) + SYSCALL_ENTRY(sys_timer_gettime) + SYSCALL_ENTRY(sys_timer_getoverrun) + SYSCALL_ENTRY(sys_timer_delete) + SYSCALL_ENTRY(sys_clock_settime) /* 245 */ + SYSCALL_ENTRY(sys_clock_gettime) + SYSCALL_ENTRY(sys_clock_getres) + SYSCALL_ENTRY(sys_clock_nanosleep) + SYSCALL_ENTRY(ppc64_swapcontext) + SYSCALL_ENTRY(sys_tgkill) /* 250 */ + SYSCALL_ENTRY(sys_utimes) + SYSCALL_ENTRY(sys_statfs64) + SYSCALL_ENTRY(sys_fstatfs64) + SYSCALL_ENTRY(sys_ni_syscall) /* 32bit only fadvise64_64 */ + SYSCALL_ENTRY(ppc_rtas) /* 255 */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for sys_debug_setcontext */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for vserver */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for new sys_remap_file_pages */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for new sys_mbind */ + SYSCALL_ENTRY(sys_ni_syscall) /* 260 - reserved for new sys_get_mempolicy */ + SYSCALL_ENTRY(sys_ni_syscall) /* reserved for new sys_set_mempolicy */ + SYSCALL_ENTRY(sys_mq_open) + SYSCALL_ENTRY(sys_mq_unlink) + SYSCALL_ENTRY(sys_mq_timedsend) + SYSCALL_ENTRY(sys_mq_timedreceive) /* 265 */ + SYSCALL_ENTRY(sys_mq_notify) + SYSCALL_ENTRY(sys_mq_getsetattr) + SYSCALL_ENTRY(sys_ni_syscall) /* 268 reserved for sys_kexec_load */ diff -puN include/asm-ppc64/unistd.h~mminimal_toc_die_die_die_5 include/asm-ppc64/unistd.h --- mminimal_toc_die_die_die/include/asm-ppc64/unistd.h~mminimal_toc_die_die_die_5 2004-08-25 00:09:23.984236354 +1000 +++ mminimal_toc_die_die_die-anton/include/asm-ppc64/unistd.h 2004-08-25 00:09:44.110715892 +1000 @@ -468,7 +468,7 @@ long sys_rt_sigaction(int sig, const str * What we want is __attribute__((weak,alias("sys_ni_syscall"))), * but it doesn't work on all toolchains, so we just do it by hand */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall"); +#define cond_syscall(x) asm(".weak\t." #x "\n\t.set\t." #x ",.sys_ni_syscall"); #endif /* __KERNEL__ */ diff -puN arch/ppc64/kernel/entry.S~mminimal_toc_die_die_die_5 arch/ppc64/kernel/entry.S --- mminimal_toc_die_die_die/arch/ppc64/kernel/entry.S~mminimal_toc_die_die_die_5 2004-08-25 00:11:02.793512345 +1000 +++ mminimal_toc_die_die_die-anton/arch/ppc64/kernel/entry.S 2004-08-25 00:17:41.562880771 +1000 @@ -142,10 +142,10 @@ system_call: /* label this so stack tr clrldi r7,r7,32 clrldi r8,r8,32 15: - slwi r0,r0,3 - ldx r10,r11,r0 /* Fetch OPD */ - ld r2,8(r10) /* load r2 */ - ld r10,0(r10) /* load pc */ + slwi r0,r0,4 + ldx r10,r11,r0 /* load pc */ + addi r11,r11,8 + ldx r2,r11,r0 /* load r2 */ mtctr r10 bctrl /* Call handler */ ld r2,PACATOC(r13) /* get kernel TOC into r2 */ _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Wed Aug 25 03:29:11 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Tue, 24 Aug 2004 10:29:11 -0700 Subject: [PATCH] NUMA compile fix on top of -mm4 Message-ID: <1093368551.1009.118.camel@nighthawk> This is my fault from a patch that's been in -mm for a couple of weeks. Didn't get reported until this morning. I think Andrew sent my patch that causes this breakage on to Linus this morning, so please give it a test boot, and forward if it is OK. -- Dave -------------- next part -------------- A non-text attachment was scrubbed... Name: ppc64-free_area_init_node-no_mem_map.patch Type: text/x-patch Size: 573 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040824/6ba9c139/attachment.bin From paulus at samba.org Wed Aug 25 15:30:07 2004 From: paulus at samba.org (Paul Mackerras) Date: Wed, 25 Aug 2004 15:30:07 +1000 Subject: [PATCH] create pcibios_remove_root_bus() In-Reply-To: <1092864747.15281.18.camel@sinatra.austin.ibm.com> References: <1092864747.15281.18.camel@sinatra.austin.ibm.com> Message-ID: <16684.9183.894300.6099@cargo.ozlabs.ibm.com> John Rose writes: > The following patch creates pcibios_remove_root_bus(), which performs the > ppc64-specific actions for PHB removal. Where would this be called from? Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From johnrose at austin.ibm.com Thu Aug 26 01:28:16 2004 From: johnrose at austin.ibm.com (John Rose) Date: Wed, 25 Aug 2004 10:28:16 -0500 Subject: [PATCH] create pcibios_remove_root_bus() In-Reply-To: <16684.9183.894300.6099@cargo.ozlabs.ibm.com> References: <1092864747.15281.18.camel@sinatra.austin.ibm.com> <16684.9183.894300.6099@cargo.ozlabs.ibm.com> Message-ID: <1093447696.22473.7.camel@sinatra.austin.ibm.com> This function will be called from the RPA DLPAR module as part of PHB removal. The driver will also call a generic counterpart to this function. I posted this generic patch to the hotplug list last week: http://sourceforge.net/mailarchive/forum.php?thread_id=5361243&forum_id=765 My plan was to get the implementation functions into mainline before the driver changes, for two reasons. First to prevent a build break should the driver changes go in first, and second because the sysfs interface story is still in flux. Thanks- John On Wed, 2004-08-25 at 00:30, Paul Mackerras wrote: > John Rose writes: > > > The following patch creates pcibios_remove_root_bus(), which performs the > > ppc64-specific actions for PHB removal. > > Where would this be called from? > > Paul. > ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From benh at kernel.crashing.org Thu Aug 26 15:03:14 2004 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Thu, 26 Aug 2004 15:03:14 +1000 Subject: vDSO preliminary implementation Message-ID: <1093496594.2172.80.camel@gaston> Hi ! Here's a first shot at implementing a vDSO for ppc32/ppc64. This is definitely not final as you can see, the enclosed implementation doesn't provide anything useful for userland to link against, so I didn't pass down any ELF AT_* entry yet telling ld.so about the vDSO at this point. What this implementation contains however is the signal trampoline beeing moved to the vDSO area, thus no longer on the stack, so people working on non-executable stacks can toy with it. The kernel side should be complete +/- bugs (for example, I'm pretty sure the vDSO Makefiles are broken for split src/obj directories, help fixing that welcome, see comments in there). The vDSO has copy-on-write semantics so you should be able to put breakpoints in there (untested, well, I tested that COW worked but didn't try putting breakpoints). I also only tested signals with 32 bits processes, though both are implemented. What remains is implementing the various functions for use by userland and the actual symbol table patching, for which I already have some code, it's just not in there yet. The patch is against a slightly old Linus bk snapshot, Comments are welcome. Ben. diff -Nru a/arch/ppc64/Makefile b/arch/ppc64/Makefile --- a/arch/ppc64/Makefile 2004-08-26 14:55:32 +10:00 +++ b/arch/ppc64/Makefile 2004-08-26 14:55:32 +10:00 @@ -43,6 +43,8 @@ libs-y += arch/ppc64/lib/ core-y += arch/ppc64/kernel/ +core-y += arch/ppc64/kernel/vdso32/ +core-y += arch/ppc64/kernel/vdso64/ core-y += arch/ppc64/mm/ core-$(CONFIG_XMON) += arch/ppc64/xmon/ drivers-$(CONFIG_OPROFILE) += arch/ppc64/oprofile/ diff -Nru a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile 2004-08-26 14:55:32 +10:00 +++ b/arch/ppc64/kernel/Makefile 2004-08-26 14:55:32 +10:00 @@ -11,7 +11,7 @@ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o rtc.o init_task.o \ lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ - iommu.o sysfs.o vio.o + iommu.o sysfs.o vio.o vdso.o obj-$(CONFIG_PPC_OF) += of_device.o diff -Nru a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c --- a/arch/ppc64/kernel/setup.c 2004-08-26 14:55:32 +10:00 +++ b/arch/ppc64/kernel/setup.c 2004-08-26 14:55:32 +10:00 @@ -47,6 +47,7 @@ #include #include #include +#include extern unsigned long klimit; /* extern void *stab; */ @@ -646,6 +647,7 @@ ppc_md.setup_arch(); paging_init(); + ppc64_boot_msg(0x15, "Setup Done"); } diff -Nru a/arch/ppc64/kernel/signal.c b/arch/ppc64/kernel/signal.c --- a/arch/ppc64/kernel/signal.c 2004-08-26 14:55:32 +10:00 +++ b/arch/ppc64/kernel/signal.c 2004-08-26 14:55:32 +10:00 @@ -34,6 +34,7 @@ #include #include #include +#include #define DEBUG_SIG 0 @@ -412,10 +413,14 @@ goto badframe; /* Set up to return from userspace. */ - err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); - if (err) - goto badframe; - + if (vdso64_rt_sigtramp) { + regs->link = vdso64_rt_sigtramp; + } else { + err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); + if (err) + goto badframe; + regs->link = (unsigned long) &frame->tramp[0]; + } funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler; /* Allocate a dummy caller frame for the signal handler. */ @@ -424,7 +429,6 @@ /* Set up "regs" so we "return" to the signal handler. */ err |= get_user(regs->nip, &funct_desc_ptr->entry); - regs->link = (unsigned long) &frame->tramp[0]; regs->gpr[1] = newsp; err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); regs->gpr[3] = signr; diff -Nru a/arch/ppc64/kernel/signal32.c b/arch/ppc64/kernel/signal32.c --- a/arch/ppc64/kernel/signal32.c 2004-08-26 14:55:32 +10:00 +++ b/arch/ppc64/kernel/signal32.c 2004-08-26 14:55:32 +10:00 @@ -30,6 +30,7 @@ #include #include #include +#include #define DEBUG_SIG 0 @@ -677,18 +678,24 @@ /* Save user registers on the stack */ frame = &rt_sf->uc.uc_mcontext; - if (save_user_regs(regs, frame, __NR_rt_sigreturn)) - goto badframe; - if (put_user(regs->gpr[1], (unsigned long __user *)newsp)) goto badframe; + + if (vdso32_rt_sigtramp) { + if (save_user_regs(regs, frame, 0)) + goto badframe; + regs->link = vdso32_rt_sigtramp; + } else { + if (save_user_regs(regs, frame, __NR_rt_sigreturn)) + goto badframe; + regs->link = (unsigned long) frame->tramp; + } regs->gpr[1] = (unsigned long) newsp; regs->gpr[3] = sig; regs->gpr[4] = (unsigned long) &rt_sf->info; regs->gpr[5] = (unsigned long) &rt_sf->uc; regs->gpr[6] = (unsigned long) rt_sf; regs->nip = (unsigned long) ka->sa.sa_handler; - regs->link = (unsigned long) frame->tramp; regs->trap = 0; regs->result = 0; @@ -844,8 +851,15 @@ || __put_user(sig, &sc->signal)) goto badframe; - if (save_user_regs(regs, &frame->mctx, __NR_sigreturn)) - goto badframe; + if (vdso32_sigtramp) { + if (save_user_regs(regs, &frame->mctx, 0)) + goto badframe; + regs->link = vdso32_sigtramp; + } else { + if (save_user_regs(regs, &frame->mctx, __NR_sigreturn)) + goto badframe; + regs->link = (unsigned long) frame->mctx.tramp; + } if (put_user(regs->gpr[1], (unsigned long __user *)newsp)) goto badframe; @@ -853,7 +867,6 @@ regs->gpr[3] = sig; regs->gpr[4] = (unsigned long) sc; regs->nip = (unsigned long) ka->sa.sa_handler; - regs->link = (unsigned long) frame->mctx.tramp; regs->trap = 0; regs->result = 0; diff -Nru a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ppc64/kernel/vdso.c 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,405 @@ + +/* + * linux/arch/ppc64/kernel/vdso.c + * + * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + + +/* + * The vDSOs themselves are here + */ +extern char vdso64_start, vdso64_end; +extern char vdso32_start, vdso32_end; + +static void *vdso64_kbase = &vdso64_start; +static void *vdso32_kbase = &vdso32_start; +unsigned long vdso64_ubase; +unsigned long vdso32_ubase; + +unsigned int vdso64_pages; +unsigned int vdso32_pages; + +/* Signal trampolines user addresses */ + +unsigned long vdso64_sigtramp; +unsigned long vdso64_rt_sigtramp; +unsigned long vdso32_sigtramp; +unsigned long vdso32_rt_sigtramp; + +/* + * Some infos carried around for each of them during parsing at + * boot time. + */ +struct lib32_elfinfo +{ + Elf32_Ehdr *hdr; /* ptr to ELF */ + Elf32_Sym *dynsym; /* ptr to .dynsym section */ + unsigned long dynsymsize; /* size of .dynsym section */ + char *dynstr; /* ptr to .dynstr section */ + unsigned long text; /* offset of .text section in .so */ +}; + +struct lib64_elfinfo +{ + Elf64_Ehdr *hdr; + Elf64_Sym *dynsym; + unsigned long dynsymsize; + char *dynstr; + unsigned long text; +}; + + +#ifdef __DEBUG +static void dump_one_vdso_page(struct page *pg, struct page *upg) +{ + printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT), + page_count(pg), + pg->flags); + if (upg/* && pg != upg*/) { + printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) << PAGE_SHIFT), + page_count(upg), + upg->flags); + } + printk("\n"); +} + +static void dump_vdso_pages(struct vm_area_struct * vma) +{ + int i; + + if (!vma || test_thread_flag(TIF_32BIT)) { + printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); + for (i=0; ivm_mm) ? + follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } + if (!vma || !test_thread_flag(TIF_32BIT)) { + printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); + for (i=0; ivm_mm) ? + follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } +} +#endif /* DEBUG */ + +/* + * Keep a dummy vma_close for now, it will prevent VMA merging, though + * I wouldn't expect the stack beeing mergeable with out VMA due to flag + * differences, better be safe than sorry + */ +static void vdso_vma_close(struct vm_area_struct * vma) +{ +} + +/* + * Our nopage() function, maps in the actual vDSO kernel pages, they will + * be mapped read-only by do_no_page(), and eventually COW'ed, either + * right away for an initial write access, or by do_wp_page(). + */ +static struct page * vdso_vma_nopage(struct vm_area_struct * vma, + unsigned long address, int *type) +{ + unsigned long offset = address - vma->vm_start;; + struct page *pg; + void *vbase = test_thread_flag(TIF_32BIT) ? vdso32_kbase : vdso64_kbase; + + DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n", + current->comm, address, offset); + + if (address < vma->vm_start || address > vma->vm_end) + return NOPAGE_SIGBUS; + + pg = virt_to_page(vbase + offset); + get_page(pg); + DBG(" ->page count: %d\n", page_count(pg)); + + return pg; +} + +static struct vm_operations_struct vdso_vmops = { + .close = vdso_vma_close, + .nopage = vdso_vma_nopage, +}; + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long vdso_base = STACK_TOP; + unsigned long vdso_pages = test_thread_flag(TIF_32BIT) ? + vdso32_pages : vdso64_pages; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma == NULL) + return -ENOMEM; + if (security_vm_enough_memory(vdso_pages)) { + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } + memset(vma, 0, sizeof(*vma)); + + vma->vm_mm = mm; + vma->vm_start = vdso_base; + vma->vm_end = TASK_SIZE; + /* + * our vma flags don't have VM_WRITE so by default, the process isn't allowed + * to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW on those + * pages but it's then your responsibility to never do that on the "data" page + * of the vDSO or you'll stop getting kernel updates and your nice userland + * gettimeofday will be totally dead. It's fine to use that for setting + * breakpoints in the vDSO code pages though + */ + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + vma->vm_flags |= mm->def_flags; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; + vma->vm_ops = &vdso_vmops; + + down_write(&mm->mmap_sem); + insert_vm_struct(mm, vma); + mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + up_write(&mm->mmap_sem); + + return 0; +} + +static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf32_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + *size = 0; + return NULL; +} + +static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf64_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + if (size) + *size = 0; + return NULL; +} + +static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, const char *symname) +{ + unsigned int i; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + if (strcmp(symname, lib->dynstr + lib->dynsym[i].st_name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, const char *symname) +{ + unsigned int i; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + if (strcmp(symname, lib->dynstr + lib->dynsym[i].st_name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function32(struct lib32_elfinfo *lib, const char *symname, + unsigned long offset) +{ + Elf32_Sym *sym = find_symbol32(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO32: function %s not found !\n", symname); + return 0; + } + return offset /*+ (unsigned long)lib->text*/ + sym->st_value; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function64(struct lib64_elfinfo *lib, const char *symname, + unsigned long offset) +{ + Elf64_Sym *sym = find_symbol64(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO64: function %s not found !\n", symname); + return 0; + } + return offset /*+ (unsigned long)lib->text*/ + sym->st_value; +} + + +static __init int vdso_do_fixups(void) +{ + struct lib32_elfinfo v32; + struct lib64_elfinfo v64; + void *sect; + + v32.hdr = vdso32_kbase; + v64.hdr = vdso64_kbase; + + /* + * Locate symbol tables & text section + */ + + v32.dynsym = find_section32(v32.hdr, ".dynsym", &v32.dynsymsize); + v32.dynstr = find_section32(v32.hdr, ".dynstr", NULL); + if (v32.dynsym == NULL || v32.dynstr == NULL) { + printk(KERN_ERR "vDSO32: a required symbol section was not found\n"); + return -1; + } + sect = find_section32(v32.hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO32: the .text section was not found\n"); + return -1; + } + v32.text = sect - vdso32_kbase; + + v64.dynsym = find_section64(v64.hdr, ".dynsym", &v64.dynsymsize); + v64.dynstr = find_section64(v64.hdr, ".dynstr", NULL); + if (v64.dynsym == NULL || v64.dynstr == NULL) { + printk(KERN_ERR "vDSO64: a required symbol section was not found\n"); + return -1; + } + sect = find_section64(v64.hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO64: the .text section was not found\n"); + return -1; + } + v64.text = sect - vdso64_kbase; + + /* + * Find signal trampolines + */ + + vdso64_sigtramp = find_function64(&v64, "_v_sigtramp64", vdso64_ubase); + vdso64_rt_sigtramp = find_function64(&v64, "_v_sigtramp_rt64", vdso64_ubase); + vdso32_sigtramp = find_function32(&v32, "_v_sigtramp32", vdso32_ubase); + vdso32_rt_sigtramp = find_function32(&v32, "_v_sigtramp_rt32", vdso32_ubase); + + return 0; +} + +void __init vdso_init(void) +{ + int i; + + vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; + vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; + vdso64_ubase = TASK_SIZE_USER64 - (vdso64_pages << PAGE_SHIFT); + vdso32_ubase = TASK_SIZE_USER32 - (vdso32_pages << PAGE_SHIFT); + + DBG("vdso64_kbase: %p, 0x%x pages, vdso32_kbase: %p, 0x%x pages\n", + vdso64_kbase, vdso64_pages, vdso32_kbase, vdso32_pages); + + /* Do necessary fixups of vDSO symbols */ + if (vdso_do_fixups()) { + printk(KERN_ERR "vDSO setup failure, not enabled !\n"); + /* XXX should free pages here ? */ + vdso64_pages = vdso32_pages = 0; + return; + } + + /* Make sure pages are in the correct state */ + for (i = 0; i < vdso64_pages; i++) { + struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + } + for (i = 0; i < vdso32_pages; i++) { + struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + } +} diff -Nru a/arch/ppc64/kernel/vdso32/Makefile b/arch/ppc64/kernel/vdso32/Makefile --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ppc64/kernel/vdso32/Makefile 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,43 @@ +# Choose compiler + +CROSS32_COMPILE ?= + +CROSS32CC := $(CROSS32_COMPILE)gcc +CROSS32AS := $(CROSS32_COMPILE)as + +# List of files in the vdso, has to be asm only for now + +src-vdso32 = sigtramp.S testfunc.S + +# Build rules + +obj-vdso32 := $(addsuffix .o, $(basename $(src-vdso32))) +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) +src-vdso32 := $(addprefix $(src)/, $(src-vdso32)) + +VDSO32_CFLAGS := -shared -s -fno-common -Iinclude -fno-builtin -nostdlib +VDSO32_CFLAGS += -Wl,-soname=linux-vdso32.so.1 +VDSO32_AFLAGS := -D__ASSEMBLY__ -s + +obj-y += vdso32_wrapper.o + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) + $(call if_changed,vdso32ld) + +# assembly rules for the .S files +# This is probably wrong with split src & obj trees +$(obj-vdso32): %.o: %.S + $(call if_changed_dep,vdso32as) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CROSS32CC) -Wp,-MD,$(depfile) $(VDSO32_CFLAGS) \ + -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CROSS32CC) -Wp,-MD,$(depfile) $(VDSO32_AFLAGS) -c -o $@ $^ + +targets += vdso32.so diff -Nru a/arch/ppc64/kernel/vdso32/sigtramp.S b/arch/ppc64/kernel/vdso32/sigtramp.S --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ppc64/kernel/vdso32/sigtramp.S 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,15 @@ +#include +#include +#include +#include + + .globl _v_sigtramp32 +_v_sigtramp32: + li r0,__NR_sigreturn + sc + + .globl _v_sigtramp_rt32 +_v_sigtramp_rt32: + li r0,__NR_rt_sigreturn + sc + diff -Nru a/arch/ppc64/kernel/vdso32/testfunc.S b/arch/ppc64/kernel/vdso32/testfunc.S --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ppc64/kernel/vdso32/testfunc.S 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,15 @@ +#include +#include +#include + + + .globl __v_myfunc_1 +__v_myfunc_1: + blr + + .globl __v_myfunc_2 +__v_myfunc_2: + blr + + .globl _v_func +_v_func: diff -Nru a/arch/ppc64/kernel/vdso32/vdso32.lds b/arch/ppc64/kernel/vdso32/vdso32.lds --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ppc64/kernel/vdso32/vdso32.lds 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,98 @@ +/* + * This is the infamous ld script for the 32 bits vdso + * library + */ +OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", + "elf32-powerpc") +OUTPUT_ARCH(powerpc:common) +ENTRY(_start) + +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + + . = 0 + SIZEOF_HEADERS; + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + } =0 + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .eh_frame_hdr : { *(.eh_frame_hdr) } + .eh_frame : { KEEP (*(.eh_frame)) } + .gcc_except_table : { *(.gcc_except_table) } + .fixup : { *(.fixup) } + .dynamic : { *(.dynamic) } + + /* Stabs debugging sections are here too, away from the + * data page. Not much in there at the moment + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /* Adjust the address for the data segment. On the vdso, we need it to + * be page aligned after the text. The data segment contains ONLY the + * .data section here, which is special in the case of the vdso as it's + * really read only and is kernel updated. The got stays there too + */ + . = ALIGN (0x1000); + + .data : + { + *(.data .data.* .gnu.linkonce.d.*) + } + .got : { *(.got.plt) *(.got) } + _edata = .; + PROVIDE (edata = .); + _end = .; + __end = .; + PROVIDE (end = .); + + /DISCARD/ : { *(.note.GNU-stack) } + /* gas insist on generating these, bin them in here, they should be + * empty anyways + */ + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} diff -Nru a/arch/ppc64/kernel/vdso32/vdso32_wrapper.S b/arch/ppc64/kernel/vdso32/vdso32_wrapper.S --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ppc64/kernel/vdso32/vdso32_wrapper.S 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,12 @@ +#include + + .section ".data" + + .globl vdso32_start, vdso32_end + .balign 4096 +vdso32_start: + .incbin "arch/ppc64/kernel/vdso32/vdso32.so" + .balign 4096 +vdso32_end: + + .previous diff -Nru a/arch/ppc64/kernel/vdso64/Makefile b/arch/ppc64/kernel/vdso64/Makefile --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ppc64/kernel/vdso64/Makefile 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,36 @@ +# List of files in the vdso, has to be asm only for now + +src-vdso64 = sigtramp.S testfunc.S + +# Build rules + +obj-vdso64 := $(addsuffix .o, $(basename $(src-vdso64))) +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) +src-vdso64 := $(addprefix $(src)/, $(src-vdso64)) + +VDSO64_CFLAGS := -shared -s -fno-common -Iinclude -fno-builtin -nostdlib +VDSO64_CFLAGS += -Wl,-soname=linux-vdso64.so.1 +VDSO64_AFLAGS := -D__ASSEMBLY__ -s + +obj-y += vdso64_wrapper.o + +# Force dependency (incbin is bad) +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) + $(call if_changed,vdso64ld) + +# assembly rules for the .S files +# This is probably wrong with split src & obj trees +$(obj-vdso64): %.o: %.S + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) -Wp,-MD,$(depfile) $(VDSO64_CFLAGS) \ + -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) -Wp,-MD,$(depfile) $(VDSO64_AFLAGS) -c -o $@ $^ + +targets += vdso64.so diff -Nru a/arch/ppc64/kernel/vdso64/sigtramp.S b/arch/ppc64/kernel/vdso64/sigtramp.S --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ppc64/kernel/vdso64/sigtramp.S 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,17 @@ +#include +#include +#include +#include + + .globl _v_sigtramp64 +_v_sigtramp64: + addi r1, r1, __SIGNAL_FRAMESIZE + li r0,__NR_sigreturn + sc + + .globl _v_sigtramp_rt64 +_v_sigtramp_rt64: + addi r1, r1, __SIGNAL_FRAMESIZE + li r0,__NR_rt_sigreturn + sc + diff -Nru a/arch/ppc64/kernel/vdso64/testfunc.S b/arch/ppc64/kernel/vdso64/testfunc.S --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ppc64/kernel/vdso64/testfunc.S 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,15 @@ +#include +#include +#include + + + .globl __v_myfunc_1 +__v_myfunc_1: + blr + + .globl __v_myfunc_2 +__v_myfunc_2: + blr + + .globl _v_func +_v_func: diff -Nru a/arch/ppc64/kernel/vdso64/vdso64.lds b/arch/ppc64/kernel/vdso64/vdso64.lds --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ppc64/kernel/vdso64/vdso64.lds 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,92 @@ +/* + * This is the infamous ld script for the 64 bits vdso + * library + */ +OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", + "elf64-powerpc") +OUTPUT_ARCH(powerpc:common64) +ENTRY(_start) + +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + . = 0 + SIZEOF_HEADERS; + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + *(.sfpr .glink) + } =0x60000000 + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .eh_frame_hdr : { *(.eh_frame_hdr) } + .eh_frame : { KEEP (*(.eh_frame)) } + .gcc_except_table : { *(.gcc_except_table) } + .dynamic : { *(.dynamic) } + + /* Stabs debugging sections are here too, away from the + * data page. Not much in there at the moment + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /* Adjust the address for the data segment. On the vdso, we need it to + * be page aligned after the text. The data segment contains ONLY the + * .data section here, which is special in the case of the vdso as it's + * really read only and is kernel updated. The got stays there too + */ + . = ALIGN (0x1000); + + .data : + { + *(.data .data.* .gnu.linkonce.d.*) + } + .got ALIGN(8) : { *(.got .toc) } + _edata = .; + PROVIDE (edata = .); + _end = .; + PROVIDE (end = .); + + /DISCARD/ : { *(.note.GNU-stack) } + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} diff -Nru a/arch/ppc64/kernel/vdso64/vdso64_wrapper.S b/arch/ppc64/kernel/vdso64/vdso64_wrapper.S --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ppc64/kernel/vdso64/vdso64_wrapper.S 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,12 @@ +#include + + .section ".data" + + .globl vdso64_start, vdso64_end + .balign 4096 +vdso64_start: + .incbin "arch/ppc64/kernel/vdso64/vdso64.so" + .balign 4096 +vdso64_end: + + .previous diff -Nru a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c --- a/arch/ppc64/mm/init.c 2004-08-26 14:55:32 +10:00 +++ b/arch/ppc64/mm/init.c 2004-08-26 14:55:32 +10:00 @@ -61,6 +61,7 @@ #include #include #include +#include struct mmu_context_queue_t mmu_context_queue; @@ -680,6 +681,8 @@ #ifdef CONFIG_PPC_ISERIES iommu_vio_init(); #endif + /* Initialize the vDSO */ + vdso_init(); } /* diff -Nru a/fs/binfmt_elf.c b/fs/binfmt_elf.c --- a/fs/binfmt_elf.c 2004-08-26 14:55:32 +10:00 +++ b/fs/binfmt_elf.c 2004-08-26 14:55:32 +10:00 @@ -713,6 +713,14 @@ goto out_free_dentry; } +#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES + retval = arch_setup_additional_pages(bprm, executable_stack); + if (retval < 0) { + send_sig(SIGKILL, current, 0); + goto out_free_dentry; + } +#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ + current->mm->start_stack = bprm->p; /* Now we do a little grungy work by mmaping the ELF image into diff -Nru a/include/asm-ppc64/a.out.h b/include/asm-ppc64/a.out.h --- a/include/asm-ppc64/a.out.h 2004-08-26 14:55:32 +10:00 +++ b/include/asm-ppc64/a.out.h 2004-08-26 14:55:32 +10:00 @@ -2,6 +2,7 @@ #define __PPC64_A_OUT_H__ #include +#include /* * c 2001 PPC 64 Team, IBM Corp @@ -30,14 +31,11 @@ #ifdef __KERNEL__ -#define STACK_TOP_USER64 (TASK_SIZE_USER64) +#define STACK_TOP_USER64 (vdso64_ubase) +#define STACK_TOP_USER32 (vdso32_ubase) -/* Give 32-bit user space a full 4G address space to live in. */ -#define STACK_TOP_USER32 (TASK_SIZE_USER32) - -#define STACK_TOP ((test_thread_flag(TIF_32BIT) || \ - (ppcdebugset(PPCDBG_BINFMT_32ADDR))) ? \ - STACK_TOP_USER32 : STACK_TOP_USER64) +#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ + STACK_TOP_USER32 : STACK_TOP_USER64) #endif /* __KERNEL__ */ diff -Nru a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h --- a/include/asm-ppc64/processor.h 2004-08-26 14:55:32 +10:00 +++ b/include/asm-ppc64/processor.h 2004-08-26 14:55:32 +10:00 @@ -526,8 +526,8 @@ /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(STACK_TOP_USER32 / 4)) -#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(STACK_TOP_USER64 / 4)) +#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) +#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4)) #define TASK_UNMAPPED_BASE ((test_thread_flag(TIF_32BIT)||(ppcdebugset(PPCDBG_BINFMT_32ADDR))) ? \ TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 ) diff -Nru a/include/asm-ppc64/vdso.h b/include/asm-ppc64/vdso.h --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/include/asm-ppc64/vdso.h 2004-08-26 14:55:32 +10:00 @@ -0,0 +1,25 @@ +#ifndef __PPC64_VDSO_H__ +#define __PPC64_VDSO_H__ + +#ifdef __KERNEL__ + +extern unsigned int vdso64_pages; +extern unsigned int vdso32_pages; + +extern unsigned long vdso64_ubase; +extern unsigned long vdso32_ubase; + +extern unsigned long vdso64_sigtramp; +extern unsigned long vdso64_rt_sigtramp; +extern unsigned long vdso32_sigtramp; +extern unsigned long vdso32_rt_sigtramp; + +extern void vdso_init(void); + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); + +#endif /* __KERNEL__ */ + +#endif /* __PPC64_VDSO_H__ */ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Thu Aug 26 15:57:20 2004 From: paulus at samba.org (Paul Mackerras) Date: Thu, 26 Aug 2004 15:57:20 +1000 Subject: [PATCH] log machine check errors In-Reply-To: <20040813130640.08ccff25@localhost> References: <20040813130640.08ccff25@localhost> Message-ID: <16685.31680.900521.278413@cargo.ozlabs.ibm.com> Jake Moilanen writes: > Somewhere along the line it looks like logging machine check errors > never got put in 2.6. Machine check error logs were one of the main > reasons for storing logs to nvram. > > Here's a forward port of the 2.4 code the Dave Altobelli originally > wrote. [snip] > +char mce_data_buf[RTAS_ERROR_LOG_MAX]__page_aligned; Why is this page-aligned? Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From moilanen at austin.ibm.com Thu Aug 26 22:36:14 2004 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Thu, 26 Aug 2004 07:36:14 -0500 Subject: [PATCH] log machine check errors In-Reply-To: <16685.31680.900521.278413@cargo.ozlabs.ibm.com> References: <20040813130640.08ccff25@localhost> <16685.31680.900521.278413@cargo.ozlabs.ibm.com> Message-ID: <20040826073614.7ab5f94d@localhost> On Thu, 26 Aug 2004 15:57:20 +1000 Paul Mackerras wrote: > Jake Moilanen writes: > > > Somewhere along the line it looks like logging machine check errors > > never got put in 2.6. Machine check error logs were one of the main > > reasons for storing logs to nvram. > > > > Here's a forward port of the 2.4 code the Dave Altobelli originally > > wrote. > [snip] > > > +char mce_data_buf[RTAS_ERROR_LOG_MAX]__page_aligned; > > Why is this page-aligned? The page alignment can probably be taken out. The reason it was originally aligned in 2.4, was that this buffer was also used for check-exception (which we don't have in 2.6 or at least aren't supporting). IIRC check-exception expects that the buffer not cross an LMB. One way we can guarantee it is to make sure it also doesn't cross a page boundary, hence the page alignment. Thanks, Jake ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From linas at austin.ibm.com Fri Aug 27 01:52:43 2004 From: linas at austin.ibm.com (Linas Vepstas) Date: Thu, 26 Aug 2004 10:52:43 -0500 Subject: [PATCH] log machine check errors In-Reply-To: <20040826073614.7ab5f94d@localhost> References: <20040813130640.08ccff25@localhost> <16685.31680.900521.278413@cargo.ozlabs.ibm.com> <20040826073614.7ab5f94d@localhost> Message-ID: <20040826155243.GR14002@austin.ibm.com> On Thu, Aug 26, 2004 at 07:36:14AM -0500, Jake Moilanen was heard to remark: > > On Thu, 26 Aug 2004 15:57:20 +1000 > Paul Mackerras wrote: > > > Jake Moilanen writes: > > > > > Somewhere along the line it looks like logging machine check errors > > > never got put in 2.6. Machine check error logs were one of the main > > > reasons for storing logs to nvram. Huh. I was wondering what happened to this. Note also, the 2.4 code used a hard-coded buffer size, which, when set to 2K will crash in firmware on power4. The fix is to do the same rtas-error-log-max value as in the patch I posted yesterday. (I chatted with firmware to confirm that this is what firmware expects). > > > +char mce_data_buf[RTAS_ERROR_LOG_MAX]__page_aligned; > > > > Why is this page-aligned? > > The page alignment can probably be taken out. The reason it was > originally aligned in 2.4, was that this buffer was also used for > check-exception (which we don't have in 2.6 or at least aren't > supporting). IIRC check-exception expects that the buffer not cross an > LMB. One way we can guarantee it is to make sure it also doesn't cross > a page boundary, hence the page alignment. In that case, this restriction would propbably apply to allthe other buffers that use an rtas error log (rtas.c rtasd.c eeh.c ras.c). I've asked firmware (vikram sethi) about this, no response yet. --linas ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From johnrose at austin.ibm.com Fri Aug 27 05:34:02 2004 From: johnrose at austin.ibm.com (John Rose) Date: Thu, 26 Aug 2004 14:34:02 -0500 Subject: dynamic addition of OF property to /proc/device-tree Message-ID: <1093548841.32115.16.camel@sinatra.austin.ibm.com> In /proc/device-tree, i need to add a property (as opposed to a node) dynamically. Should I use the existing remove/add_node_dynamic functions to remove and re-add the parent node of the new property, or write up a new dynamic property add/remove? This is for the creation of the linux,pci-domain property for newly added PHBs. Thoughts? Thanks- John ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Fri Aug 27 06:39:02 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Thu, 26 Aug 2004 13:39:02 -0700 Subject: [PATCH] log machine check errors In-Reply-To: <20040826073614.7ab5f94d@localhost> References: <20040813130640.08ccff25@localhost> <16685.31680.900521.278413@cargo.ozlabs.ibm.com> <20040826073614.7ab5f94d@localhost> Message-ID: <1093552741.2984.347.camel@nighthawk> On Thu, 2004-08-26 at 05:36, Jake Moilanen wrote: > IIRC check-exception expects that the buffer not cross an > LMB. One way we can guarantee it is to make sure it also doesn't cross > a page boundary, hence the page alignment. Is the buffer guaranteed to never be larger than a page? (I know it's only 2048 bytes now) -- Dave ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From moilanen at austin.ibm.com Fri Aug 27 06:44:52 2004 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Thu, 26 Aug 2004 15:44:52 -0500 Subject: [PATCH] log machine check errors In-Reply-To: <1093552741.2984.347.camel@nighthawk> References: <20040813130640.08ccff25@localhost> <16685.31680.900521.278413@cargo.ozlabs.ibm.com> <20040826073614.7ab5f94d@localhost> <1093552741.2984.347.camel@nighthawk> Message-ID: <20040826154452.59b07cdd@localhost> On Thu, 26 Aug 2004 13:39:02 -0700 Dave Hansen wrote: > On Thu, 2004-08-26 at 05:36, Jake Moilanen wrote: > > IIRC check-exception expects that the buffer not cross an > > LMB. One way we can guarantee it is to make sure it also doesn't cross > > a page boundary, hence the page alignment. > > Is the buffer guaranteed to never be larger than a page? (I know it's > only 2048 bytes now) It will depend on the RAS architects if they ever decide to move error logs greater then 2k. It may happen someday, but we'll have warning. Thanks, Jake ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From moilanen at austin.ibm.com Fri Aug 27 07:27:43 2004 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Thu, 26 Aug 2004 16:27:43 -0500 Subject: vDSO preliminary implementation In-Reply-To: <1093496594.2172.80.camel@gaston> References: <1093496594.2172.80.camel@gaston> Message-ID: <20040826162743.1e8e7b12@localhost> > What this implementation contains however is the signal trampoline beeing moved > to the vDSO area, thus no longer on the stack, so people working on > non-executable stacks can toy with it. Hey I can boot w/ distros that compile w/ pt_gnu_stacks now! > > +VDSO32_CFLAGS := -shared -s -fno-common -Iinclude -fno-builtin -nostdlib > +VDSO32_CFLAGS += -Wl,-soname=linux-vdso32.so.1 > +VDSO32_AFLAGS := -D__ASSEMBLY__ -s Don't you need -Iinclude on your VDSO32_AFLAGS line? I had to add it to compile both sigtramp.S for vdso32 and vdso64. Thanks, Jake ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From linas at austin.ibm.com Fri Aug 27 07:52:26 2004 From: linas at austin.ibm.com (Linas Vepstas) Date: Thu, 26 Aug 2004 16:52:26 -0500 Subject: [PATCH] log machine check errors In-Reply-To: <1093552741.2984.347.camel@nighthawk> References: <20040813130640.08ccff25@localhost> <16685.31680.900521.278413@cargo.ozlabs.ibm.com> <20040826073614.7ab5f94d@localhost> <1093552741.2984.347.camel@nighthawk> Message-ID: <20040826215226.GW14002@austin.ibm.com> On Thu, Aug 26, 2004 at 01:39:02PM -0700, Dave Hansen was heard to remark: > > On Thu, 2004-08-26 at 05:36, Jake Moilanen wrote: > > IIRC check-exception expects that the buffer not cross an > > LMB. One way we can guarantee it is to make sure it also doesn't cross > > a page boundary, hence the page alignment. > > Is the buffer guaranteed to never be larger than a page? (I know it's > only 2048 bytes now) Actually, there is one thing we probably should do, and that's to change the buffer to 4K now, so that if/when firmware moves to 4K, older kernels (i.e. the ones we are creating now) will continue to work correctly. We already hit this with the rhel3 2.4 kernels on power5 ... we forgot to upgrade from 1K to 2K and now all the logs are being truncated, and redhat doesn't want to patch this late in the game ... So, with that argument in hand, will the maintainers accept the following patch? Signed-off-by: Linas Vepstas ===== include/asm-ppc64/rtas.h 1.21 vs edited ===== --- 1.21/include/asm-ppc64/rtas.h Wed Aug 25 15:09:24 2004 +++ edited/include/asm-ppc64/rtas.h Thu Aug 26 16:48:33 2004 @@ -199,7 +199,7 @@ #define RTAS_DEBUG KERN_DEBUG "RTAS: " -#define RTAS_ERROR_LOG_MAX 2048 +#define RTAS_ERROR_LOG_MAX 4096 /** Return the firmware-specified size of the error log buffer * for all rtas calls that require an error buffer argument. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From moilanen at austin.ibm.com Fri Aug 27 08:19:48 2004 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Thu, 26 Aug 2004 17:19:48 -0500 Subject: [PATCH] log machine check errors In-Reply-To: <20040826215226.GW14002@austin.ibm.com> References: <20040813130640.08ccff25@localhost> <16685.31680.900521.278413@cargo.ozlabs.ibm.com> <20040826073614.7ab5f94d@localhost> <1093552741.2984.347.camel@nighthawk> <20040826215226.GW14002@austin.ibm.com> Message-ID: <20040826171948.44ba0387@localhost> > Actually, there is one thing we probably should do, and that's to > change the buffer to 4K now, so that if/when firmware moves to 4K, > older kernels (i.e. the ones we are creating now) will continue > to work correctly. I personally disagree w/ doing this. Memory is cheap, but RTAS_ERROR_LOG_MAX is used to define at least 4 static buffers that I can think of off the top of my head. So that is an extra 8K for something that _may_ happen in the future. Plus we've seen bugs in the Firmware where they do not handle us passing the error log max in correctly. We may regress boxes. Jake ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From linas at austin.ibm.com Fri Aug 27 08:37:36 2004 From: linas at austin.ibm.com (Linas Vepstas) Date: Thu, 26 Aug 2004 17:37:36 -0500 Subject: [PATCH] log machine check errors In-Reply-To: <20040826171948.44ba0387@localhost> References: <20040813130640.08ccff25@localhost> <16685.31680.900521.278413@cargo.ozlabs.ibm.com> <20040826073614.7ab5f94d@localhost> <1093552741.2984.347.camel@nighthawk> <20040826215226.GW14002@austin.ibm.com> <20040826171948.44ba0387@localhost> Message-ID: <20040826223736.GY14002@austin.ibm.com> On Thu, Aug 26, 2004 at 05:19:48PM -0500, Jake Moilanen was heard to remark: > > > Actually, there is one thing we probably should do, and that's to > > change the buffer to 4K now, so that if/when firmware moves to 4K, > > older kernels (i.e. the ones we are creating now) will continue > > to work correctly. > > I personally disagree w/ doing this. Memory is cheap, but > RTAS_ERROR_LOG_MAX is used to define at least 4 static buffers that I > can think of off the top of my head. So that is an extra 8K for > something that _may_ happen in the future. yes. BTW, they're statically allocated so that they end up in RMO so firmware can get at them. > Plus we've seen bugs in the > Firmware where they do not handle us passing the error log max in > correctly. We may regress boxes. I think I've fixed all of those now, pending yesterday's patch. --linas ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From benh at kernel.crashing.org Fri Aug 27 09:29:19 2004 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Fri, 27 Aug 2004 09:29:19 +1000 Subject: vDSO preliminary implementation In-Reply-To: <20040826162743.1e8e7b12@localhost> References: <1093496594.2172.80.camel@gaston> <20040826162743.1e8e7b12@localhost> Message-ID: <1093562959.2637.164.camel@gaston> On Fri, 2004-08-27 at 07:27, Jake Moilanen wrote: > > What this implementation contains however is the signal trampoline beeing moved > > to the vDSO area, thus no longer on the stack, so people working on > > non-executable stacks can toy with it. > > Hey I can boot w/ distros that compile w/ pt_gnu_stacks now! > > > > > +VDSO32_CFLAGS := -shared -s -fno-common -Iinclude -fno-builtin -nostdlib > > +VDSO32_CFLAGS += -Wl,-soname=linux-vdso32.so.1 > > +VDSO32_AFLAGS := -D__ASSEMBLY__ -s > > Don't you need -Iinclude on your VDSO32_AFLAGS line? I had to add it to > compile both sigtramp.S for vdso32 and vdso64. Weird... worked for me without... I'll have a look, those Makefiles are really horrible, anybody is welcome to improve them. Ben. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From strosake at austin.ibm.com Fri Aug 27 10:09:59 2004 From: strosake at austin.ibm.com (Mike Strosaker) Date: Thu, 26 Aug 2004 19:09:59 -0500 Subject: [PATCH] updates to surveillance for power5 In-Reply-To: References: Message-ID: <412E7BD7.5080302@austin.ibm.com> Hi, Anton: > > Any ideas why they got rid of surveillance on POWER5? > Because of the virtualization layer and partitioning, the surveillance requirement was moved to PHYP<->SP. Apparently, this was a hotly contested issue among the platform design folks (especially considering that partitioned power4 systems still have OS<->SP surveillance). I think the logic is: If an OS goes down, its not likely a server problem, hence no requirement to monitor from the server side. At least the platform gets notified of panics via os-term. I gather that some user space tools are expected to monitor for deadlocks/hangs (maybe clustering tools). Thanks, Mike Michael Strosaker IBM Linux Technology Center ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Fri Aug 27 13:07:41 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Thu, 26 Aug 2004 22:07:41 -0500 Subject: [patch 0/3] consolidate cpu map initialisation Message-ID: <1093576061.5926.468.camel@biclops.private.network> Hi- Following is a series of patches which move all cpu map manipulations out of the OF client boot code and into a single function which is intended to work for both pSeries and pmac (iSeries has its own mechanism). The possible map is also finalized in this function so we can key off of that instead of NR_CPUS for allocating irqstacks. Boot-tested on a 4-way Power4 partition and 20-way SMT Power5 partition; build-tested with iSeries and pmac defconfigs, and with a pSeries UP config. I'd appreciate it if anyone could give these a test boot on iSeries or pmac. Patches are against 2.6.9-rc1-bk2. Nathan ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Fri Aug 27 13:16:48 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Thu, 26 Aug 2004 22:16:48 -0500 Subject: [patch 1/3] rework ppc64 cpu map setup In-Reply-To: <1093576061.5926.468.camel@biclops.private.network> References: <1093576061.5926.468.camel@biclops.private.network> Message-ID: <1093576452.5926.479.camel@biclops.private.network> Move all cpu map initializations to one place (except for the online map -- cpus mark themselves online as they come up). This sets up cpu_possible_map early enough that we can use num_possible_cpus for allocating irqstacks instead of NR_CPUS. Hopefully this should also help set the stage for kexec. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/setup.c~ppc64-rework-cpumap-setup arch/ppc64/kernel/setup.c --- 2.6.9-rc1-bk2/arch/ppc64/kernel/setup.c~ppc64-rework-cpumap-setup 2004-08-26 15:45:45.000000000 -0500 +++ 2.6.9-rc1-bk2-nathanl/arch/ppc64/kernel/setup.c 2004-08-26 21:52:43.000000000 -0500 @@ -155,6 +155,94 @@ void __init disable_early_printk(void) early_console_initialized = 0; } +#if !defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP) +/** + * setup_cpu_maps - initialize the following cpu maps: + * cpu_possible_map + * cpu_present_map + * cpu_sibling_map + * + * Having the possible map set up early allows us to restrict allocations + * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. + * + * We do not initialize the online map here; cpus set their own bits in + * cpu_online_map as they come up. + * + * This function is valid only for Open Firmware systems. finish_device_tree + * must be called before using this. + */ +static void __init setup_cpu_maps(void) +{ + struct device_node *dn = NULL; + int cpu = 0; + + while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { + u32 *intserv; + int j, len = sizeof(u32), nthreads; + + intserv = (u32 *)get_property(dn, "ibm,ppc-interrupt-server#s", + &len); + nthreads = len / sizeof(u32); + + for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); + cpu++; + } + } + + /* + * On pSeries LPAR, we need to know how many cpus + * could possibly be added to this partition. + */ + if (systemcfg->platform == PLATFORM_PSERIES_LPAR && + (dn = of_find_node_by_path("/rtas"))) { + int num_addr_cell, num_size_cell, maxcpus; + unsigned int *ireg; + + num_addr_cell = prom_n_addr_cells(dn); + num_size_cell = prom_n_size_cells(dn); + + ireg = (unsigned int *) + get_property(dn, "ibm,lrdr-capacity", NULL); + + if (!ireg) + goto out; + + maxcpus = ireg[num_addr_cell + num_size_cell]; + + /* Double maxcpus for processors which have SMT capability */ + if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) + maxcpus *= 2; + + if (maxcpus > NR_CPUS) { + printk(KERN_WARNING + "Partition configured for %d cpus, " + "operating system maximum is %d.\n", + maxcpus, NR_CPUS); + maxcpus = NR_CPUS; + } else + printk(KERN_INFO "Partition configured for %d cpus.\n", + maxcpus); + + for (cpu = 0; cpu < maxcpus; cpu++) + cpu_set(cpu, cpu_possible_map); + out: + of_node_put(dn); + } + + /* + * Do the sibling map; assume only two threads per processor. + */ + for_each_cpu(cpu) { + cpu_set(cpu, cpu_sibling_map[cpu]); + if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) + cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); + } + + systemcfg->processorCount = num_present_cpus(); +} +#endif /* !defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP) */ /* * Do some initial setup of the system. The parameters are those which * were passed in from the bootloader. @@ -220,6 +308,13 @@ void setup_system(unsigned long r3, unsi } #endif /* CONFIG_BOOTX_TEXT */ +#ifdef CONFIG_PPC_PMAC + if (systemcfg->platform == PLATFORM_POWERMAC) { + finish_device_tree(); + pmac_init(r3, r4, r5, r6, r7); + } +#endif /* CONFIG_PPC_PMAC */ + #ifdef CONFIG_PPC_PSERIES if (systemcfg->platform & PLATFORM_PSERIES) { early_console_initialized = 1; @@ -227,31 +322,32 @@ void setup_system(unsigned long r3, unsi __irq_offset_value = NUM_ISA_INTERRUPTS; finish_device_tree(); chrp_init(r3, r4, r5, r6, r7); + } +#endif /* CONFIG_PPC_PSERIES */ #ifdef CONFIG_SMP - /* Start secondary threads on SMT systems; primary threads - * are already in the running state. - */ - for_each_present_cpu(i) { - if (query_cpu_stopped - (get_hard_smp_processor_id(i)) == 0) { - printk("%16.16x : starting thread\n", i); - rtas_call(rtas_token("start-cpu"), 3, 1, &ret, - get_hard_smp_processor_id(i), - (u32)*((unsigned long *)pseries_secondary_smp_init), - i); - } +#ifndef CONFIG_PPC_ISERIES + /* + * iSeries has already initialized the cpu maps at this point. + */ + setup_cpu_maps(); +#endif /* CONFIG_PPC_ISERIES */ + +#ifdef CONFIG_PPC_PSERIES + /* Start secondary threads on SMT systems; primary threads + * are already in the running state. + */ + for_each_present_cpu(i) { + if (query_cpu_stopped(get_hard_smp_processor_id(i)) == 0) { + printk("%16.16x : starting thread\n", i); + rtas_call(rtas_token("start-cpu"), 3, 1, &ret, + get_hard_smp_processor_id(i), + (u32)*((unsigned long *)pseries_secondary_smp_init), + i); } -#endif /* CONFIG_SMP */ } #endif /* CONFIG_PPC_PSERIES */ - -#ifdef CONFIG_PPC_PMAC - if (systemcfg->platform == PLATFORM_POWERMAC) { - finish_device_tree(); - pmac_init(r3, r4, r5, r6, r7); - } -#endif /* CONFIG_PPC_PMAC */ +#endif /* CONFIG_SMP */ #if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_PPC_PMAC) rtas_stop_self_args.token = rtas_token("stop-self"); diff -puN arch/ppc64/kernel/prom.c~ppc64-rework-cpumap-setup arch/ppc64/kernel/prom.c --- 2.6.9-rc1-bk2/arch/ppc64/kernel/prom.c~ppc64-rework-cpumap-setup 2004-08-26 15:45:57.000000000 -0500 +++ 2.6.9-rc1-bk2-nathanl/arch/ppc64/kernel/prom.c 2004-08-26 21:51:56.000000000 -0500 @@ -939,20 +939,11 @@ static void __init prom_hold_cpus(unsign prom_getprop(node, "reg", ®, sizeof(reg)); lpaca[cpuid].hw_cpu_id = reg; -#ifdef CONFIG_SMP - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_map)); - if (reg == 0) - cpu_set(cpuid, RELOC(cpu_online_map)); -#endif /* CONFIG_SMP */ cpuid++; } return; } - /* Initially, we must have one active CPU. */ - _systemcfg->processorCount = 1; - prom_debug("prom_hold_cpus: start...\n"); prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); @@ -1038,23 +1029,13 @@ static void __init prom_hold_cpus(unsign * even if we never start it. */ if (cpuid >= NR_CPUS) goto next; -#ifdef CONFIG_SMP - /* Set the number of active processors. */ - _systemcfg->processorCount++; - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_map)); -#endif } else { prom_printf("... failed: %x\n", *acknowledge); } } #ifdef CONFIG_SMP - else { + else prom_printf("%x : booting cpu %s\n", cpuid, path); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_online_map)); - cpu_set(cpuid, RELOC(cpu_present_map)); - } #endif next: #ifdef CONFIG_SMP @@ -1067,9 +1048,6 @@ next: prom_printf("%x : preparing thread ... ", interrupt_server[i]); if (_naca->smt_state) { - cpu_set(cpuid, RELOC(cpu_present_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - _systemcfg->processorCount++; prom_printf("available\n"); } else { prom_printf("not available\n"); @@ -1099,11 +1077,7 @@ next: pir & 0x3ff; } } -/* cpu_set(i+1, cpu_online_map); */ - cpu_set(i+1, RELOC(cpu_possible_map)); - cpu_set(i+1, RELOC(cpu_present_map)); } - _systemcfg->processorCount *= 2; } else { prom_printf("Processor is not HMT capable\n"); } diff -puN arch/ppc64/kernel/smp.c~ppc64-rework-cpumap-setup arch/ppc64/kernel/smp.c --- 2.6.9-rc1-bk2/arch/ppc64/kernel/smp.c~ppc64-rework-cpumap-setup 2004-08-26 17:21:29.000000000 -0500 +++ 2.6.9-rc1-bk2-nathanl/arch/ppc64/kernel/smp.c 2004-08-26 17:48:56.000000000 -0500 @@ -401,56 +401,11 @@ static inline int __devinit smp_startup_ } return 1; } - -static inline void look_for_more_cpus(void) -{ - int num_addr_cell, num_size_cell, len, i, maxcpus; - struct device_node *np; - unsigned int *ireg; - - /* Find the property which will tell us about how many CPUs - * we're allowed to have. */ - if ((np = find_path_device("/rtas")) == NULL) { - printk(KERN_ERR "Could not find /rtas in device tree!"); - return; - } - num_addr_cell = prom_n_addr_cells(np); - num_size_cell = prom_n_size_cells(np); - - ireg = (unsigned int *)get_property(np, "ibm,lrdr-capacity", &len); - if (ireg == NULL) { - /* FIXME: make sure not marked as lrdr_capable() */ - return; - } - - maxcpus = ireg[num_addr_cell + num_size_cell]; - - /* Double maxcpus for processors which have SMT capability */ - if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) - maxcpus *= 2; - - - if (maxcpus > NR_CPUS) { - printk(KERN_WARNING - "Partition configured for %d cpus, " - "operating system maximum is %d.\n", maxcpus, NR_CPUS); - maxcpus = NR_CPUS; - } else - printk(KERN_INFO "Partition configured for %d cpus.\n", - maxcpus); - - /* Make those cpus (which might appear later) possible too. */ - for (i = 0; i < maxcpus; i++) - cpu_set(i, cpu_possible_map); -} #else /* ... CONFIG_HOTPLUG_CPU */ static inline int __devinit smp_startup_cpu(unsigned int lcpu) { return 1; } -static inline void look_for_more_cpus(void) -{ -} #endif /* CONFIG_HOTPLUG_CPU */ static void smp_pSeries_kick_cpu(int nr) @@ -837,8 +792,6 @@ void __init smp_prepare_cpus(unsigned in */ do_gtod.tb_orig_stamp = tb_last_stamp; systemcfg->tb_orig_stamp = tb_last_stamp; - - look_for_more_cpus(); #endif max_cpus = smp_ops->probe(); @@ -851,19 +804,12 @@ void __init smp_prepare_cpus(unsigned in for_each_cpu(cpu) if (cpu != boot_cpuid) smp_create_idle(cpu); - - for_each_cpu(cpu) { - cpu_set(cpu, cpu_sibling_map[cpu]); - if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) - cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); - } } void __devinit smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); - /* cpu_possible is set up in prom.c */ cpu_set(boot_cpuid, cpu_online_map); paca[boot_cpuid].__current = current; _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Fri Aug 27 13:16:51 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Thu, 26 Aug 2004 22:16:51 -0500 Subject: patch [2/3] set platform cpuids later in boot In-Reply-To: <1093576061.5926.468.camel@biclops.private.network> References: <1093576061.5926.468.camel@biclops.private.network> Message-ID: <1093576457.5926.481.camel@biclops.private.network> Move the initialization of the per-cpu paca->hw_cpu_id out of the Open Firmware client boot code and into a common location which is executed later. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/setup.c~ppc64-later-cpuid-setup arch/ppc64/kernel/setup.c --- 2.6.9-rc1-bk2/arch/ppc64/kernel/setup.c~ppc64-later-cpuid-setup 2004-08-26 21:56:37.000000000 -0500 +++ 2.6.9-rc1-bk2-nathanl/arch/ppc64/kernel/setup.c 2004-08-26 21:56:37.000000000 -0500 @@ -170,6 +170,8 @@ void __init disable_early_printk(void) * * This function is valid only for Open Firmware systems. finish_device_tree * must be called before using this. + * + * While we're here, we may as well set the "physical" cpu ids in the paca. */ static void __init setup_cpu_maps(void) { @@ -182,11 +184,15 @@ static void __init setup_cpu_maps(void) intserv = (u32 *)get_property(dn, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + intserv = (u32 *)get_property(dn, "reg", NULL); + nthreads = len / sizeof(u32); for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); + set_hard_smp_processor_id(cpu, intserv[j]); cpu++; } } diff -puN arch/ppc64/kernel/prom.c~ppc64-later-cpuid-setup arch/ppc64/kernel/prom.c --- 2.6.9-rc1-bk2/arch/ppc64/kernel/prom.c~ppc64-later-cpuid-setup 2004-08-26 21:56:37.000000000 -0500 +++ 2.6.9-rc1-bk2-nathanl/arch/ppc64/kernel/prom.c 2004-08-26 21:56:37.000000000 -0500 @@ -919,31 +919,11 @@ static void __init prom_hold_cpus(unsign unsigned long secondary_hold = virt_to_abs(*PTRRELOC((unsigned long *)__secondary_hold)); struct systemcfg *_systemcfg = RELOC(systemcfg); - struct paca_struct *lpaca = PTRRELOC(&paca[0]); struct prom_t *_prom = PTRRELOC(&prom); #ifdef CONFIG_SMP struct naca_struct *_naca = RELOC(naca); #endif - /* On pmac, we just fill out the various global bitmasks and - * arrays indicating our CPUs are here, they are actually started - * later on from pmac_smp - */ - if (_systemcfg->platform == PLATFORM_POWERMAC) { - for (node = 0; prom_next_node(&node); ) { - type[0] = 0; - prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, RELOC("cpu")) != 0) - continue; - reg = -1; - prom_getprop(node, "reg", ®, sizeof(reg)); - lpaca[cpuid].hw_cpu_id = reg; - - cpuid++; - } - return; - } - prom_debug("prom_hold_cpus: start...\n"); prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); @@ -987,7 +967,6 @@ static void __init prom_hold_cpus(unsign prom_debug("\ncpuid = 0x%x\n", cpuid); prom_debug("cpu hw idx = 0x%x\n", reg); - lpaca[cpuid].hw_cpu_id = reg; /* Init the acknowledge var which will be reset by * the secondary cpu when it awakens from its OF @@ -1044,7 +1023,6 @@ next: cpuid++; if (cpuid >= NR_CPUS) continue; - lpaca[cpuid].hw_cpu_id = interrupt_server[i]; prom_printf("%x : preparing thread ... ", interrupt_server[i]); if (_naca->smt_state) { diff -puN include/asm-ppc64/smp.h~ppc64-later-cpuid-setup include/asm-ppc64/smp.h --- 2.6.9-rc1-bk2/include/asm-ppc64/smp.h~ppc64-later-cpuid-setup 2004-08-26 21:56:37.000000000 -0500 +++ 2.6.9-rc1-bk2-nathanl/include/asm-ppc64/smp.h 2004-08-26 21:56:37.000000000 -0500 @@ -63,7 +63,7 @@ extern int query_cpu_stopped(unsigned in #define get_hard_smp_processor_id(CPU) (paca[(CPU)].hw_cpu_id) #define set_hard_smp_processor_id(CPU, VAL) \ - do { (paca[(CPU)].hw_proc_num = (VAL)); } while (0) + do { (paca[(CPU)].hw_cpu_id = (VAL)); } while (0) #endif /* __ASSEMBLY__ */ _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Fri Aug 27 13:16:54 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Thu, 26 Aug 2004 22:16:54 -0500 Subject: [patch 3/3] allocate irqstacks only for possible cpus In-Reply-To: <1093576061.5926.468.camel@biclops.private.network> References: <1093576061.5926.468.camel@biclops.private.network> Message-ID: <1093576465.5926.484.camel@biclops.private.network> With earlier setup of cpu_possible_map the number of irqstacks shrinks from NR_CPUS to the number of possible cpus. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/setup.c~ppc64-trim-irqstack-allocation arch/ppc64/kernel/setup.c --- 2.6.9-rc1-bk2/arch/ppc64/kernel/setup.c~ppc64-trim-irqstack-allocation 2004-08-26 21:56:42.000000000 -0500 +++ 2.6.9-rc1-bk2-nathanl/arch/ppc64/kernel/setup.c 2004-08-26 21:56:42.000000000 -0500 @@ -701,7 +701,7 @@ static void __init irqstack_early_init(v int i; /* interrupt stacks must be under 256MB, we cannot afford to take SLB misses on them */ - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu(i) { softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, THREAD_SIZE, 0x10000000)); hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, diff -puN arch/ppc64/kernel/irq.c~ppc64-trim-irqstack-allocation arch/ppc64/kernel/irq.c --- 2.6.9-rc1-bk2/arch/ppc64/kernel/irq.c~ppc64-trim-irqstack-allocation 2004-08-26 21:56:42.000000000 -0500 +++ 2.6.9-rc1-bk2-nathanl/arch/ppc64/kernel/irq.c 2004-08-26 21:56:42.000000000 -0500 @@ -978,7 +978,7 @@ void irq_ctx_init(void) struct thread_info *tp; int i; - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu(i) { memset((void *)softirq_ctx[i], 0, THREAD_SIZE); tp = softirq_ctx[i]; tp->cpu = i; _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jschopp at austin.ibm.com Sat Aug 28 01:40:58 2004 From: jschopp at austin.ibm.com (Joel Schopp) Date: Fri, 27 Aug 2004 10:40:58 -0500 Subject: [patch 1/3] rework ppc64 cpu map setup In-Reply-To: <1093576452.5926.479.camel@biclops.private.network> References: <1093576061.5926.468.camel@biclops.private.network> <1093576452.5926.479.camel@biclops.private.network> Message-ID: <412F560A.9020901@austin.ibm.com> Patch looks great. Now that this is done I think we could remove systemcfg->processorCount and replace it with num_present_cpus() everywhere. Simplify the code a bit. Nathan Lynch wrote: > Move all cpu map initializations to one place (except for the online > map -- cpus mark themselves online as they come up). This sets up > cpu_possible_map early enough that we can use num_possible_cpus for > allocating irqstacks instead of NR_CPUS. Hopefully this should also > help set the stage for kexec. > > Signed-off-by: Nathan Lynch > > > --- > > > diff -puN arch/ppc64/kernel/setup.c~ppc64-rework-cpumap-setup arch/ppc64/kernel/setup.c > --- 2.6.9-rc1-bk2/arch/ppc64/kernel/setup.c~ppc64-rework-cpumap-setup 2004-08-26 15:45:45.000000000 -0500 > +++ 2.6.9-rc1-bk2-nathanl/arch/ppc64/kernel/setup.c 2004-08-26 21:52:43.000000000 -0500 > @@ -155,6 +155,94 @@ void __init disable_early_printk(void) > early_console_initialized = 0; > } > > +#if !defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP) > +/** > + * setup_cpu_maps - initialize the following cpu maps: > + * cpu_possible_map > + * cpu_present_map > + * cpu_sibling_map > + * > + * Having the possible map set up early allows us to restrict allocations > + * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. > + * > + * We do not initialize the online map here; cpus set their own bits in > + * cpu_online_map as they come up. > + * > + * This function is valid only for Open Firmware systems. finish_device_tree > + * must be called before using this. > + */ > +static void __init setup_cpu_maps(void) > +{ > + struct device_node *dn = NULL; > + int cpu = 0; > + > + while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { > + u32 *intserv; > + int j, len = sizeof(u32), nthreads; > + > + intserv = (u32 *)get_property(dn, "ibm,ppc-interrupt-server#s", > + &len); > + nthreads = len / sizeof(u32); > + > + for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { > + cpu_set(cpu, cpu_possible_map); > + cpu_set(cpu, cpu_present_map); > + cpu++; > + } > + } > + > + /* > + * On pSeries LPAR, we need to know how many cpus > + * could possibly be added to this partition. > + */ > + if (systemcfg->platform == PLATFORM_PSERIES_LPAR && > + (dn = of_find_node_by_path("/rtas"))) { > + int num_addr_cell, num_size_cell, maxcpus; > + unsigned int *ireg; > + > + num_addr_cell = prom_n_addr_cells(dn); > + num_size_cell = prom_n_size_cells(dn); > + > + ireg = (unsigned int *) > + get_property(dn, "ibm,lrdr-capacity", NULL); > + > + if (!ireg) > + goto out; > + > + maxcpus = ireg[num_addr_cell + num_size_cell]; > + > + /* Double maxcpus for processors which have SMT capability */ > + if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) > + maxcpus *= 2; > + > + if (maxcpus > NR_CPUS) { > + printk(KERN_WARNING > + "Partition configured for %d cpus, " > + "operating system maximum is %d.\n", > + maxcpus, NR_CPUS); > + maxcpus = NR_CPUS; > + } else > + printk(KERN_INFO "Partition configured for %d cpus.\n", > + maxcpus); > + > + for (cpu = 0; cpu < maxcpus; cpu++) > + cpu_set(cpu, cpu_possible_map); > + out: > + of_node_put(dn); > + } > + > + /* > + * Do the sibling map; assume only two threads per processor. > + */ > + for_each_cpu(cpu) { > + cpu_set(cpu, cpu_sibling_map[cpu]); > + if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) > + cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); > + } > + > + systemcfg->processorCount = num_present_cpus(); > +} > +#endif /* !defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP) */ > /* > * Do some initial setup of the system. The parameters are those which > * were passed in from the bootloader. > @@ -220,6 +308,13 @@ void setup_system(unsigned long r3, unsi > } > #endif /* CONFIG_BOOTX_TEXT */ > > +#ifdef CONFIG_PPC_PMAC > + if (systemcfg->platform == PLATFORM_POWERMAC) { > + finish_device_tree(); > + pmac_init(r3, r4, r5, r6, r7); > + } > +#endif /* CONFIG_PPC_PMAC */ > + > #ifdef CONFIG_PPC_PSERIES > if (systemcfg->platform & PLATFORM_PSERIES) { > early_console_initialized = 1; > @@ -227,31 +322,32 @@ void setup_system(unsigned long r3, unsi > __irq_offset_value = NUM_ISA_INTERRUPTS; > finish_device_tree(); > chrp_init(r3, r4, r5, r6, r7); > + } > +#endif /* CONFIG_PPC_PSERIES */ > > #ifdef CONFIG_SMP > - /* Start secondary threads on SMT systems; primary threads > - * are already in the running state. > - */ > - for_each_present_cpu(i) { > - if (query_cpu_stopped > - (get_hard_smp_processor_id(i)) == 0) { > - printk("%16.16x : starting thread\n", i); > - rtas_call(rtas_token("start-cpu"), 3, 1, &ret, > - get_hard_smp_processor_id(i), > - (u32)*((unsigned long *)pseries_secondary_smp_init), > - i); > - } > +#ifndef CONFIG_PPC_ISERIES > + /* > + * iSeries has already initialized the cpu maps at this point. > + */ > + setup_cpu_maps(); > +#endif /* CONFIG_PPC_ISERIES */ > + > +#ifdef CONFIG_PPC_PSERIES > + /* Start secondary threads on SMT systems; primary threads > + * are already in the running state. > + */ > + for_each_present_cpu(i) { > + if (query_cpu_stopped(get_hard_smp_processor_id(i)) == 0) { > + printk("%16.16x : starting thread\n", i); > + rtas_call(rtas_token("start-cpu"), 3, 1, &ret, > + get_hard_smp_processor_id(i), > + (u32)*((unsigned long *)pseries_secondary_smp_init), > + i); > } > -#endif /* CONFIG_SMP */ > } > #endif /* CONFIG_PPC_PSERIES */ > - > -#ifdef CONFIG_PPC_PMAC > - if (systemcfg->platform == PLATFORM_POWERMAC) { > - finish_device_tree(); > - pmac_init(r3, r4, r5, r6, r7); > - } > -#endif /* CONFIG_PPC_PMAC */ > +#endif /* CONFIG_SMP */ > > #if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_PPC_PMAC) > rtas_stop_self_args.token = rtas_token("stop-self"); > diff -puN arch/ppc64/kernel/prom.c~ppc64-rework-cpumap-setup arch/ppc64/kernel/prom.c > --- 2.6.9-rc1-bk2/arch/ppc64/kernel/prom.c~ppc64-rework-cpumap-setup 2004-08-26 15:45:57.000000000 -0500 > +++ 2.6.9-rc1-bk2-nathanl/arch/ppc64/kernel/prom.c 2004-08-26 21:51:56.000000000 -0500 > @@ -939,20 +939,11 @@ static void __init prom_hold_cpus(unsign > prom_getprop(node, "reg", ®, sizeof(reg)); > lpaca[cpuid].hw_cpu_id = reg; > > -#ifdef CONFIG_SMP > - cpu_set(cpuid, RELOC(cpu_possible_map)); > - cpu_set(cpuid, RELOC(cpu_present_map)); > - if (reg == 0) > - cpu_set(cpuid, RELOC(cpu_online_map)); > -#endif /* CONFIG_SMP */ > cpuid++; > } > return; > } > > - /* Initially, we must have one active CPU. */ > - _systemcfg->processorCount = 1; > - > prom_debug("prom_hold_cpus: start...\n"); > prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); > prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); > @@ -1038,23 +1029,13 @@ static void __init prom_hold_cpus(unsign > * even if we never start it. */ > if (cpuid >= NR_CPUS) > goto next; > -#ifdef CONFIG_SMP > - /* Set the number of active processors. */ > - _systemcfg->processorCount++; > - cpu_set(cpuid, RELOC(cpu_possible_map)); > - cpu_set(cpuid, RELOC(cpu_present_map)); > -#endif > } else { > prom_printf("... failed: %x\n", *acknowledge); > } > } > #ifdef CONFIG_SMP > - else { > + else > prom_printf("%x : booting cpu %s\n", cpuid, path); > - cpu_set(cpuid, RELOC(cpu_possible_map)); > - cpu_set(cpuid, RELOC(cpu_online_map)); > - cpu_set(cpuid, RELOC(cpu_present_map)); > - } > #endif > next: > #ifdef CONFIG_SMP > @@ -1067,9 +1048,6 @@ next: > prom_printf("%x : preparing thread ... ", > interrupt_server[i]); > if (_naca->smt_state) { > - cpu_set(cpuid, RELOC(cpu_present_map)); > - cpu_set(cpuid, RELOC(cpu_possible_map)); > - _systemcfg->processorCount++; > prom_printf("available\n"); > } else { > prom_printf("not available\n"); > @@ -1099,11 +1077,7 @@ next: > pir & 0x3ff; > } > } > -/* cpu_set(i+1, cpu_online_map); */ > - cpu_set(i+1, RELOC(cpu_possible_map)); > - cpu_set(i+1, RELOC(cpu_present_map)); > } > - _systemcfg->processorCount *= 2; > } else { > prom_printf("Processor is not HMT capable\n"); > } > diff -puN arch/ppc64/kernel/smp.c~ppc64-rework-cpumap-setup arch/ppc64/kernel/smp.c > --- 2.6.9-rc1-bk2/arch/ppc64/kernel/smp.c~ppc64-rework-cpumap-setup 2004-08-26 17:21:29.000000000 -0500 > +++ 2.6.9-rc1-bk2-nathanl/arch/ppc64/kernel/smp.c 2004-08-26 17:48:56.000000000 -0500 > @@ -401,56 +401,11 @@ static inline int __devinit smp_startup_ > } > return 1; > } > - > -static inline void look_for_more_cpus(void) > -{ > - int num_addr_cell, num_size_cell, len, i, maxcpus; > - struct device_node *np; > - unsigned int *ireg; > - > - /* Find the property which will tell us about how many CPUs > - * we're allowed to have. */ > - if ((np = find_path_device("/rtas")) == NULL) { > - printk(KERN_ERR "Could not find /rtas in device tree!"); > - return; > - } > - num_addr_cell = prom_n_addr_cells(np); > - num_size_cell = prom_n_size_cells(np); > - > - ireg = (unsigned int *)get_property(np, "ibm,lrdr-capacity", &len); > - if (ireg == NULL) { > - /* FIXME: make sure not marked as lrdr_capable() */ > - return; > - } > - > - maxcpus = ireg[num_addr_cell + num_size_cell]; > - > - /* Double maxcpus for processors which have SMT capability */ > - if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) > - maxcpus *= 2; > - > - > - if (maxcpus > NR_CPUS) { > - printk(KERN_WARNING > - "Partition configured for %d cpus, " > - "operating system maximum is %d.\n", maxcpus, NR_CPUS); > - maxcpus = NR_CPUS; > - } else > - printk(KERN_INFO "Partition configured for %d cpus.\n", > - maxcpus); > - > - /* Make those cpus (which might appear later) possible too. */ > - for (i = 0; i < maxcpus; i++) > - cpu_set(i, cpu_possible_map); > -} > #else /* ... CONFIG_HOTPLUG_CPU */ > static inline int __devinit smp_startup_cpu(unsigned int lcpu) > { > return 1; > } > -static inline void look_for_more_cpus(void) > -{ > -} > #endif /* CONFIG_HOTPLUG_CPU */ > > static void smp_pSeries_kick_cpu(int nr) > @@ -837,8 +792,6 @@ void __init smp_prepare_cpus(unsigned in > */ > do_gtod.tb_orig_stamp = tb_last_stamp; > systemcfg->tb_orig_stamp = tb_last_stamp; > - > - look_for_more_cpus(); > #endif > > max_cpus = smp_ops->probe(); > @@ -851,19 +804,12 @@ void __init smp_prepare_cpus(unsigned in > for_each_cpu(cpu) > if (cpu != boot_cpuid) > smp_create_idle(cpu); > - > - for_each_cpu(cpu) { > - cpu_set(cpu, cpu_sibling_map[cpu]); > - if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) > - cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); > - } > } > > void __devinit smp_prepare_boot_cpu(void) > { > BUG_ON(smp_processor_id() != boot_cpuid); > > - /* cpu_possible is set up in prom.c */ > cpu_set(boot_cpuid, cpu_online_map); > > paca[boot_cpuid].__current = current; > > _ > > > > ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jschopp at austin.ibm.com Sat Aug 28 01:46:21 2004 From: jschopp at austin.ibm.com (Joel Schopp) Date: Fri, 27 Aug 2004 10:46:21 -0500 Subject: dynamic addition of OF property to /proc/device-tree In-Reply-To: <1093548841.32115.16.camel@sinatra.austin.ibm.com> References: <1093548841.32115.16.camel@sinatra.austin.ibm.com> Message-ID: <412F574D.1060004@austin.ibm.com> Sometime last year Nathan Lynch had written an implementation of dynamic property add/remove code. He'd be a great person to ask. We ended up deciding it was cleaner/nicer to use sysfs instead, you might consider this as well. -Joel John Rose wrote: > In /proc/device-tree, i need to add a property (as opposed to a node) > dynamically. Should I use the existing remove/add_node_dynamic > functions to remove and re-add the parent node of the new property, or > write up a new dynamic property add/remove? This is for the creation of > the linux,pci-domain property for newly added PHBs. Thoughts? > > Thanks- > John > > > > ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Sat Aug 28 02:04:49 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Fri, 27 Aug 2004 11:04:49 -0500 Subject: dynamic addition of OF property to /proc/device-tree In-Reply-To: <412F574D.1060004@austin.ibm.com> References: <1093548841.32115.16.camel@sinatra.austin.ibm.com> <412F574D.1060004@austin.ibm.com> Message-ID: <1093622689.16473.14.camel@biclops.private.network> On Fri, 2004-08-27 at 10:46, Joel Schopp wrote: > Sometime last year Nathan Lynch had written an implementation of dynamic > property add/remove code. Which was half-baked and terminated with extreme prejudice ;) I don't see a safe way to allow removal of node properties without adding a per-node lock and changing a lot of code. If we're concerned only with newly added PHB nodes, I think we can safely attach the new property right before the node is plugged into the device tree, before anything else can get a reference to the new PHB node. Nathan ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From johnrose at austin.ibm.com Sat Aug 28 02:12:45 2004 From: johnrose at austin.ibm.com (John Rose) Date: Fri, 27 Aug 2004 11:12:45 -0500 Subject: dynamic addition of OF property to /proc/device-tree In-Reply-To: <1093622689.16473.14.camel@biclops.private.network> References: <1093548841.32115.16.camel@sinatra.austin.ibm.com> <412F574D.1060004@austin.ibm.com> <1093622689.16473.14.camel@biclops.private.network> Message-ID: <1093623165.9389.5.camel@sinatra.austin.ibm.com> > If we're concerned only with newly added PHB nodes, I think we can > safely attach the new property right before the node is plugged into the > device tree, before anything else can get a reference to the new PHB > node. I was thinking the same thing, but it's a problem that our device tree addition happens in a separate step than the rest of the kernel work for adding a PHB. Seems like our two options are to 1) add the property in userspace before handing new nodes to kernel or 2) check explicitly for PHBs in the kernel dynamic node addition code. I don't like either of these much :) What's the worst that could happen if I remove the node entirely and add it back with the new property? :) Thanks- John ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Sat Aug 28 02:43:47 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Fri, 27 Aug 2004 09:43:47 -0700 Subject: dynamic addition of OF property to /proc/device-tree In-Reply-To: <412F574D.1060004@austin.ibm.com> References: <1093548841.32115.16.camel@sinatra.austin.ibm.com> <412F574D.1060004@austin.ibm.com> Message-ID: <1093625027.2984.500.camel@nighthawk> On Fri, 2004-08-27 at 08:46, Joel Schopp wrote: > John Rose wrote: > > In /proc/device-tree, i need to add a property (as opposed to a node) > > dynamically. Should I use the existing remove/add_node_dynamic > > functions to remove and re-add the parent node of the new property, or > > write up a new dynamic property add/remove? This is for the creation of > > the linux,pci-domain property for newly added PHBs. Thoughts? > > > >Sometime last year Nathan Lynch had written an implementation of dynamic > property add/remove code. He'd be a great person to ask. > > We ended up deciding it was cleaner/nicer to use sysfs instead, you > might consider this as well. Is this a good time to talk Martin into updating his tools to lookup the pci domain somewhere other than /proc/device-tree? (not sure which Martin I'm referring to see: http://ozlabs.org/ppc64-patches/patch.pl?id=184) -- Dave ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From johnrose at austin.ibm.com Sat Aug 28 03:45:37 2004 From: johnrose at austin.ibm.com (John Rose) Date: Fri, 27 Aug 2004 12:45:37 -0500 Subject: dynamic addition of OF property to /proc/device-tree In-Reply-To: <1093625027.2984.500.camel@nighthawk> References: <1093548841.32115.16.camel@sinatra.austin.ibm.com> <412F574D.1060004@austin.ibm.com> <1093625027.2984.500.camel@nighthawk> Message-ID: <1093628737.9389.8.camel@sinatra.austin.ibm.com> On Fri, 2004-08-27 at 11:43, Dave Hansen wrote: > On Fri, 2004-08-27 at 08:46, Joel Schopp wrote: > > John Rose wrote: > > > In /proc/device-tree, i need to add a property (as opposed to a node) > > > dynamically. Should I use the existing remove/add_node_dynamic > > > functions to remove and re-add the parent node of the new property, or > > > write up a new dynamic property add/remove? This is for the creation of > > > the linux,pci-domain property for newly added PHBs. Thoughts? > > > > > >Sometime last year Nathan Lynch had written an implementation of dynamic > > property add/remove code. He'd be a great person to ask. > > > > We ended up deciding it was cleaner/nicer to use sysfs instead, you > > might consider this as well. > > Is this a good time to talk Martin into updating his tools to lookup the > pci domain somewhere other than /proc/device-tree? Yes, Martin, how about that? :) John ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Sat Aug 28 09:50:44 2004 From: paulus at samba.org (Paul Mackerras) Date: Sat, 28 Aug 2004 09:50:44 +1000 Subject: dynamic addition of OF property to /proc/device-tree In-Reply-To: <1093628737.9389.8.camel@sinatra.austin.ibm.com> References: <1093548841.32115.16.camel@sinatra.austin.ibm.com> <412F574D.1060004@austin.ibm.com> <1093625027.2984.500.camel@nighthawk> <1093628737.9389.8.camel@sinatra.austin.ibm.com> Message-ID: <16687.51412.172055.583108@cargo.ozlabs.ibm.com> John Rose writes: > > Is this a good time to talk Martin into updating his tools to lookup the > > pci domain somewhere other than /proc/device-tree? > > Yes, Martin, how about that? :) What precisely do you mean? What are you suggesting he does instead of what? Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Sat Aug 28 11:10:20 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Fri, 27 Aug 2004 18:10:20 -0700 Subject: dynamic addition of OF property to /proc/device-tree In-Reply-To: <16687.51412.172055.583108@cargo.ozlabs.ibm.com> References: <1093548841.32115.16.camel@sinatra.austin.ibm.com> <412F574D.1060004@austin.ibm.com> <1093625027.2984.500.camel@nighthawk> <1093628737.9389.8.camel@sinatra.austin.ibm.com> <16687.51412.172055.583108@cargo.ozlabs.ibm.com> Message-ID: <1093655420.26660.53.camel@nighthawk> On Fri, 2004-08-27 at 16:50, Paul Mackerras wrote: > John Rose writes: > > > > Is this a good time to talk Martin into updating his tools to lookup the > > > pci domain somewhere other than /proc/device-tree? > > > > Yes, Martin, how about that? :) > > What precisely do you mean? What are you suggesting he does instead > of what? It think it was established (from an earlier patch) that Martin's tools are currently the only user of the linux,pci-domain property in the OpenFirmware tree. From what I understand, this is how Martin's tool found out which PCI domain a particular adapter is in. So, John Rose noted that he couldn't create the necessary properties at runtime with /proc/ppc64/ofdt because it lacked the capacity "add a property (as opposed to a node) dynamically" and asked for suggestions. So, I thought that, instead of adding more functionality to the kernel to support exporting the linux,pci-domain (which someone was already trying to remove) in a hotplug situation, this might be an opportunity to modify the tools instead. The tools could, instead, use the driver model and sysfs to determine the PCI domain of a particular adapter. If that happened, not only would we be able to avoid addming more functionality to the OpenFirmware /proc code, but we'd also make John Rose happy because he could completely remove linux,pci-domain, like he tried here: http://ozlabs.org/ppc64-patches/patch.pl?id=184 I just wondered if it was feasible, and if this was a good time to update those tools. -- Dave ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Sat Aug 28 11:55:59 2004 From: paulus at samba.org (Paul Mackerras) Date: Sat, 28 Aug 2004 11:55:59 +1000 Subject: dynamic addition of OF property to /proc/device-tree In-Reply-To: <1093655420.26660.53.camel@nighthawk> References: <1093548841.32115.16.camel@sinatra.austin.ibm.com> <412F574D.1060004@austin.ibm.com> <1093625027.2984.500.camel@nighthawk> <1093628737.9389.8.camel@sinatra.austin.ibm.com> <16687.51412.172055.583108@cargo.ozlabs.ibm.com> <1093655420.26660.53.camel@nighthawk> Message-ID: <16687.58927.45498.510202@cargo.ozlabs.ibm.com> Dave Hansen writes: > It think it was established (from an earlier patch) that Martin's tools > are currently the only user of the linux,pci-domain property in the > OpenFirmware tree. From what I understand, this is how Martin's tool > found out which PCI domain a particular adapter is in. Yes, a particular adaptor as identified by an OF pathname. The linux,pci-domain property was added because without it, it wasn't possible to determine reliably the correspondence between OF pathnames and Linux device names. Now that we have the devspec entry in sysfs it's possible that we don't need the linux,pci-domain property. I'll talk to Martin about it on Monday. Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From benh at kernel.crashing.org Mon Aug 30 14:10:14 2004 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Mon, 30 Aug 2004 14:10:14 +1000 Subject: vDSO : Second shot & ready for glibc help Message-ID: <1093839013.2638.203.camel@gaston> Hi ! Here's a patch against Linus bk from last friday that implements the vDSO basic mecanism for ppc64 (for both 32 and 64 bits apps). Currently, there is no useful function exposed to userland, that will come next, only a pair of bogus ones are exposed (along with the signal trampoline, so at least you get the immediate benefit of getting that one out of the stack). The address of the vDSO is passed in an elf aux table entry I defined in include/asm-ppc64/elf.h At this point, I'm a bit lost in glibc code, I would appreciate your help getting a glibc patch (32 bits only at first would be fine for me to move forward) that allows glibc to detect that vDSO and link it in with applications. From that point, I'll start working on the fully userland implementation of gettimeofday and start working on the various cpu-optimized routines we want to put in there. This version, unlike the previously posted one, does not put the vDSO above the stack anymore, but rather down at +1Mb + random offset. The random offset thing is more a proof-of-concept thing at this point than anything else, and the +1Mb address was chosen so that the linker can later be tweaked to use "ba" instruction to get there if we want to, but none of this is burned in stone. We have been thinking about defining an optinal program header for apps to tell where they want the vDSO to be (in case apps like emulators need that space at 1Mb to be available for something else) or that they don't want one at all. Any comment of course is welcome, Regards, Ben. diff -urN linux-2.5/arch/ppc64/Makefile linux-vdso/arch/ppc64/Makefile --- linux-2.5/arch/ppc64/Makefile 2004-08-26 15:46:30.000000000 +1000 +++ linux-vdso/arch/ppc64/Makefile 2004-08-27 13:07:59.000000000 +1000 @@ -43,6 +43,8 @@ libs-y += arch/ppc64/lib/ core-y += arch/ppc64/kernel/ +core-y += arch/ppc64/kernel/vdso32/ +core-y += arch/ppc64/kernel/vdso64/ core-y += arch/ppc64/mm/ core-$(CONFIG_XMON) += arch/ppc64/xmon/ drivers-$(CONFIG_OPROFILE) += arch/ppc64/oprofile/ diff -urN linux-2.5/arch/ppc64/kernel/Makefile linux-vdso/arch/ppc64/kernel/Makefile --- linux-2.5/arch/ppc64/kernel/Makefile 2004-08-26 15:46:30.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/Makefile 2004-08-27 13:07:59.000000000 +1000 @@ -11,7 +11,7 @@ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o rtc.o init_task.o \ lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ - iommu.o sysfs.o vio.o + iommu.o sysfs.o vio.o vdso.o obj-$(CONFIG_PPC_OF) += of_device.o diff -urN linux-2.5/arch/ppc64/kernel/signal.c linux-vdso/arch/ppc64/kernel/signal.c --- linux-2.5/arch/ppc64/kernel/signal.c 2004-08-26 15:46:30.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/signal.c 2004-08-27 15:46:07.000000000 +1000 @@ -34,6 +34,7 @@ #include #include #include +#include #define DEBUG_SIG 0 @@ -412,10 +413,14 @@ goto badframe; /* Set up to return from userspace. */ - err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); - if (err) - goto badframe; - + if (vdso64_rt_sigtramp && current->thread.vdso_base) { + regs->link = current->thread.vdso_base + vdso64_rt_sigtramp; + } else { + err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); + if (err) + goto badframe; + regs->link = (unsigned long) &frame->tramp[0]; + } funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler; /* Allocate a dummy caller frame for the signal handler. */ @@ -424,7 +429,6 @@ /* Set up "regs" so we "return" to the signal handler. */ err |= get_user(regs->nip, &funct_desc_ptr->entry); - regs->link = (unsigned long) &frame->tramp[0]; regs->gpr[1] = newsp; err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); regs->gpr[3] = signr; diff -urN linux-2.5/arch/ppc64/kernel/signal32.c linux-vdso/arch/ppc64/kernel/signal32.c --- linux-2.5/arch/ppc64/kernel/signal32.c 2004-08-26 15:46:30.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/signal32.c 2004-08-27 15:46:07.000000000 +1000 @@ -30,6 +30,7 @@ #include #include #include +#include #define DEBUG_SIG 0 @@ -677,18 +678,24 @@ /* Save user registers on the stack */ frame = &rt_sf->uc.uc_mcontext; - if (save_user_regs(regs, frame, __NR_rt_sigreturn)) - goto badframe; - if (put_user(regs->gpr[1], (unsigned long __user *)newsp)) goto badframe; + + if (vdso32_rt_sigtramp && current->thread.vdso_base) { + if (save_user_regs(regs, frame, 0)) + goto badframe; + regs->link = current->thread.vdso_base + vdso32_rt_sigtramp; + } else { + if (save_user_regs(regs, frame, __NR_rt_sigreturn)) + goto badframe; + regs->link = (unsigned long) frame->tramp; + } regs->gpr[1] = (unsigned long) newsp; regs->gpr[3] = sig; regs->gpr[4] = (unsigned long) &rt_sf->info; regs->gpr[5] = (unsigned long) &rt_sf->uc; regs->gpr[6] = (unsigned long) rt_sf; regs->nip = (unsigned long) ka->sa.sa_handler; - regs->link = (unsigned long) frame->tramp; regs->trap = 0; regs->result = 0; @@ -842,8 +849,15 @@ || __put_user(sig, &sc->signal)) goto badframe; - if (save_user_regs(regs, &frame->mctx, __NR_sigreturn)) - goto badframe; + if (vdso32_sigtramp && current->thread.vdso_base) { + if (save_user_regs(regs, &frame->mctx, 0)) + goto badframe; + regs->link = current->thread.vdso_base + vdso32_sigtramp; + } else { + if (save_user_regs(regs, &frame->mctx, __NR_sigreturn)) + goto badframe; + regs->link = (unsigned long) frame->mctx.tramp; + } if (put_user(regs->gpr[1], (unsigned long __user *)newsp)) goto badframe; @@ -851,7 +865,6 @@ regs->gpr[3] = sig; regs->gpr[4] = (unsigned long) sc; regs->nip = (unsigned long) ka->sa.sa_handler; - regs->link = (unsigned long) frame->mctx.tramp; regs->trap = 0; regs->result = 0; diff -urN linux-2.5/arch/ppc64/kernel/vdso.c linux-vdso/arch/ppc64/kernel/vdso.c --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/vdso.c 2004-08-27 15:46:07.000000000 +1000 @@ -0,0 +1,412 @@ + +/* + * linux/arch/ppc64/kernel/vdso.c + * + * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + + +/* + * The vDSOs themselves are here + */ +extern char vdso64_start, vdso64_end; +extern char vdso32_start, vdso32_end; + +static void *vdso64_kbase = &vdso64_start; +static void *vdso32_kbase = &vdso32_start; + +unsigned int vdso64_pages; +unsigned int vdso32_pages; + +/* Signal trampolines user addresses */ + +unsigned long vdso64_sigtramp; +unsigned long vdso64_rt_sigtramp; +unsigned long vdso32_sigtramp; +unsigned long vdso32_rt_sigtramp; + +/* + * Some infos carried around for each of them during parsing at + * boot time. + */ +struct lib32_elfinfo +{ + Elf32_Ehdr *hdr; /* ptr to ELF */ + Elf32_Sym *dynsym; /* ptr to .dynsym section */ + unsigned long dynsymsize; /* size of .dynsym section */ + char *dynstr; /* ptr to .dynstr section */ + unsigned long text; /* offset of .text section in .so */ +}; + +struct lib64_elfinfo +{ + Elf64_Ehdr *hdr; + Elf64_Sym *dynsym; + unsigned long dynsymsize; + char *dynstr; + unsigned long text; +}; + + +#ifdef __DEBUG +static void dump_one_vdso_page(struct page *pg, struct page *upg) +{ + printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT), + page_count(pg), + pg->flags); + if (upg/* && pg != upg*/) { + printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) << PAGE_SHIFT), + page_count(upg), + upg->flags); + } + printk("\n"); +} + +static void dump_vdso_pages(struct vm_area_struct * vma) +{ + int i; + + if (!vma || test_thread_flag(TIF_32BIT)) { + printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); + for (i=0; ivm_mm) ? + follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } + if (!vma || !test_thread_flag(TIF_32BIT)) { + printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); + for (i=0; ivm_mm) ? + follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } +} +#endif /* DEBUG */ + +/* + * Keep a dummy vma_close for now, it will prevent VMA merging. + */ +static void vdso_vma_close(struct vm_area_struct * vma) +{ +} + +/* + * Our nopage() function, maps in the actual vDSO kernel pages, they will + * be mapped read-only by do_no_page(), and eventually COW'ed, either + * right away for an initial write access, or by do_wp_page(). + */ +static struct page * vdso_vma_nopage(struct vm_area_struct * vma, + unsigned long address, int *type) +{ + unsigned long offset = address - vma->vm_start; + struct page *pg; + void *vbase = test_thread_flag(TIF_32BIT) ? vdso32_kbase : vdso64_kbase; + + DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n", + current->comm, address, offset); + + if (address < vma->vm_start || address > vma->vm_end) + return NOPAGE_SIGBUS; + + pg = virt_to_page(vbase + offset); + get_page(pg); + DBG(" ->page count: %d\n", page_count(pg)); + + return pg; +} + +static struct vm_operations_struct vdso_vmops = { + .close = vdso_vma_close, + .nopage = vdso_vma_nopage, +}; + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long vdso_pages = test_thread_flag(TIF_32BIT) ? + vdso32_pages : vdso64_pages; + + /* vDSO has a problem and was disabled, just don't "enable" it for the + * process + */ + if (vdso_pages == 0) { + current->thread.vdso_base = 0; + return 0; + } + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma == NULL) + return -ENOMEM; + if (security_vm_enough_memory(vdso_pages)) { + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } + memset(vma, 0, sizeof(*vma)); + + /* + * pick a base address for the vDSO in process space. We have a default + * base of 1Mb on which we had a random offset up to 1Mb. + * XXX: Add possibility for a program header to specify that location + */ + current->thread.vdso_base = 0x00100000 + + ((unsigned long)vma & 0x000ff000); + + vma->vm_mm = mm; + vma->vm_start = current->thread.vdso_base; + vma->vm_end = vma->vm_start + (vdso_pages << PAGE_SHIFT); + + /* + * our vma flags don't have VM_WRITE so by default, the process isn't allowed + * to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW on those + * pages but it's then your responsibility to never do that on the "data" page + * of the vDSO or you'll stop getting kernel updates and your nice userland + * gettimeofday will be totally dead. It's fine to use that for setting + * breakpoints in the vDSO code pages though + */ + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + vma->vm_flags |= mm->def_flags; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; + vma->vm_ops = &vdso_vmops; + + down_write(&mm->mmap_sem); + insert_vm_struct(mm, vma); + mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + up_write(&mm->mmap_sem); + + return 0; +} + +static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf32_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + *size = 0; + return NULL; +} + +static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf64_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + if (size) + *size = 0; + return NULL; +} + +static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, const char *symname) +{ + unsigned int i; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + if (strcmp(symname, lib->dynstr + lib->dynsym[i].st_name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, const char *symname) +{ + unsigned int i; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + if (strcmp(symname, lib->dynstr + lib->dynsym[i].st_name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function32(struct lib32_elfinfo *lib, const char *symname) +{ + Elf32_Sym *sym = find_symbol32(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO32: function %s not found !\n", symname); + return 0; + } + return sym->st_value; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function64(struct lib64_elfinfo *lib, const char *symname) +{ + Elf64_Sym *sym = find_symbol64(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO64: function %s not found !\n", symname); + return 0; + } + return sym->st_value; +} + + +static __init int vdso_do_fixups(void) +{ + struct lib32_elfinfo v32; + struct lib64_elfinfo v64; + void *sect; + + v32.hdr = vdso32_kbase; + v64.hdr = vdso64_kbase; + + /* + * Locate symbol tables & text section + */ + + v32.dynsym = find_section32(v32.hdr, ".dynsym", &v32.dynsymsize); + v32.dynstr = find_section32(v32.hdr, ".dynstr", NULL); + if (v32.dynsym == NULL || v32.dynstr == NULL) { + printk(KERN_ERR "vDSO32: a required symbol section was not found\n"); + return -1; + } + sect = find_section32(v32.hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO32: the .text section was not found\n"); + return -1; + } + v32.text = sect - vdso32_kbase; + + v64.dynsym = find_section64(v64.hdr, ".dynsym", &v64.dynsymsize); + v64.dynstr = find_section64(v64.hdr, ".dynstr", NULL); + if (v64.dynsym == NULL || v64.dynstr == NULL) { + printk(KERN_ERR "vDSO64: a required symbol section was not found\n"); + return -1; + } + sect = find_section64(v64.hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO64: the .text section was not found\n"); + return -1; + } + v64.text = sect - vdso64_kbase; + + /* + * Find signal trampolines + */ + + vdso64_sigtramp = find_function64(&v64, "_v_sigtramp64"); + vdso64_rt_sigtramp = find_function64(&v64, "_v_sigtramp_rt64"); + vdso32_sigtramp = find_function32(&v32, "_v_sigtramp32"); + vdso32_rt_sigtramp = find_function32(&v32, "_v_sigtramp_rt32"); + + return 0; +} + +void __init vdso_init(void) +{ + int i; + + vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; + vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; + + DBG("vdso64_kbase: %p, 0x%x pages, vdso32_kbase: %p, 0x%x pages\n", + vdso64_kbase, vdso64_pages, vdso32_kbase, vdso32_pages); + + /* Do necessary fixups of vDSO symbols */ + if (vdso_do_fixups()) { + printk(KERN_ERR "vDSO setup failure, not enabled !\n"); + /* XXX should free pages here ? */ + vdso64_pages = vdso32_pages = 0; + return; + } + + /* Make sure pages are in the correct state */ + for (i = 0; i < vdso64_pages; i++) { + struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + } + for (i = 0; i < vdso32_pages; i++) { + struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + } +} diff -urN linux-2.5/arch/ppc64/kernel/vdso32/Makefile linux-vdso/arch/ppc64/kernel/vdso32/Makefile --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/vdso32/Makefile 2004-08-27 13:07:59.000000000 +1000 @@ -0,0 +1,43 @@ +# Choose compiler + +CROSS32_COMPILE ?= + +CROSS32CC := $(CROSS32_COMPILE)gcc +CROSS32AS := $(CROSS32_COMPILE)as + +# List of files in the vdso, has to be asm only for now + +src-vdso32 = sigtramp.S testfunc.S + +# Build rules + +obj-vdso32 := $(addsuffix .o, $(basename $(src-vdso32))) +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) +src-vdso32 := $(addprefix $(src)/, $(src-vdso32)) + +VDSO32_CFLAGS := -shared -s -fno-common -Iinclude -fno-builtin -nostdlib +VDSO32_CFLAGS += -Wl,-soname=linux-vdso32.so.1 +VDSO32_AFLAGS := -D__ASSEMBLY__ -s + +obj-y += vdso32_wrapper.o + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) + $(call if_changed,vdso32ld) + +# assembly rules for the .S files +# This is probably wrong with split src & obj trees +$(obj-vdso32): %.o: %.S + $(call if_changed_dep,vdso32as) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CROSS32CC) -Wp,-MD,$(depfile) $(VDSO32_CFLAGS) \ + -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CROSS32CC) -Wp,-MD,$(depfile) $(VDSO32_AFLAGS) -c -o $@ $^ + +targets += vdso32.so diff -urN linux-2.5/arch/ppc64/kernel/vdso32/sigtramp.S linux-vdso/arch/ppc64/kernel/vdso32/sigtramp.S --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/vdso32/sigtramp.S 2004-08-27 13:07:59.000000000 +1000 @@ -0,0 +1,15 @@ +#include +#include +#include +#include + + .globl _v_sigtramp32 +_v_sigtramp32: + li r0,__NR_sigreturn + sc + + .globl _v_sigtramp_rt32 +_v_sigtramp_rt32: + li r0,__NR_rt_sigreturn + sc + diff -urN linux-2.5/arch/ppc64/kernel/vdso32/testfunc.S linux-vdso/arch/ppc64/kernel/vdso32/testfunc.S --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/vdso32/testfunc.S 2004-08-27 15:46:07.000000000 +1000 @@ -0,0 +1,12 @@ +#include +#include +#include + + + .globl __v_myfunc_1 +__v_myfunc_1: + blr + + .globl __v_myfunc_2 +__v_myfunc_2: + blr diff -urN linux-2.5/arch/ppc64/kernel/vdso32/vdso32.lds linux-vdso/arch/ppc64/kernel/vdso32/vdso32.lds --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/vdso32/vdso32.lds 2004-08-27 13:07:59.000000000 +1000 @@ -0,0 +1,98 @@ +/* + * This is the infamous ld script for the 32 bits vdso + * library + */ +OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", + "elf32-powerpc") +OUTPUT_ARCH(powerpc:common) +ENTRY(_start) + +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + + . = 0 + SIZEOF_HEADERS; + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + } =0 + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .eh_frame_hdr : { *(.eh_frame_hdr) } + .eh_frame : { KEEP (*(.eh_frame)) } + .gcc_except_table : { *(.gcc_except_table) } + .fixup : { *(.fixup) } + .dynamic : { *(.dynamic) } + + /* Stabs debugging sections are here too, away from the + * data page. Not much in there at the moment + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /* Adjust the address for the data segment. On the vdso, we need it to + * be page aligned after the text. The data segment contains ONLY the + * .data section here, which is special in the case of the vdso as it's + * really read only and is kernel updated. The got stays there too + */ + . = ALIGN (0x1000); + + .data : + { + *(.data .data.* .gnu.linkonce.d.*) + } + .got : { *(.got.plt) *(.got) } + _edata = .; + PROVIDE (edata = .); + _end = .; + __end = .; + PROVIDE (end = .); + + /DISCARD/ : { *(.note.GNU-stack) } + /* gas insist on generating these, bin them in here, they should be + * empty anyways + */ + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} diff -urN linux-2.5/arch/ppc64/kernel/vdso32/vdso32_wrapper.S linux-vdso/arch/ppc64/kernel/vdso32/vdso32_wrapper.S --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/vdso32/vdso32_wrapper.S 2004-08-27 13:07:59.000000000 +1000 @@ -0,0 +1,12 @@ +#include + + .section ".data" + + .globl vdso32_start, vdso32_end + .balign 4096 +vdso32_start: + .incbin "arch/ppc64/kernel/vdso32/vdso32.so" + .balign 4096 +vdso32_end: + + .previous diff -urN linux-2.5/arch/ppc64/kernel/vdso64/Makefile linux-vdso/arch/ppc64/kernel/vdso64/Makefile --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/vdso64/Makefile 2004-08-27 13:07:59.000000000 +1000 @@ -0,0 +1,36 @@ +# List of files in the vdso, has to be asm only for now + +src-vdso64 = sigtramp.S testfunc.S + +# Build rules + +obj-vdso64 := $(addsuffix .o, $(basename $(src-vdso64))) +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) +src-vdso64 := $(addprefix $(src)/, $(src-vdso64)) + +VDSO64_CFLAGS := -shared -s -fno-common -Iinclude -fno-builtin -nostdlib +VDSO64_CFLAGS += -Wl,-soname=linux-vdso64.so.1 +VDSO64_AFLAGS := -D__ASSEMBLY__ -s + +obj-y += vdso64_wrapper.o + +# Force dependency (incbin is bad) +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) + $(call if_changed,vdso64ld) + +# assembly rules for the .S files +# This is probably wrong with split src & obj trees +$(obj-vdso64): %.o: %.S + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) -Wp,-MD,$(depfile) $(VDSO64_CFLAGS) \ + -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) -Wp,-MD,$(depfile) $(VDSO64_AFLAGS) -c -o $@ $^ + +targets += vdso64.so diff -urN linux-2.5/arch/ppc64/kernel/vdso64/sigtramp.S linux-vdso/arch/ppc64/kernel/vdso64/sigtramp.S --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/vdso64/sigtramp.S 2004-08-27 13:07:59.000000000 +1000 @@ -0,0 +1,17 @@ +#include +#include +#include +#include + + .globl _v_sigtramp64 +_v_sigtramp64: + addi r1, r1, __SIGNAL_FRAMESIZE + li r0,__NR_sigreturn + sc + + .globl _v_sigtramp_rt64 +_v_sigtramp_rt64: + addi r1, r1, __SIGNAL_FRAMESIZE + li r0,__NR_rt_sigreturn + sc + diff -urN linux-2.5/arch/ppc64/kernel/vdso64/testfunc.S linux-vdso/arch/ppc64/kernel/vdso64/testfunc.S --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/vdso64/testfunc.S 2004-08-27 15:46:07.000000000 +1000 @@ -0,0 +1,12 @@ +#include +#include +#include + + + .globl __v_myfunc_1 +__v_myfunc_1: + blr + + .globl __v_myfunc_2 +__v_myfunc_2: + blr diff -urN linux-2.5/arch/ppc64/kernel/vdso64/vdso64.lds linux-vdso/arch/ppc64/kernel/vdso64/vdso64.lds --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/vdso64/vdso64.lds 2004-08-27 13:07:59.000000000 +1000 @@ -0,0 +1,92 @@ +/* + * This is the infamous ld script for the 64 bits vdso + * library + */ +OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", + "elf64-powerpc") +OUTPUT_ARCH(powerpc:common64) +ENTRY(_start) + +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + . = 0 + SIZEOF_HEADERS; + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + *(.sfpr .glink) + } =0x60000000 + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .eh_frame_hdr : { *(.eh_frame_hdr) } + .eh_frame : { KEEP (*(.eh_frame)) } + .gcc_except_table : { *(.gcc_except_table) } + .dynamic : { *(.dynamic) } + + /* Stabs debugging sections are here too, away from the + * data page. Not much in there at the moment + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /* Adjust the address for the data segment. On the vdso, we need it to + * be page aligned after the text. The data segment contains ONLY the + * .data section here, which is special in the case of the vdso as it's + * really read only and is kernel updated. The got stays there too + */ + . = ALIGN (0x1000); + + .data : + { + *(.data .data.* .gnu.linkonce.d.*) + } + .got ALIGN(8) : { *(.got .toc) } + _edata = .; + PROVIDE (edata = .); + _end = .; + PROVIDE (end = .); + + /DISCARD/ : { *(.note.GNU-stack) } + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} diff -urN linux-2.5/arch/ppc64/kernel/vdso64/vdso64_wrapper.S linux-vdso/arch/ppc64/kernel/vdso64/vdso64_wrapper.S --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/arch/ppc64/kernel/vdso64/vdso64_wrapper.S 2004-08-27 13:07:59.000000000 +1000 @@ -0,0 +1,12 @@ +#include + + .section ".data" + + .globl vdso64_start, vdso64_end + .balign 4096 +vdso64_start: + .incbin "arch/ppc64/kernel/vdso64/vdso64.so" + .balign 4096 +vdso64_end: + + .previous diff -urN linux-2.5/arch/ppc64/mm/init.c linux-vdso/arch/ppc64/mm/init.c --- linux-2.5/arch/ppc64/mm/init.c 2004-08-26 15:46:30.000000000 +1000 +++ linux-vdso/arch/ppc64/mm/init.c 2004-08-27 13:07:59.000000000 +1000 @@ -61,6 +61,7 @@ #include #include #include +#include struct mmu_context_queue_t mmu_context_queue; @@ -706,6 +707,8 @@ #ifdef CONFIG_PPC_ISERIES iommu_vio_init(); #endif + /* Initialize the vDSO */ + vdso_init(); } /* diff -urN linux-2.5/fs/binfmt_elf.c linux-vdso/fs/binfmt_elf.c --- linux-2.5/fs/binfmt_elf.c 2004-08-26 15:46:35.000000000 +1000 +++ linux-vdso/fs/binfmt_elf.c 2004-08-27 13:08:01.000000000 +1000 @@ -715,6 +715,14 @@ goto out_free_dentry; } +#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES + retval = arch_setup_additional_pages(bprm, executable_stack); + if (retval < 0) { + send_sig(SIGKILL, current, 0); + goto out_free_dentry; + } +#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ + current->mm->start_stack = bprm->p; /* Now we do a little grungy work by mmaping the ELF image into diff -urN linux-2.5/include/asm-ppc64/a.out.h linux-vdso/include/asm-ppc64/a.out.h --- linux-2.5/include/asm-ppc64/a.out.h 2004-08-10 10:22:36.000000000 +1000 +++ linux-vdso/include/asm-ppc64/a.out.h 2004-08-27 15:46:34.000000000 +1000 @@ -2,6 +2,7 @@ #define __PPC64_A_OUT_H__ #include +#include /* * c 2001 PPC 64 Team, IBM Corp @@ -30,14 +31,11 @@ #ifdef __KERNEL__ -#define STACK_TOP_USER64 (TASK_SIZE_USER64) +#define STACK_TOP_USER64 TASK_SIZE_USER64 +#define STACK_TOP_USER32 TASK_SIZE_USER32 -/* Give 32-bit user space a full 4G address space to live in. */ -#define STACK_TOP_USER32 (TASK_SIZE_USER32) - -#define STACK_TOP ((test_thread_flag(TIF_32BIT) || \ - (ppcdebugset(PPCDBG_BINFMT_32ADDR))) ? \ - STACK_TOP_USER32 : STACK_TOP_USER64) +#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ + STACK_TOP_USER32 : STACK_TOP_USER64) #endif /* __KERNEL__ */ diff -urN linux-2.5/include/asm-ppc64/elf.h linux-vdso/include/asm-ppc64/elf.h --- linux-2.5/include/asm-ppc64/elf.h 2004-08-10 10:22:37.000000000 +1000 +++ linux-vdso/include/asm-ppc64/elf.h 2004-08-27 15:46:34.000000000 +1000 @@ -237,6 +237,8 @@ #define AT_UCACHEBSIZE 21 /* A special ignored type value for PPC, for glibc compatibility. */ #define AT_IGNOREPPC 22 +/* The vDSO location */ +#define AT_VDSO_BASE 23 extern int dcache_bsize; extern int icache_bsize; @@ -260,6 +262,8 @@ NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \ NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \ NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \ + /* vDSO base */ \ + NEW_AUX_ENT(AT_VDSO_BASE, current->thread.vdso_base); \ } while (0) /* PowerPC64 relocations defined by the ABIs */ diff -urN linux-2.5/include/asm-ppc64/processor.h linux-vdso/include/asm-ppc64/processor.h --- linux-2.5/include/asm-ppc64/processor.h 2004-08-26 15:46:40.000000000 +1000 +++ linux-vdso/include/asm-ppc64/processor.h 2004-08-27 15:46:34.000000000 +1000 @@ -526,8 +526,8 @@ /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(STACK_TOP_USER32 / 4)) -#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(STACK_TOP_USER64 / 4)) +#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) +#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4)) #define TASK_UNMAPPED_BASE ((test_thread_flag(TIF_32BIT)||(ppcdebugset(PPCDBG_BINFMT_32ADDR))) ? \ TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 ) @@ -543,7 +543,8 @@ double fpr[32]; /* Complete floating point set */ unsigned long fpscr; /* Floating point status (plus pad) */ unsigned long fpexc_mode; /* Floating-point exception mode */ - unsigned long pad[3]; /* was saved_msr, saved_softe */ + unsigned long pad[2]; /* was saved_msr, saved_softe */ + unsigned long vdso_base; /* base of the vDSO library */ #ifdef CONFIG_ALTIVEC /* Complete AltiVec register set */ vector128 vr[32] __attribute((aligned(16))); diff -urN linux-2.5/include/asm-ppc64/vdso.h linux-vdso/include/asm-ppc64/vdso.h --- /dev/null 2004-07-28 14:31:22.000000000 +1000 +++ linux-vdso/include/asm-ppc64/vdso.h 2004-08-27 15:46:34.000000000 +1000 @@ -0,0 +1,23 @@ +#ifndef __PPC64_VDSO_H__ +#define __PPC64_VDSO_H__ + +#ifdef __KERNEL__ + +extern unsigned int vdso64_pages; +extern unsigned int vdso32_pages; + +/* Offsets relative to thread->vdso_base */ +extern unsigned long vdso64_sigtramp; +extern unsigned long vdso64_rt_sigtramp; +extern unsigned long vdso32_sigtramp; +extern unsigned long vdso32_rt_sigtramp; + +extern void vdso_init(void); + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); + +#endif /* __KERNEL__ */ + +#endif /* __PPC64_VDSO_H__ */ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Mon Aug 30 15:44:32 2004 From: anton at samba.org (Anton Blanchard) Date: Mon, 30 Aug 2004 15:44:32 +1000 Subject: [patch 1/2] clean up tce build functions In-Reply-To: References: Message-ID: <20040830054432.GD26072@krispykreme> Hi Dave, > The tce_build* functions currently take a virtual address as an > unsigned long. Each implementation also does its own page alignment > of that address. > > Create a global tce_build() for all platforms, do the page alignment > there. Also, change the type of uaddr to be an 'unsigned char*' so > that we can do arithmetic on it, but it is still a pointer to > differentiate it from the 'unsigned long' usually used for physical > addresses. > > Create sglist_vaddr() and sglist_size_pages() to hide some of the > nasty casting that goes on. Note that this will perform the > sglist_vaddr() operation an extra time. We could get around it by > computing it once and passing it into sglist_size_pages() but, unless > this is a hot path, it's probably not worth it. > > The patch does add more lines of code than it removes, but that's due > to adding some helper functions. The common path has a bunch of > casts, masks, and shifts removed. Olof has some stuff pending in this area (to support the multiple TCE entry interface in the hypervisor). Id prefer to hold off until that is merged. Anton > Tested on pSeries, but not iSeries or pmac. > > Signed-off-by: Dave Hansen > --- > > memhotplug-dave/arch/ppc64/kernel/iSeries_iommu.c | 2 - > memhotplug-dave/arch/ppc64/kernel/iommu.c | 30 +++++++++++++++------- > memhotplug-dave/arch/ppc64/kernel/pSeries_iommu.c | 2 - > memhotplug-dave/arch/ppc64/kernel/pSeries_lpar.c | 4 +- > memhotplug-dave/arch/ppc64/kernel/pmac_iommu.c | 2 - > memhotplug-dave/include/asm-ppc64/machdep.h | 10 ++++++- > 6 files changed, 35 insertions(+), 15 deletions(-) > > diff -puN arch/ppc64/kernel/iommu.c~A7-ppc64-tce-build-vaddr arch/ppc64/kernel/iommu.c > --- memhotplug/arch/ppc64/kernel/iommu.c~A7-ppc64-tce-build-vaddr 2004-08-23 11:25:19.000000000 -0700 > +++ memhotplug-dave/arch/ppc64/kernel/iommu.c 2004-08-23 11:25:19.000000000 -0700 > @@ -159,8 +159,7 @@ static dma_addr_t iommu_alloc(struct iom > ret = entry << PAGE_SHIFT; /* Set the return dma address */ > > /* Put the TCEs in the HW table */ > - ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK, > - direction); > + tce_build(tbl, entry, npages, page, direction); > > > /* Flush/invalidate TLB caches if necessary */ > @@ -225,6 +224,21 @@ static void iommu_free(struct iommu_tabl > spin_unlock_irqrestore(&(tbl->it_lock), flags); > } > > +static inline void *sglist_vaddr(struct scatterlist *s) > +{ > + return (void *)((unsigned long)page_address(s->page) + s->offset); > +} > + > +static inline unsigned long sglist_size_pages(struct scatterlist *s) > +{ > + unsigned long vaddr, len; > + > + vaddr = (unsigned long)sglist_vaddr(s); > + len = PAGE_ALIGN(vaddr + s->length) - (vaddr & PAGE_MASK); > + > + return len >> PAGE_SHIFT; > +} > + > int iommu_map_sg(struct device *dev, struct iommu_table *tbl, > struct scatterlist *sglist, int nelems, > enum dma_data_direction direction) > @@ -252,7 +266,7 @@ int iommu_map_sg(struct device *dev, str > spin_lock_irqsave(&(tbl->it_lock), flags); > > for (s = outs; nelems; nelems--, s++) { > - unsigned long vaddr, npages, entry, slen; > + unsigned long npages, entry, slen; > > slen = s->length; > /* Sanity check */ > @@ -261,9 +275,7 @@ int iommu_map_sg(struct device *dev, str > continue; > } > /* Allocate iommu entries for that segment */ > - vaddr = (unsigned long)page_address(s->page) + s->offset; > - npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK); > - npages >>= PAGE_SHIFT; > + npages = sglist_size_pages(s); > entry = iommu_range_alloc(tbl, npages, &handle); > > DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); > @@ -271,8 +283,8 @@ int iommu_map_sg(struct device *dev, str > /* Handle failure */ > if (unlikely(entry == DMA_ERROR_CODE)) { > if (printk_ratelimit()) > - printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx" > - " npages %lx\n", tbl, vaddr, npages); > + printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %p" > + " npages %lx\n", tbl, sglist_vaddr(s), npages); > goto failure; > } > > @@ -285,7 +297,7 @@ int iommu_map_sg(struct device *dev, str > npages, entry, dma_addr); > > /* Insert into HW table */ > - ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction); > + tce_build(tbl, entry, npages, sglist_vaddr(s), direction); > > /* If we are in an open segment, try merging */ > if (segstart != s) { > diff -puN arch/ppc64/kernel/iSeries_iommu.c~A7-ppc64-tce-build-vaddr arch/ppc64/kernel/iSeries_iommu.c > --- memhotplug/arch/ppc64/kernel/iSeries_iommu.c~A7-ppc64-tce-build-vaddr 2004-08-23 11:25:19.000000000 -0700 > +++ memhotplug-dave/arch/ppc64/kernel/iSeries_iommu.c 2004-08-23 11:25:19.000000000 -0700 > @@ -54,7 +54,7 @@ extern struct list_head iSeries_Global_D > > > static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, > - unsigned long uaddr, enum dma_data_direction direction) > + unsigned char *uaddr, enum dma_data_direction direction) > { > u64 rc; > union tce_entry tce; > diff -puN arch/ppc64/kernel/pSeries_iommu.c~A7-ppc64-tce-build-vaddr arch/ppc64/kernel/pSeries_iommu.c > --- memhotplug/arch/ppc64/kernel/pSeries_iommu.c~A7-ppc64-tce-build-vaddr 2004-08-23 11:25:19.000000000 -0700 > +++ memhotplug-dave/arch/ppc64/kernel/pSeries_iommu.c 2004-08-23 11:25:19.000000000 -0700 > @@ -46,7 +46,7 @@ > > > static void tce_build_pSeries(struct iommu_table *tbl, long index, > - long npages, unsigned long uaddr, > + long npages, unsigned char *uaddr, > enum dma_data_direction direction) > { > union tce_entry t; > diff -puN arch/ppc64/kernel/pmac_iommu.c~A7-ppc64-tce-build-vaddr arch/ppc64/kernel/pmac_iommu.c > --- memhotplug/arch/ppc64/kernel/pmac_iommu.c~A7-ppc64-tce-build-vaddr 2004-08-23 11:25:19.000000000 -0700 > +++ memhotplug-dave/arch/ppc64/kernel/pmac_iommu.c 2004-08-23 11:25:19.000000000 -0700 > @@ -141,7 +141,7 @@ static void dart_flush(struct iommu_tabl > } > > static void dart_build_pmac(struct iommu_table *tbl, long index, > - long npages, unsigned long uaddr, > + long npages, unsigned char *uaddr, > enum dma_data_direction direction) > { > unsigned int *dp; > diff -puN arch/ppc64/kernel/pSeries_lpar.c~A7-ppc64-tce-build-vaddr arch/ppc64/kernel/pSeries_lpar.c > --- memhotplug/arch/ppc64/kernel/pSeries_lpar.c~A7-ppc64-tce-build-vaddr 2004-08-23 11:25:19.000000000 -0700 > +++ memhotplug-dave/arch/ppc64/kernel/pSeries_lpar.c 2004-08-23 11:25:19.000000000 -0700 > @@ -131,8 +131,8 @@ long plpar_put_term_char(unsigned long t > } > > static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, > - long npages, unsigned long uaddr, > - enum dma_data_direction direction) > + long npages, unsigned char *uaddr, > + enum dma_data_direction direction) > { > u64 rc; > union tce_entry tce; > diff -puN include/asm-ppc64/machdep.h~A7-ppc64-tce-build-vaddr include/asm-ppc64/machdep.h > --- memhotplug/include/asm-ppc64/machdep.h~A7-ppc64-tce-build-vaddr 2004-08-23 11:25:19.000000000 -0700 > +++ memhotplug-dave/include/asm-ppc64/machdep.h 2004-08-23 11:25:19.000000000 -0700 > @@ -60,7 +60,7 @@ struct machdep_calls { > void (*tce_build)(struct iommu_table * tbl, > long index, > long npages, > - unsigned long uaddr, > + unsigned char *uaddr, > enum dma_data_direction direction); > void (*tce_free)(struct iommu_table *tbl, > long index, > @@ -132,6 +132,14 @@ void ppc64_attention_msg(unsigned int sr > /* Print a dump progress message. */ > void ppc64_dump_msg(unsigned int src, const char *msg); > > +static inline void tce_build(struct iommu_table * tbl, long index, long npages, > + void *uaddr, enum dma_data_direction direction) > +{ > + unsigned char *page_addr = (unsigned char *)((unsigned long)uaddr & PAGE_MASK); > + > + ppc_md.tce_build(tbl, index, npages, page_addr, direction); > +} > + > static inline void log_error(char *buf, unsigned int err_type, int fatal) > { > if (ppc_md.log_error) > diff -L A8-ppc64-pfn_to_kaddr -puN /dev/null /dev/null > _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/