From sfr at canb.auug.org.au Sat Oct 1 00:05:16 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 00:05:16 +1000 Subject: [PATCH 7/9] ppc64: simplify the build a little In-Reply-To: <20050930233602.138b6e27.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> Message-ID: <20051001000516.1d444d51.sfr@canb.auug.org.au> This adds arch/powerp/kernel/ to core-y in arch/ppc64/Makefile so that we don;t have to put in a special line in arch/ppc64/kernel/Makefile for each file we merge. We should be able to use a similar technique for other directories as we get to them. Signed-off-by: Stephen Rothwell --- arch/powerpc/Makefile | 1 - arch/powerpc/kernel/Makefile | 13 +++++++++---- arch/ppc64/Makefile | 2 +- arch/ppc64/kernel/Makefile | 11 ++--------- 4 files changed, 12 insertions(+), 15 deletions(-) -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ 28163804fe2135701522671bd8c3828e1aa0ce62 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -121,7 +121,6 @@ head-$(CONFIG_FSL_BOOKE) := arch/powerpc ifeq ($(CONFIG_PPC32),y) head-$(CONFIG_6xx) += arch/powerpc/kernel/idle_6xx.o -head-$(CONFIG_POWER4) += arch/powerpc/kernel/idle_power4.o head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -2,6 +2,10 @@ # Makefile for the linux kernel. # +ifeq ($(CONFIG_PPC64),y) +EXTRA_CFLAGS += -mno-minimal-toc +endif + extra-$(CONFIG_PPC_STD_MMU) := head.o extra_$(CONFIG_PPC64) := head_64.o extra-$(CONFIG_40x) := head_4xx.o @@ -9,12 +13,13 @@ extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-$(CONFIG_6xx) += idle_6xx.o -extra-$(CONFIG_POWER4) += idle_power4.o extra-$(CONFIG_PPC_FPU) += fpu.o extra-y += vmlinux.lds -obj-y := semaphore.o process.o -obj-$(CONFIG_PPC32) += traps32.c -obj-$(CONFIG_PPC64) += traps64.c +obj-$(CONFIG_PPC32) := semaphore.o process.o +obj-$(CONFIG_PPC32) += traps32.o +obj-$(CONFIG_PPC64) += traps64.o idle_power4.o +ifeq ($(CONFIG_PPC32),y) obj-$(CONFIG_MODULES) += ppc_ksyms.o +endif obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile --- a/arch/ppc64/Makefile +++ b/arch/ppc64/Makefile @@ -82,7 +82,7 @@ CFLAGS += $(call cc-option,-funit-at-a-t head-y := arch/ppc64/kernel/head.o libs-y += arch/ppc64/lib/ -core-y += arch/ppc64/kernel/ +core-y += arch/ppc64/kernel/ arch/powerpc/kernel/ core-y += arch/ppc64/mm/ core-y += arch/powerpc/platforms/ core-$(CONFIG_XMON) += arch/ppc64/xmon/ diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -7,12 +7,12 @@ ifneq ($(CONFIG_PPC_MERGE),y) EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds -obj-y := setup.o entry.o traps64.o irq.o idle.o dma.o \ +obj-y := setup.o entry.o irq.o idle.o dma.o \ time.o process.o signal.o syscalls.o misc.o ptrace.o \ align.o semaphore.o bitops.o pacaData.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o rtc.o init_task.o \ - lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ + lmb.o cputable.o cpu_setup_power4.o \ iommu.o sysfs.o vdso.o pmc.o firmware.o prom.o obj-y += vdso32/ vdso64/ @@ -66,7 +66,6 @@ obj-$(CONFIG_PPC_BPA) += pSeries_smp.o obj-$(CONFIG_PPC_MAPLE) += smp-tbsync.o endif -obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_KPROBES) += kprobes.o CFLAGS_ioctl32.o += -Ifs/ @@ -76,12 +75,6 @@ arch/ppc64/kernel/head.o: arch/powerpc/p AFLAGS_head.o += -Iarch/powerpc/platforms/iseries endif -# These are here while we do the architecture merge -vecemu-y += ../../powerpc/kernel/vecemu.o -vector-y += ../../powerpc/kernel/vector.o -idle_power4-y += ../../powerpc/kernel/idle_power4.o -traps64-y += ../../powerpc/kernel/traps64.o - else endif From sfr at canb.auug.org.au Sat Oct 1 00:10:05 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 00:10:05 +1000 Subject: [PATCH 8/9] powerpc: make iSeries build In-Reply-To: <20050930233602.138b6e27.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> Message-ID: <20051001001005.348d7798.sfr@canb.auug.org.au> Also Merge vmlinux.lds.S and remove arch/powerpc/kernel/vmlinux.lds which is a generated file. The merge of vmlinux.lds.S would be much cleaner if it is clear that putting the ..start/end symbols inside the section definitions is OK on ppc32. Signed-off-by: Stephen Rothwell --- arch/powerpc/Kconfig | 6 + arch/powerpc/Makefile | 27 ++-- arch/powerpc/kernel/Makefile | 9 + arch/powerpc/kernel/vmlinux.lds | 174 --------------------------- arch/powerpc/kernel/vmlinux.lds.S | 190 +++++++++++++++++++++++++++-- arch/powerpc/platforms/iseries/lpevents.c | 2 arch/ppc64/kernel/Makefile | 8 - include/asm-powerpc/system.h | 4 - 8 files changed, 214 insertions(+), 206 deletions(-) delete mode 100644 arch/powerpc/kernel/vmlinux.lds -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ f0c094f719829a7a15cbbea72a33093b9a7dec5d diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -833,6 +833,12 @@ config PIN_TLB depends on ADVANCED_OPTIONS && 8xx endmenu +if PPC64 +config KERNEL_START + hex + default "0xc0000000" +endif + source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -124,12 +124,14 @@ head-$(CONFIG_6xx) += arch/powerpc/kern head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o endif -core-y += arch/powerpc/kernel/ \ - arch/$(OLDARCH)/kernel/ \ - arch/powerpc/mm/ \ - arch/powerpc/lib/ \ - arch/powerpc/sysdev/ \ - arch/powerpc/platforms/ +core-y += arch/powerpc/kernel/ +core-y += arch/$(OLDARCH)/kernel/ +core-$(CONFIG_PPC32) += arch/powerpc/mm/ +core-$(CONFIG_PPC64) += arch/$(OLDARCH)/mm/ +core-$(CONFIG_PPC32) += arch/powerpc/lib/ +libs-$(CONFIG_PPC64) += arch/$(OLDARCH)/lib/ +core-y += arch/powerpc/sysdev/ +core-y += arch/powerpc/platforms/ core-$(CONFIG_PPC32) += arch/ppc/syslib/ core-$(CONFIG_MATH_EMULATION) += arch/ppc/math-emu/ core-$(CONFIG_XMON) += arch/powerpc/xmon/ @@ -140,17 +142,20 @@ drivers-$(CONFIG_CPM2) += arch/ppc/8260 drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ -BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm - -.PHONY: $(BOOT_TARGETS) - -all: uImage zImage +defaultimage-$(CONFIG_PPC32) := uImage zImage +defaultimage-$(CONFIG_PPC_ISERIES) := vmlinux +KBUILD_IMAGE := $(defaultimage-y) +all: $(KBUILD_IMAGE) CPPFLAGS_vmlinux.lds := -Upowerpc # All the instructions talk about "make bzImage". bzImage: zImage +BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm + +.PHONY: $(BOOT_TARGETS) + boot := arch/$(OLDARCH)/boot $(BOOT_TARGETS): vmlinux diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -6,8 +6,10 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc endif +ifeq ($(CONFIG_PPC32),y) extra-$(CONFIG_PPC_STD_MMU) := head.o -extra_$(CONFIG_PPC64) := head_64.o +endif +extra-$(CONFIG_PPC64) := head_64.o extra-$(CONFIG_40x) := head_4xx.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o @@ -23,3 +25,8 @@ ifeq ($(CONFIG_PPC32),y) obj-$(CONFIG_MODULES) += ppc_ksyms.o endif obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o + +ifeq ($(CONFIG_PPC_ISERIES),y) +arch/powerpc/kernel/head_64.o: arch/powerpc/platforms/iseries/lparmap.s +AFLAGS_head_64.o += -Iarch/powerpc/platforms/iseries +endif diff --git a/arch/powerpc/kernel/vmlinux.lds b/arch/powerpc/kernel/vmlinux.lds deleted file mode 100644 --- a/arch/powerpc/kernel/vmlinux.lds +++ /dev/null @@ -1,174 +0,0 @@ -/* Align . to a 8 byte boundary equals to maximum function alignment. */ -/* sched.text is aling to function alignment to secure we have same - * address even at second ld pass when generating System.map */ -/* spinlock.text is aling to function alignment to secure we have same - * address even at second ld pass when generating System.map */ - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to - the beginning of the section so we begin them at 0. */ - /* Stabs debugging sections. */ -OUTPUT_ARCH(powerpc:common) -jiffies = jiffies_64 + 4; -SECTIONS -{ - /* Read-only sections, merged into text segment: */ - . = + SIZEOF_HEADERS; - .interp : { *(.interp) } - .hash : { *(.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .rel.text : { *(.rel.text) } - .rela.text : { *(.rela.text) } - .rel.data : { *(.rel.data) } - .rela.data : { *(.rela.data) } - .rel.rodata : { *(.rel.rodata) } - .rela.rodata : { *(.rela.rodata) } - .rel.got : { *(.rel.got) } - .rela.got : { *(.rela.got) } - .rel.ctors : { *(.rel.ctors) } - .rela.ctors : { *(.rela.ctors) } - .rel.dtors : { *(.rel.dtors) } - .rela.dtors : { *(.rela.dtors) } - .rel.bss : { *(.rel.bss) } - .rela.bss : { *(.rela.bss) } - .rel.plt : { *(.rel.plt) } - .rela.plt : { *(.rela.plt) } -/* .init : { *(.init) } =0*/ - .plt : { *(.plt) } - .text : - { - *(.text) - . = ALIGN(8); __sched_text_start = .; *(.sched.text) __sched_text_end = .; - . = ALIGN(8); __lock_text_start = .; *(.spinlock.text) __lock_text_end = .; - *(.fixup) - *(.got1) - __got2_start = .; - *(.got2) - __got2_end = .; - } - _etext = .; - PROVIDE (etext = .); - .rodata : AT(ADDR(.rodata) - 0) { *(.rodata) *(.rodata.*) *(__vermagic) } .rodata1 : AT(ADDR(.rodata1) - 0) { *(.rodata1) } .pci_fixup : AT(ADDR(.pci_fixup) - 0) { __start_pci_fixups_early = .; *(.pci_fixup_early) __end_pci_fixups_early = .; __start_pci_fixups_header = .; *(.pci_fixup_header) __end_pci_fixups_header = .; __start_pci_fixups_final = .; *(.pci_fixup_final) __end_pci_fixups_final = .; __start_pci_fixups_enable = .; *(.pci_fixup_enable) __end_pci_fixups_enable = .; } __ksymtab : AT(ADDR(__ksymtab) - 0) { __start___ksymtab = .; *(__ksymtab) __stop___ksymtab = .; } __ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - 0) { __start___ksymtab_gpl = .; *(__ksymtab_gpl) __stop___ksymtab_gpl = .; } __kcrctab : AT(ADDR(__kcrctab) - 0) { __start___kcrctab = .; *(__kcrctab) __stop___kcrctab = .; } __kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - 0) { __start___kcrctab_gpl = .; *(__kcrctab_gpl) __stop___kcrctab_gpl = .; } __ksymtab_strings : AT(ADDR(__ksymtab_strings) - 0) { *(__ksymtab_strings) } __param : AT(ADDR(__param) - 0) { __start___param = .; *(__param) __stop___param = .; } - .fini : { *(.fini) } =0 - .ctors : { *(.ctors) } - .dtors : { *(.dtors) } - .fixup : { *(.fixup) } - __ex_table : { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } - __bug_table : { - __start___bug_table = .; - *(__bug_table) - __stop___bug_table = .; - } - /* Read-write section, merged into data segment: */ - . = ALIGN(4096); - .data : - { - *(.data) - *(.data1) - *(.sdata) - *(.sdata2) - *(.got.plt) *(.got) - *(.dynamic) - CONSTRUCTORS - } - - . = ALIGN(4096); - __nosave_begin = .; - .data_nosave : { *(.data.nosave) } - . = ALIGN(4096); - __nosave_end = .; - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - _edata = .; - PROVIDE (edata = .); - - . = ALIGN(8192); - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); - __init_begin = .; - .init.text : { - _sinittext = .; - *(.init.text) - _einittext = .; - } - /* .exit.text is discarded at runtime, not link time, - to deal with references from __bug_table */ - .exit.text : { *(.exit.text) } - .init.data : { - *(.init.data); - __vtop_table_begin = .; - *(.vtop_fixup); - __vtop_table_end = .; - __ptov_table_begin = .; - *(.ptov_fixup); - __ptov_table_end = .; - } - . = ALIGN(16); - __setup_start = .; - .init.setup : { *(.init.setup) } - __setup_end = .; - __initcall_start = .; - .initcall.init : { - *(.initcall1.init) - *(.initcall2.init) - *(.initcall3.init) - *(.initcall4.init) - *(.initcall5.init) - *(.initcall6.init) - *(.initcall7.init) - } - __initcall_end = .; - - __con_initcall_start = .; - .con_initcall.init : { *(.con_initcall.init) } - __con_initcall_end = .; - - .security_initcall.init : AT(ADDR(.security_initcall.init) - 0) { __security_initcall_start = .; *(.security_initcall.init) __security_initcall_end = .; } - - __start___ftr_fixup = .; - __ftr_fixup : { *(__ftr_fixup) } - __stop___ftr_fixup = .; - - . = ALIGN(32); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; - - . = ALIGN(4096); - __initramfs_start = .; - .init.ramfs : { *(.init.ramfs) } - __initramfs_end = .; - - . = ALIGN(4096); - __init_end = .; - - . = ALIGN(4096); - _sextratext = .; - _eextratext = .; - - __bss_start = .; - .bss : - { - *(.sbss) *(.scommon) - *(.dynbss) - *(.bss) - *(COMMON) - } - __bss_stop = .; - - _end = . ; - PROVIDE (end = .); - - /* Sections to be discarded. */ - /DISCARD/ : { - *(.exitcall.exit) - *(.exit.data) - } -} diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -1,10 +1,29 @@ +#include +#ifdef CONFIG_PPC64 +#include +#endif #include +#ifdef CONFIG_PPC64 +OUTPUT_ARCH(powerpc:common64) +jiffies = jiffies_64; +#else OUTPUT_ARCH(powerpc:common) jiffies = jiffies_64 + 4; +#endif SECTIONS { + /* Sections to be discarded. */ + /DISCARD/ : { + *(.exitcall.exit) +#ifdef CONFIG_PPC32 + *(.exit.data) +#endif + } + + /* Read-only sections, merged into text segment: */ +#ifdef CONFIG_PPC32 . = + SIZEOF_HEADERS; .interp : { *(.interp) } .hash : { *(.hash) } @@ -28,17 +47,30 @@ SECTIONS .rela.plt : { *(.rela.plt) } /* .init : { *(.init) } =0*/ .plt : { *(.plt) } - .text : - { +#endif + .text : { +#ifdef CONFIG_PPC64 + *(.text .text.*) +#else *(.text) +#endif SCHED_TEXT LOCK_TEXT +#ifdef CONFIG_PPC64 + KPROBES_TEXT +#endif *(.fixup) +#ifdef CONFIG_PPC32 *(.got1) __got2_start = .; *(.got2) __got2_end = .; +#else + . = ALIGN(PAGE_SIZE); + _etext = .; +#endif } +#ifdef CONFIG_PPC32 _etext = .; PROVIDE (etext = .); @@ -48,6 +80,7 @@ SECTIONS .dtors : { *(.dtors) } .fixup : { *(.fixup) } +#endif __ex_table : { __start___ex_table = .; @@ -61,6 +94,17 @@ SECTIONS __stop___bug_table = .; } +#ifdef CONFIG_PPC64 + __ftr_fixup : { + __start___ftr_fixup = .; + *(__ftr_fixup) + __stop___ftr_fixup = .; + } + + RODATA +#endif + +#ifdef CONFIG_PPC32 /* Read-write section, merged into data segment: */ . = ALIGN(4096); .data : @@ -90,16 +134,25 @@ SECTIONS .data.init_task : { *(.data.init_task) } . = ALIGN(4096); +#else + /* will be freed after init */ + . = ALIGN(PAGE_SIZE); +#endif __init_begin = .; .init.text : { _sinittext = .; *(.init.text) _einittext = .; } +#ifdef CONFIG_PPC32 /* .exit.text is discarded at runtime, not link time, to deal with references from __bug_table */ .exit.text : { *(.exit.text) } +#endif .init.data : { +#ifdef CONFIG_PPC64 + *(.init.data) +#else *(.init.data); __vtop_table_begin = .; *(.vtop_fixup); @@ -107,13 +160,31 @@ SECTIONS __ptov_table_begin = .; *(.ptov_fixup); __ptov_table_end = .; +#endif } + . = ALIGN(16); +#ifdef CONFIG_PPC32 __setup_start = .; - .init.setup : { *(.init.setup) } +#endif + .init.setup : { +#ifdef CONFIG_PPC64 + __setup_start = .; +#endif + *(.init.setup) +#ifdef CONFIG_PPC64 + __setup_end = .; +#endif + } +#ifdef CONFIG_PPC32 __setup_end = .; + __initcall_start = .; +#endif .initcall.init : { +#ifdef CONFIG_PPC64 + __initcall_start = .; +#endif *(.initcall1.init) *(.initcall2.init) *(.initcall3.init) @@ -121,27 +192,109 @@ SECTIONS *(.initcall5.init) *(.initcall6.init) *(.initcall7.init) +#ifdef CONFIG_PPC64 + __initcall_end = .; +#endif } +#ifdef CONFIG_PPC32 __initcall_end = .; __con_initcall_start = .; - .con_initcall.init : { *(.con_initcall.init) } +#endif + .con_initcall.init : { +#ifdef CONFIG_PPC64 + __con_initcall_start = .; +#endif + *(.con_initcall.init) +#ifdef CONFIG_PPC64 + __con_initcall_end = .; +#endif + } +#ifdef CONFIG_PPC32 __con_initcall_end = .; +#endif SECURITY_INIT +#ifdef CONFIG_PPC32 __start___ftr_fixup = .; __ftr_fixup : { *(__ftr_fixup) } __stop___ftr_fixup = .; +#else + . = ALIGN(PAGE_SIZE); + .init.ramfs : { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } +#endif +#ifdef CONFIG_PPC32 . = ALIGN(32); __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } +#endif + .data.percpu : { +#ifdef CONFIG_PPC64 + __per_cpu_start = .; +#endif + *(.data.percpu) +#ifdef CONFIG_PPC64 + __per_cpu_end = .; +#endif + } +#ifdef CONFIG_PPC32 __per_cpu_end = .; +#endif + +#ifdef CONFIG_PPC64 + . = ALIGN(PAGE_SIZE); + . = ALIGN(16384); + __init_end = .; + /* freed after init ends here */ + + + /* Read/write sections */ + . = ALIGN(PAGE_SIZE); + . = ALIGN(16384); + /* The initial task and kernel stack */ + .data.init_task : { + *(.data.init_task) + } + + . = ALIGN(PAGE_SIZE); + .data.page_aligned : { + *(.data.page_aligned) + } + + .data.cacheline_aligned : { + *(.data.cacheline_aligned) + } + + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) + } + + .opd : { + *(.opd) + } + + .got : { + __toc_start = .; + *(.got) + *(.toc) + . = ALIGN(PAGE_SIZE); + _edata = .; + } + + . = ALIGN(PAGE_SIZE); +#else . = ALIGN(4096); __initramfs_start = .; - .init.ramfs : { *(.init.ramfs) } + .init.ramfs : { + *(.init.ramfs) + } __initramfs_end = .; . = ALIGN(4096); @@ -152,21 +305,30 @@ SECTIONS _eextratext = .; __bss_start = .; - .bss : - { +#endif + .bss : { +#ifdef CONFIG_PPC64 + __bss_start = .; +#else *(.sbss) *(.scommon) *(.dynbss) +#endif *(.bss) +#ifdef CONFIG_PPC32 *(COMMON) +#else + __bss_stop = .; +#endif } +#ifdef CONFIG_PPC32 __bss_stop = .; +#endif +#ifdef CONFIG_PPC64 + . = ALIGN(PAGE_SIZE); +#endif _end = . ; +#ifdef CONFIG_PPC32 PROVIDE (end = .); - - /* Sections to be discarded. */ - /DISCARD/ : { - *(.exitcall.exit) - *(.exit.data) - } +#endif } diff --git a/arch/powerpc/platforms/iseries/lpevents.c b/arch/powerpc/platforms/iseries/lpevents.c --- a/arch/powerpc/platforms/iseries/lpevents.c +++ b/arch/powerpc/platforms/iseries/lpevents.c @@ -13,6 +13,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -2,10 +2,10 @@ # Makefile for the linux ppc64 kernel. # -ifneq ($(CONFIG_PPC_MERGE),y) - EXTRA_CFLAGS += -mno-minimal-toc +ifneq ($(CONFIG_PPC_MERGE),y) extra-y := head.o vmlinux.lds +endif obj-y := setup.o entry.o irq.o idle.o dma.o \ time.o process.o signal.o syscalls.o misc.o ptrace.o \ @@ -70,11 +70,9 @@ obj-$(CONFIG_KPROBES) += kprobes.o CFLAGS_ioctl32.o += -Ifs/ +ifneq ($(CONFIG_PPC_MERGE),y) ifeq ($(CONFIG_PPC_ISERIES),y) arch/ppc64/kernel/head.o: arch/powerpc/platforms/iseries/lparmap.s AFLAGS_head.o += -Iarch/powerpc/platforms/iseries endif - -else - endif diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -118,10 +118,10 @@ extern void _set_L3CR(unsigned long); #endif extern void via_cuda_init(void); -extern void pmac_nvram_init(void); extern void read_rtc_time(void); extern void pmac_find_display(void); extern void giveup_fpu(struct task_struct *); +extern void disable_kernel_fp(void); extern void enable_kernel_fp(void); extern void flush_fp_to_thread(struct task_struct *); extern void enable_kernel_altivec(void); @@ -346,5 +346,7 @@ __cmpxchg(volatile void *ptr, unsigned l #define arch_align_stack(x) (x) +extern unsigned long reloc_offset(void); + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SYSTEM_H */ From sfr at canb.auug.org.au Sat Oct 1 00:00:01 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 00:00:01 +1000 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and fixup traps.c In-Reply-To: <20050930233602.138b6e27.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> Message-ID: <20051001000001.1f1d8c48.sfr@canb.auug.org.au> Use idle_power4.S from ppc64 as we are not going to support 32 bit power4 in the merged tree. create traps{32,64}.c as these are hard to merge. Signed-off-by: Stephen Rothwell --- arch/powerpc/Kconfig | 4 arch/powerpc/kernel/Makefile | 4 arch/powerpc/kernel/idle_power4.S | 78 +++ arch/powerpc/kernel/traps.c | 1047 ------------------------------------- arch/powerpc/kernel/traps32.c | 1047 +++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/traps64.c | 568 ++++++++++++++++++++ arch/ppc64/kernel/Makefile | 10 arch/ppc64/kernel/idle_power4.S | 79 --- arch/ppc64/kernel/traps.c | 568 -------------------- 9 files changed, 1707 insertions(+), 1698 deletions(-) create mode 100644 arch/powerpc/kernel/idle_power4.S delete mode 100644 arch/powerpc/kernel/traps.c create mode 100644 arch/powerpc/kernel/traps32.c create mode 100644 arch/powerpc/kernel/traps64.c delete mode 100644 arch/ppc64/kernel/idle_power4.S delete mode 100644 arch/ppc64/kernel/traps.c -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ bbc83a78c1c417cc6bb44e5a1bdcd5a56e625bc5 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -142,8 +142,8 @@ config POWER4 def_bool y config PPC_FPU - bool - default y if PPC64 + depends on PPC32 + def_bool y config BOOKE bool diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -13,6 +13,8 @@ extra-$(CONFIG_POWER4) += idle_power4.o extra-$(CONFIG_PPC_FPU) += fpu.o extra-y += vmlinux.lds -obj-y := semaphore.o traps.o process.o +obj-y := semaphore.o process.o +obj-$(CONFIG_PPC32) += traps32.c +obj-$(CONFIG_PPC64) += traps64.c obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S new file mode 100644 --- /dev/null +++ b/arch/powerpc/kernel/idle_power4.S @@ -0,0 +1,78 @@ +/* + * This file contains the power_save function for 6xx & 7xxx CPUs + * rewritten in assembler + * + * Warning ! This code assumes that if your machine has a 750fx + * it will have PLL 1 set to low speed mode (used during NAP/DOZE). + * if this is not the case some additional changes will have to + * be done to check a runtime var (a bit like powersave-nap) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + + .text + +/* + * Here is the power_save_6xx function. This could eventually be + * split into several functions & changing the function pointer + * depending on the various features. + */ +_GLOBAL(power4_idle) +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) + /* We must dynamically check for the NAP feature as it + * can be cleared by CPU init after the fixups are done + */ + LOADBASE(r3,cur_cpu_spec) + ld r4,cur_cpu_spec at l(r3) + ld r4,CPU_SPEC_FEATURES(r4) + andi. r0,r4,CPU_FTR_CAN_NAP + beqlr + /* Now check if user or arch enabled NAP mode */ + LOADBASE(r3,powersave_nap) + lwz r4,powersave_nap at l(r3) + cmpwi 0,r4,0 + beqlr + + /* Clear MSR:EE */ + mfmsr r7 + li r4,0 + ori r4,r4,MSR_EE + andc r0,r7,r4 + mtmsrd r0 + + /* Check current_thread_info()->flags */ + clrrdi r4,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r4) + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + mtmsrd r7 /* out of line this ? */ + blr +1: + /* Go to NAP now */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + oris r7,r7,MSR_POW at h + sync + isync + mtmsrd r7 + isync + sync + blr diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c deleted file mode 100644 --- a/arch/powerpc/kernel/traps.c +++ /dev/null @@ -1,1047 +0,0 @@ -/* - * arch/powerpc/kernel/traps.c - * - * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Modified by Cort Dougan (cort at cs.nmt.edu) - * and Paul Mackerras (paulus at samba.org) - */ - -/* - * This file handles the architecture-dependent parts of hardware exceptions - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_PMAC_BACKLIGHT -#include -#endif -#include - -#ifdef CONFIG_DEBUGGER -int (*__debugger)(struct pt_regs *regs); -int (*__debugger_ipi)(struct pt_regs *regs); -int (*__debugger_bpt)(struct pt_regs *regs); -int (*__debugger_sstep)(struct pt_regs *regs); -int (*__debugger_iabr_match)(struct pt_regs *regs); -int (*__debugger_dabr_match)(struct pt_regs *regs); -int (*__debugger_fault_handler)(struct pt_regs *regs); - -EXPORT_SYMBOL(__debugger); -EXPORT_SYMBOL(__debugger_ipi); -EXPORT_SYMBOL(__debugger_bpt); -EXPORT_SYMBOL(__debugger_sstep); -EXPORT_SYMBOL(__debugger_iabr_match); -EXPORT_SYMBOL(__debugger_dabr_match); -EXPORT_SYMBOL(__debugger_fault_handler); -#endif - -struct notifier_block *powerpc_die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); - -int register_die_notifier(struct notifier_block *nb) -{ - int err = 0; - unsigned long flags; - - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&powerpc_die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; -} - -/* - * Trap & Exception support - */ - -static DEFINE_SPINLOCK(die_lock); - -int die(const char *str, struct pt_regs *regs, long err) -{ - static int die_counter; - int nl = 0; - - if (debugger(regs)) - return 1; - - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); -#ifdef CONFIG_PMAC_BACKLIGHT - if (_machine == _MACH_Pmac) { - set_backlight_enable(1); - set_backlight_level(BACKLIGHT_MAX); - } -#endif - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); - nl = 1; -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); - nl = 1; -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); - nl = 1; -#endif -#ifdef CONFIG_NUMA - printk("NUMA "); - nl = 1; -#endif -#ifdef CONFIG_PPC64 - switch (systemcfg->platform) { - case PLATFORM_PSERIES: - printk("PSERIES "); - nl = 1; - break; - case PLATFORM_PSERIES_LPAR: - printk("PSERIES LPAR "); - nl = 1; - break; - case PLATFORM_ISERIES_LPAR: - printk("ISERIES LPAR "); - nl = 1; - break; - case PLATFORM_POWERMAC: - printk("POWERMAC "); - nl = 1; - break; - case PLATFORM_BPA: - printk("BPA "); - nl = 1; - break; - } -#endif - if (nl) - printk("\n"); - print_modules(); - show_regs(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); - - if (in_interrupt()) - panic("Fatal exception in interrupt"); - - if (panic_on_oops) { - panic("Fatal exception"); - } - do_exit(err); - - return 0; -} - -void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) -{ - siginfo_t info; - - if (!user_mode(regs)) { - if (die("Exception in kernel mode", regs, signr)) - return; - } - - memset(&info, 0, sizeof(info)); - info.si_signo = signr; - info.si_code = code; - info.si_addr = (void __user *) addr; - force_sig_info(signr, &info, current); - - /* - * Init gets no signals that it doesn't have a handler for. - * That's all very well, but if it has caused a synchronous - * exception and we ignore the resulting signal, it will just - * generate the same exception over and over again and we get - * nowhere. Better to kill it and let the kernel panic. - */ - if (current->pid == 1) { - __sighandler_t handler; - - spin_lock_irq(¤t->sighand->siglock); - handler = current->sighand->action[signr-1].sa.sa_handler; - spin_unlock_irq(¤t->sighand->siglock); - if (handler == SIG_DFL) { - /* init has generated a synchronous exception - and it doesn't have a handler for the signal */ - printk(KERN_CRIT "init has generated signal %d " - "but has no handler for it\n", signr); - do_exit(signr); - } - } -} - -#ifdef CONFIG_PPC64 -void system_reset_exception(struct pt_regs *regs) -{ - /* See if any machine dependent calls */ - if (ppc_md.system_reset_exception) - ppc_md.system_reset_exception(regs); - - die("System Reset", regs, SIGABRT); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable System Reset"); - - /* What should we do here? We could issue a shutdown or hard reset. */ -} -#endif - -/* - * I/O accesses can cause machine checks on powermacs. - * Check if the NIP corresponds to the address of a sync - * instruction for which there is an entry in the exception - * table. - * Note that the 601 only takes a machine check on TEA - * (transfer error ack) signal assertion, and does not - * set any of the top 16 bits of SRR1. - * -- paulus. - */ -static inline int check_io_access(struct pt_regs *regs) -{ -#ifdef CONFIG_PPC_PMAC - unsigned long msr = regs->msr; - const struct exception_table_entry *entry; - unsigned int *nip = (unsigned int *)regs->nip; - - if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000))) - && (entry = search_exception_tables(regs->nip)) != NULL) { - /* - * Check that it's a sync instruction, or somewhere - * in the twi; isync; nop sequence that inb/inw/inl uses. - * As the address is in the exception table - * we should be able to read the instr there. - * For the debug message, we look at the preceding - * load or store. - */ - if (*nip == 0x60000000) /* nop */ - nip -= 2; - else if (*nip == 0x4c00012c) /* isync */ - --nip; - if (*nip == 0x7c0004ac || (*nip >> 26) == 3) { - /* sync or twi */ - unsigned int rb; - - --nip; - rb = (*nip >> 11) & 0x1f; - printk(KERN_DEBUG "%s bad port %lx at %p\n", - (*nip & 0x100)? "OUT to": "IN from", - regs->gpr[rb] - _IO_BASE, nip); - regs->msr |= MSR_RI; - regs->nip = entry->fixup; - return 1; - } - } -#endif /* CONFIG_PPC_PMAC */ - return 0; -} - -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) -/* On 4xx, the reason for the machine check or program exception - is in the ESR. */ -#define get_reason(regs) ((regs)->dsisr) -#ifndef CONFIG_FSL_BOOKE -#define get_mc_reason(regs) ((regs)->dsisr) -#else -#define get_mc_reason(regs) (mfspr(SPRN_MCSR)) -#endif -#define REASON_FP ESR_FP -#define REASON_ILLEGAL (ESR_PIL | ESR_PUO) -#define REASON_PRIVILEGED ESR_PPR -#define REASON_TRAP ESR_PTR - -/* single-step stuff */ -#define single_stepping(regs) (current->thread.dbcr0 & DBCR0_IC) -#define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC) - -#else -/* On non-4xx, the reason for the machine check or program - exception is in the MSR. */ -#define get_reason(regs) ((regs)->msr) -#define get_mc_reason(regs) ((regs)->msr) -#define REASON_FP 0x100000 -#define REASON_ILLEGAL 0x80000 -#define REASON_PRIVILEGED 0x40000 -#define REASON_TRAP 0x20000 - -#define single_stepping(regs) ((regs)->msr & MSR_SE) -#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) -#endif - -/* - * This is "fall-back" implementation for configurations - * which don't provide platform-specific machine check info - */ -void __attribute__ ((weak)) -platform_machine_check(struct pt_regs *regs) -{ -} - -void MachineCheckException(struct pt_regs *regs) -{ -#ifdef CONFIG_PPC64 - int recover = 0; - - /* See if any machine dependent calls */ - if (ppc_md.machine_check_exception) - recover = ppc_md.machine_check_exception(regs); - - if (recover) - return; -#else - unsigned long reason = get_mc_reason(regs); - - if (user_mode(regs)) { - regs->msr |= MSR_RI; - _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); - return; - } - -#if defined(CONFIG_8xx) && defined(CONFIG_PCI) - /* the qspan pci read routines can cause machine checks -- Cort */ - bad_page_fault(regs, regs->dar, SIGBUS); - return; -#endif - - if (debugger_fault_handler(regs)) { - regs->msr |= MSR_RI; - return; - } - - if (check_io_access(regs)) - return; - -#if defined(CONFIG_4xx) && !defined(CONFIG_440A) - if (reason & ESR_IMCP) { - printk("Instruction"); - mtspr(SPRN_ESR, reason & ~ESR_IMCP); - } else - printk("Data"); - printk(" machine check in kernel mode.\n"); -#elif defined(CONFIG_440A) - printk("Machine check in kernel mode.\n"); - if (reason & ESR_IMCP){ - printk("Instruction Synchronous Machine Check exception\n"); - mtspr(SPRN_ESR, reason & ~ESR_IMCP); - } - else { - u32 mcsr = mfspr(SPRN_MCSR); - if (mcsr & MCSR_IB) - printk("Instruction Read PLB Error\n"); - if (mcsr & MCSR_DRB) - printk("Data Read PLB Error\n"); - if (mcsr & MCSR_DWB) - printk("Data Write PLB Error\n"); - if (mcsr & MCSR_TLBP) - printk("TLB Parity Error\n"); - if (mcsr & MCSR_ICP){ - flush_instruction_cache(); - printk("I-Cache Parity Error\n"); - } - if (mcsr & MCSR_DCSP) - printk("D-Cache Search Parity Error\n"); - if (mcsr & MCSR_DCFP) - printk("D-Cache Flush Parity Error\n"); - if (mcsr & MCSR_IMPE) - printk("Machine Check exception is imprecise\n"); - - /* Clear MCSR */ - mtspr(SPRN_MCSR, mcsr); - } -#elif defined (CONFIG_E500) - printk("Machine check in kernel mode.\n"); - printk("Caused by (from MCSR=%lx): ", reason); - - if (reason & MCSR_MCP) - printk("Machine Check Signal\n"); - if (reason & MCSR_ICPERR) - printk("Instruction Cache Parity Error\n"); - if (reason & MCSR_DCP_PERR) - printk("Data Cache Push Parity Error\n"); - if (reason & MCSR_DCPERR) - printk("Data Cache Parity Error\n"); - if (reason & MCSR_GL_CI) - printk("Guarded Load or Cache-Inhibited stwcx.\n"); - if (reason & MCSR_BUS_IAERR) - printk("Bus - Instruction Address Error\n"); - if (reason & MCSR_BUS_RAERR) - printk("Bus - Read Address Error\n"); - if (reason & MCSR_BUS_WAERR) - printk("Bus - Write Address Error\n"); - if (reason & MCSR_BUS_IBERR) - printk("Bus - Instruction Data Error\n"); - if (reason & MCSR_BUS_RBERR) - printk("Bus - Read Data Bus Error\n"); - if (reason & MCSR_BUS_WBERR) - printk("Bus - Read Data Bus Error\n"); - if (reason & MCSR_BUS_IPERR) - printk("Bus - Instruction Parity Error\n"); - if (reason & MCSR_BUS_RPERR) - printk("Bus - Read Parity Error\n"); -#elif defined (CONFIG_E200) - printk("Machine check in kernel mode.\n"); - printk("Caused by (from MCSR=%lx): ", reason); - - if (reason & MCSR_MCP) - printk("Machine Check Signal\n"); - if (reason & MCSR_CP_PERR) - printk("Cache Push Parity Error\n"); - if (reason & MCSR_CPERR) - printk("Cache Parity Error\n"); - if (reason & MCSR_EXCP_ERR) - printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); - if (reason & MCSR_BUS_IRERR) - printk("Bus - Read Bus Error on instruction fetch\n"); - if (reason & MCSR_BUS_DRERR) - printk("Bus - Read Bus Error on data load\n"); - if (reason & MCSR_BUS_WRERR) - printk("Bus - Write Bus Error on buffered store or cache line push\n"); -#else /* !CONFIG_4xx && !CONFIG_E500 && !CONFIG_E200 */ - printk("Machine check in kernel mode.\n"); - printk("Caused by (from SRR1=%lx): ", reason); - switch (reason & 0x601F0000) { - case 0x80000: - printk("Machine check signal\n"); - break; - case 0: /* for 601 */ - case 0x40000: - case 0x140000: /* 7450 MSS error and TEA */ - printk("Transfer error ack signal\n"); - break; - case 0x20000: - printk("Data parity error signal\n"); - break; - case 0x10000: - printk("Address parity error signal\n"); - break; - case 0x20000000: - printk("L1 Data Cache error\n"); - break; - case 0x40000000: - printk("L1 Instruction Cache error\n"); - break; - case 0x00100000: - printk("L2 data cache parity error\n"); - break; - default: - printk("Unknown values in msr\n"); - } -#endif /* CONFIG_4xx */ - - /* - * Optional platform-provided routine to print out - * additional info, e.g. bus error registers. - */ - platform_machine_check(regs); -#endif /* CONFIG_PPC64 */ - - if (debugger_fault_handler(regs)) - return; - die("Machine check", regs, SIGBUS); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable Machine check"); -} - -void SMIException(struct pt_regs *regs) -{ - die("System Management Interrupt", regs, SIGABRT); -} - -void UnknownException(struct pt_regs *regs) -{ - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", - regs->nip, regs->msr, regs->trap); - - _exception(SIGTRAP, regs, 0, 0); -} - -void InstructionBreakpoint(struct pt_regs *regs) -{ - if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_iabr_match(regs)) - return; - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); -} - -void RunModeException(struct pt_regs *regs) -{ - _exception(SIGTRAP, regs, 0, 0); -} - -void SingleStepException(struct pt_regs *regs) -{ - regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ - - if (notify_die(DIE_SSTEP, "single_step", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_sstep(regs)) - return; - - _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); -} - -/* - * After we have successfully emulated an instruction, we have to - * check if the instruction was being single-stepped, and if so, - * pretend we got a single-step exception. This was pointed out - * by Kumar Gala. -- paulus - */ -static void emulate_single_step(struct pt_regs *regs) -{ - if (single_stepping(regs)) { - clear_single_step(regs); - _exception(SIGTRAP, regs, TRAP_TRACE, 0); - } -} - -/* Illegal instruction emulation support. Originally written to - * provide the PVR to user applications using the mfspr rd, PVR. - * Return non-zero if we can't emulate, or -EFAULT if the associated - * memory access caused an access fault. Return zero on success. - * - * There are a couple of ways to do this, either "decode" the instruction - * or directly match lots of bits. In this case, matching lots of - * bits is faster and easier. - * - */ -#define INST_MFSPR_PVR 0x7c1f42a6 -#define INST_MFSPR_PVR_MASK 0xfc1fffff - -#define INST_DCBA 0x7c0005ec -#define INST_DCBA_MASK 0x7c0007fe - -#define INST_MCRXR 0x7c000400 -#define INST_MCRXR_MASK 0x7c0007fe - -#define INST_STRING 0x7c00042a -#define INST_STRING_MASK 0x7c0007fe -#define INST_STRING_GEN_MASK 0x7c00067e -#define INST_LSWI 0x7c0004aa -#define INST_LSWX 0x7c00042a -#define INST_STSWI 0x7c0005aa -#define INST_STSWX 0x7c00052a - -static int emulate_string_inst(struct pt_regs *regs, u32 instword) -{ - u8 rT = (instword >> 21) & 0x1f; - u8 rA = (instword >> 16) & 0x1f; - u8 NB_RB = (instword >> 11) & 0x1f; - u32 num_bytes; - unsigned long EA; - int pos = 0; - - /* Early out if we are an invalid form of lswx */ - if ((instword & INST_STRING_MASK) == INST_LSWX) - if ((rT == rA) || (rT == NB_RB)) - return -EINVAL; - - EA = (rA == 0) ? 0 : regs->gpr[rA]; - - switch (instword & INST_STRING_MASK) { - case INST_LSWX: - case INST_STSWX: - EA += NB_RB; - num_bytes = regs->xer & 0x7f; - break; - case INST_LSWI: - case INST_STSWI: - num_bytes = (NB_RB == 0) ? 32 : NB_RB; - break; - default: - return -EINVAL; - } - - while (num_bytes != 0) - { - u8 val; - u32 shift = 8 * (3 - (pos & 0x3)); - - switch ((instword & INST_STRING_MASK)) { - case INST_LSWX: - case INST_LSWI: - if (get_user(val, (u8 __user *)EA)) - return -EFAULT; - /* first time updating this reg, - * zero it out */ - if (pos == 0) - regs->gpr[rT] = 0; - regs->gpr[rT] |= val << shift; - break; - case INST_STSWI: - case INST_STSWX: - val = regs->gpr[rT] >> shift; - if (put_user(val, (u8 __user *)EA)) - return -EFAULT; - break; - } - /* move EA to next address */ - EA += 1; - num_bytes--; - - /* manage our position within the register */ - if (++pos == 4) { - pos = 0; - if (++rT == 32) - rT = 0; - } - } - - return 0; -} - -static int emulate_instruction(struct pt_regs *regs) -{ - u32 instword; - u32 rd; - - if (!user_mode(regs)) - return -EINVAL; - CHECK_FULL_REGS(regs); - - if (get_user(instword, (u32 __user *)(regs->nip))) - return -EFAULT; - - /* Emulate the mfspr rD, PVR. */ - if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { - rd = (instword >> 21) & 0x1f; - regs->gpr[rd] = mfspr(SPRN_PVR); - return 0; - } - - /* Emulating the dcba insn is just a no-op. */ - if ((instword & INST_DCBA_MASK) == INST_DCBA) - return 0; - - /* Emulate the mcrxr insn. */ - if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { - int shift = (instword >> 21) & 0x1c; - unsigned long msk = 0xf0000000UL >> shift; - - regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); - regs->xer &= ~0xf0000000UL; - return 0; - } - - /* Emulate load/store string insn. */ - if ((instword & INST_STRING_GEN_MASK) == INST_STRING) - return emulate_string_inst(regs, instword); - - return -EINVAL; -} - -/* - * Look through the list of trap instructions that are used for BUG(), - * BUG_ON() and WARN_ON() and see if we hit one. At this point we know - * that the exception was caused by a trap instruction of some kind. - * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 - * otherwise. - */ -extern struct bug_entry __start___bug_table[], __stop___bug_table[]; - -#ifndef CONFIG_MODULES -#define module_find_bug(x) NULL -#endif - -struct bug_entry *find_bug(unsigned long bugaddr) -{ - struct bug_entry *bug; - - for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) - if (bugaddr == bug->bug_addr) - return bug; - return module_find_bug(bugaddr); -} - -int check_bug_trap(struct pt_regs *regs) -{ - struct bug_entry *bug; - unsigned long addr; - - if (regs->msr & MSR_PR) - return 0; /* not in kernel */ - addr = regs->nip; /* address of trap instruction */ - if (addr < PAGE_OFFSET) - return 0; - bug = find_bug(regs->nip); - if (bug == NULL) - return 0; - if (bug->line & BUG_WARNING_TRAP) { - /* this is a WARN_ON rather than BUG/BUG_ON */ -#ifdef CONFIG_XMON - xmon_printf(KERN_ERR "Badness in %s at %s:%d\n", - bug->function, bug->file, - bug->line & ~BUG_WARNING_TRAP); -#endif /* CONFIG_XMON */ - printk(KERN_ERR "Badness in %s at %s:%d\n", - bug->function, bug->file, - bug->line & ~BUG_WARNING_TRAP); - dump_stack(); - return 1; - } -#ifdef CONFIG_XMON - xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%d!\n", - bug->function, bug->file, bug->line); - xmon(regs); -#endif /* CONFIG_XMON */ - printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", - bug->function, bug->file, bug->line); - - return 0; -} - -void ProgramCheckException(struct pt_regs *regs) -{ - unsigned int reason = get_reason(regs); - extern int do_mathemu(struct pt_regs *regs); - -#ifdef CONFIG_MATH_EMULATION - /* (reason & REASON_ILLEGAL) would be the obvious thing here, - * but there seems to be a hardware bug on the 405GP (RevD) - * that means ESR is sometimes set incorrectly - either to - * ESR_DST (!?) or 0. In the process of chasing this with the - * hardware people - not sure if it can happen on any illegal - * instruction or only on FP instructions, whether there is a - * pattern to occurences etc. -dgibson 31/Mar/2003 */ - if (!(reason & REASON_TRAP) && do_mathemu(regs) == 0) { - emulate_single_step(regs); - return; - } -#endif /* CONFIG_MATH_EMULATION */ - - if (reason & REASON_FP) { - /* IEEE FP exception */ - int code = 0; - u32 fpscr; - - /* We must make sure the FP state is consistent with - * our MSR_FP in regs - */ - preempt_disable(); - if (regs->msr & MSR_FP) - giveup_fpu(current); - preempt_enable(); - - fpscr = current->thread.fpscr; - fpscr &= fpscr << 22; /* mask summary bits with enables */ - if (fpscr & FPSCR_VX) - code = FPE_FLTINV; - else if (fpscr & FPSCR_OX) - code = FPE_FLTOVF; - else if (fpscr & FPSCR_UX) - code = FPE_FLTUND; - else if (fpscr & FPSCR_ZX) - code = FPE_FLTDIV; - else if (fpscr & FPSCR_XX) - code = FPE_FLTRES; - _exception(SIGFPE, regs, code, regs->nip); - return; - } - - if (reason & REASON_TRAP) { - /* trap exception */ - if (debugger_bpt(regs)) - return; - if (check_bug_trap(regs)) { - regs->nip += 4; - return; - } - _exception(SIGTRAP, regs, TRAP_BRKPT, 0); - return; - } - - /* Try to emulate it if we should. */ - if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { - switch (emulate_instruction(regs)) { - case 0: - regs->nip += 4; - emulate_single_step(regs); - return; - case -EFAULT: - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; - } - } - - if (reason & REASON_PRIVILEGED) - _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); - else - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); -} - -void AlignmentException(struct pt_regs *regs) -{ - int fixed; - - fixed = fix_alignment(regs); - - if (fixed == 1) { - regs->nip += 4; /* skip over emulated instruction */ - emulate_single_step(regs); - return; - } - - /* Operand address was bad */ - if (fixed == -EFAULT) { - if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_ACCERR, regs->dar); - else - /* Search exception table */ - bad_page_fault(regs, regs->dar, SIGSEGV); - return; - } - _exception(SIGBUS, regs, BUS_ADRALN, regs->dar); -} - -void StackOverflow(struct pt_regs *regs) -{ - printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", - current, regs->gpr[1]); - debugger(regs); - show_regs(regs); - panic("kernel stack overflow"); -} - -void nonrecoverable_exception(struct pt_regs *regs) -{ - printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n", - regs->nip, regs->msr); - debugger(regs); - die("nonrecoverable exception", regs, SIGKILL); -} - -void trace_syscall(struct pt_regs *regs) -{ - printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", - current, current->pid, regs->nip, regs->link, regs->gpr[0], - regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); -} - -#ifdef CONFIG_8xx -void SoftwareEmulation(struct pt_regs *regs) -{ - extern int do_mathemu(struct pt_regs *); - extern int Soft_emulate_8xx(struct pt_regs *); - int errcode; - - CHECK_FULL_REGS(regs); - - if (!user_mode(regs)) { - debugger(regs); - die("Kernel Mode Software FPU Emulation", regs, SIGFPE); - } - -#ifdef CONFIG_MATH_EMULATION - errcode = do_mathemu(regs); -#else - errcode = Soft_emulate_8xx(regs); -#endif - if (errcode) { - if (errcode > 0) - _exception(SIGFPE, regs, 0, 0); - else if (errcode == -EFAULT) - _exception(SIGSEGV, regs, 0, 0); - else - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - } else - emulate_single_step(regs); -} -#endif /* CONFIG_8xx */ - -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - -void DebugException(struct pt_regs *regs, unsigned long debug_status) -{ - if (debug_status & DBSR_IC) { /* instruction completion */ - regs->msr &= ~MSR_DE; - if (user_mode(regs)) { - current->thread.dbcr0 &= ~DBCR0_IC; - } else { - /* Disable instruction completion */ - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); - /* Clear the instruction completion event */ - mtspr(SPRN_DBSR, DBSR_IC); - if (debugger_sstep(regs)) - return; - } - _exception(SIGTRAP, regs, TRAP_TRACE, 0); - } -} -#endif /* CONFIG_4xx || CONFIG_BOOKE */ - -#if !defined(CONFIG_TAU_INT) -void TAUException(struct pt_regs *regs) -{ - printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n", - regs->nip, regs->msr, regs->trap, print_tainted()); -} -#endif /* CONFIG_INT_TAU */ - -void AltivecUnavailException(struct pt_regs *regs) -{ - static int kernel_altivec_count; - -#ifndef CONFIG_ALTIVEC - if (user_mode(regs)) { - /* A user program has executed an altivec instruction, - but this kernel doesn't support altivec. */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - } -#endif - /* The kernel has executed an altivec instruction without - first enabling altivec. Whinge but let it do it. */ - if (++kernel_altivec_count < 10) - printk(KERN_ERR "AltiVec used in kernel (task=%p, pc=%lx)\n", - current, regs->nip); - regs->msr |= MSR_VEC; -} - -#ifdef CONFIG_ALTIVEC -void AltivecAssistException(struct pt_regs *regs) -{ - int err; - - preempt_disable(); - if (regs->msr & MSR_VEC) - giveup_altivec(current); - preempt_enable(); - if (!user_mode(regs)) { - printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" - " at %lx\n", regs->nip); - die("Kernel Altivec assist exception", regs, SIGILL); - } - - err = emulate_altivec(regs); - if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ - emulate_single_step(regs); - return; - } - - if (err == -EFAULT) { - /* got an error reading the instruction */ - _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); - } else { - /* didn't recognize the instruction */ - /* XXX quick hack for now: set the non-Java bit in the VSCR */ - if (printk_ratelimit()) - printk(KERN_ERR "Unrecognized altivec instruction " - "in %s at %lx\n", current->comm, regs->nip); - current->thread.vscr.u[3] |= 0x10000; - } -} -#endif /* CONFIG_ALTIVEC */ - -#ifdef CONFIG_E500 -void PerformanceMonitorException(struct pt_regs *regs) -{ - perf_irq(regs); -} -#endif - -#ifdef CONFIG_FSL_BOOKE -void CacheLockingException(struct pt_regs *regs, unsigned long address, - unsigned long error_code) -{ - /* We treat cache locking instructions from the user - * as priv ops, in the future we could try to do - * something smarter - */ - if (error_code & (ESR_DLK|ESR_ILK)) - _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); - return; -} -#endif /* CONFIG_FSL_BOOKE */ - -#ifdef CONFIG_SPE -void SPEFloatingPointException(struct pt_regs *regs) -{ - unsigned long spefscr; - int fpexc_mode; - int code = 0; - - spefscr = current->thread.spefscr; - fpexc_mode = current->thread.fpexc_mode; - - /* Hardware does not neccessarily set sticky - * underflow/overflow/invalid flags */ - if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) { - code = FPE_FLTOVF; - spefscr |= SPEFSCR_FOVFS; - } - else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) { - code = FPE_FLTUND; - spefscr |= SPEFSCR_FUNFS; - } - else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV)) - code = FPE_FLTDIV; - else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) { - code = FPE_FLTINV; - spefscr |= SPEFSCR_FINVS; - } - else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES)) - code = FPE_FLTRES; - - current->thread.spefscr = spefscr; - - _exception(SIGFPE, regs, code, regs->nip); - return; -} -#endif - -#ifdef CONFIG_BOOKE_WDT -/* - * Default handler for a Watchdog exception, - * spins until a reboot occurs - */ -void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) -{ - /* Generic WatchdogHandler, implement your own */ - mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE)); - return; -} - -void WatchdogException(struct pt_regs *regs) -{ - printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); - WatchdogHandler(regs); -} -#endif - -void __init trap_init(void) -{ -} diff --git a/arch/powerpc/kernel/traps32.c b/arch/powerpc/kernel/traps32.c new file mode 100644 --- /dev/null +++ b/arch/powerpc/kernel/traps32.c @@ -0,0 +1,1047 @@ +/* + * arch/powerpc/kernel/traps.c + * + * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Modified by Cort Dougan (cort at cs.nmt.edu) + * and Paul Mackerras (paulus at samba.org) + */ + +/* + * This file handles the architecture-dependent parts of hardware exceptions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PMAC_BACKLIGHT +#include +#endif +#include + +#ifdef CONFIG_DEBUGGER +int (*__debugger)(struct pt_regs *regs); +int (*__debugger_ipi)(struct pt_regs *regs); +int (*__debugger_bpt)(struct pt_regs *regs); +int (*__debugger_sstep)(struct pt_regs *regs); +int (*__debugger_iabr_match)(struct pt_regs *regs); +int (*__debugger_dabr_match)(struct pt_regs *regs); +int (*__debugger_fault_handler)(struct pt_regs *regs); + +EXPORT_SYMBOL(__debugger); +EXPORT_SYMBOL(__debugger_ipi); +EXPORT_SYMBOL(__debugger_bpt); +EXPORT_SYMBOL(__debugger_sstep); +EXPORT_SYMBOL(__debugger_iabr_match); +EXPORT_SYMBOL(__debugger_dabr_match); +EXPORT_SYMBOL(__debugger_fault_handler); +#endif + +struct notifier_block *powerpc_die_chain; +static DEFINE_SPINLOCK(die_notifier_lock); + +int register_die_notifier(struct notifier_block *nb) +{ + int err = 0; + unsigned long flags; + + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_register(&powerpc_die_chain, nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + return err; +} + +/* + * Trap & Exception support + */ + +static DEFINE_SPINLOCK(die_lock); + +int die(const char *str, struct pt_regs *regs, long err) +{ + static int die_counter; + int nl = 0; + + if (debugger(regs)) + return 1; + + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); +#ifdef CONFIG_PMAC_BACKLIGHT + if (_machine == _MACH_Pmac) { + set_backlight_enable(1); + set_backlight_level(BACKLIGHT_MAX); + } +#endif + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); + nl = 1; +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); + nl = 1; +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); + nl = 1; +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); + nl = 1; +#endif +#ifdef CONFIG_PPC64 + switch (systemcfg->platform) { + case PLATFORM_PSERIES: + printk("PSERIES "); + nl = 1; + break; + case PLATFORM_PSERIES_LPAR: + printk("PSERIES LPAR "); + nl = 1; + break; + case PLATFORM_ISERIES_LPAR: + printk("ISERIES LPAR "); + nl = 1; + break; + case PLATFORM_POWERMAC: + printk("POWERMAC "); + nl = 1; + break; + case PLATFORM_BPA: + printk("BPA "); + nl = 1; + break; + } +#endif + if (nl) + printk("\n"); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) { + panic("Fatal exception"); + } + do_exit(err); + + return 0; +} + +void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) +{ + siginfo_t info; + + if (!user_mode(regs)) { + if (die("Exception in kernel mode", regs, signr)) + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = signr; + info.si_code = code; + info.si_addr = (void __user *) addr; + force_sig_info(signr, &info, current); + + /* + * Init gets no signals that it doesn't have a handler for. + * That's all very well, but if it has caused a synchronous + * exception and we ignore the resulting signal, it will just + * generate the same exception over and over again and we get + * nowhere. Better to kill it and let the kernel panic. + */ + if (current->pid == 1) { + __sighandler_t handler; + + spin_lock_irq(¤t->sighand->siglock); + handler = current->sighand->action[signr-1].sa.sa_handler; + spin_unlock_irq(¤t->sighand->siglock); + if (handler == SIG_DFL) { + /* init has generated a synchronous exception + and it doesn't have a handler for the signal */ + printk(KERN_CRIT "init has generated signal %d " + "but has no handler for it\n", signr); + do_exit(signr); + } + } +} + +#ifdef CONFIG_PPC64 +void system_reset_exception(struct pt_regs *regs) +{ + /* See if any machine dependent calls */ + if (ppc_md.system_reset_exception) + ppc_md.system_reset_exception(regs); + + die("System Reset", regs, SIGABRT); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable System Reset"); + + /* What should we do here? We could issue a shutdown or hard reset. */ +} +#endif + +/* + * I/O accesses can cause machine checks on powermacs. + * Check if the NIP corresponds to the address of a sync + * instruction for which there is an entry in the exception + * table. + * Note that the 601 only takes a machine check on TEA + * (transfer error ack) signal assertion, and does not + * set any of the top 16 bits of SRR1. + * -- paulus. + */ +static inline int check_io_access(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_PMAC + unsigned long msr = regs->msr; + const struct exception_table_entry *entry; + unsigned int *nip = (unsigned int *)regs->nip; + + if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000))) + && (entry = search_exception_tables(regs->nip)) != NULL) { + /* + * Check that it's a sync instruction, or somewhere + * in the twi; isync; nop sequence that inb/inw/inl uses. + * As the address is in the exception table + * we should be able to read the instr there. + * For the debug message, we look at the preceding + * load or store. + */ + if (*nip == 0x60000000) /* nop */ + nip -= 2; + else if (*nip == 0x4c00012c) /* isync */ + --nip; + if (*nip == 0x7c0004ac || (*nip >> 26) == 3) { + /* sync or twi */ + unsigned int rb; + + --nip; + rb = (*nip >> 11) & 0x1f; + printk(KERN_DEBUG "%s bad port %lx at %p\n", + (*nip & 0x100)? "OUT to": "IN from", + regs->gpr[rb] - _IO_BASE, nip); + regs->msr |= MSR_RI; + regs->nip = entry->fixup; + return 1; + } + } +#endif /* CONFIG_PPC_PMAC */ + return 0; +} + +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +/* On 4xx, the reason for the machine check or program exception + is in the ESR. */ +#define get_reason(regs) ((regs)->dsisr) +#ifndef CONFIG_FSL_BOOKE +#define get_mc_reason(regs) ((regs)->dsisr) +#else +#define get_mc_reason(regs) (mfspr(SPRN_MCSR)) +#endif +#define REASON_FP ESR_FP +#define REASON_ILLEGAL (ESR_PIL | ESR_PUO) +#define REASON_PRIVILEGED ESR_PPR +#define REASON_TRAP ESR_PTR + +/* single-step stuff */ +#define single_stepping(regs) (current->thread.dbcr0 & DBCR0_IC) +#define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC) + +#else +/* On non-4xx, the reason for the machine check or program + exception is in the MSR. */ +#define get_reason(regs) ((regs)->msr) +#define get_mc_reason(regs) ((regs)->msr) +#define REASON_FP 0x100000 +#define REASON_ILLEGAL 0x80000 +#define REASON_PRIVILEGED 0x40000 +#define REASON_TRAP 0x20000 + +#define single_stepping(regs) ((regs)->msr & MSR_SE) +#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) +#endif + +/* + * This is "fall-back" implementation for configurations + * which don't provide platform-specific machine check info + */ +void __attribute__ ((weak)) +platform_machine_check(struct pt_regs *regs) +{ +} + +void MachineCheckException(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC64 + int recover = 0; + + /* See if any machine dependent calls */ + if (ppc_md.machine_check_exception) + recover = ppc_md.machine_check_exception(regs); + + if (recover) + return; +#else + unsigned long reason = get_mc_reason(regs); + + if (user_mode(regs)) { + regs->msr |= MSR_RI; + _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); + return; + } + +#if defined(CONFIG_8xx) && defined(CONFIG_PCI) + /* the qspan pci read routines can cause machine checks -- Cort */ + bad_page_fault(regs, regs->dar, SIGBUS); + return; +#endif + + if (debugger_fault_handler(regs)) { + regs->msr |= MSR_RI; + return; + } + + if (check_io_access(regs)) + return; + +#if defined(CONFIG_4xx) && !defined(CONFIG_440A) + if (reason & ESR_IMCP) { + printk("Instruction"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } else + printk("Data"); + printk(" machine check in kernel mode.\n"); +#elif defined(CONFIG_440A) + printk("Machine check in kernel mode.\n"); + if (reason & ESR_IMCP){ + printk("Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } + else { + u32 mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk("Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk("Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk("Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk("TLB Parity Error\n"); + if (mcsr & MCSR_ICP){ + flush_instruction_cache(); + printk("I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk("D-Cache Search Parity Error\n"); + if (mcsr & MCSR_DCFP) + printk("D-Cache Flush Parity Error\n"); + if (mcsr & MCSR_IMPE) + printk("Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + } +#elif defined (CONFIG_E500) + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_ICPERR) + printk("Instruction Cache Parity Error\n"); + if (reason & MCSR_DCP_PERR) + printk("Data Cache Push Parity Error\n"); + if (reason & MCSR_DCPERR) + printk("Data Cache Parity Error\n"); + if (reason & MCSR_GL_CI) + printk("Guarded Load or Cache-Inhibited stwcx.\n"); + if (reason & MCSR_BUS_IAERR) + printk("Bus - Instruction Address Error\n"); + if (reason & MCSR_BUS_RAERR) + printk("Bus - Read Address Error\n"); + if (reason & MCSR_BUS_WAERR) + printk("Bus - Write Address Error\n"); + if (reason & MCSR_BUS_IBERR) + printk("Bus - Instruction Data Error\n"); + if (reason & MCSR_BUS_RBERR) + printk("Bus - Read Data Bus Error\n"); + if (reason & MCSR_BUS_WBERR) + printk("Bus - Read Data Bus Error\n"); + if (reason & MCSR_BUS_IPERR) + printk("Bus - Instruction Parity Error\n"); + if (reason & MCSR_BUS_RPERR) + printk("Bus - Read Parity Error\n"); +#elif defined (CONFIG_E200) + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_CP_PERR) + printk("Cache Push Parity Error\n"); + if (reason & MCSR_CPERR) + printk("Cache Parity Error\n"); + if (reason & MCSR_EXCP_ERR) + printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); + if (reason & MCSR_BUS_IRERR) + printk("Bus - Read Bus Error on instruction fetch\n"); + if (reason & MCSR_BUS_DRERR) + printk("Bus - Read Bus Error on data load\n"); + if (reason & MCSR_BUS_WRERR) + printk("Bus - Write Bus Error on buffered store or cache line push\n"); +#else /* !CONFIG_4xx && !CONFIG_E500 && !CONFIG_E200 */ + printk("Machine check in kernel mode.\n"); + printk("Caused by (from SRR1=%lx): ", reason); + switch (reason & 0x601F0000) { + case 0x80000: + printk("Machine check signal\n"); + break; + case 0: /* for 601 */ + case 0x40000: + case 0x140000: /* 7450 MSS error and TEA */ + printk("Transfer error ack signal\n"); + break; + case 0x20000: + printk("Data parity error signal\n"); + break; + case 0x10000: + printk("Address parity error signal\n"); + break; + case 0x20000000: + printk("L1 Data Cache error\n"); + break; + case 0x40000000: + printk("L1 Instruction Cache error\n"); + break; + case 0x00100000: + printk("L2 data cache parity error\n"); + break; + default: + printk("Unknown values in msr\n"); + } +#endif /* CONFIG_4xx */ + + /* + * Optional platform-provided routine to print out + * additional info, e.g. bus error registers. + */ + platform_machine_check(regs); +#endif /* CONFIG_PPC64 */ + + if (debugger_fault_handler(regs)) + return; + die("Machine check", regs, SIGBUS); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable Machine check"); +} + +void SMIException(struct pt_regs *regs) +{ + die("System Management Interrupt", regs, SIGABRT); +} + +void UnknownException(struct pt_regs *regs) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, 0, 0); +} + +void InstructionBreakpoint(struct pt_regs *regs) +{ + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_iabr_match(regs)) + return; + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); +} + +void RunModeException(struct pt_regs *regs) +{ + _exception(SIGTRAP, regs, 0, 0); +} + +void SingleStepException(struct pt_regs *regs) +{ + regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ + + if (notify_die(DIE_SSTEP, "single_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_sstep(regs)) + return; + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); +} + +/* + * After we have successfully emulated an instruction, we have to + * check if the instruction was being single-stepped, and if so, + * pretend we got a single-step exception. This was pointed out + * by Kumar Gala. -- paulus + */ +static void emulate_single_step(struct pt_regs *regs) +{ + if (single_stepping(regs)) { + clear_single_step(regs); + _exception(SIGTRAP, regs, TRAP_TRACE, 0); + } +} + +/* Illegal instruction emulation support. Originally written to + * provide the PVR to user applications using the mfspr rd, PVR. + * Return non-zero if we can't emulate, or -EFAULT if the associated + * memory access caused an access fault. Return zero on success. + * + * There are a couple of ways to do this, either "decode" the instruction + * or directly match lots of bits. In this case, matching lots of + * bits is faster and easier. + * + */ +#define INST_MFSPR_PVR 0x7c1f42a6 +#define INST_MFSPR_PVR_MASK 0xfc1fffff + +#define INST_DCBA 0x7c0005ec +#define INST_DCBA_MASK 0x7c0007fe + +#define INST_MCRXR 0x7c000400 +#define INST_MCRXR_MASK 0x7c0007fe + +#define INST_STRING 0x7c00042a +#define INST_STRING_MASK 0x7c0007fe +#define INST_STRING_GEN_MASK 0x7c00067e +#define INST_LSWI 0x7c0004aa +#define INST_LSWX 0x7c00042a +#define INST_STSWI 0x7c0005aa +#define INST_STSWX 0x7c00052a + +static int emulate_string_inst(struct pt_regs *regs, u32 instword) +{ + u8 rT = (instword >> 21) & 0x1f; + u8 rA = (instword >> 16) & 0x1f; + u8 NB_RB = (instword >> 11) & 0x1f; + u32 num_bytes; + unsigned long EA; + int pos = 0; + + /* Early out if we are an invalid form of lswx */ + if ((instword & INST_STRING_MASK) == INST_LSWX) + if ((rT == rA) || (rT == NB_RB)) + return -EINVAL; + + EA = (rA == 0) ? 0 : regs->gpr[rA]; + + switch (instword & INST_STRING_MASK) { + case INST_LSWX: + case INST_STSWX: + EA += NB_RB; + num_bytes = regs->xer & 0x7f; + break; + case INST_LSWI: + case INST_STSWI: + num_bytes = (NB_RB == 0) ? 32 : NB_RB; + break; + default: + return -EINVAL; + } + + while (num_bytes != 0) + { + u8 val; + u32 shift = 8 * (3 - (pos & 0x3)); + + switch ((instword & INST_STRING_MASK)) { + case INST_LSWX: + case INST_LSWI: + if (get_user(val, (u8 __user *)EA)) + return -EFAULT; + /* first time updating this reg, + * zero it out */ + if (pos == 0) + regs->gpr[rT] = 0; + regs->gpr[rT] |= val << shift; + break; + case INST_STSWI: + case INST_STSWX: + val = regs->gpr[rT] >> shift; + if (put_user(val, (u8 __user *)EA)) + return -EFAULT; + break; + } + /* move EA to next address */ + EA += 1; + num_bytes--; + + /* manage our position within the register */ + if (++pos == 4) { + pos = 0; + if (++rT == 32) + rT = 0; + } + } + + return 0; +} + +static int emulate_instruction(struct pt_regs *regs) +{ + u32 instword; + u32 rd; + + if (!user_mode(regs)) + return -EINVAL; + CHECK_FULL_REGS(regs); + + if (get_user(instword, (u32 __user *)(regs->nip))) + return -EFAULT; + + /* Emulate the mfspr rD, PVR. */ + if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_PVR); + return 0; + } + + /* Emulating the dcba insn is just a no-op. */ + if ((instword & INST_DCBA_MASK) == INST_DCBA) + return 0; + + /* Emulate the mcrxr insn. */ + if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { + int shift = (instword >> 21) & 0x1c; + unsigned long msk = 0xf0000000UL >> shift; + + regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); + regs->xer &= ~0xf0000000UL; + return 0; + } + + /* Emulate load/store string insn. */ + if ((instword & INST_STRING_GEN_MASK) == INST_STRING) + return emulate_string_inst(regs, instword); + + return -EINVAL; +} + +/* + * Look through the list of trap instructions that are used for BUG(), + * BUG_ON() and WARN_ON() and see if we hit one. At this point we know + * that the exception was caused by a trap instruction of some kind. + * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 + * otherwise. + */ +extern struct bug_entry __start___bug_table[], __stop___bug_table[]; + +#ifndef CONFIG_MODULES +#define module_find_bug(x) NULL +#endif + +struct bug_entry *find_bug(unsigned long bugaddr) +{ + struct bug_entry *bug; + + for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) + if (bugaddr == bug->bug_addr) + return bug; + return module_find_bug(bugaddr); +} + +int check_bug_trap(struct pt_regs *regs) +{ + struct bug_entry *bug; + unsigned long addr; + + if (regs->msr & MSR_PR) + return 0; /* not in kernel */ + addr = regs->nip; /* address of trap instruction */ + if (addr < PAGE_OFFSET) + return 0; + bug = find_bug(regs->nip); + if (bug == NULL) + return 0; + if (bug->line & BUG_WARNING_TRAP) { + /* this is a WARN_ON rather than BUG/BUG_ON */ +#ifdef CONFIG_XMON + xmon_printf(KERN_ERR "Badness in %s at %s:%d\n", + bug->function, bug->file, + bug->line & ~BUG_WARNING_TRAP); +#endif /* CONFIG_XMON */ + printk(KERN_ERR "Badness in %s at %s:%d\n", + bug->function, bug->file, + bug->line & ~BUG_WARNING_TRAP); + dump_stack(); + return 1; + } +#ifdef CONFIG_XMON + xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, bug->line); + xmon(regs); +#endif /* CONFIG_XMON */ + printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, bug->line); + + return 0; +} + +void ProgramCheckException(struct pt_regs *regs) +{ + unsigned int reason = get_reason(regs); + extern int do_mathemu(struct pt_regs *regs); + +#ifdef CONFIG_MATH_EMULATION + /* (reason & REASON_ILLEGAL) would be the obvious thing here, + * but there seems to be a hardware bug on the 405GP (RevD) + * that means ESR is sometimes set incorrectly - either to + * ESR_DST (!?) or 0. In the process of chasing this with the + * hardware people - not sure if it can happen on any illegal + * instruction or only on FP instructions, whether there is a + * pattern to occurences etc. -dgibson 31/Mar/2003 */ + if (!(reason & REASON_TRAP) && do_mathemu(regs) == 0) { + emulate_single_step(regs); + return; + } +#endif /* CONFIG_MATH_EMULATION */ + + if (reason & REASON_FP) { + /* IEEE FP exception */ + int code = 0; + u32 fpscr; + + /* We must make sure the FP state is consistent with + * our MSR_FP in regs + */ + preempt_disable(); + if (regs->msr & MSR_FP) + giveup_fpu(current); + preempt_enable(); + + fpscr = current->thread.fpscr; + fpscr &= fpscr << 22; /* mask summary bits with enables */ + if (fpscr & FPSCR_VX) + code = FPE_FLTINV; + else if (fpscr & FPSCR_OX) + code = FPE_FLTOVF; + else if (fpscr & FPSCR_UX) + code = FPE_FLTUND; + else if (fpscr & FPSCR_ZX) + code = FPE_FLTDIV; + else if (fpscr & FPSCR_XX) + code = FPE_FLTRES; + _exception(SIGFPE, regs, code, regs->nip); + return; + } + + if (reason & REASON_TRAP) { + /* trap exception */ + if (debugger_bpt(regs)) + return; + if (check_bug_trap(regs)) { + regs->nip += 4; + return; + } + _exception(SIGTRAP, regs, TRAP_BRKPT, 0); + return; + } + + /* Try to emulate it if we should. */ + if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { + switch (emulate_instruction(regs)) { + case 0: + regs->nip += 4; + emulate_single_step(regs); + return; + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } + } + + if (reason & REASON_PRIVILEGED) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); +} + +void AlignmentException(struct pt_regs *regs) +{ + int fixed; + + fixed = fix_alignment(regs); + + if (fixed == 1) { + regs->nip += 4; /* skip over emulated instruction */ + emulate_single_step(regs); + return; + } + + /* Operand address was bad */ + if (fixed == -EFAULT) { + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_ACCERR, regs->dar); + else + /* Search exception table */ + bad_page_fault(regs, regs->dar, SIGSEGV); + return; + } + _exception(SIGBUS, regs, BUS_ADRALN, regs->dar); +} + +void StackOverflow(struct pt_regs *regs) +{ + printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", + current, regs->gpr[1]); + debugger(regs); + show_regs(regs); + panic("kernel stack overflow"); +} + +void nonrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n", + regs->nip, regs->msr); + debugger(regs); + die("nonrecoverable exception", regs, SIGKILL); +} + +void trace_syscall(struct pt_regs *regs) +{ + printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", + current, current->pid, regs->nip, regs->link, regs->gpr[0], + regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); +} + +#ifdef CONFIG_8xx +void SoftwareEmulation(struct pt_regs *regs) +{ + extern int do_mathemu(struct pt_regs *); + extern int Soft_emulate_8xx(struct pt_regs *); + int errcode; + + CHECK_FULL_REGS(regs); + + if (!user_mode(regs)) { + debugger(regs); + die("Kernel Mode Software FPU Emulation", regs, SIGFPE); + } + +#ifdef CONFIG_MATH_EMULATION + errcode = do_mathemu(regs); +#else + errcode = Soft_emulate_8xx(regs); +#endif + if (errcode) { + if (errcode > 0) + _exception(SIGFPE, regs, 0, 0); + else if (errcode == -EFAULT) + _exception(SIGSEGV, regs, 0, 0); + else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + } else + emulate_single_step(regs); +} +#endif /* CONFIG_8xx */ + +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + +void DebugException(struct pt_regs *regs, unsigned long debug_status) +{ + if (debug_status & DBSR_IC) { /* instruction completion */ + regs->msr &= ~MSR_DE; + if (user_mode(regs)) { + current->thread.dbcr0 &= ~DBCR0_IC; + } else { + /* Disable instruction completion */ + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); + /* Clear the instruction completion event */ + mtspr(SPRN_DBSR, DBSR_IC); + if (debugger_sstep(regs)) + return; + } + _exception(SIGTRAP, regs, TRAP_TRACE, 0); + } +} +#endif /* CONFIG_4xx || CONFIG_BOOKE */ + +#if !defined(CONFIG_TAU_INT) +void TAUException(struct pt_regs *regs) +{ + printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n", + regs->nip, regs->msr, regs->trap, print_tainted()); +} +#endif /* CONFIG_INT_TAU */ + +void AltivecUnavailException(struct pt_regs *regs) +{ + static int kernel_altivec_count; + +#ifndef CONFIG_ALTIVEC + if (user_mode(regs)) { + /* A user program has executed an altivec instruction, + but this kernel doesn't support altivec. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } +#endif + /* The kernel has executed an altivec instruction without + first enabling altivec. Whinge but let it do it. */ + if (++kernel_altivec_count < 10) + printk(KERN_ERR "AltiVec used in kernel (task=%p, pc=%lx)\n", + current, regs->nip); + regs->msr |= MSR_VEC; +} + +#ifdef CONFIG_ALTIVEC +void AltivecAssistException(struct pt_regs *regs) +{ + int err; + + preempt_disable(); + if (regs->msr & MSR_VEC) + giveup_altivec(current); + preempt_enable(); + if (!user_mode(regs)) { + printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" + " at %lx\n", regs->nip); + die("Kernel Altivec assist exception", regs, SIGILL); + } + + err = emulate_altivec(regs); + if (err == 0) { + regs->nip += 4; /* skip emulated instruction */ + emulate_single_step(regs); + return; + } + + if (err == -EFAULT) { + /* got an error reading the instruction */ + _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); + } else { + /* didn't recognize the instruction */ + /* XXX quick hack for now: set the non-Java bit in the VSCR */ + if (printk_ratelimit()) + printk(KERN_ERR "Unrecognized altivec instruction " + "in %s at %lx\n", current->comm, regs->nip); + current->thread.vscr.u[3] |= 0x10000; + } +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_E500 +void PerformanceMonitorException(struct pt_regs *regs) +{ + perf_irq(regs); +} +#endif + +#ifdef CONFIG_FSL_BOOKE +void CacheLockingException(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + /* We treat cache locking instructions from the user + * as priv ops, in the future we could try to do + * something smarter + */ + if (error_code & (ESR_DLK|ESR_ILK)) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + return; +} +#endif /* CONFIG_FSL_BOOKE */ + +#ifdef CONFIG_SPE +void SPEFloatingPointException(struct pt_regs *regs) +{ + unsigned long spefscr; + int fpexc_mode; + int code = 0; + + spefscr = current->thread.spefscr; + fpexc_mode = current->thread.fpexc_mode; + + /* Hardware does not neccessarily set sticky + * underflow/overflow/invalid flags */ + if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) { + code = FPE_FLTOVF; + spefscr |= SPEFSCR_FOVFS; + } + else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) { + code = FPE_FLTUND; + spefscr |= SPEFSCR_FUNFS; + } + else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV)) + code = FPE_FLTDIV; + else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) { + code = FPE_FLTINV; + spefscr |= SPEFSCR_FINVS; + } + else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES)) + code = FPE_FLTRES; + + current->thread.spefscr = spefscr; + + _exception(SIGFPE, regs, code, regs->nip); + return; +} +#endif + +#ifdef CONFIG_BOOKE_WDT +/* + * Default handler for a Watchdog exception, + * spins until a reboot occurs + */ +void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) +{ + /* Generic WatchdogHandler, implement your own */ + mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE)); + return; +} + +void WatchdogException(struct pt_regs *regs) +{ + printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); + WatchdogHandler(regs); +} +#endif + +void __init trap_init(void) +{ +} diff --git a/arch/powerpc/kernel/traps64.c b/arch/powerpc/kernel/traps64.c new file mode 100644 --- /dev/null +++ b/arch/powerpc/kernel/traps64.c @@ -0,0 +1,568 @@ +/* + * linux/arch/ppc64/kernel/traps.c + * + * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Modified by Cort Dougan (cort at cs.nmt.edu) + * and Paul Mackerras (paulus at cs.anu.edu.au) + */ + +/* + * This file handles the architecture-dependent parts of hardware exceptions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_DEBUGGER +int (*__debugger)(struct pt_regs *regs); +int (*__debugger_ipi)(struct pt_regs *regs); +int (*__debugger_bpt)(struct pt_regs *regs); +int (*__debugger_sstep)(struct pt_regs *regs); +int (*__debugger_iabr_match)(struct pt_regs *regs); +int (*__debugger_dabr_match)(struct pt_regs *regs); +int (*__debugger_fault_handler)(struct pt_regs *regs); + +EXPORT_SYMBOL(__debugger); +EXPORT_SYMBOL(__debugger_ipi); +EXPORT_SYMBOL(__debugger_bpt); +EXPORT_SYMBOL(__debugger_sstep); +EXPORT_SYMBOL(__debugger_iabr_match); +EXPORT_SYMBOL(__debugger_dabr_match); +EXPORT_SYMBOL(__debugger_fault_handler); +#endif + +struct notifier_block *powerpc_die_chain; +static DEFINE_SPINLOCK(die_notifier_lock); + +int register_die_notifier(struct notifier_block *nb) +{ + int err = 0; + unsigned long flags; + + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_register(&powerpc_die_chain, nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + return err; +} + +/* + * Trap & Exception support + */ + +static DEFINE_SPINLOCK(die_lock); + +int die(const char *str, struct pt_regs *regs, long err) +{ + static int die_counter; + int nl = 0; + + if (debugger(regs)) + return 1; + + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); + nl = 1; +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); + nl = 1; +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); + nl = 1; +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); + nl = 1; +#endif + switch(systemcfg->platform) { + case PLATFORM_PSERIES: + printk("PSERIES "); + nl = 1; + break; + case PLATFORM_PSERIES_LPAR: + printk("PSERIES LPAR "); + nl = 1; + break; + case PLATFORM_ISERIES_LPAR: + printk("ISERIES LPAR "); + nl = 1; + break; + case PLATFORM_POWERMAC: + printk("POWERMAC "); + nl = 1; + break; + case PLATFORM_BPA: + printk("BPA "); + nl = 1; + break; + } + if (nl) + printk("\n"); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) { + printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); + ssleep(5); + panic("Fatal exception"); + } + do_exit(SIGSEGV); + + return 0; +} + +void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) +{ + siginfo_t info; + + if (!user_mode(regs)) { + if (die("Exception in kernel mode", regs, signr)) + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = signr; + info.si_code = code; + info.si_addr = (void __user *) addr; + force_sig_info(signr, &info, current); +} + +void system_reset_exception(struct pt_regs *regs) +{ + /* See if any machine dependent calls */ + if (ppc_md.system_reset_exception) + ppc_md.system_reset_exception(regs); + + die("System Reset", regs, 0); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable System Reset"); + + /* What should we do here? We could issue a shutdown or hard reset. */ +} + +void machine_check_exception(struct pt_regs *regs) +{ + int recover = 0; + + /* See if any machine dependent calls */ + if (ppc_md.machine_check_exception) + recover = ppc_md.machine_check_exception(regs); + + if (recover) + return; + + if (debugger_fault_handler(regs)) + return; + die("Machine check", regs, 0); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable Machine check"); +} + +void unknown_exception(struct pt_regs *regs) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, 0, 0); +} + +void instruction_breakpoint_exception(struct pt_regs *regs) +{ + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_iabr_match(regs)) + return; + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); +} + +void __kprobes single_step_exception(struct pt_regs *regs) +{ + regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */ + + if (notify_die(DIE_SSTEP, "single_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_sstep(regs)) + return; + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); +} + +/* + * After we have successfully emulated an instruction, we have to + * check if the instruction was being single-stepped, and if so, + * pretend we got a single-step exception. This was pointed out + * by Kumar Gala. -- paulus + */ +static inline void emulate_single_step(struct pt_regs *regs) +{ + if (regs->msr & MSR_SE) + single_step_exception(regs); +} + +static void parse_fpe(struct pt_regs *regs) +{ + int code = 0; + unsigned long fpscr; + + flush_fp_to_thread(current); + + fpscr = current->thread.fpscr; + + /* Invalid operation */ + if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) + code = FPE_FLTINV; + + /* Overflow */ + else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX)) + code = FPE_FLTOVF; + + /* Underflow */ + else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX)) + code = FPE_FLTUND; + + /* Divide by zero */ + else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX)) + code = FPE_FLTDIV; + + /* Inexact result */ + else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX)) + code = FPE_FLTRES; + + _exception(SIGFPE, regs, code, regs->nip); +} + +/* + * Illegal instruction emulation support. Return non-zero if we can't + * emulate, or -EFAULT if the associated memory access caused an access + * fault. Return zero on success. + */ + +#define INST_MFSPR_PVR 0x7c1f42a6 +#define INST_MFSPR_PVR_MASK 0xfc1fffff + +#define INST_DCBA 0x7c0005ec +#define INST_DCBA_MASK 0x7c0007fe + +#define INST_MCRXR 0x7c000400 +#define INST_MCRXR_MASK 0x7c0007fe + +static int emulate_instruction(struct pt_regs *regs) +{ + unsigned int instword; + + if (!user_mode(regs)) + return -EINVAL; + + CHECK_FULL_REGS(regs); + + if (get_user(instword, (unsigned int __user *)(regs->nip))) + return -EFAULT; + + /* Emulate the mfspr rD, PVR. */ + if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { + unsigned int rd; + + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_PVR); + return 0; + } + + /* Emulating the dcba insn is just a no-op. */ + if ((instword & INST_DCBA_MASK) == INST_DCBA) { + static int warned; + + if (!warned) { + printk(KERN_WARNING + "process %d (%s) uses obsolete 'dcba' insn\n", + current->pid, current->comm); + warned = 1; + } + return 0; + } + + /* Emulate the mcrxr insn. */ + if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { + static int warned; + unsigned int shift; + + if (!warned) { + printk(KERN_WARNING + "process %d (%s) uses obsolete 'mcrxr' insn\n", + current->pid, current->comm); + warned = 1; + } + + shift = (instword >> 21) & 0x1c; + regs->ccr &= ~(0xf0000000 >> shift); + regs->ccr |= (regs->xer & 0xf0000000) >> shift; + regs->xer &= ~0xf0000000; + return 0; + } + + return -EINVAL; +} + +/* + * Look through the list of trap instructions that are used for BUG(), + * BUG_ON() and WARN_ON() and see if we hit one. At this point we know + * that the exception was caused by a trap instruction of some kind. + * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 + * otherwise. + */ +extern struct bug_entry __start___bug_table[], __stop___bug_table[]; + +#ifndef CONFIG_MODULES +#define module_find_bug(x) NULL +#endif + +struct bug_entry *find_bug(unsigned long bugaddr) +{ + struct bug_entry *bug; + + for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) + if (bugaddr == bug->bug_addr) + return bug; + return module_find_bug(bugaddr); +} + +static int +check_bug_trap(struct pt_regs *regs) +{ + struct bug_entry *bug; + unsigned long addr; + + if (regs->msr & MSR_PR) + return 0; /* not in kernel */ + addr = regs->nip; /* address of trap instruction */ + if (addr < PAGE_OFFSET) + return 0; + bug = find_bug(regs->nip); + if (bug == NULL) + return 0; + if (bug->line & BUG_WARNING_TRAP) { + /* this is a WARN_ON rather than BUG/BUG_ON */ + printk(KERN_ERR "Badness in %s at %s:%d\n", + bug->function, bug->file, + bug->line & ~BUG_WARNING_TRAP); + show_stack(current, (void *)regs->gpr[1]); + return 1; + } + printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, bug->line); + return 0; +} + +void __kprobes program_check_exception(struct pt_regs *regs) +{ + if (debugger_fault_handler(regs)) + return; + + if (regs->msr & 0x100000) { + /* IEEE FP exception */ + parse_fpe(regs); + } else if (regs->msr & 0x20000) { + /* trap exception */ + + if (notify_die(DIE_BPT, "breakpoint", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_bpt(regs)) + return; + + if (check_bug_trap(regs)) { + regs->nip += 4; + return; + } + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + + } else { + /* Privileged or illegal instruction; try to emulate it. */ + switch (emulate_instruction(regs)) { + case 0: + regs->nip += 4; + emulate_single_step(regs); + break; + + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + break; + + default: + if (regs->msr & 0x40000) + /* priveleged */ + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + else + /* illegal */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + break; + } + } +} + +void kernel_fp_unavailable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); +} + +void altivec_unavailable_exception(struct pt_regs *regs) +{ + if (user_mode(regs)) { + /* A user program has executed an altivec instruction, + but this kernel doesn't support altivec. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } + printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); +} + +extern perf_irq_t perf_irq; + +void performance_monitor_exception(struct pt_regs *regs) +{ + perf_irq(regs); +} + +void alignment_exception(struct pt_regs *regs) +{ + int fixed; + + fixed = fix_alignment(regs); + + if (fixed == 1) { + regs->nip += 4; /* skip over emulated instruction */ + emulate_single_step(regs); + return; + } + + /* Operand address was bad */ + if (fixed == -EFAULT) { + if (user_mode(regs)) { + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->dar); + } else { + /* Search exception table */ + bad_page_fault(regs, regs->dar, SIGSEGV); + } + + return; + } + + _exception(SIGBUS, regs, BUS_ADRALN, regs->nip); +} + +#ifdef CONFIG_ALTIVEC +void altivec_assist_exception(struct pt_regs *regs) +{ + int err; + siginfo_t info; + + if (!user_mode(regs)) { + printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" + " at %lx\n", regs->nip); + die("Kernel VMX/Altivec assist exception", regs, SIGILL); + } + + flush_altivec_to_thread(current); + + err = emulate_altivec(regs); + if (err == 0) { + regs->nip += 4; /* skip emulated instruction */ + emulate_single_step(regs); + return; + } + + if (err == -EFAULT) { + /* got an error reading the instruction */ + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *) regs->nip; + force_sig_info(SIGSEGV, &info, current); + } else { + /* didn't recognize the instruction */ + /* XXX quick hack for now: set the non-Java bit in the VSCR */ + if (printk_ratelimit()) + printk(KERN_ERR "Unrecognized altivec instruction " + "in %s at %lx\n", current->comm, regs->nip); + current->thread.vscr.u[3] |= 0x10000; + } +} +#endif /* CONFIG_ALTIVEC */ + +/* + * We enter here if we get an unrecoverable exception, that is, one + * that happened at a point where the RI (recoverable interrupt) bit + * in the MSR is 0. This indicates that SRR0/1 are live, and that + * we therefore lost state by taking this exception. + */ +void unrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", + regs->trap, regs->nip); + die("Unrecoverable exception", regs, SIGABRT); +} + +/* + * We enter here if we discover during exception entry that we are + * running in supervisor mode with a userspace value in the stack pointer. + */ +void kernel_bad_stack(struct pt_regs *regs) +{ + printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", + regs->gpr[1], regs->nip); + die("Bad kernel stack pointer", regs, SIGABRT); +} + +void __init trap_init(void) +{ +} diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -2,10 +2,12 @@ # Makefile for the linux ppc64 kernel. # +ifneq ($(CONFIG_PPC_MERGE),y) + EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds -obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ +obj-y := setup.o entry.o traps64.o irq.o idle.o dma.o \ time.o process.o signal.o syscalls.o misc.o ptrace.o \ align.o semaphore.o bitops.o pacaData.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ @@ -77,3 +79,9 @@ endif # These are here while we do the architecture merge vecemu-y += ../../powerpc/kernel/vecemu.o vector-y += ../../powerpc/kernel/vector.o +idle_power4-y += ../../powerpc/kernel/idle_power4.o +traps64-y += ../../powerpc/kernel/traps64.o + +else + +endif diff --git a/arch/ppc64/kernel/idle_power4.S b/arch/ppc64/kernel/idle_power4.S deleted file mode 100644 --- a/arch/ppc64/kernel/idle_power4.S +++ /dev/null @@ -1,79 +0,0 @@ -/* - * This file contains the power_save function for 6xx & 7xxx CPUs - * rewritten in assembler - * - * Warning ! This code assumes that if your machine has a 750fx - * it will have PLL 1 set to low speed mode (used during NAP/DOZE). - * if this is not the case some additional changes will have to - * be done to check a runtime var (a bit like powersave-nap) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#undef DEBUG - - .text - -/* - * Here is the power_save_6xx function. This could eventually be - * split into several functions & changing the function pointer - * depending on the various features. - */ -_GLOBAL(power4_idle) -BEGIN_FTR_SECTION - blr -END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) - /* We must dynamically check for the NAP feature as it - * can be cleared by CPU init after the fixups are done - */ - LOADBASE(r3,cur_cpu_spec) - ld r4,cur_cpu_spec at l(r3) - ld r4,CPU_SPEC_FEATURES(r4) - andi. r0,r4,CPU_FTR_CAN_NAP - beqlr - /* Now check if user or arch enabled NAP mode */ - LOADBASE(r3,powersave_nap) - lwz r4,powersave_nap at l(r3) - cmpwi 0,r4,0 - beqlr - - /* Clear MSR:EE */ - mfmsr r7 - li r4,0 - ori r4,r4,MSR_EE - andc r0,r7,r4 - mtmsrd r0 - - /* Check current_thread_info()->flags */ - clrrdi r4,r1,THREAD_SHIFT - ld r4,TI_FLAGS(r4) - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f - mtmsrd r7 /* out of line this ? */ - blr -1: - /* Go to NAP now */ -BEGIN_FTR_SECTION - DSSALL - sync -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - oris r7,r7,MSR_POW at h - sync - isync - mtmsrd r7 - isync - sync - blr - diff --git a/arch/ppc64/kernel/traps.c b/arch/ppc64/kernel/traps.c deleted file mode 100644 --- a/arch/ppc64/kernel/traps.c +++ /dev/null @@ -1,568 +0,0 @@ -/* - * linux/arch/ppc64/kernel/traps.c - * - * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Modified by Cort Dougan (cort at cs.nmt.edu) - * and Paul Mackerras (paulus at cs.anu.edu.au) - */ - -/* - * This file handles the architecture-dependent parts of hardware exceptions - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_DEBUGGER -int (*__debugger)(struct pt_regs *regs); -int (*__debugger_ipi)(struct pt_regs *regs); -int (*__debugger_bpt)(struct pt_regs *regs); -int (*__debugger_sstep)(struct pt_regs *regs); -int (*__debugger_iabr_match)(struct pt_regs *regs); -int (*__debugger_dabr_match)(struct pt_regs *regs); -int (*__debugger_fault_handler)(struct pt_regs *regs); - -EXPORT_SYMBOL(__debugger); -EXPORT_SYMBOL(__debugger_ipi); -EXPORT_SYMBOL(__debugger_bpt); -EXPORT_SYMBOL(__debugger_sstep); -EXPORT_SYMBOL(__debugger_iabr_match); -EXPORT_SYMBOL(__debugger_dabr_match); -EXPORT_SYMBOL(__debugger_fault_handler); -#endif - -struct notifier_block *powerpc_die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); - -int register_die_notifier(struct notifier_block *nb) -{ - int err = 0; - unsigned long flags; - - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&powerpc_die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; -} - -/* - * Trap & Exception support - */ - -static DEFINE_SPINLOCK(die_lock); - -int die(const char *str, struct pt_regs *regs, long err) -{ - static int die_counter; - int nl = 0; - - if (debugger(regs)) - return 1; - - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); - nl = 1; -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); - nl = 1; -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); - nl = 1; -#endif -#ifdef CONFIG_NUMA - printk("NUMA "); - nl = 1; -#endif - switch(systemcfg->platform) { - case PLATFORM_PSERIES: - printk("PSERIES "); - nl = 1; - break; - case PLATFORM_PSERIES_LPAR: - printk("PSERIES LPAR "); - nl = 1; - break; - case PLATFORM_ISERIES_LPAR: - printk("ISERIES LPAR "); - nl = 1; - break; - case PLATFORM_POWERMAC: - printk("POWERMAC "); - nl = 1; - break; - case PLATFORM_BPA: - printk("BPA "); - nl = 1; - break; - } - if (nl) - printk("\n"); - print_modules(); - show_regs(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); - - if (in_interrupt()) - panic("Fatal exception in interrupt"); - - if (panic_on_oops) { - printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); - ssleep(5); - panic("Fatal exception"); - } - do_exit(SIGSEGV); - - return 0; -} - -void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) -{ - siginfo_t info; - - if (!user_mode(regs)) { - if (die("Exception in kernel mode", regs, signr)) - return; - } - - memset(&info, 0, sizeof(info)); - info.si_signo = signr; - info.si_code = code; - info.si_addr = (void __user *) addr; - force_sig_info(signr, &info, current); -} - -void system_reset_exception(struct pt_regs *regs) -{ - /* See if any machine dependent calls */ - if (ppc_md.system_reset_exception) - ppc_md.system_reset_exception(regs); - - die("System Reset", regs, 0); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable System Reset"); - - /* What should we do here? We could issue a shutdown or hard reset. */ -} - -void machine_check_exception(struct pt_regs *regs) -{ - int recover = 0; - - /* See if any machine dependent calls */ - if (ppc_md.machine_check_exception) - recover = ppc_md.machine_check_exception(regs); - - if (recover) - return; - - if (debugger_fault_handler(regs)) - return; - die("Machine check", regs, 0); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable Machine check"); -} - -void unknown_exception(struct pt_regs *regs) -{ - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", - regs->nip, regs->msr, regs->trap); - - _exception(SIGTRAP, regs, 0, 0); -} - -void instruction_breakpoint_exception(struct pt_regs *regs) -{ - if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_iabr_match(regs)) - return; - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); -} - -void __kprobes single_step_exception(struct pt_regs *regs) -{ - regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */ - - if (notify_die(DIE_SSTEP, "single_step", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_sstep(regs)) - return; - - _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); -} - -/* - * After we have successfully emulated an instruction, we have to - * check if the instruction was being single-stepped, and if so, - * pretend we got a single-step exception. This was pointed out - * by Kumar Gala. -- paulus - */ -static inline void emulate_single_step(struct pt_regs *regs) -{ - if (regs->msr & MSR_SE) - single_step_exception(regs); -} - -static void parse_fpe(struct pt_regs *regs) -{ - int code = 0; - unsigned long fpscr; - - flush_fp_to_thread(current); - - fpscr = current->thread.fpscr; - - /* Invalid operation */ - if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) - code = FPE_FLTINV; - - /* Overflow */ - else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX)) - code = FPE_FLTOVF; - - /* Underflow */ - else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX)) - code = FPE_FLTUND; - - /* Divide by zero */ - else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX)) - code = FPE_FLTDIV; - - /* Inexact result */ - else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX)) - code = FPE_FLTRES; - - _exception(SIGFPE, regs, code, regs->nip); -} - -/* - * Illegal instruction emulation support. Return non-zero if we can't - * emulate, or -EFAULT if the associated memory access caused an access - * fault. Return zero on success. - */ - -#define INST_MFSPR_PVR 0x7c1f42a6 -#define INST_MFSPR_PVR_MASK 0xfc1fffff - -#define INST_DCBA 0x7c0005ec -#define INST_DCBA_MASK 0x7c0007fe - -#define INST_MCRXR 0x7c000400 -#define INST_MCRXR_MASK 0x7c0007fe - -static int emulate_instruction(struct pt_regs *regs) -{ - unsigned int instword; - - if (!user_mode(regs)) - return -EINVAL; - - CHECK_FULL_REGS(regs); - - if (get_user(instword, (unsigned int __user *)(regs->nip))) - return -EFAULT; - - /* Emulate the mfspr rD, PVR. */ - if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { - unsigned int rd; - - rd = (instword >> 21) & 0x1f; - regs->gpr[rd] = mfspr(SPRN_PVR); - return 0; - } - - /* Emulating the dcba insn is just a no-op. */ - if ((instword & INST_DCBA_MASK) == INST_DCBA) { - static int warned; - - if (!warned) { - printk(KERN_WARNING - "process %d (%s) uses obsolete 'dcba' insn\n", - current->pid, current->comm); - warned = 1; - } - return 0; - } - - /* Emulate the mcrxr insn. */ - if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { - static int warned; - unsigned int shift; - - if (!warned) { - printk(KERN_WARNING - "process %d (%s) uses obsolete 'mcrxr' insn\n", - current->pid, current->comm); - warned = 1; - } - - shift = (instword >> 21) & 0x1c; - regs->ccr &= ~(0xf0000000 >> shift); - regs->ccr |= (regs->xer & 0xf0000000) >> shift; - regs->xer &= ~0xf0000000; - return 0; - } - - return -EINVAL; -} - -/* - * Look through the list of trap instructions that are used for BUG(), - * BUG_ON() and WARN_ON() and see if we hit one. At this point we know - * that the exception was caused by a trap instruction of some kind. - * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 - * otherwise. - */ -extern struct bug_entry __start___bug_table[], __stop___bug_table[]; - -#ifndef CONFIG_MODULES -#define module_find_bug(x) NULL -#endif - -struct bug_entry *find_bug(unsigned long bugaddr) -{ - struct bug_entry *bug; - - for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) - if (bugaddr == bug->bug_addr) - return bug; - return module_find_bug(bugaddr); -} - -static int -check_bug_trap(struct pt_regs *regs) -{ - struct bug_entry *bug; - unsigned long addr; - - if (regs->msr & MSR_PR) - return 0; /* not in kernel */ - addr = regs->nip; /* address of trap instruction */ - if (addr < PAGE_OFFSET) - return 0; - bug = find_bug(regs->nip); - if (bug == NULL) - return 0; - if (bug->line & BUG_WARNING_TRAP) { - /* this is a WARN_ON rather than BUG/BUG_ON */ - printk(KERN_ERR "Badness in %s at %s:%d\n", - bug->function, bug->file, - bug->line & ~BUG_WARNING_TRAP); - show_stack(current, (void *)regs->gpr[1]); - return 1; - } - printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", - bug->function, bug->file, bug->line); - return 0; -} - -void __kprobes program_check_exception(struct pt_regs *regs) -{ - if (debugger_fault_handler(regs)) - return; - - if (regs->msr & 0x100000) { - /* IEEE FP exception */ - parse_fpe(regs); - } else if (regs->msr & 0x20000) { - /* trap exception */ - - if (notify_die(DIE_BPT, "breakpoint", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_bpt(regs)) - return; - - if (check_bug_trap(regs)) { - regs->nip += 4; - return; - } - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - - } else { - /* Privileged or illegal instruction; try to emulate it. */ - switch (emulate_instruction(regs)) { - case 0: - regs->nip += 4; - emulate_single_step(regs); - break; - - case -EFAULT: - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - break; - - default: - if (regs->msr & 0x40000) - /* priveleged */ - _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); - else - /* illegal */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - break; - } - } -} - -void kernel_fp_unavailable_exception(struct pt_regs *regs) -{ - printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " - "%lx at %lx\n", regs->trap, regs->nip); - die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); -} - -void altivec_unavailable_exception(struct pt_regs *regs) -{ - if (user_mode(regs)) { - /* A user program has executed an altivec instruction, - but this kernel doesn't support altivec. */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - } - printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " - "%lx at %lx\n", regs->trap, regs->nip); - die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); -} - -extern perf_irq_t perf_irq; - -void performance_monitor_exception(struct pt_regs *regs) -{ - perf_irq(regs); -} - -void alignment_exception(struct pt_regs *regs) -{ - int fixed; - - fixed = fix_alignment(regs); - - if (fixed == 1) { - regs->nip += 4; /* skip over emulated instruction */ - emulate_single_step(regs); - return; - } - - /* Operand address was bad */ - if (fixed == -EFAULT) { - if (user_mode(regs)) { - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->dar); - } else { - /* Search exception table */ - bad_page_fault(regs, regs->dar, SIGSEGV); - } - - return; - } - - _exception(SIGBUS, regs, BUS_ADRALN, regs->nip); -} - -#ifdef CONFIG_ALTIVEC -void altivec_assist_exception(struct pt_regs *regs) -{ - int err; - siginfo_t info; - - if (!user_mode(regs)) { - printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" - " at %lx\n", regs->nip); - die("Kernel VMX/Altivec assist exception", regs, SIGILL); - } - - flush_altivec_to_thread(current); - - err = emulate_altivec(regs); - if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ - emulate_single_step(regs); - return; - } - - if (err == -EFAULT) { - /* got an error reading the instruction */ - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *) regs->nip; - force_sig_info(SIGSEGV, &info, current); - } else { - /* didn't recognize the instruction */ - /* XXX quick hack for now: set the non-Java bit in the VSCR */ - if (printk_ratelimit()) - printk(KERN_ERR "Unrecognized altivec instruction " - "in %s at %lx\n", current->comm, regs->nip); - current->thread.vscr.u[3] |= 0x10000; - } -} -#endif /* CONFIG_ALTIVEC */ - -/* - * We enter here if we get an unrecoverable exception, that is, one - * that happened at a point where the RI (recoverable interrupt) bit - * in the MSR is 0. This indicates that SRR0/1 are live, and that - * we therefore lost state by taking this exception. - */ -void unrecoverable_exception(struct pt_regs *regs) -{ - printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", - regs->trap, regs->nip); - die("Unrecoverable exception", regs, SIGABRT); -} - -/* - * We enter here if we discover during exception entry that we are - * running in supervisor mode with a userspace value in the stack pointer. - */ -void kernel_bad_stack(struct pt_regs *regs) -{ - printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", - regs->gpr[1], regs->nip); - die("Bad kernel stack pointer", regs, SIGABRT); -} - -void __init trap_init(void) -{ -} From sfr at canb.auug.org.au Sat Oct 1 00:14:39 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 00:14:39 +1000 Subject: [PATCH 9/9] powerpc: make iSeries boot In-Reply-To: <20050930233602.138b6e27.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> Message-ID: <20051001001439.2f04423d.sfr@canb.auug.org.au> Now that we use the device tree, it helps to build it in. It also helps to link the kernel at the correct address. Signed-off-by: Stephen Rothwell --- arch/powerpc/Kconfig | 4 ++-- arch/powerpc/Kconfig.debug | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ 1b4c416e0cf237dce004392122db45eb22dbc416 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -317,7 +317,7 @@ config PPC_BPA config PPC_OF bool - depends on PPC_MULTIPLATFORM # for now + depends on PPC_MULTIPLATFORM || PPC_ISERIES default y config XICS @@ -836,7 +836,7 @@ endmenu if PPC64 config KERNEL_START hex - default "0xc0000000" + default "0xc000000000000000" endif source "net/Kconfig" diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -55,7 +55,7 @@ config BDI_SWITCH config BOOTX_TEXT bool "Support for early boot text console (BootX or OpenFirmware only)" - depends PPC_OF + depends PPC_OF && !PPC_ISERIES help Say Y here to see progress messages from the boot firmware in text mode. Requires either BootX or Open Firmware. From arnd at arndb.de Sat Oct 1 00:28:48 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Fri, 30 Sep 2005 16:28:48 +0200 Subject: libspe for 2.6.13 spufs In-Reply-To: <20050929220009.146368000@localhost> References: <20050929220009.146368000@localhost> Message-ID: <200509301628.49277.arnd@arndb.de> As a companion to the spufs release posted yesterday, this is the user space libspe library from Dirk Herrendoerfer, together with the extracted interface documentation. This library gives an operating system independent abstraction on top of the spufs to work with asynchronous SPU threads. Arnd <>< -------------- next part -------------- A non-text attachment was scrubbed... Name: libspe-0.9.tar.gz Type: application/x-tgz Size: 21092 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20050930/91c930ab/attachment.bin -------------- next part -------------- A non-text attachment was scrubbed... Name: libspe-0.9.pdf.gz Type: application/x-gzip Size: 25758 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20050930/91c930ab/attachment-0001.bin From linas at austin.ibm.com Sat Oct 1 00:58:23 2005 From: linas at austin.ibm.com (linas) Date: Fri, 30 Sep 2005 09:58:23 -0500 Subject: [PATCH 7/7] ppc64: EEH Halt if bad drivers spin in error condition In-Reply-To: <200509300449.j8U4n94d014765@falcon30.maxeymade.com> References: <20050930010228.GG6173@austin.ibm.com> <200509300449.j8U4n94d014765@falcon30.maxeymade.com> Message-ID: <20050930145822.GM29826@austin.ibm.com> On Thu, Sep 29, 2005 at 11:49:09PM -0500, Doug Maxey was heard to remark: > > On Thu, 29 Sep 2005 20:02:28 CDT, linas wrote: > > > >07-eeh-spin-counter.patch > > > >One an EEH event is triggers, all further I/O to a device is blocked (until > >reset). Bad device drivers may end up spinning in their interrupt handlers, > >trying to read an interrupt status register that will never change state. > >This patch moves that spin counter to a per-device structure, and adds > >some diagnostic prints to help locate the bad driver. > > > > Which struct gets the element? struct pci_dn, which Paulus recently introduced; it splits off the pci parts from struct device_node. Think of it as holding all the firmaware and arch-specific peices that can't be jammed in the generic struct pci_dev. --linas From haveblue at us.ibm.com Sat Oct 1 04:23:05 2005 From: haveblue at us.ibm.com (Dave Hansen) Date: Fri, 30 Sep 2005 11:23:05 -0700 Subject: [PATCH] fix 2.6.14-rc2-git8 compile errors Message-ID: <1128104585.8123.21.camel@localhost> I've been having some problems compiling the latest git snapshot. Seems to be some missing includes, which keeps it from finding stuff like boot_cpuid. But, this is after applying Anton's discontig removal patches as well, so it might be just a side-effect of those. Here's my config: http://www.sr71.net/patches/2.6.14/2.6.14-rc1-mhp1/configs/config-ppc64-abat --- arch/ppc64/kernel/pSeries_lpar.c | 0 memhotplug-dave/arch/ppc64/kernel/pSeries_setup.c | 1 + memhotplug-dave/arch/ppc64/kernel/time.c | 1 + 3 files changed, 2 insertions(+) diff -puN arch/ppc64/kernel/time.c~no-found-boot_cpuid arch/ppc64/kernel/time.c --- memhotplug/arch/ppc64/kernel/time.c~no-found-boot_cpuid 2005-09-30 11:00:40.000000000 -0700 +++ memhotplug-dave/arch/ppc64/kernel/time.c 2005-09-30 11:00:40.000000000 -0700 @@ -65,6 +65,7 @@ #include #include #include +#include #include #include diff -puN arch/ppc64/kernel/pSeries_setup.c~no-found-boot_cpuid arch/ppc64/kernel/pSeries_setup.c --- memhotplug/arch/ppc64/kernel/pSeries_setup.c~no-found-boot_cpuid 2005-09-30 11:00:40.000000000 -0700 +++ memhotplug-dave/arch/ppc64/kernel/pSeries_setup.c 2005-09-30 11:00:40.000000000 -0700 @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include From kumar.gala at freescale.com Sat Oct 1 06:52:40 2005 From: kumar.gala at freescale.com (Kumar Gala) Date: Fri, 30 Sep 2005 15:52:40 -0500 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and fixup traps.c In-Reply-To: <20051001000001.1f1d8c48.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000001.1f1d8c48.sfr@canb.auug.org.au> Message-ID: <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> (My first attempt at posting to the list failed due to size) I really dont like the ideal of splitting up traps.c into traps32.c and traps64.c. This defeats the purpose of the merge. I expect that a significant portion of traps.c is common (or can be made to be) between all powerpc's. - kumar On Sep 30, 2005, at 9:00 AM, Stephen Rothwell wrote: > Use idle_power4.S from ppc64 as we are not going to support > 32 bit power4 in the merged tree. > > create traps{32,64}.c as these are hard to merge. > > Signed-off-by: Stephen Rothwell > --- > > arch/powerpc/Kconfig | 4 > arch/powerpc/kernel/Makefile | 4 > arch/powerpc/kernel/idle_power4.S | 78 +++ > arch/powerpc/kernel/traps.c | 1047 > ------------------------------------- > arch/powerpc/kernel/traps32.c | 1047 > +++++++++++++++++++++++++++++++++++++ > arch/powerpc/kernel/traps64.c | 568 ++++++++++++++++++++ > arch/ppc64/kernel/Makefile | 10 > arch/ppc64/kernel/idle_power4.S | 79 --- > arch/ppc64/kernel/traps.c | 568 -------------------- > 9 files changed, 1707 insertions(+), 1698 deletions(-) > create mode 100644 arch/powerpc/kernel/idle_power4.S > delete mode 100644 arch/powerpc/kernel/traps.c > create mode 100644 arch/powerpc/kernel/traps32.c > create mode 100644 arch/powerpc/kernel/traps64.c > delete mode 100644 arch/ppc64/kernel/idle_power4.S > delete mode 100644 arch/ppc64/kernel/traps.c > From linas at austin.ibm.com Sat Oct 1 08:29:18 2005 From: linas at austin.ibm.com (linas) Date: Fri, 30 Sep 2005 17:29:18 -0500 Subject: [PATCH 0/7] ppc64: Assorted minor EEH cleanups In-Reply-To: <20050930004800.GL29826@austin.ibm.com> References: <20050930004800.GL29826@austin.ibm.com> Message-ID: <20050930222918.GN29826@austin.ibm.com> On Thu, Sep 29, 2005 at 07:48:00PM -0500, linas was heard to remark: > > They compile but (ahem) are not tested, They are now tested. They work (I had a corupted initrd yesterday). Please apply and foward as soon as possible. During testing I found two unrelated bugs; wasn't able to squeeze out patches for today; maybe monday. Paul, these are: 1) You added an eeh_capable flag that is never initialized, and so this blocks operation. I don't think this flag is needed, as it duplicates a bitflag in eeh_mode. (Unless your plan is to use bitfields; do you want to use C language bitfields?) 2) PCI hotplug is broken because the flag phb->is_dynamic is never set to one. As a result, hotplug add calls __alloc_bootmem instead of kmalloc(), and crashes. I was testing a potential patch just now, but the clock ran out. --linas p.s. I hope to spit out the rest of the patces, including the kthread handling, early next week. I've got things mostly ported, and am testing. Let me know how to best coordinate on this. From jimix at watson.ibm.com Sat Oct 1 10:43:55 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Fri, 30 Sep 2005 20:43:55 -0400 Subject: To large page or not to large page Message-ID: <17213.56267.826654.651632@kitch0.watson.ibm.com> It seems as tho Linux will map the kernel with large pages if the processor allows it regardless if the lmb is sufficient to hold a large page, correct? Is there some runtime option to force the use of 4K pages. Ultimately, my desire is to define a 256Mig segment that, using a Hypervisor, that can be populated by shared pages that can physically belong to the hypervisor or other partions/domains) and restrict the mappings to 4k. I have some ideas, but am willing to hear any suggestions. -JX -- "I got an idea, an idea so smart my head would explode if I even began to know what I was talking about." -- Peter Griffin (Family Guy) From sfr at canb.auug.org.au Sat Oct 1 12:17:14 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 12:17:14 +1000 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and fixup traps.c In-Reply-To: <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000001.1f1d8c48.sfr@canb.auug.org.au> <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> Message-ID: <20051001121714.1b5886aa.sfr@canb.auug.org.au> On Fri, 30 Sep 2005 15:52:40 -0500 Kumar Gala wrote: > > (My first attempt at posting to the list failed due to size) Yes, quoting the whole patch was probably not necessary :-) > I really dont like the ideal of splitting up traps.c into traps32.c > and traps64.c. This defeats the purpose of the merge. I expect that > a significant portion of traps.c is common (or can be made to be) > between all powerpc's. My first attempt at the merge was a real mess and a right pain, so I put the two files in as a compromise. However, I have made another attempt and ,although it took a while, it seems to be going ok. How about we put the two in for now (as that will allow the merge of more platforms to continue) and I will supply a further patch in a few days that combines the two trapsxx.c files? -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051001/faf277cd/attachment.pgp From paulus at samba.org Sat Oct 1 13:28:56 2005 From: paulus at samba.org (Paul Mackerras) Date: Sat, 1 Oct 2005 13:28:56 +1000 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and fixup traps.c In-Reply-To: <20051001121714.1b5886aa.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000001.1f1d8c48.sfr@canb.auug.org.au> <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> <20051001121714.1b5886aa.sfr@canb.auug.org.au> Message-ID: <17214.632.655003.750819@cargo.ozlabs.ibm.com> Stephen Rothwell writes: > My first attempt at the merge was a real mess and a right pain, so I put > the two files in as a compromise. However, I have made another attempt > and ,although it took a while, it seems to be going ok. How about we put > the two in for now (as that will allow the merge of more platforms to > continue) and I will supply a further patch in a few days that combines > the two trapsxx.c files? In that case, what is the advantage of having two traps*.c files in arch/powerpc/kernel instead of having them in arch/ppc*/kernel? Paul. From sfr at canb.auug.org.au Sat Oct 1 21:37:53 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 21:37:53 +1000 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and traps.c In-Reply-To: <17214.632.655003.750819@cargo.ozlabs.ibm.com> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000001.1f1d8c48.sfr@canb.auug.org.au> <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> <20051001121714.1b5886aa.sfr@canb.auug.org.au> <17214.632.655003.750819@cargo.ozlabs.ibm.com> Message-ID: <20051001213753.52d5d5c3.sfr@canb.auug.org.au> On Sat, 1 Oct 2005 13:28:56 +1000 Paul Mackerras wrote: > > Stephen Rothwell writes: > > > My first attempt at the merge was a real mess and a right pain, so I put > > the two files in as a compromise. However, I have made another attempt > > and ,although it took a while, it seems to be going ok. How about we put > > the two in for now (as that will allow the merge of more platforms to > > continue) and I will supply a further patch in a few days that combines > > the two trapsxx.c files? > > In that case, what is the advantage of having two traps*.c files in > arch/powerpc/kernel instead of having them in arch/ppc*/kernel? OK, thanks for keeping me honest :-) Here is new versions of patches 6 and 7 (all the rest are the same as before). --------------- Use idle_power4.S from ppc64 as we are not going to support 32 bit power4 in the merged tree. Merge ppc64 traps.c into powerpc traps.c: use ppc64 versions of exception routine names (as they don't have StudlyCaps) make all the versions if die() have the same prototype Signed-off-by: Stephen Rothwell --- arch/powerpc/Kconfig | 4 arch/powerpc/kernel/head.S | 84 +++-- arch/powerpc/kernel/head_44x.S | 14 - arch/powerpc/kernel/head_4xx.S | 52 ++- arch/powerpc/kernel/head_8xx.S | 42 +-- arch/powerpc/kernel/head_fsl_booke.S | 24 + arch/powerpc/kernel/idle_power4.S | 78 +++++ arch/powerpc/kernel/ppc_ksyms.c | 16 - arch/powerpc/kernel/traps.c | 407 ++++++++++++++++++------ arch/ppc/kernel/head.S | 84 +++-- arch/ppc/kernel/head_44x.S | 14 - arch/ppc/kernel/head_4xx.S | 52 ++- arch/ppc/kernel/head_8xx.S | 42 +-- arch/ppc/kernel/head_booke.h | 4 arch/ppc/kernel/head_fsl_booke.S | 24 + arch/ppc/kernel/ppc_ksyms.c | 16 - arch/ppc/kernel/traps.c | 20 + arch/ppc/syslib/ibm44x_common.c | 2 arch/ppc/syslib/ppc4xx_setup.c | 2 arch/ppc64/kernel/Makefile | 8 arch/ppc64/kernel/idle_power4.S | 79 ----- arch/ppc64/kernel/traps.c | 568 ---------------------------------- include/asm-ppc/system.h | 2 23 files changed, 640 insertions(+), 998 deletions(-) create mode 100644 arch/powerpc/kernel/idle_power4.S delete mode 100644 arch/ppc64/kernel/idle_power4.S delete mode 100644 arch/ppc64/kernel/traps.c -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ dc1c1ca3dcd94c545c5e01d7c06b46824d43f4d0 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -142,8 +142,8 @@ config POWER4 def_bool y config PPC_FPU - bool - default y if PPC64 + depends on PPC32 + def_bool y config BOOKE bool diff --git a/arch/powerpc/kernel/head.S b/arch/powerpc/kernel/head.S --- a/arch/powerpc/kernel/head.S +++ b/arch/powerpc/kernel/head.S @@ -349,12 +349,12 @@ i##n: \ /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and - putting it back to what it was (UnknownException) when done. */ + putting it back to what it was (unknown_exception) when done. */ #if defined(CONFIG_GEMINI) && defined(CONFIG_SMP) . = 0x100 b __secondary_start_gemini #else - EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD) + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) #endif /* Machine check */ @@ -389,7 +389,7 @@ i##n: \ cmpwi cr1,r4,0 bne cr1,1f #endif - EXC_XFER_STD(0x200, MachineCheckException) + EXC_XFER_STD(0x200, machine_check_exception) #ifdef CONFIG_PPC_CHRP 1: b machine_check_in_rtas #endif @@ -456,10 +456,10 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD) + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) /* Floating-point unavailable */ . = 0x800 @@ -472,8 +472,8 @@ FPUnavailable: /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) /* System call */ . = 0xc00 @@ -482,8 +482,8 @@ SystemCall: EXC_XFER_EE_LITE(0xc00, DoSyscall) /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE) + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) /* * The Altivec unavailable trap is at 0x0f20. Foo. @@ -502,7 +502,7 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0xf00, UnknownException) + EXC_XFER_EE(0xf00, unknown_exception) /* * Handle TLB miss for instruction on 603/603e. @@ -702,44 +702,44 @@ DataStoreTLBMiss: rfi #ifndef CONFIG_ALTIVEC -#define AltivecAssistException UnknownException +#define altivec_assist_exception unknown_exception #endif - EXCEPTION(0x1300, Trap_13, InstructionBreakpoint, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE) EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) #ifdef CONFIG_POWER4 - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1700, Trap_17, AltivecAssistException, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, altivec_assist_exception, EXC_XFER_EE) EXCEPTION(0x1800, Trap_18, TAUException, EXC_XFER_STD) #else /* !CONFIG_POWER4 */ - EXCEPTION(0x1600, Trap_16, AltivecAssistException, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE) EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_POWER4 */ - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) - EXCEPTION(0x2100, Trap_21, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2200, Trap_22, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2300, Trap_23, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2400, Trap_24, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2500, Trap_25, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2600, Trap_26, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2700, Trap_27, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2800, Trap_28, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2900, Trap_29, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2a00, Trap_2a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2b00, Trap_2b, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2c00, Trap_2c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2d00, Trap_2d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2e00, Trap_2e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2f00, MOLTrampoline, UnknownException, EXC_XFER_EE_LITE) + EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE) .globl mol_trampoline .set mol_trampoline, i0x2f00 @@ -751,7 +751,7 @@ AltiVecUnavailable: #ifdef CONFIG_ALTIVEC bne load_up_altivec /* if from user, just load it up */ #endif /* CONFIG_ALTIVEC */ - EXC_XFER_EE_LITE(0xf20, AltivecUnavailException) + EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) #ifdef CONFIG_PPC64BRIDGE DataAccess: @@ -767,12 +767,12 @@ DataSegment: addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_DAR stw r4,_DAR(r11) - EXC_XFER_STD(0x380, UnknownException) + EXC_XFER_STD(0x380, unknown_exception) InstructionSegment: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x480, UnknownException) + EXC_XFER_STD(0x480, unknown_exception) #endif /* CONFIG_PPC64BRIDGE */ #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -309,13 +309,13 @@ skpinv: addi r4,r4,1 /* Increment */ interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) /* Machine Check Interrupt */ #ifdef CONFIG_440A - MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #else - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #endif /* Data Storage Interrupt */ @@ -442,7 +442,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x2010, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif /* System Call Interrupt */ @@ -451,21 +451,21 @@ interrupt_base: EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxillary Processor Unavailable Interrupt */ - EXCEPTION(0x2020, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x1010, FixedIntervalTimer, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x1020, WatchdogTimer, UnknownException) + CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ diff --git a/arch/powerpc/kernel/head_4xx.S b/arch/powerpc/kernel/head_4xx.S --- a/arch/powerpc/kernel/head_4xx.S +++ b/arch/powerpc/kernel/head_4xx.S @@ -245,12 +245,12 @@ label: /* * 0x0100 - Critical Interrupt Exception */ - CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception) /* * 0x0200 - Machine Check Exception */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) /* * 0x0300 - Data Storage Exception @@ -405,7 +405,7 @@ label: mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ stw r4,_DEAR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) @@ -413,21 +413,21 @@ label: mfspr r4,SPRN_ESR /* Grab the ESR and save it */ stw r4,_ESR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x700, ProgramCheckException) + EXC_XFER_STD(0x700, program_check_exception) - EXCEPTION(0x0800, Trap_08, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0900, Trap_09, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0A00, Trap_0A, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0B00, Trap_0B, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) NORMAL_EXCEPTION_PROLOG EXC_XFER_EE_LITE(0xc00, DoSyscall) - EXCEPTION(0x0D00, Trap_0D, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0E00, Trap_0E, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0F00, Trap_0F, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ START_EXCEPTION(0x1000, Decrementer) @@ -444,14 +444,14 @@ label: /* 0x1010 - Fixed Interval Timer (FIT) Exception */ - STND_EXCEPTION(0x1010, FITException, UnknownException) + STND_EXCEPTION(0x1010, FITException, unknown_exception) /* 0x1020 - Watchdog Timer (WDT) Exception */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException) #else - CRITICAL_EXCEPTION(0x1020, WDTException, UnknownException) + CRITICAL_EXCEPTION(0x1020, WDTException, unknown_exception) #endif #endif @@ -656,25 +656,25 @@ label: mfspr r10, SPRN_SPRG0 b InstructionAccess - EXCEPTION(0x1300, Trap_13, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1400, Trap_14, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) #ifdef CONFIG_IBM405_ERR51 /* 405GP errata 51 */ START_EXCEPTION(0x1700, Trap_17) b DTLBMiss #else - EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) #endif - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1A00, Trap_1A, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1B00, Trap_1B, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1C00, Trap_1C, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1D00, Trap_1D, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1E00, Trap_1E, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1F00, Trap_1F, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE) /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -203,7 +203,7 @@ i##n: \ ret_from_except) /* System reset */ - EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD) + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) /* Machine check */ . = 0x200 @@ -214,7 +214,7 @@ MachineCheck: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x200, MachineCheckException) + EXC_XFER_STD(0x200, machine_check_exception) /* Data access exception. * This is "never generated" by the MPC8xx. We jump to it for other @@ -252,20 +252,20 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD) + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) /* No FPU on MPC8xx. This exception is not supposed to happen. */ - EXCEPTION(0x800, FPUnavailable, UnknownException, EXC_XFER_STD) + EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD) /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) /* System call */ . = 0xc00 @@ -274,9 +274,9 @@ SystemCall: EXC_XFER_EE_LITE(0xc00, DoSyscall) /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE) - EXCEPTION(0xf00, Trap_0f, UnknownException, EXC_XFER_EE) + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. @@ -540,22 +540,22 @@ DataTLBError: #endif b DataAccess - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to * using them. */ - EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) . = 0x2000 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -426,14 +426,14 @@ skpinv: addi r6,r6,1 /* Increment */ interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) /* Machine Check Interrupt */ #ifdef CONFIG_E200 /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #else - MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #endif /* Data Storage Interrupt */ @@ -542,9 +542,9 @@ interrupt_base: #else #ifdef CONFIG_E200 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FloatingPointUnavailable, ProgramCheckException, EXC_XFER_EE) + EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE) #else - EXCEPTION(0x0800, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif #endif @@ -554,20 +554,20 @@ interrupt_base: EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxillary Processor Unavailable Interrupt */ - EXCEPTION(0x2900, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x3100, FixedIntervalTimer, UnknownException, EXC_XFER_EE) + EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x3200, WatchdogTimer, UnknownException) + CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ @@ -696,21 +696,21 @@ interrupt_base: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) #else - EXCEPTION(0x2020, SPEUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE); #else - EXCEPTION(0x2040, SPEFloatingPointData, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Round */ - EXCEPTION(0x2050, SPEFloatingPointRound, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE) /* Performance Monitor */ - EXCEPTION(0x2060, PerformanceMonitor, PerformanceMonitorException, EXC_XFER_STD) + EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) /* Debug Interrupt */ diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S new file mode 100644 --- /dev/null +++ b/arch/powerpc/kernel/idle_power4.S @@ -0,0 +1,78 @@ +/* + * This file contains the power_save function for 6xx & 7xxx CPUs + * rewritten in assembler + * + * Warning ! This code assumes that if your machine has a 750fx + * it will have PLL 1 set to low speed mode (used during NAP/DOZE). + * if this is not the case some additional changes will have to + * be done to check a runtime var (a bit like powersave-nap) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + + .text + +/* + * Here is the power_save_6xx function. This could eventually be + * split into several functions & changing the function pointer + * depending on the various features. + */ +_GLOBAL(power4_idle) +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) + /* We must dynamically check for the NAP feature as it + * can be cleared by CPU init after the fixups are done + */ + LOADBASE(r3,cur_cpu_spec) + ld r4,cur_cpu_spec at l(r3) + ld r4,CPU_SPEC_FEATURES(r4) + andi. r0,r4,CPU_FTR_CAN_NAP + beqlr + /* Now check if user or arch enabled NAP mode */ + LOADBASE(r3,powersave_nap) + lwz r4,powersave_nap at l(r3) + cmpwi 0,r4,0 + beqlr + + /* Clear MSR:EE */ + mfmsr r7 + li r4,0 + ori r4,r4,MSR_EE + andc r0,r7,r4 + mtmsrd r0 + + /* Check current_thread_info()->flags */ + clrrdi r4,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r4) + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + mtmsrd r7 /* out of line this ? */ + blr +1: + /* Go to NAP now */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + oris r7,r7,MSR_POW at h + sync + isync + mtmsrd r7 + isync + sync + blr diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -53,10 +53,10 @@ extern void transfer_to_handler(void); extern void do_IRQ(struct pt_regs *regs); -extern void MachineCheckException(struct pt_regs *regs); -extern void AlignmentException(struct pt_regs *regs); -extern void ProgramCheckException(struct pt_regs *regs); -extern void SingleStepException(struct pt_regs *regs); +extern void machine_check_exception(struct pt_regs *regs); +extern void alignment_exception(struct pt_regs *regs); +extern void program_check_exception(struct pt_regs *regs); +extern void single_step_exception(struct pt_regs *regs); extern int do_signal(sigset_t *, struct pt_regs *); extern int pmac_newworld; extern int sys_sigreturn(struct pt_regs *regs); @@ -72,10 +72,10 @@ EXPORT_SYMBOL(clear_user_page); EXPORT_SYMBOL(do_signal); EXPORT_SYMBOL(transfer_to_handler); EXPORT_SYMBOL(do_IRQ); -EXPORT_SYMBOL(MachineCheckException); -EXPORT_SYMBOL(AlignmentException); -EXPORT_SYMBOL(ProgramCheckException); -EXPORT_SYMBOL(SingleStepException); +EXPORT_SYMBOL(machine_check_exception); +EXPORT_SYMBOL(alignment_exception); +EXPORT_SYMBOL(program_check_exception); +EXPORT_SYMBOL(single_step_exception); EXPORT_SYMBOL(sys_sigreturn); EXPORT_SYMBOL(ppc_n_lost_interrupts); EXPORT_SYMBOL(ppc_lost_interrupts); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1,6 +1,4 @@ /* - * arch/powerpc/kernel/traps.c - * * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) * * This program is free software; you can redistribute it and/or @@ -23,29 +21,46 @@ #include #include #include -#include #include #include #include #include -#include #include #include -#include #include #include #include +#ifdef CONFIG_PPC32 +#include +#include +#endif #include #include #include #include +#ifdef CONFIG_PPC32 #include #include #ifdef CONFIG_PMAC_BACKLIGHT #include #endif #include +#endif +#ifdef CONFIG_PPC64 +#include +#include +#include +#include +#include +#include +#endif + +#ifdef CONFIG_PPC64 +#define __KPROBES __kprobes +#else +#define __KPROBES +#endif #ifdef CONFIG_DEBUGGER int (*__debugger)(struct pt_regs *regs); @@ -96,7 +111,7 @@ int die(const char *str, struct pt_regs console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); -#ifdef CONFIG_PMAC_BACKLIGHT +#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC_BACKLIGHT) if (_machine == _MACH_Pmac) { set_backlight_enable(1); set_backlight_level(BACKLIGHT_MAX); @@ -154,9 +169,17 @@ int die(const char *str, struct pt_regs panic("Fatal exception in interrupt"); if (panic_on_oops) { +#ifdef CONFIG_PPC64 + printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); + ssleep(5); +#endif panic("Fatal exception"); } +#ifdef CONFIG_PPC32 do_exit(err); +#else + do_exit(SIGSEGV); +#endif return 0; } @@ -176,6 +199,7 @@ void _exception(int signr, struct pt_reg info.si_addr = (void __user *) addr; force_sig_info(signr, &info, current); +#ifdef CONFIG_PPC32 /* * Init gets no signals that it doesn't have a handler for. * That's all very well, but if it has caused a synchronous @@ -197,6 +221,7 @@ void _exception(int signr, struct pt_reg do_exit(signr); } } +#endif } #ifdef CONFIG_PPC64 @@ -206,7 +231,7 @@ void system_reset_exception(struct pt_re if (ppc_md.system_reset_exception) ppc_md.system_reset_exception(regs); - die("System Reset", regs, SIGABRT); + die("System Reset", regs, 0); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) @@ -216,6 +241,7 @@ void system_reset_exception(struct pt_re } #endif +#ifdef CONFIG_PPC32 /* * I/O accesses can cause machine checks on powermacs. * Check if the NIP corresponds to the address of a sync @@ -264,8 +290,10 @@ static inline int check_io_access(struct #endif /* CONFIG_PPC_PMAC */ return 0; } +#endif /* CONFIG_PPC32 */ #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + /* On 4xx, the reason for the machine check or program exception is in the ESR. */ #define get_reason(regs) ((regs)->dsisr) @@ -284,6 +312,7 @@ static inline int check_io_access(struct #define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC) #else + /* On non-4xx, the reason for the machine check or program exception is in the MSR. */ #define get_reason(regs) ((regs)->msr) @@ -297,6 +326,7 @@ static inline int check_io_access(struct #define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) #endif +#ifdef CONFIG_PPC32 /* * This is "fall-back" implementation for configurations * which don't provide platform-specific machine check info @@ -305,8 +335,9 @@ void __attribute__ ((weak)) platform_machine_check(struct pt_regs *regs) { } +#endif -void MachineCheckException(struct pt_regs *regs) +void machine_check_exception(struct pt_regs *regs) { #ifdef CONFIG_PPC64 int recover = 0; @@ -462,23 +493,31 @@ void MachineCheckException(struct pt_reg * additional info, e.g. bus error registers. */ platform_machine_check(regs); -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC32 */ if (debugger_fault_handler(regs)) return; - die("Machine check", regs, SIGBUS); + die("Machine check", regs, +#ifdef CONFIG_PPC32 + SIGBUS +#else + 0 +#endif + ); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable Machine check"); } +#ifdef CONFIG_PPC32 void SMIException(struct pt_regs *regs) { die("System Management Interrupt", regs, SIGABRT); } +#endif -void UnknownException(struct pt_regs *regs) +void unknown_exception(struct pt_regs *regs) { printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); @@ -486,7 +525,7 @@ void UnknownException(struct pt_regs *re _exception(SIGTRAP, regs, 0, 0); } -void InstructionBreakpoint(struct pt_regs *regs) +void instruction_breakpoint_exception(struct pt_regs *regs) { if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) @@ -496,14 +535,20 @@ void InstructionBreakpoint(struct pt_reg _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); } +#ifdef CONFIG_PPC32 void RunModeException(struct pt_regs *regs) { _exception(SIGTRAP, regs, 0, 0); } +#endif -void SingleStepException(struct pt_regs *regs) +void __KPROBES single_step_exception(struct pt_regs *regs) { +#ifdef CONFIG_PPC32 regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ +#else + regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */ +#endif if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) @@ -520,15 +565,62 @@ void SingleStepException(struct pt_regs * pretend we got a single-step exception. This was pointed out * by Kumar Gala. -- paulus */ -static void emulate_single_step(struct pt_regs *regs) +static inline void emulate_single_step(struct pt_regs *regs) { if (single_stepping(regs)) { +#ifdef CONFIG_PPC32 clear_single_step(regs); _exception(SIGTRAP, regs, TRAP_TRACE, 0); +#else + single_step_exception(regs); +#endif } } -/* Illegal instruction emulation support. Originally written to +static void parse_fpe(struct pt_regs *regs) +{ + int code = 0; + unsigned long fpscr; + +#ifdef CONFIG_PPC32 + /* We must make sure the FP state is consistent with + * our MSR_FP in regs + */ + preempt_disable(); + if (regs->msr & MSR_FP) + giveup_fpu(current); + preempt_enable(); +#else + flush_fp_to_thread(current); +#endif + + fpscr = current->thread.fpscr; + + /* Invalid operation */ + if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) + code = FPE_FLTINV; + + /* Overflow */ + else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX)) + code = FPE_FLTOVF; + + /* Underflow */ + else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX)) + code = FPE_FLTUND; + + /* Divide by zero */ + else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX)) + code = FPE_FLTDIV; + + /* Inexact result */ + else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX)) + code = FPE_FLTRES; + + _exception(SIGFPE, regs, code, regs->nip); +} + +/* + * Illegal instruction emulation support. Originally written to * provide the PVR to user applications using the mfspr rd, PVR. * Return non-zero if we can't emulate, or -EFAULT if the associated * memory access caused an access fault. Return zero on success. @@ -536,7 +628,6 @@ static void emulate_single_step(struct p * There are a couple of ways to do this, either "decode" the instruction * or directly match lots of bits. In this case, matching lots of * bits is faster and easier. - * */ #define INST_MFSPR_PVR 0x7c1f42a6 #define INST_MFSPR_PVR_MASK 0xfc1fffff @@ -547,6 +638,8 @@ static void emulate_single_step(struct p #define INST_MCRXR 0x7c000400 #define INST_MCRXR_MASK 0x7c0007fe +#ifdef CONFIG_PPC32 + #define INST_STRING 0x7c00042a #define INST_STRING_MASK 0x7c0007fe #define INST_STRING_GEN_MASK 0x7c00067e @@ -622,6 +715,7 @@ static int emulate_string_inst(struct pt return 0; } +#endif /* CONFIG_PPC32 */ static int emulate_instruction(struct pt_regs *regs) { @@ -643,22 +737,44 @@ static int emulate_instruction(struct pt } /* Emulating the dcba insn is just a no-op. */ - if ((instword & INST_DCBA_MASK) == INST_DCBA) + if ((instword & INST_DCBA_MASK) == INST_DCBA) { +#ifdef CONFIG_PPC64 + static int warned; + + if (!warned) { + printk(KERN_WARNING + "process %d (%s) uses obsolete 'dcba' insn\n", + current->pid, current->comm); + warned = 1; + } +#endif /* CONFIG_PPC64 */ return 0; + } /* Emulate the mcrxr insn. */ if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { - int shift = (instword >> 21) & 0x1c; + unsigned int shift = (instword >> 21) & 0x1c; unsigned long msk = 0xf0000000UL >> shift; +#ifdef CONFIG_PPC64 + static int warned; + if (!warned) { + printk(KERN_WARNING + "process %d (%s) uses obsolete 'mcrxr' insn\n", + current->pid, current->comm); + warned = 1; + } +#endif regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); regs->xer &= ~0xf0000000UL; return 0; } +#ifdef CONFIG_PPC32 /* Emulate load/store string insn. */ if ((instword & INST_STRING_GEN_MASK) == INST_STRING) return emulate_string_inst(regs, instword); +#endif return -EINVAL; } @@ -686,7 +802,7 @@ struct bug_entry *find_bug(unsigned long return module_find_bug(bugaddr); } -int check_bug_trap(struct pt_regs *regs) +static int check_bug_trap(struct pt_regs *regs) { struct bug_entry *bug; unsigned long addr; @@ -701,34 +817,38 @@ int check_bug_trap(struct pt_regs *regs) return 0; if (bug->line & BUG_WARNING_TRAP) { /* this is a WARN_ON rather than BUG/BUG_ON */ -#ifdef CONFIG_XMON +#if defined(CONFIG_PPC32) && defined(CONFIG_XMON) xmon_printf(KERN_ERR "Badness in %s at %s:%d\n", bug->function, bug->file, bug->line & ~BUG_WARNING_TRAP); -#endif /* CONFIG_XMON */ +#endif printk(KERN_ERR "Badness in %s at %s:%d\n", bug->function, bug->file, bug->line & ~BUG_WARNING_TRAP); +#ifdef CONFIG_PPC32 dump_stack(); +#else + show_stack(current, (void *)regs->gpr[1]); +#endif return 1; } -#ifdef CONFIG_XMON +#if defined(CONFIG_PPC32) && defined(CONFIG_XMON) xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%d!\n", bug->function, bug->file, bug->line); xmon(regs); -#endif /* CONFIG_XMON */ +#endif printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", bug->function, bug->file, bug->line); return 0; } -void ProgramCheckException(struct pt_regs *regs) +void __KPROBES program_check_exception(struct pt_regs *regs) { unsigned int reason = get_reason(regs); +#if defined(CONFIG_PPC32) && defined(CONFIG_MATH_EMULATION) extern int do_mathemu(struct pt_regs *regs); -#ifdef CONFIG_MATH_EMULATION /* (reason & REASON_ILLEGAL) would be the obvious thing here, * but there seems to be a hardware bug on the 405GP (RevD) * that means ESR is sometimes set incorrectly - either to @@ -740,69 +860,61 @@ void ProgramCheckException(struct pt_reg emulate_single_step(regs); return; } -#endif /* CONFIG_MATH_EMULATION */ - - if (reason & REASON_FP) { - /* IEEE FP exception */ - int code = 0; - u32 fpscr; +#endif - /* We must make sure the FP state is consistent with - * our MSR_FP in regs - */ - preempt_disable(); - if (regs->msr & MSR_FP) - giveup_fpu(current); - preempt_enable(); - - fpscr = current->thread.fpscr; - fpscr &= fpscr << 22; /* mask summary bits with enables */ - if (fpscr & FPSCR_VX) - code = FPE_FLTINV; - else if (fpscr & FPSCR_OX) - code = FPE_FLTOVF; - else if (fpscr & FPSCR_UX) - code = FPE_FLTUND; - else if (fpscr & FPSCR_ZX) - code = FPE_FLTDIV; - else if (fpscr & FPSCR_XX) - code = FPE_FLTRES; - _exception(SIGFPE, regs, code, regs->nip); +#ifdef CONFIG_PPC64 + if (debugger_fault_handler(regs)) return; - } +#endif - if (reason & REASON_TRAP) { + if (reason & REASON_FP) { + /* IEEE FP exception */ + parse_fpe(regs); + } else if (reason & REASON_TRAP) { /* trap exception */ +#ifdef CONFIG_PPC64 + if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) + == NOTIFY_STOP) + return; +#endif if (debugger_bpt(regs)) return; if (check_bug_trap(regs)) { regs->nip += 4; return; } - _exception(SIGTRAP, regs, TRAP_BRKPT, 0); - return; - } - - /* Try to emulate it if we should. */ - if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { + _exception(SIGTRAP, regs, TRAP_BRKPT, +#ifdef CONFIG_PPC32 + 0 +#else + regs->nip +#endif + ); + } else +#ifdef CONFIG_PPC32 + if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) +#endif + { + /* Privileged or illegal instruction; try to emulate it. */ switch (emulate_instruction(regs)) { case 0: regs->nip += 4; emulate_single_step(regs); - return; + break; case -EFAULT: _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; + break; + default: + if (reason & REASON_PRIVILEGED) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + break; } } - - if (reason & REASON_PRIVILEGED) - _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); - else - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); } -void AlignmentException(struct pt_regs *regs) +void alignment_exception(struct pt_regs *regs) { int fixed; @@ -814,18 +926,31 @@ void AlignmentException(struct pt_regs * return; } - /* Operand address was bad */ + /* Operand address was bad */ if (fixed == -EFAULT) { if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_ACCERR, regs->dar); + _exception(SIGSEGV, regs, +#ifdef CONFIG_PPC32 + SEGV_ACCERR, +#else + SEGV_MAPERR, +#endif + regs->dar); else /* Search exception table */ bad_page_fault(regs, regs->dar, SIGSEGV); return; } - _exception(SIGBUS, regs, BUS_ADRALN, regs->dar); + _exception(SIGBUS, regs, BUS_ADRALN, +#ifdef CONFIG_PPC32 + regs->dar +#else + regs->nip +#endif + ); } +#ifdef CONFIG_PPC32 void StackOverflow(struct pt_regs *regs) { printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", @@ -849,8 +974,58 @@ void trace_syscall(struct pt_regs *regs) current, current->pid, regs->nip, regs->link, regs->gpr[0], regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); } +#endif /* CONFIG_PPC32 */ + +#ifdef CONFIG_PPC64 +void kernel_fp_unavailable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); +} +#endif + +void altivec_unavailable_exception(struct pt_regs *regs) +{ +#if !defined(CONFIG_ALTIVEC) || defined(CONFIG_PPC64) + if (user_mode(regs)) { + /* A user program has executed an altivec instruction, + but this kernel doesn't support altivec. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } +#endif +#ifdef CONFIG_PPC32 + { + static int kernel_altivec_count; + + /* The kernel has executed an altivec instruction without + first enabling altivec. Whinge but let it do it. */ + if (++kernel_altivec_count < 10) + printk(KERN_ERR "AltiVec used in kernel (task=%p, pc=%lx)\n", + current, regs->nip); + regs->msr |= MSR_VEC; + } +#else + printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); +#endif +} + +#ifdef CONFIG_PPC64 +extern perf_irq_t perf_irq; +#endif + +#if defined(CONFIG_PPC64) || defined(CONFIG_E500) +void performance_monitor_exception(struct pt_regs *regs) +{ + perf_irq(regs); +} +#endif + -#ifdef CONFIG_8xx +#if defined(CONFIG_PPC32) && defined(CONFIG_8xx) void SoftwareEmulation(struct pt_regs *regs) { extern int do_mathemu(struct pt_regs *); @@ -879,8 +1054,9 @@ void SoftwareEmulation(struct pt_regs *r } else emulate_single_step(regs); } -#endif /* CONFIG_8xx */ +#endif /* defined(CONFIG_PPC32) && defined(CONFIG_8xx) */ +#ifdef CONFIG_PPC32 #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) void DebugException(struct pt_regs *regs, unsigned long debug_status) @@ -909,42 +1085,36 @@ void TAUException(struct pt_regs *regs) regs->nip, regs->msr, regs->trap, print_tainted()); } #endif /* CONFIG_INT_TAU */ - -void AltivecUnavailException(struct pt_regs *regs) -{ - static int kernel_altivec_count; - -#ifndef CONFIG_ALTIVEC - if (user_mode(regs)) { - /* A user program has executed an altivec instruction, - but this kernel doesn't support altivec. */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - } -#endif - /* The kernel has executed an altivec instruction without - first enabling altivec. Whinge but let it do it. */ - if (++kernel_altivec_count < 10) - printk(KERN_ERR "AltiVec used in kernel (task=%p, pc=%lx)\n", - current, regs->nip); - regs->msr |= MSR_VEC; -} +#endif /* CONFIG_PPC32*/ #ifdef CONFIG_ALTIVEC -void AltivecAssistException(struct pt_regs *regs) +void altivec_assist_exception(struct pt_regs *regs) { int err; +#ifdef CONFIG_PPC64 + siginfo_t info; +#endif +#ifdef CONFIG_PPC32 preempt_disable(); if (regs->msr & MSR_VEC) giveup_altivec(current); preempt_enable(); +#endif if (!user_mode(regs)) { printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" " at %lx\n", regs->nip); - die("Kernel Altivec assist exception", regs, SIGILL); + die("Kernel " +#ifdef CONFIG_PPC64 + "VMX/" +#endif + "Altivec assist exception", regs, SIGILL); } +#ifdef CONFIG_PPC64 + flush_altivec_to_thread(current); +#endif /* CONFIG_PPC64 */ + err = emulate_altivec(regs); if (err == 0) { regs->nip += 4; /* skip emulated instruction */ @@ -954,7 +1124,15 @@ void AltivecAssistException(struct pt_re if (err == -EFAULT) { /* got an error reading the instruction */ +#ifdef CONFIG_PPC32 _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); +#else + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *) regs->nip; + force_sig_info(SIGSEGV, &info, current); +#endif } else { /* didn't recognize the instruction */ /* XXX quick hack for now: set the non-Java bit in the VSCR */ @@ -966,13 +1144,7 @@ void AltivecAssistException(struct pt_re } #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_E500 -void PerformanceMonitorException(struct pt_regs *regs) -{ - perf_irq(regs); -} -#endif - +#ifdef CONFIG_PPC32 #ifdef CONFIG_FSL_BOOKE void CacheLockingException(struct pt_regs *regs, unsigned long address, unsigned long error_code) @@ -1022,7 +1194,24 @@ void SPEFloatingPointException(struct pt return; } #endif +#endif /* CONFIG_PPC32 */ +#ifdef CONFIG_PPC64 +/* + * We enter here if we get an unrecoverable exception, that is, one + * that happened at a point where the RI (recoverable interrupt) bit + * in the MSR is 0. This indicates that SRR0/1 are live, and that + * we therefore lost state by taking this exception. + */ +void unrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", + regs->trap, regs->nip); + die("Unrecoverable exception", regs, SIGABRT); +} +#endif /* CONFIG_PPC64 */ + +#ifdef CONFIG_PPC32 #ifdef CONFIG_BOOKE_WDT /* * Default handler for a Watchdog exception, @@ -1041,6 +1230,20 @@ void WatchdogException(struct pt_regs *r WatchdogHandler(regs); } #endif +#endif /* CONFIG_PPC32 */ + +#ifdef CONFIG_PPC64 +/* + * We enter here if we discover during exception entry that we are + * running in supervisor mode with a userspace value in the stack pointer. + */ +void kernel_bad_stack(struct pt_regs *regs) +{ + printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", + regs->gpr[1], regs->nip); + die("Bad kernel stack pointer", regs, SIGABRT); +} +#endif void __init trap_init(void) { diff --git a/arch/ppc/kernel/head.S b/arch/ppc/kernel/head.S --- a/arch/ppc/kernel/head.S +++ b/arch/ppc/kernel/head.S @@ -349,12 +349,12 @@ i##n: \ /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and - putting it back to what it was (UnknownException) when done. */ + putting it back to what it was (unknown_exception) when done. */ #if defined(CONFIG_GEMINI) && defined(CONFIG_SMP) . = 0x100 b __secondary_start_gemini #else - EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD) + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) #endif /* Machine check */ @@ -389,7 +389,7 @@ i##n: \ cmpwi cr1,r4,0 bne cr1,1f #endif - EXC_XFER_STD(0x200, MachineCheckException) + EXC_XFER_STD(0x200, machine_check_exception) #ifdef CONFIG_PPC_CHRP 1: b machine_check_in_rtas #endif @@ -456,10 +456,10 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD) + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) /* Floating-point unavailable */ . = 0x800 @@ -472,8 +472,8 @@ FPUnavailable: /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) /* System call */ . = 0xc00 @@ -482,8 +482,8 @@ SystemCall: EXC_XFER_EE_LITE(0xc00, DoSyscall) /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE) + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) /* * The Altivec unavailable trap is at 0x0f20. Foo. @@ -502,7 +502,7 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0xf00, UnknownException) + EXC_XFER_EE(0xf00, unknown_exception) /* * Handle TLB miss for instruction on 603/603e. @@ -702,44 +702,44 @@ DataStoreTLBMiss: rfi #ifndef CONFIG_ALTIVEC -#define AltivecAssistException UnknownException +#define altivec_assist_exception unknown_exception #endif - EXCEPTION(0x1300, Trap_13, InstructionBreakpoint, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE) EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) #ifdef CONFIG_POWER4 - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1700, Trap_17, AltivecAssistException, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, altivec_assist_exception, EXC_XFER_EE) EXCEPTION(0x1800, Trap_18, TAUException, EXC_XFER_STD) #else /* !CONFIG_POWER4 */ - EXCEPTION(0x1600, Trap_16, AltivecAssistException, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE) EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_POWER4 */ - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) - EXCEPTION(0x2100, Trap_21, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2200, Trap_22, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2300, Trap_23, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2400, Trap_24, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2500, Trap_25, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2600, Trap_26, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2700, Trap_27, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2800, Trap_28, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2900, Trap_29, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2a00, Trap_2a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2b00, Trap_2b, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2c00, Trap_2c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2d00, Trap_2d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2e00, Trap_2e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2f00, MOLTrampoline, UnknownException, EXC_XFER_EE_LITE) + EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE) .globl mol_trampoline .set mol_trampoline, i0x2f00 @@ -751,7 +751,7 @@ AltiVecUnavailable: #ifdef CONFIG_ALTIVEC bne load_up_altivec /* if from user, just load it up */ #endif /* CONFIG_ALTIVEC */ - EXC_XFER_EE_LITE(0xf20, AltivecUnavailException) + EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) #ifdef CONFIG_PPC64BRIDGE DataAccess: @@ -767,12 +767,12 @@ DataSegment: addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_DAR stw r4,_DAR(r11) - EXC_XFER_STD(0x380, UnknownException) + EXC_XFER_STD(0x380, unknown_exception) InstructionSegment: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x480, UnknownException) + EXC_XFER_STD(0x480, unknown_exception) #endif /* CONFIG_PPC64BRIDGE */ #ifdef CONFIG_ALTIVEC diff --git a/arch/ppc/kernel/head_44x.S b/arch/ppc/kernel/head_44x.S --- a/arch/ppc/kernel/head_44x.S +++ b/arch/ppc/kernel/head_44x.S @@ -309,13 +309,13 @@ skpinv: addi r4,r4,1 /* Increment */ interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) /* Machine Check Interrupt */ #ifdef CONFIG_440A - MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #else - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #endif /* Data Storage Interrupt */ @@ -442,7 +442,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x2010, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif /* System Call Interrupt */ @@ -451,21 +451,21 @@ interrupt_base: EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxillary Processor Unavailable Interrupt */ - EXCEPTION(0x2020, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x1010, FixedIntervalTimer, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x1020, WatchdogTimer, UnknownException) + CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ diff --git a/arch/ppc/kernel/head_4xx.S b/arch/ppc/kernel/head_4xx.S --- a/arch/ppc/kernel/head_4xx.S +++ b/arch/ppc/kernel/head_4xx.S @@ -245,12 +245,12 @@ label: /* * 0x0100 - Critical Interrupt Exception */ - CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception) /* * 0x0200 - Machine Check Exception */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) /* * 0x0300 - Data Storage Exception @@ -405,7 +405,7 @@ label: mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ stw r4,_DEAR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) @@ -413,21 +413,21 @@ label: mfspr r4,SPRN_ESR /* Grab the ESR and save it */ stw r4,_ESR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x700, ProgramCheckException) + EXC_XFER_STD(0x700, program_check_exception) - EXCEPTION(0x0800, Trap_08, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0900, Trap_09, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0A00, Trap_0A, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0B00, Trap_0B, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) NORMAL_EXCEPTION_PROLOG EXC_XFER_EE_LITE(0xc00, DoSyscall) - EXCEPTION(0x0D00, Trap_0D, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0E00, Trap_0E, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0F00, Trap_0F, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ START_EXCEPTION(0x1000, Decrementer) @@ -444,14 +444,14 @@ label: /* 0x1010 - Fixed Interval Timer (FIT) Exception */ - STND_EXCEPTION(0x1010, FITException, UnknownException) + STND_EXCEPTION(0x1010, FITException, unknown_exception) /* 0x1020 - Watchdog Timer (WDT) Exception */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException) #else - CRITICAL_EXCEPTION(0x1020, WDTException, UnknownException) + CRITICAL_EXCEPTION(0x1020, WDTException, unknown_exception) #endif #endif @@ -656,25 +656,25 @@ label: mfspr r10, SPRN_SPRG0 b InstructionAccess - EXCEPTION(0x1300, Trap_13, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1400, Trap_14, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) #ifdef CONFIG_IBM405_ERR51 /* 405GP errata 51 */ START_EXCEPTION(0x1700, Trap_17) b DTLBMiss #else - EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) #endif - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1A00, Trap_1A, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1B00, Trap_1B, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1C00, Trap_1C, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1D00, Trap_1D, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1E00, Trap_1E, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1F00, Trap_1F, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE) /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case diff --git a/arch/ppc/kernel/head_8xx.S b/arch/ppc/kernel/head_8xx.S --- a/arch/ppc/kernel/head_8xx.S +++ b/arch/ppc/kernel/head_8xx.S @@ -203,7 +203,7 @@ i##n: \ ret_from_except) /* System reset */ - EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD) + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) /* Machine check */ . = 0x200 @@ -214,7 +214,7 @@ MachineCheck: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x200, MachineCheckException) + EXC_XFER_STD(0x200, machine_check_exception) /* Data access exception. * This is "never generated" by the MPC8xx. We jump to it for other @@ -252,20 +252,20 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD) + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) /* No FPU on MPC8xx. This exception is not supposed to happen. */ - EXCEPTION(0x800, FPUnavailable, UnknownException, EXC_XFER_STD) + EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD) /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) /* System call */ . = 0xc00 @@ -274,9 +274,9 @@ SystemCall: EXC_XFER_EE_LITE(0xc00, DoSyscall) /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE) - EXCEPTION(0xf00, Trap_0f, UnknownException, EXC_XFER_EE) + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. @@ -540,22 +540,22 @@ DataTLBError: #endif b DataAccess - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to * using them. */ - EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) . = 0x2000 diff --git a/arch/ppc/kernel/head_booke.h b/arch/ppc/kernel/head_booke.h --- a/arch/ppc/kernel/head_booke.h +++ b/arch/ppc/kernel/head_booke.h @@ -335,7 +335,7 @@ label: mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_EE(0x0600, AlignmentException) + EXC_XFER_EE(0x0600, alignment_exception) #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ @@ -343,7 +343,7 @@ label: mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ stw r4,_ESR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_STD(0x0700, ProgramCheckException) + EXC_XFER_STD(0x0700, program_check_exception) #define DECREMENTER_EXCEPTION \ START_EXCEPTION(Decrementer) \ diff --git a/arch/ppc/kernel/head_fsl_booke.S b/arch/ppc/kernel/head_fsl_booke.S --- a/arch/ppc/kernel/head_fsl_booke.S +++ b/arch/ppc/kernel/head_fsl_booke.S @@ -426,14 +426,14 @@ skpinv: addi r6,r6,1 /* Increment */ interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) /* Machine Check Interrupt */ #ifdef CONFIG_E200 /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #else - MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #endif /* Data Storage Interrupt */ @@ -542,9 +542,9 @@ interrupt_base: #else #ifdef CONFIG_E200 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FloatingPointUnavailable, ProgramCheckException, EXC_XFER_EE) + EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE) #else - EXCEPTION(0x0800, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif #endif @@ -554,20 +554,20 @@ interrupt_base: EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxillary Processor Unavailable Interrupt */ - EXCEPTION(0x2900, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x3100, FixedIntervalTimer, UnknownException, EXC_XFER_EE) + EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x3200, WatchdogTimer, UnknownException) + CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ @@ -696,21 +696,21 @@ interrupt_base: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) #else - EXCEPTION(0x2020, SPEUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE); #else - EXCEPTION(0x2040, SPEFloatingPointData, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Round */ - EXCEPTION(0x2050, SPEFloatingPointRound, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE) /* Performance Monitor */ - EXCEPTION(0x2060, PerformanceMonitor, PerformanceMonitorException, EXC_XFER_STD) + EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) /* Debug Interrupt */ diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c --- a/arch/ppc/kernel/ppc_ksyms.c +++ b/arch/ppc/kernel/ppc_ksyms.c @@ -53,10 +53,10 @@ extern void transfer_to_handler(void); extern void do_IRQ(struct pt_regs *regs); -extern void MachineCheckException(struct pt_regs *regs); -extern void AlignmentException(struct pt_regs *regs); -extern void ProgramCheckException(struct pt_regs *regs); -extern void SingleStepException(struct pt_regs *regs); +extern void machine_check_exception(struct pt_regs *regs); +extern void alignment_exception(struct pt_regs *regs); +extern void program_check_exception(struct pt_regs *regs); +extern void single_step_exception(struct pt_regs *regs); extern int do_signal(sigset_t *, struct pt_regs *); extern int pmac_newworld; extern int sys_sigreturn(struct pt_regs *regs); @@ -72,10 +72,10 @@ EXPORT_SYMBOL(clear_user_page); EXPORT_SYMBOL(do_signal); EXPORT_SYMBOL(transfer_to_handler); EXPORT_SYMBOL(do_IRQ); -EXPORT_SYMBOL(MachineCheckException); -EXPORT_SYMBOL(AlignmentException); -EXPORT_SYMBOL(ProgramCheckException); -EXPORT_SYMBOL(SingleStepException); +EXPORT_SYMBOL(machine_check_exception); +EXPORT_SYMBOL(alignment_exception); +EXPORT_SYMBOL(program_check_exception); +EXPORT_SYMBOL(single_step_exception); EXPORT_SYMBOL(sys_sigreturn); EXPORT_SYMBOL(ppc_n_lost_interrupts); EXPORT_SYMBOL(ppc_lost_interrupts); diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c --- a/arch/ppc/kernel/traps.c +++ b/arch/ppc/kernel/traps.c @@ -74,7 +74,7 @@ void (*debugger_fault_handler)(struct pt DEFINE_SPINLOCK(die_lock); -void die(const char * str, struct pt_regs * fp, long err) +int die(const char * str, struct pt_regs * fp, long err) { static int die_counter; int nl = 0; @@ -232,7 +232,7 @@ platform_machine_check(struct pt_regs *r { } -void MachineCheckException(struct pt_regs *regs) +void machine_check_exception(struct pt_regs *regs) { unsigned long reason = get_mc_reason(regs); @@ -393,14 +393,14 @@ void SMIException(struct pt_regs *regs) #endif } -void UnknownException(struct pt_regs *regs) +void unknown_exception(struct pt_regs *regs) { printk("Bad trap at PC: %lx, MSR: %lx, vector=%lx %s\n", regs->nip, regs->msr, regs->trap, print_tainted()); _exception(SIGTRAP, regs, 0, 0); } -void InstructionBreakpoint(struct pt_regs *regs) +void instruction_breakpoint_exception(struct pt_regs *regs) { if (debugger_iabr_match(regs)) return; @@ -622,7 +622,7 @@ int check_bug_trap(struct pt_regs *regs) return 0; } -void ProgramCheckException(struct pt_regs *regs) +void program_check_exception(struct pt_regs *regs) { unsigned int reason = get_reason(regs); extern int do_mathemu(struct pt_regs *regs); @@ -701,7 +701,7 @@ void ProgramCheckException(struct pt_reg _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); } -void SingleStepException(struct pt_regs *regs) +void single_step_exception(struct pt_regs *regs) { regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ if (debugger_sstep(regs)) @@ -709,7 +709,7 @@ void SingleStepException(struct pt_regs _exception(SIGTRAP, regs, TRAP_TRACE, 0); } -void AlignmentException(struct pt_regs *regs) +void alignment_exception(struct pt_regs *regs) { int fixed; @@ -814,7 +814,7 @@ void TAUException(struct pt_regs *regs) } #endif /* CONFIG_INT_TAU */ -void AltivecUnavailException(struct pt_regs *regs) +void altivec_unavailable_exception(struct pt_regs *regs) { static int kernel_altivec_count; @@ -835,7 +835,7 @@ void AltivecUnavailException(struct pt_r } #ifdef CONFIG_ALTIVEC -void AltivecAssistException(struct pt_regs *regs) +void altivec_assist_exception(struct pt_regs *regs) { int err; @@ -872,7 +872,7 @@ void AltivecAssistException(struct pt_re #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_E500 -void PerformanceMonitorException(struct pt_regs *regs) +void performance_monitor_exception(struct pt_regs *regs) { perf_irq(regs); } diff --git a/arch/ppc/syslib/ibm44x_common.c b/arch/ppc/syslib/ibm44x_common.c --- a/arch/ppc/syslib/ibm44x_common.c +++ b/arch/ppc/syslib/ibm44x_common.c @@ -178,7 +178,7 @@ void __init ibm44x_platform_init(void) #endif } -/* Called from MachineCheckException */ +/* Called from machine_check_exception */ void platform_machine_check(struct pt_regs *regs) { printk("PLB0: BEAR=0x%08x%08x ACR= 0x%08x BESR= 0x%08x\n", diff --git a/arch/ppc/syslib/ppc4xx_setup.c b/arch/ppc/syslib/ppc4xx_setup.c --- a/arch/ppc/syslib/ppc4xx_setup.c +++ b/arch/ppc/syslib/ppc4xx_setup.c @@ -279,7 +279,7 @@ ppc4xx_init(unsigned long r3, unsigned l #endif /* defined(CONFIG_PCI) && defined(CONFIG_IDE) */ } -/* Called from MachineCheckException */ +/* Called from machine_check_exception */ void platform_machine_check(struct pt_regs *regs) { #if defined(DCRN_PLB0_BEAR) diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -2,6 +2,8 @@ # Makefile for the linux ppc64 kernel. # +ifneq ($(CONFIG_PPC_MERGE),y) + EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds @@ -77,3 +79,9 @@ endif # These are here while we do the architecture merge vecemu-y += ../../powerpc/kernel/vecemu.o vector-y += ../../powerpc/kernel/vector.o +idle_power4-y += ../../powerpc/kernel/idle_power4.o +traps-y += ../../powerpc/kernel/traps.o + +else + +endif diff --git a/arch/ppc64/kernel/idle_power4.S b/arch/ppc64/kernel/idle_power4.S deleted file mode 100644 --- a/arch/ppc64/kernel/idle_power4.S +++ /dev/null @@ -1,79 +0,0 @@ -/* - * This file contains the power_save function for 6xx & 7xxx CPUs - * rewritten in assembler - * - * Warning ! This code assumes that if your machine has a 750fx - * it will have PLL 1 set to low speed mode (used during NAP/DOZE). - * if this is not the case some additional changes will have to - * be done to check a runtime var (a bit like powersave-nap) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#undef DEBUG - - .text - -/* - * Here is the power_save_6xx function. This could eventually be - * split into several functions & changing the function pointer - * depending on the various features. - */ -_GLOBAL(power4_idle) -BEGIN_FTR_SECTION - blr -END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) - /* We must dynamically check for the NAP feature as it - * can be cleared by CPU init after the fixups are done - */ - LOADBASE(r3,cur_cpu_spec) - ld r4,cur_cpu_spec at l(r3) - ld r4,CPU_SPEC_FEATURES(r4) - andi. r0,r4,CPU_FTR_CAN_NAP - beqlr - /* Now check if user or arch enabled NAP mode */ - LOADBASE(r3,powersave_nap) - lwz r4,powersave_nap at l(r3) - cmpwi 0,r4,0 - beqlr - - /* Clear MSR:EE */ - mfmsr r7 - li r4,0 - ori r4,r4,MSR_EE - andc r0,r7,r4 - mtmsrd r0 - - /* Check current_thread_info()->flags */ - clrrdi r4,r1,THREAD_SHIFT - ld r4,TI_FLAGS(r4) - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f - mtmsrd r7 /* out of line this ? */ - blr -1: - /* Go to NAP now */ -BEGIN_FTR_SECTION - DSSALL - sync -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - oris r7,r7,MSR_POW at h - sync - isync - mtmsrd r7 - isync - sync - blr - diff --git a/arch/ppc64/kernel/traps.c b/arch/ppc64/kernel/traps.c deleted file mode 100644 --- a/arch/ppc64/kernel/traps.c +++ /dev/null @@ -1,568 +0,0 @@ -/* - * linux/arch/ppc64/kernel/traps.c - * - * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Modified by Cort Dougan (cort at cs.nmt.edu) - * and Paul Mackerras (paulus at cs.anu.edu.au) - */ - -/* - * This file handles the architecture-dependent parts of hardware exceptions - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_DEBUGGER -int (*__debugger)(struct pt_regs *regs); -int (*__debugger_ipi)(struct pt_regs *regs); -int (*__debugger_bpt)(struct pt_regs *regs); -int (*__debugger_sstep)(struct pt_regs *regs); -int (*__debugger_iabr_match)(struct pt_regs *regs); -int (*__debugger_dabr_match)(struct pt_regs *regs); -int (*__debugger_fault_handler)(struct pt_regs *regs); - -EXPORT_SYMBOL(__debugger); -EXPORT_SYMBOL(__debugger_ipi); -EXPORT_SYMBOL(__debugger_bpt); -EXPORT_SYMBOL(__debugger_sstep); -EXPORT_SYMBOL(__debugger_iabr_match); -EXPORT_SYMBOL(__debugger_dabr_match); -EXPORT_SYMBOL(__debugger_fault_handler); -#endif - -struct notifier_block *powerpc_die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); - -int register_die_notifier(struct notifier_block *nb) -{ - int err = 0; - unsigned long flags; - - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&powerpc_die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; -} - -/* - * Trap & Exception support - */ - -static DEFINE_SPINLOCK(die_lock); - -int die(const char *str, struct pt_regs *regs, long err) -{ - static int die_counter; - int nl = 0; - - if (debugger(regs)) - return 1; - - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); - nl = 1; -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); - nl = 1; -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); - nl = 1; -#endif -#ifdef CONFIG_NUMA - printk("NUMA "); - nl = 1; -#endif - switch(systemcfg->platform) { - case PLATFORM_PSERIES: - printk("PSERIES "); - nl = 1; - break; - case PLATFORM_PSERIES_LPAR: - printk("PSERIES LPAR "); - nl = 1; - break; - case PLATFORM_ISERIES_LPAR: - printk("ISERIES LPAR "); - nl = 1; - break; - case PLATFORM_POWERMAC: - printk("POWERMAC "); - nl = 1; - break; - case PLATFORM_BPA: - printk("BPA "); - nl = 1; - break; - } - if (nl) - printk("\n"); - print_modules(); - show_regs(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); - - if (in_interrupt()) - panic("Fatal exception in interrupt"); - - if (panic_on_oops) { - printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); - ssleep(5); - panic("Fatal exception"); - } - do_exit(SIGSEGV); - - return 0; -} - -void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) -{ - siginfo_t info; - - if (!user_mode(regs)) { - if (die("Exception in kernel mode", regs, signr)) - return; - } - - memset(&info, 0, sizeof(info)); - info.si_signo = signr; - info.si_code = code; - info.si_addr = (void __user *) addr; - force_sig_info(signr, &info, current); -} - -void system_reset_exception(struct pt_regs *regs) -{ - /* See if any machine dependent calls */ - if (ppc_md.system_reset_exception) - ppc_md.system_reset_exception(regs); - - die("System Reset", regs, 0); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable System Reset"); - - /* What should we do here? We could issue a shutdown or hard reset. */ -} - -void machine_check_exception(struct pt_regs *regs) -{ - int recover = 0; - - /* See if any machine dependent calls */ - if (ppc_md.machine_check_exception) - recover = ppc_md.machine_check_exception(regs); - - if (recover) - return; - - if (debugger_fault_handler(regs)) - return; - die("Machine check", regs, 0); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable Machine check"); -} - -void unknown_exception(struct pt_regs *regs) -{ - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", - regs->nip, regs->msr, regs->trap); - - _exception(SIGTRAP, regs, 0, 0); -} - -void instruction_breakpoint_exception(struct pt_regs *regs) -{ - if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_iabr_match(regs)) - return; - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); -} - -void __kprobes single_step_exception(struct pt_regs *regs) -{ - regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */ - - if (notify_die(DIE_SSTEP, "single_step", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_sstep(regs)) - return; - - _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); -} - -/* - * After we have successfully emulated an instruction, we have to - * check if the instruction was being single-stepped, and if so, - * pretend we got a single-step exception. This was pointed out - * by Kumar Gala. -- paulus - */ -static inline void emulate_single_step(struct pt_regs *regs) -{ - if (regs->msr & MSR_SE) - single_step_exception(regs); -} - -static void parse_fpe(struct pt_regs *regs) -{ - int code = 0; - unsigned long fpscr; - - flush_fp_to_thread(current); - - fpscr = current->thread.fpscr; - - /* Invalid operation */ - if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) - code = FPE_FLTINV; - - /* Overflow */ - else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX)) - code = FPE_FLTOVF; - - /* Underflow */ - else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX)) - code = FPE_FLTUND; - - /* Divide by zero */ - else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX)) - code = FPE_FLTDIV; - - /* Inexact result */ - else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX)) - code = FPE_FLTRES; - - _exception(SIGFPE, regs, code, regs->nip); -} - -/* - * Illegal instruction emulation support. Return non-zero if we can't - * emulate, or -EFAULT if the associated memory access caused an access - * fault. Return zero on success. - */ - -#define INST_MFSPR_PVR 0x7c1f42a6 -#define INST_MFSPR_PVR_MASK 0xfc1fffff - -#define INST_DCBA 0x7c0005ec -#define INST_DCBA_MASK 0x7c0007fe - -#define INST_MCRXR 0x7c000400 -#define INST_MCRXR_MASK 0x7c0007fe - -static int emulate_instruction(struct pt_regs *regs) -{ - unsigned int instword; - - if (!user_mode(regs)) - return -EINVAL; - - CHECK_FULL_REGS(regs); - - if (get_user(instword, (unsigned int __user *)(regs->nip))) - return -EFAULT; - - /* Emulate the mfspr rD, PVR. */ - if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { - unsigned int rd; - - rd = (instword >> 21) & 0x1f; - regs->gpr[rd] = mfspr(SPRN_PVR); - return 0; - } - - /* Emulating the dcba insn is just a no-op. */ - if ((instword & INST_DCBA_MASK) == INST_DCBA) { - static int warned; - - if (!warned) { - printk(KERN_WARNING - "process %d (%s) uses obsolete 'dcba' insn\n", - current->pid, current->comm); - warned = 1; - } - return 0; - } - - /* Emulate the mcrxr insn. */ - if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { - static int warned; - unsigned int shift; - - if (!warned) { - printk(KERN_WARNING - "process %d (%s) uses obsolete 'mcrxr' insn\n", - current->pid, current->comm); - warned = 1; - } - - shift = (instword >> 21) & 0x1c; - regs->ccr &= ~(0xf0000000 >> shift); - regs->ccr |= (regs->xer & 0xf0000000) >> shift; - regs->xer &= ~0xf0000000; - return 0; - } - - return -EINVAL; -} - -/* - * Look through the list of trap instructions that are used for BUG(), - * BUG_ON() and WARN_ON() and see if we hit one. At this point we know - * that the exception was caused by a trap instruction of some kind. - * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 - * otherwise. - */ -extern struct bug_entry __start___bug_table[], __stop___bug_table[]; - -#ifndef CONFIG_MODULES -#define module_find_bug(x) NULL -#endif - -struct bug_entry *find_bug(unsigned long bugaddr) -{ - struct bug_entry *bug; - - for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) - if (bugaddr == bug->bug_addr) - return bug; - return module_find_bug(bugaddr); -} - -static int -check_bug_trap(struct pt_regs *regs) -{ - struct bug_entry *bug; - unsigned long addr; - - if (regs->msr & MSR_PR) - return 0; /* not in kernel */ - addr = regs->nip; /* address of trap instruction */ - if (addr < PAGE_OFFSET) - return 0; - bug = find_bug(regs->nip); - if (bug == NULL) - return 0; - if (bug->line & BUG_WARNING_TRAP) { - /* this is a WARN_ON rather than BUG/BUG_ON */ - printk(KERN_ERR "Badness in %s at %s:%d\n", - bug->function, bug->file, - bug->line & ~BUG_WARNING_TRAP); - show_stack(current, (void *)regs->gpr[1]); - return 1; - } - printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", - bug->function, bug->file, bug->line); - return 0; -} - -void __kprobes program_check_exception(struct pt_regs *regs) -{ - if (debugger_fault_handler(regs)) - return; - - if (regs->msr & 0x100000) { - /* IEEE FP exception */ - parse_fpe(regs); - } else if (regs->msr & 0x20000) { - /* trap exception */ - - if (notify_die(DIE_BPT, "breakpoint", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_bpt(regs)) - return; - - if (check_bug_trap(regs)) { - regs->nip += 4; - return; - } - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - - } else { - /* Privileged or illegal instruction; try to emulate it. */ - switch (emulate_instruction(regs)) { - case 0: - regs->nip += 4; - emulate_single_step(regs); - break; - - case -EFAULT: - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - break; - - default: - if (regs->msr & 0x40000) - /* priveleged */ - _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); - else - /* illegal */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - break; - } - } -} - -void kernel_fp_unavailable_exception(struct pt_regs *regs) -{ - printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " - "%lx at %lx\n", regs->trap, regs->nip); - die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); -} - -void altivec_unavailable_exception(struct pt_regs *regs) -{ - if (user_mode(regs)) { - /* A user program has executed an altivec instruction, - but this kernel doesn't support altivec. */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - } - printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " - "%lx at %lx\n", regs->trap, regs->nip); - die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); -} - -extern perf_irq_t perf_irq; - -void performance_monitor_exception(struct pt_regs *regs) -{ - perf_irq(regs); -} - -void alignment_exception(struct pt_regs *regs) -{ - int fixed; - - fixed = fix_alignment(regs); - - if (fixed == 1) { - regs->nip += 4; /* skip over emulated instruction */ - emulate_single_step(regs); - return; - } - - /* Operand address was bad */ - if (fixed == -EFAULT) { - if (user_mode(regs)) { - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->dar); - } else { - /* Search exception table */ - bad_page_fault(regs, regs->dar, SIGSEGV); - } - - return; - } - - _exception(SIGBUS, regs, BUS_ADRALN, regs->nip); -} - -#ifdef CONFIG_ALTIVEC -void altivec_assist_exception(struct pt_regs *regs) -{ - int err; - siginfo_t info; - - if (!user_mode(regs)) { - printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" - " at %lx\n", regs->nip); - die("Kernel VMX/Altivec assist exception", regs, SIGILL); - } - - flush_altivec_to_thread(current); - - err = emulate_altivec(regs); - if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ - emulate_single_step(regs); - return; - } - - if (err == -EFAULT) { - /* got an error reading the instruction */ - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *) regs->nip; - force_sig_info(SIGSEGV, &info, current); - } else { - /* didn't recognize the instruction */ - /* XXX quick hack for now: set the non-Java bit in the VSCR */ - if (printk_ratelimit()) - printk(KERN_ERR "Unrecognized altivec instruction " - "in %s at %lx\n", current->comm, regs->nip); - current->thread.vscr.u[3] |= 0x10000; - } -} -#endif /* CONFIG_ALTIVEC */ - -/* - * We enter here if we get an unrecoverable exception, that is, one - * that happened at a point where the RI (recoverable interrupt) bit - * in the MSR is 0. This indicates that SRR0/1 are live, and that - * we therefore lost state by taking this exception. - */ -void unrecoverable_exception(struct pt_regs *regs) -{ - printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", - regs->trap, regs->nip); - die("Unrecoverable exception", regs, SIGABRT); -} - -/* - * We enter here if we discover during exception entry that we are - * running in supervisor mode with a userspace value in the stack pointer. - */ -void kernel_bad_stack(struct pt_regs *regs) -{ - printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", - regs->gpr[1], regs->nip); - die("Bad kernel stack pointer", regs, SIGABRT); -} - -void __init trap_init(void) -{ -} diff --git a/include/asm-ppc/system.h b/include/asm-ppc/system.h --- a/include/asm-ppc/system.h +++ b/include/asm-ppc/system.h @@ -87,7 +87,7 @@ extern void cacheable_memzero(void *p, u extern void *cacheable_memcpy(void *, const void *, unsigned int); extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); -extern void die(const char *, struct pt_regs *, long); +extern int die(const char *, struct pt_regs *, long); extern void _exception(int, struct pt_regs *, int, unsigned long); #ifdef CONFIG_BOOKE_WDT extern u32 booke_wdt_enabled; From sfr at canb.auug.org.au Sat Oct 1 21:40:29 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 21:40:29 +1000 Subject: [PATCH 7/9] ppc64: simplify the build a little In-Reply-To: <20051001000516.1d444d51.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000516.1d444d51.sfr@canb.auug.org.au> Message-ID: <20051001214029.5d66fdb4.sfr@canb.auug.org.au> New version to because of changes in 6/9 Signed-off-by: Stephen Rothwell --- arch/powerpc/Makefile | 1 - arch/powerpc/kernel/Makefile | 11 +++++++++-- arch/ppc64/Makefile | 2 +- arch/ppc64/kernel/Makefile | 11 ++--------- 4 files changed, 12 insertions(+), 13 deletions(-) -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ bd142b70a6bd5522f7d95f0cec06091b93bb0715 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -121,7 +121,6 @@ head-$(CONFIG_FSL_BOOKE) := arch/powerpc ifeq ($(CONFIG_PPC32),y) head-$(CONFIG_6xx) += arch/powerpc/kernel/idle_6xx.o -head-$(CONFIG_POWER4) += arch/powerpc/kernel/idle_power4.o head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -2,6 +2,10 @@ # Makefile for the linux kernel. # +ifeq ($(CONFIG_PPC64),y) +EXTRA_CFLAGS += -mno-minimal-toc +endif + extra-$(CONFIG_PPC_STD_MMU) := head.o extra_$(CONFIG_PPC64) := head_64.o extra-$(CONFIG_40x) := head_4xx.o @@ -9,10 +13,13 @@ extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-$(CONFIG_6xx) += idle_6xx.o -extra-$(CONFIG_POWER4) += idle_power4.o extra-$(CONFIG_PPC_FPU) += fpu.o extra-y += vmlinux.lds -obj-y := semaphore.o traps.o process.o +obj-y := traps.o +obj-$(CONFIG_PPC32) += semaphore.o process.o +obj-$(CONFIG_PPC64) += idle_power4.o +ifeq ($(CONFIG_PPC32),y) obj-$(CONFIG_MODULES) += ppc_ksyms.o +endif obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile --- a/arch/ppc64/Makefile +++ b/arch/ppc64/Makefile @@ -82,7 +82,7 @@ CFLAGS += $(call cc-option,-funit-at-a-t head-y := arch/ppc64/kernel/head.o libs-y += arch/ppc64/lib/ -core-y += arch/ppc64/kernel/ +core-y += arch/ppc64/kernel/ arch/powerpc/kernel/ core-y += arch/ppc64/mm/ core-y += arch/powerpc/platforms/ core-$(CONFIG_XMON) += arch/ppc64/xmon/ diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -7,12 +7,12 @@ ifneq ($(CONFIG_PPC_MERGE),y) EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds -obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ +obj-y := setup.o entry.o irq.o idle.o dma.o \ time.o process.o signal.o syscalls.o misc.o ptrace.o \ align.o semaphore.o bitops.o pacaData.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o rtc.o init_task.o \ - lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ + lmb.o cputable.o cpu_setup_power4.o \ iommu.o sysfs.o vdso.o pmc.o firmware.o prom.o obj-y += vdso32/ vdso64/ @@ -66,7 +66,6 @@ obj-$(CONFIG_PPC_BPA) += pSeries_smp.o obj-$(CONFIG_PPC_MAPLE) += smp-tbsync.o endif -obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_KPROBES) += kprobes.o CFLAGS_ioctl32.o += -Ifs/ @@ -76,12 +75,6 @@ arch/ppc64/kernel/head.o: arch/powerpc/p AFLAGS_head.o += -Iarch/powerpc/platforms/iseries endif -# These are here while we do the architecture merge -vecemu-y += ../../powerpc/kernel/vecemu.o -vector-y += ../../powerpc/kernel/vector.o -idle_power4-y += ../../powerpc/kernel/idle_power4.o -traps-y += ../../powerpc/kernel/traps.o - else endif From sfr at canb.auug.org.au Sat Oct 1 22:30:28 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 22:30:28 +1000 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and traps.c In-Reply-To: <20051001213753.52d5d5c3.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000001.1f1d8c48.sfr@canb.auug.org.au> <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> <20051001121714.1b5886aa.sfr@canb.auug.org.au> <17214.632.655003.750819@cargo.ozlabs.ibm.com> <20051001213753.52d5d5c3.sfr@canb.auug.org.au> Message-ID: <20051001223028.56ef9bfd.sfr@canb.auug.org.au> On Sat, 1 Oct 2005 21:37:53 +1000 Stephen Rothwell wrote: > > OK, thanks for keeping me honest :-) Here is new versions of patches 6 > and 7 (all the rest are the same as before). Just in case anyone is wondering, the new patchset has been built for (my configs) pSeries, iSeries, g5, ARCH=ppc, ARCH=powerpc ppc32 pmac, ARCH=powerpc iSeries and I have booted the ARCH=powerpc iSeries kernel. -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051001/1d938420/attachment.pgp From benh at kernel.crashing.org Sun Oct 2 18:45:59 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Sun, 02 Oct 2005 18:45:59 +1000 Subject: To large page or not to large page In-Reply-To: <17213.56267.826654.651632@kitch0.watson.ibm.com> References: <17213.56267.826654.651632@kitch0.watson.ibm.com> Message-ID: <1128242759.8267.45.camel@gaston> On Fri, 2005-09-30 at 20:43 -0400, Jimi Xenidis wrote: > It seems as tho Linux will map the kernel with large pages if the > processor allows it regardless if the lmb is sufficient to hold a > large page, correct? > > Is there some runtime option to force the use of 4K pages. > > Ultimately, my desire is to define a 256Mig segment that, using a > Hypervisor, that can be populated by shared pages that can physically > belong to the hypervisor or other partions/domains) and restrict the > mappings to 4k. I have some ideas, but am willing to hear any suggestions. Does that segment has to be part of the linear mapping ? Can't it just be mapped afterward using a kernel virtual mapping ? Also, don't forget that the 64k pages patch won't support 4k pages at all for performances reasons when CONFIG_PPC_64K_PAGES is enabled (at least on processors that have HW support for 64k pages) . Ben? From jimix at watson.ibm.com Sun Oct 2 22:19:09 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Sun, 2 Oct 2005 08:19:09 -0400 Subject: To large page or not to large page In-Reply-To: <1128242759.8267.45.camel@gaston> References: <17213.56267.826654.651632@kitch0.watson.ibm.com> <1128242759.8267.45.camel@gaston> Message-ID: <17215.53309.488586.919028@kitch0.watson.ibm.com> >>>>> "BH" == Benjamin Herrenschmidt writes: BH> On Fri, 2005-09-30 at 20:43 -0400, Jimi Xenidis wrote: >> It seems as tho Linux will map the kernel with large pages if the >> processor allows it regardless if the lmb is sufficient to hold a >> large page, correct? >> >> Is there some runtime option to force the use of 4K pages. >> >> Ultimately, my desire is to define a 256Mig segment that, using a >> Hypervisor, that can be populated by shared pages that can physically >> belong to the hypervisor or other partions/domains) and restrict the >> mappings to 4k. I have some ideas, but am willing to hear any suggestions. BH> Does that segment has to be part of the linear mapping ? Not sure what _you_ mean by "linear mapping". More specifically I would like for: #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) to work, and the segment to be managed. I think that is the linear map. BH> Can't it just be mapped afterward using a kernel virtual mapping BH> ? Can it? given the use of __pa()? Should I consider a new REGION_ID()? BH> Also, don't forget that the 64k pages patch won't support 4k BH> pages at all for performances reasons when CONFIG_PPC_64K_PAGES BH> is enabled (at least on processors that have HW support for 64k BH> pages) . "at all", surely, IO space will require and continue to use 4k pages. -JX -- "I got an idea, an idea so smart my head would explode if I even began to know what I was talking about." -- Peter Griffin (Family Guy) From schwab at suse.de Sun Oct 2 23:25:16 2005 From: schwab at suse.de (Andreas Schwab) Date: Sun, 02 Oct 2005 15:25:16 +0200 Subject: PMac motherboard information missing from /proc/cpuinfo Message-ID: With 2.6.14-rc3 I no longer get the motherboard information in /proc/cpuinfo. With 2.6.13 I got this: machine : PowerMac7,3 motherboard : PowerMac7,3 MacRISC4 Power Macintosh Now I only get this: machine : PowerMac The information in /proc/device-tree is still complete. Andreas. -- Andreas Schwab, SuSE Labs, schwab at suse.de SuSE Linux Products GmbH, Maxfeldstra?e 5, 90409 N?rnberg, Germany Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5 "And now for something completely different." From vatsa at in.ibm.com Mon Oct 3 03:46:30 2005 From: vatsa at in.ibm.com (Srivatsa Vaddagiri) Date: Sun, 2 Oct 2005 23:16:30 +0530 Subject: [PATCH] NO_IDLE_HZ implementation for PPC64 Message-ID: <20051002174630.GA6786@in.ibm.com> Hello, The patch below implements NO_IDLE_HZ support for pSeries/PPC64. It basically lets idle CPUs to cut off their timer ticks until they can. Some notes about the patch: - Patch is against 2.6.14-rc1 and has undergone some basic test (with an additional patch - also in the mail) on a Power4 box. I intend to test on a Power5 box also sometime soon. - Only pseries_shared_idle and pseries_dedicated_idle routines have been converted over to use this support, since I felt cutting off ticks doesnt make sense if the idle routine is poll-based. - Boot CPU cannot skip ticks. This is because of the current design wherein only boot CPU updates wall-clock/jiffies. I didn't see any particular reason why it has been designed like that (maybe to reduce lock contention on xtime_lock?). If we have to allow boot CPU also to skip ticks (which IMO we should), then this design needs to change, i.e we should allow xtime/jiffies to be updated from any CPU (like S390 allows). If people agree that this is the right direction, then I can give it a shot next. - By default the feature is disabled at bootup and has to be enabled by writing 0 to /proc/sys/kernel/hz_timer. This can be modifed later after the patch has undergone sufficient test. Also we can introduce a boottime argument to control this behaviour. - One requirement is that a call to start_hz_timer should be inserted in every possible interrupt path. Towards this end, have I missed some interrupt paths? Or have I included some exception path which I shouldn't be! Below are both the patches - actual patch and the patch which I used to test on Power4 box. First the actual NO_IDLE_HZ patch: --- linux-2.6.14-rc1-root/arch/ppc64/Kconfig | 6 + linux-2.6.14-rc1-root/arch/ppc64/kernel/head.S | 9 + linux-2.6.14-rc1-root/arch/ppc64/kernel/irq.c | 3 linux-2.6.14-rc1-root/arch/ppc64/kernel/pSeries_setup.c | 10 + linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c | 96 ++++++++++++++-- linux-2.6.14-rc1-root/include/asm-ppc64/time.h | 8 + linux-2.6.14-rc1-root/kernel/sysctl.c | 20 +-- 7 files changed, 127 insertions(+), 25 deletions(-) diff -puN arch/ppc64/kernel/time.c~ppc64 arch/ppc64/kernel/time.c --- linux-2.6.14-rc1/arch/ppc64/kernel/time.c~ppc64 2005-09-28 19:35:36.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c 2005-10-02 22:53:44.000000000 +0530 @@ -315,23 +315,13 @@ static void iSeries_tb_recal(void) unsigned long tb_last_stamp __cacheline_aligned_in_smp; -/* - * timer_interrupt - gets called when the decrementer overflows, - * with interrupts disabled. - */ -int timer_interrupt(struct pt_regs * regs) +static void account_ticks(struct pt_regs *regs) { int next_dec; unsigned long cur_tb; struct paca_struct *lpaca = get_paca(); unsigned long cpu = smp_processor_id(); - irq_enter(); - - profile_tick(CPU_PROFILING, regs); - - lpaca->lppaca.int_dword.fields.decr_int = 0; - while (lpaca->next_jiffy_update_tb <= (cur_tb = get_tb())) { /* * We cannot disable the decrementer, so in the period @@ -364,6 +354,23 @@ int timer_interrupt(struct pt_regs * reg if (next_dec > lpaca->default_decr) next_dec = lpaca->default_decr; set_dec(next_dec); +} + +/* + * timer_interrupt - gets called when the decrementer overflows, + * with interrupts disabled. + */ +int timer_interrupt(struct pt_regs * regs) +{ + struct paca_struct *lpaca = get_paca(); + + irq_enter(); + + profile_tick(CPU_PROFILING, regs); + + lpaca->lppaca.int_dword.fields.decr_int = 0; + + account_ticks(regs); #ifdef CONFIG_PPC_ISERIES if (hvlpevent_is_pending()) @@ -381,6 +388,73 @@ int timer_interrupt(struct pt_regs * reg return 1; } +#ifdef CONFIG_NO_IDLE_HZ + +#define MAX_DEC_COUNT (UINT_MAX) /* Decrementer is 32-bit */ +#define MIN_SKIP 2 +#define MAX_SKIP (MAX_DEC_COUNT/tb_ticks_per_jiffy) + +int sysctl_hz_timer = 1; + +/* Avoid the HZ timer (decrementer) exception on this CPU for "some" time. + * Has to be called with interrupts disabled. + * + * The HZ timer frequency is restored upon the occurence of an interrupt or + * exception on this CPU. + */ +void stop_hz_timer(void) +{ + unsigned long cpu = smp_processor_id(), seq, delta; + int next_dec; + + if (sysctl_hz_timer != 0 || cpu == boot_cpuid) + return; + + cpu_set(cpu, nohz_cpu_mask); + mb(); + if (rcu_pending(cpu) || local_softirq_pending()) { + cpu_clear(cpu, nohz_cpu_mask); + return; + } + + do { + seq = read_seqbegin(&xtime_lock); + + delta = next_timer_interrupt() - jiffies; + + if (delta < MIN_SKIP) { + cpu_clear(cpu, nohz_cpu_mask); + return; + } + + if (delta > MAX_SKIP) + delta = MAX_SKIP; + + next_dec = tb_last_stamp + (delta-1) * tb_ticks_per_jiffy; + + } while (read_seqretry(&xtime_lock, seq)); + + next_dec -= get_tb(); + set_dec(next_dec); + + return; +} + +/* Take into account skipped ticks and restore the HZ timer frequency */ +void start_hz_timer(struct pt_regs *regs) +{ + unsigned long cpu = smp_processor_id(); + + if (!cpu_isset(cpu, nohz_cpu_mask)) + return; + + cpu_clear(cpu, nohz_cpu_mask); + account_ticks(regs); +} + +#endif /* CONFIG_NO_IDLE_HZ */ + + /* * Scheduler clock - returns current time in nanosec units. * diff -puN arch/ppc64/kernel/irq.c~ppc64 arch/ppc64/kernel/irq.c --- linux-2.6.14-rc1/arch/ppc64/kernel/irq.c~ppc64 2005-09-28 19:35:36.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/irq.c 2005-10-02 21:42:03.000000000 +0530 @@ -55,6 +55,7 @@ #include #include #include +#include #ifdef CONFIG_SMP extern void iSeries_smp_message_recv( struct pt_regs * ); @@ -313,6 +314,8 @@ void do_IRQ(struct pt_regs *regs) irq_enter(); + start_hz_timer(regs); + #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 2KB free? */ { diff -puN arch/ppc64/kernel/head.S~ppc64 arch/ppc64/kernel/head.S --- linux-2.6.14-rc1/arch/ppc64/kernel/head.S~ppc64 2005-09-28 19:35:36.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/head.S 2005-10-02 22:45:44.000000000 +0530 @@ -355,6 +355,13 @@ label##_iSeries: \ #endif +#ifdef CONFIG_NO_IDLE_HZ +#define START_HZ_TIMER \ + bl .start_hz_timer +#else +#define START_HZ_TIMER +#endif + #define STD_EXCEPTION_COMMON(trap, label, hdlr) \ .align 7; \ .globl label##_common; \ @@ -363,6 +370,7 @@ label##_common: \ DISABLE_INTS; \ bl .save_nvgprs; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ + START_HZ_TIMER; \ bl hdlr; \ b .ret_from_except @@ -373,6 +381,7 @@ label##_common: \ EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ DISABLE_INTS; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ + START_HZ_TIMER; \ bl hdlr; \ b .ret_from_except_lite diff -puN include/asm-ppc64/time.h~ppc64 include/asm-ppc64/time.h --- linux-2.6.14-rc1/include/asm-ppc64/time.h~ppc64 2005-09-28 19:43:54.000000000 +0530 +++ linux-2.6.14-rc1-root/include/asm-ppc64/time.h 2005-10-02 21:32:08.000000000 +0530 @@ -102,6 +102,14 @@ static inline unsigned long tb_ticks_sin return get_tb() - tstamp; } +#ifdef CONFIG_NO_IDLE_HZ +extern void stop_hz_timer(void); +extern void start_hz_timer(struct pt_regs *); +#else +static inline void stop_hz_timer(void) { } +static inline void start_hz_timer(struct pt_regs *regs) { } +#endif + #define mulhwu(x,y) \ ({unsigned z; asm ("mulhwu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;}) #define mulhdu(x,y) \ diff -puN arch/ppc64/Kconfig~ppc64 arch/ppc64/Kconfig --- linux-2.6.14-rc1/arch/ppc64/Kconfig~ppc64 2005-09-28 20:08:39.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/Kconfig 2005-10-01 15:45:06.000000000 +0530 @@ -146,6 +146,12 @@ config PPC_SPLPAR processors, that is, which share physical processors between two or more partitions. +config NO_IDLE_HZ + depends on EXPERIMENTAL && PPC_PSERIES + bool "No HZ timer ticks in idle" + help + Switches the HZ timer interrupts off when a CPU is idle. + config KEXEC bool "kexec system call (EXPERIMENTAL)" depends on PPC_MULTIPLATFORM && EXPERIMENTAL diff -puN kernel/sysctl.c~ppc64 kernel/sysctl.c --- linux-2.6.14-rc1/kernel/sysctl.c~ppc64 2005-09-28 21:08:05.000000000 +0530 +++ linux-2.6.14-rc1-root/kernel/sysctl.c 2005-10-01 10:55:32.000000000 +0530 @@ -544,6 +544,16 @@ static ctl_table kern_table[] = { .extra1 = &minolduid, .extra2 = &maxolduid, }, +#ifdef CONFIG_NO_IDLE_HZ + { + .ctl_name = KERN_HZ_TIMER, + .procname = "hz_timer", + .data = &sysctl_hz_timer, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif #ifdef CONFIG_ARCH_S390 #ifdef CONFIG_MATHEMU { @@ -555,16 +565,6 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif -#ifdef CONFIG_NO_IDLE_HZ - { - .ctl_name = KERN_HZ_TIMER, - .procname = "hz_timer", - .data = &sysctl_hz_timer, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif { .ctl_name = KERN_S390_USER_DEBUG_LOGGING, .procname = "userprocess_debug", diff -puN arch/ppc64/kernel/pSeries_setup.c~ppc64 arch/ppc64/kernel/pSeries_setup.c --- linux-2.6.14-rc1/arch/ppc64/kernel/pSeries_setup.c~ppc64 2005-10-01 11:02:18.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/pSeries_setup.c 2005-10-01 11:10:50.000000000 +0530 @@ -475,9 +475,10 @@ static inline void dedicated_idle_sleep( * a prod occurs. Returning from the cede enables external * interrupts. */ - if (!need_resched()) + if (!need_resched()) { + stop_hz_timer(); cede_processor(); - else + } else local_irq_enable(); } else { /* @@ -570,9 +571,10 @@ static int pseries_shared_idle(void) * Check need_resched() again with interrupts disabled * to avoid a race. */ - if (!need_resched()) + if (!need_resched()) { + stop_hz_timer(); cede_processor(); - else + } else local_irq_enable(); HMT_medium(); _ Now the test patch. It was something quick that I wrote to get the data I needed. Are the decrementer exception statistics available somewhere already? Also I assume that there are 4 CPUs in the m/c! --- linux-2.6.14-rc1-root/arch/ppc64/kernel/idle.c | 5 +++++ linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c | 4 ++++ linux-2.6.14-rc1-root/fs/proc/proc_misc.c | 12 ++++++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff -puN arch/ppc64/kernel/time.c~debug arch/ppc64/kernel/time.c --- linux-2.6.14-rc1/arch/ppc64/kernel/time.c~debug 2005-10-02 22:56:58.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c 2005-10-02 22:56:58.000000000 +0530 @@ -315,6 +315,8 @@ static void iSeries_tb_recal(void) unsigned long tb_last_stamp __cacheline_aligned_in_smp; +DEFINE_PER_CPU(int, dec_ticks); + static void account_ticks(struct pt_regs *regs) { int next_dec; @@ -366,6 +368,8 @@ int timer_interrupt(struct pt_regs * reg irq_enter(); + __get_cpu_var(dec_ticks) += 1; + profile_tick(CPU_PROFILING, regs); lpaca->lppaca.int_dword.fields.decr_int = 0; diff -puN fs/proc/proc_misc.c~debug fs/proc/proc_misc.c --- linux-2.6.14-rc1/fs/proc/proc_misc.c~debug 2005-10-02 22:56:58.000000000 +0530 +++ linux-2.6.14-rc1-root/fs/proc/proc_misc.c 2005-10-02 22:56:58.000000000 +0530 @@ -233,13 +233,21 @@ static struct file_operations proc_zonei .release = seq_release, }; +DECLARE_PER_CPU(int, dec_ticks); + static int version_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { int len; + char *cp = page; - strcpy(page, linux_banner); - len = strlen(page); + cp += sprintf(cp, "%s\n", linux_banner); + cp += sprintf (cp, "%d %d %d %d \n", + per_cpu(dec_ticks, 0), + per_cpu(dec_ticks, 1), + per_cpu(dec_ticks, 2), + per_cpu(dec_ticks, 3)); + len = cp - page; return proc_calc_metrics(page, start, off, count, eof, len); } diff -puN arch/ppc64/kernel/idle.c~debug arch/ppc64/kernel/idle.c --- linux-2.6.14-rc1/arch/ppc64/kernel/idle.c~debug 2005-10-02 22:56:58.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/idle.c 2005-10-02 22:56:58.000000000 +0530 @@ -45,6 +45,11 @@ int default_idle(void) while (!need_resched() && !cpu_is_offline(cpu)) { ppc64_runlatch_off(); + local_irq_disable(); + if (!need_resched()) + stop_hz_timer(); + local_irq_enable(); + /* * Go into low thread priority and possibly * low power mode. _ -- Thanks and Regards, Srivatsa Vaddagiri, Linux Technology Center, IBM Software Labs, Bangalore, INDIA - 560017 From benh at kernel.crashing.org Mon Oct 3 18:08:12 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Mon, 03 Oct 2005 18:08:12 +1000 Subject: [PATCH] NO_IDLE_HZ implementation for PPC64 In-Reply-To: <20051002174630.GA6786@in.ibm.com> References: <20051002174630.GA6786@in.ibm.com> Message-ID: <1128326892.8267.89.camel@gaston> On Sun, 2005-10-02 at 23:16 +0530, Srivatsa Vaddagiri wrote: > Hello, > The patch below implements NO_IDLE_HZ support for pSeries/PPC64. It > basically lets idle CPUs to cut off their timer ticks until they can. Hi ! I haven't looked at the patch itself yet, but I have a few comments on your notes: > Some notes about the patch: > > - Only pseries_shared_idle and pseries_dedicated_idle routines > have been converted over to use this support, since I felt > cutting off ticks doesnt make sense if the idle routine is > poll-based. It should be fine to do that on native_idle as well (for things like G5 machines). > - Boot CPU cannot skip ticks. This is because of the current design wherein > only boot CPU updates wall-clock/jiffies. That should be fairly easy to fix. > I didn't see any particular reason why it has been designed like that > (maybe to reduce lock contention on xtime_lock?). If we have to allow > boot CPU also to skip ticks (which IMO we should), then this design > needs to change, i.e we should allow xtime/jiffies to be updated > from any CPU (like S390 allows). If people agree that this is the > right direction, then I can give it a shot next. I don't think we care that much which CPU updates xtime and jiffies. I would love xtime to die anyway, kernel should get do_gettimeofday() internally. On ppc64, we only seldomly update xtime. Doing it on CPU0 was, I suppose, a matter of simplicity of the implementation. > - One requirement is that a call to start_hz_timer should be inserted > in every possible interrupt path. Towards this end, have I missed > some interrupt paths? Or have I included some exception path which > I shouldn't be! I think C code is good enough, you probably only need need to add a call to do_IRQ(). The decrementer interrupt should naturally playback lost ticks. Ben. From jimix at watson.ibm.com Tue Oct 4 00:59:40 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Mon, 3 Oct 2005 10:59:40 -0400 Subject: To large page or not to large page In-Reply-To: <1128242759.8267.45.camel@gaston> References: <17213.56267.826654.651632@kitch0.watson.ibm.com> <1128242759.8267.45.camel@gaston> Message-ID: <17217.18268.120919.459499@kitch0.watson.ibm.com> >>>>> "BH" == Benjamin Herrenschmidt writes: BH> Does that segment has to be part of the linear mapping ? Can't it just BH> be mapped afterward using a kernel virtual mapping ? Thanks for the kick, I see now that I can use __ioremap() without forcing CI. thanks. -JX -- "I got an idea, an idea so smart my head would explode if I even began to know what I was talking about." -- Peter Griffin (Family Guy) From vatsa at in.ibm.com Tue Oct 4 02:18:51 2005 From: vatsa at in.ibm.com (Srivatsa Vaddagiri) Date: Mon, 3 Oct 2005 21:48:51 +0530 Subject: [PATCH] NO_IDLE_HZ implementation for PPC64 In-Reply-To: <1128326892.8267.89.camel@gaston> References: <20051002174630.GA6786@in.ibm.com> <1128326892.8267.89.camel@gaston> Message-ID: <20051003161851.GA4244@in.ibm.com> On Mon, Oct 03, 2005 at 06:08:12PM +1000, Benjamin Herrenschmidt wrote: > I don't think we care that much which CPU updates xtime and jiffies. I > would love xtime to die anyway, kernel should get do_gettimeofday() > internally. On ppc64, we only seldomly update xtime. Doing it on CPU0 > was, I suppose, a matter of simplicity of the implementation. Ok, in that case I will fix that as well in the next version of the patch. > I think C code is good enough, you probably only need need to add a call > to do_IRQ(). The decrementer interrupt should naturally playback lost > ticks. Aren't other exceptions possible when a CPU is in its idle loop? If not, then yes we can insert a call to start_hz_timer only in do_IRQ. BTW, in the patch that I had sent, I had got the calculation of next_dec (in stop_hz_timer) wrong. I had considered that tb_last_stamp is ahead of jiffies, which is not correct. So the modified code should be something like below: do { seq = read_seqbegin(&xtime_lock); delta = next_timer_interrupt() - jiffies; if (delta < MIN_SKIP) { cpu_clear(cpu, nohz_cpu_mask); return; } if (delta > MAX_SKIP) delta = MAX_SKIP; next_dec = tb_last_stamp + delta * tb_ticks_per_jiffy; } while (read_seqretry(&xtime_lock, seq)); next_dec -= get_tb(); set_dec(next_dec); -- Thanks and Regards, Srivatsa Vaddagiri, Linux Technology Center, IBM Software Labs, Bangalore, INDIA - 560017 From linas at austin.ibm.com Tue Oct 4 04:57:39 2005 From: linas at austin.ibm.com (linas) Date: Mon, 3 Oct 2005 13:57:39 -0500 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add Message-ID: <20051003185739.GR29826@austin.ibm.com> 08-hotplug-bugfix.patch In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add of a hotplug slot will crash the system, with the following (abbreviated) stack trace: cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 c000000000060754 .notifier_call_chain+0x68/0x9c The root cause was that the phb was not marked "dynamic", and so instead of having kmalloc() being called, the __init __alloc_bootmem() was called, resulting in access of garage data. The patch below fixes this crash, and adds some docs to clarify the code. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.011393833 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:52:26.421786761 -0500 @@ -121,6 +121,12 @@ return NULL; } +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,17 +207,19 @@ .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** pci_devs_phb_init -- Initialize phbs and pci devs under them. + * + * When this is called, the buswalk of PHB's has not happened yet. */ void __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { pci_devs_phb_init_dynamic(phb); + phb->is_dynamic = 1; + } pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); } From brking at us.ibm.com Tue Oct 4 06:26:30 2005 From: brking at us.ibm.com (brking at us.ibm.com) Date: Mon, 03 Oct 2005 15:26:30 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option Message-ID: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> Add a .config option to default the scsi scan inquiry timeout. Due to a broken device (SCSI/ATA converter card) that is very common on IBM iSeries/pSeries machines, these architectures need a longer default inquiry timeout. Signed-off-by: Brian King --- linux-2.6-bjking1/arch/ppc64/configs/iSeries_defconfig | 1 + linux-2.6-bjking1/arch/ppc64/configs/pSeries_defconfig | 1 + linux-2.6-bjking1/drivers/scsi/Kconfig | 8 ++++++++ linux-2.6-bjking1/drivers/scsi/scsi_scan.c | 2 +- 4 files changed, 11 insertions(+), 1 deletion(-) diff -puN drivers/scsi/Kconfig~scsi_inq_timeout_config drivers/scsi/Kconfig --- linux-2.6/drivers/scsi/Kconfig~scsi_inq_timeout_config 2005-10-03 11:15:08.000000000 -0500 +++ linux-2.6-bjking1/drivers/scsi/Kconfig 2005-10-03 11:17:37.000000000 -0500 @@ -209,6 +209,14 @@ config SCSI_LOGGING there should be no noticeable performance impact as long as you have logging turned off. +config SCSI_INQUIRY_TIMEOUT + int "default timeout in seconds for INQUIRY scan" + depends on SCSI + default "5" + ---help--- + Timeout (in seconds) waiting for devices to answer INQUIRY. + Default is 5. Some non-compliant devices need more. + menu "SCSI Transport Attributes" depends on SCSI diff -puN drivers/scsi/scsi_scan.c~scsi_inq_timeout_config drivers/scsi/scsi_scan.c --- linux-2.6/drivers/scsi/scsi_scan.c~scsi_inq_timeout_config 2005-10-03 11:17:44.000000000 -0500 +++ linux-2.6-bjking1/drivers/scsi/scsi_scan.c 2005-10-03 11:18:58.000000000 -0500 @@ -102,7 +102,7 @@ MODULE_PARM_DESC(max_report_luns, "REPORT LUNS maximum number of LUNS received (should be" " between 1 and 16384)"); -static unsigned int scsi_inq_timeout = SCSI_TIMEOUT/HZ+3; +static unsigned int scsi_inq_timeout = CONFIG_SCSI_INQUIRY_TIMEOUT; module_param_named(inq_timeout, scsi_inq_timeout, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(inq_timeout, diff -puN arch/ppc64/configs/pSeries_defconfig~scsi_inq_timeout_config arch/ppc64/configs/pSeries_defconfig --- linux-2.6/arch/ppc64/configs/pSeries_defconfig~scsi_inq_timeout_config 2005-10-03 11:19:18.000000000 -0500 +++ linux-2.6-bjking1/arch/ppc64/configs/pSeries_defconfig 2005-10-03 11:21:18.000000000 -0500 @@ -436,6 +436,7 @@ CONFIG_CHR_DEV_SG=y CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set +CONFIG_SCSI_INQUIRY_TIMEOUT=30 # # SCSI Transport Attributes diff -puN arch/ppc64/configs/iSeries_defconfig~scsi_inq_timeout_config arch/ppc64/configs/iSeries_defconfig --- linux-2.6/arch/ppc64/configs/iSeries_defconfig~scsi_inq_timeout_config 2005-10-03 11:24:14.000000000 -0500 +++ linux-2.6-bjking1/arch/ppc64/configs/iSeries_defconfig 2005-10-03 11:24:29.000000000 -0500 @@ -343,6 +343,7 @@ CONFIG_CHR_DEV_SG=y CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set +CONFIG_SCSI_INQUIRY_TIMEOUT=30 # # SCSI Transport Attributes _ From linas at austin.ibm.com Tue Oct 4 06:39:24 2005 From: linas at austin.ibm.com (linas) Date: Mon, 3 Oct 2005 15:39:24 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> Message-ID: <20051003203924.GS29826@austin.ibm.com> On Mon, Oct 03, 2005 at 03:26:30PM -0500, brking at us.ibm.com was heard to remark: > > Add a .config option to default the scsi scan inquiry timeout. > Due to a broken device (SCSI/ATA converter card) that is very > common on IBM iSeries/pSeries machines, these architectures > need a longer default inquiry timeout. Hmm, I thought I fixed this several yeas ago. I guess it didn't stay fixed? --linas From brking at us.ibm.com Tue Oct 4 06:44:28 2005 From: brking at us.ibm.com (Brian King) Date: Mon, 03 Oct 2005 15:44:28 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <20051003203924.GS29826@austin.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <20051003203924.GS29826@austin.ibm.com> Message-ID: <4341982C.8070702@us.ibm.com> linas wrote: > On Mon, Oct 03, 2005 at 03:26:30PM -0500, brking at us.ibm.com was heard to remark: > >>Add a .config option to default the scsi scan inquiry timeout. >>Due to a broken device (SCSI/ATA converter card) that is very >>common on IBM iSeries/pSeries machines, these architectures >>need a longer default inquiry timeout. > > > Hmm, I thought I fixed this several yeas ago. I guess it didn't > stay fixed? Some distro kernels have the default inquiry timeout changed for ppc64, but mainline only has the module parameter. -- Brian King eServer Storage I/O IBM Linux Technology Center From James.Bottomley at SteelEye.com Tue Oct 4 07:01:45 2005 From: James.Bottomley at SteelEye.com (James Bottomley) Date: Mon, 03 Oct 2005 17:01:45 -0400 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> Message-ID: <1128373305.5825.3.camel@mulgrave> On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > Add a .config option to default the scsi scan inquiry timeout. > Due to a broken device (SCSI/ATA converter card) that is very > common on IBM iSeries/pSeries machines, these architectures > need a longer default inquiry timeout. The inquiry timeout has already been changed a while ago to be a module parameter (or kernel parameter) for this very case. Why does it now need to be a config option as well? James From jdl at freescale.com Tue Oct 4 07:19:38 2005 From: jdl at freescale.com (Jon Loeliger) Date: Mon, 03 Oct 2005 16:19:38 -0500 Subject: EXPORT_SYMBOL(foo) Trend? Message-ID: <1128374378.22452.16.camel@cashmere.sps.mot.com> Guys, Is the trend these days towards EXPORT_SYMBOL(foo) near its definition, or in the one large mondo ppc_ksym.c file? Thanks, jdl From arnd at arndb.de Tue Oct 4 07:23:48 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Mon, 3 Oct 2005 23:23:48 +0200 Subject: EXPORT_SYMBOL(foo) Trend? In-Reply-To: <1128374378.22452.16.camel@cashmere.sps.mot.com> References: <1128374378.22452.16.camel@cashmere.sps.mot.com> Message-ID: <200510032323.48967.arnd@arndb.de> On Maandag 03 Oktober 2005 23:19, Jon Loeliger wrote: > Is the trend these days towards EXPORT_SYMBOL(foo) near > its definition, or in the one large mondo ppc_ksym.c file? > Near the definition. We're trying to phase out ppc_ksym.c for anything coming from C files, though it's still needed for assembly files. Arnd <>< From brking at us.ibm.com Tue Oct 4 07:31:17 2005 From: brking at us.ibm.com (Brian King) Date: Mon, 03 Oct 2005 16:31:17 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <1128373305.5825.3.camel@mulgrave> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> Message-ID: <4341A325.5070406@us.ibm.com> James Bottomley wrote: > On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > >>Add a .config option to default the scsi scan inquiry timeout. >>Due to a broken device (SCSI/ATA converter card) that is very >>common on IBM iSeries/pSeries machines, these architectures >>need a longer default inquiry timeout. > > > The inquiry timeout has already been changed a while ago to be a module > parameter (or kernel parameter) for this very case. Why does it now > need to be a config option as well? So that when a distro goes off and builds a PPC64 install kernel, it can boot on one of these broken DVD-ROMs without requiring each distro to add special module options inside their initrd. -- Brian King eServer Storage I/O IBM Linux Technology Center From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ From brking at us.ibm.com Tue Oct 4 07:42:54 2005 From: brking at us.ibm.com (Brian King) Date: Mon, 03 Oct 2005 16:42:54 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> <4341A325.5070406@us.ibm.com> Message-ID: <4341A5DE.5030804@us.ibm.com> Randy.Dunlap wrote: > On Mon, 3 Oct 2005, Brian King wrote: > > >>James Bottomley wrote: >> >>>On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: >>> >>> >>>>Add a .config option to default the scsi scan inquiry timeout. >>>>Due to a broken device (SCSI/ATA converter card) that is very >>>>common on IBM iSeries/pSeries machines, these architectures >>>>need a longer default inquiry timeout. >>> >>> >>>The inquiry timeout has already been changed a while ago to be a module >>>parameter (or kernel parameter) for this very case. Why does it now >>>need to be a config option as well? >> >>So that when a distro goes off and builds a PPC64 install kernel, >>it can boot on one of these broken DVD-ROMs without requiring each >>distro to add special module options inside their initrd. > > > My reading on mailing lists is that distros prefer > module or kernel parameters more than they do kernel config > options (in general) because they are more dynamic -- it > allows them to build one kernel instead of many kernels > with various config options. I agree with that statement, however.... Right now I am sitting with an install ISO, trying to boot off this broken DVD-ROM, with scsi compiled as a module in the install kernel, so I can't pass any boot parameters to it in order to make it boot... Additionally, this kernel config option does not decrease this dynamic ability. The config option simply alters the default inquiry timeout. If a module parm is passed at module load time, it will override the default. -- Brian King eServer Storage I/O IBM Linux Technology Center From rdunlap at xenotime.net Tue Oct 4 07:34:26 2005 From: rdunlap at xenotime.net (Randy.Dunlap) Date: Mon, 3 Oct 2005 14:34:26 -0700 (PDT) Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <4341A325.5070406@us.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> <4341A325.5070406@us.ibm.com> Message-ID: On Mon, 3 Oct 2005, Brian King wrote: > James Bottomley wrote: > > On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > > > >>Add a .config option to default the scsi scan inquiry timeout. > >>Due to a broken device (SCSI/ATA converter card) that is very > >>common on IBM iSeries/pSeries machines, these architectures > >>need a longer default inquiry timeout. > > > > > > The inquiry timeout has already been changed a while ago to be a module > > parameter (or kernel parameter) for this very case. Why does it now > > need to be a config option as well? > > So that when a distro goes off and builds a PPC64 install kernel, > it can boot on one of these broken DVD-ROMs without requiring each > distro to add special module options inside their initrd. My reading on mailing lists is that distros prefer module or kernel parameters more than they do kernel config options (in general) because they are more dynamic -- it allows them to build one kernel instead of many kernels with various config options. -- ~Randy From rdunlap at xenotime.net Tue Oct 4 07:47:22 2005 From: rdunlap at xenotime.net (Randy.Dunlap) Date: Mon, 3 Oct 2005 14:47:22 -0700 (PDT) Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <4341A5DE.5030804@us.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> <4341A325.5070406@us.ibm.com> <4341A5DE.5030804@us.ibm.com> Message-ID: On Mon, 3 Oct 2005, Brian King wrote: > Randy.Dunlap wrote: > > On Mon, 3 Oct 2005, Brian King wrote: > > > > > >>James Bottomley wrote: > >> > >>>On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > >>> > >>> > >>>>Add a .config option to default the scsi scan inquiry timeout. > >>>>Due to a broken device (SCSI/ATA converter card) that is very > >>>>common on IBM iSeries/pSeries machines, these architectures > >>>>need a longer default inquiry timeout. > >>> > >>> > >>>The inquiry timeout has already been changed a while ago to be a module > >>>parameter (or kernel parameter) for this very case. Why does it now > >>>need to be a config option as well? > >> > >>So that when a distro goes off and builds a PPC64 install kernel, > >>it can boot on one of these broken DVD-ROMs without requiring each > >>distro to add special module options inside their initrd. > > > > > > My reading on mailing lists is that distros prefer > > module or kernel parameters more than they do kernel config > > options (in general) because they are more dynamic -- it > > allows them to build one kernel instead of many kernels > > with various config options. > > I agree with that statement, however.... Right now I am sitting with > an install ISO, trying to boot off this broken DVD-ROM, with scsi > compiled as a module in the install kernel, so I can't pass any > boot parameters to it in order to make it boot... and you can't modify the module parameters (on the DVD)... Yes, I've seen that kind of problem recently too (not the same problem, however). > Additionally, this kernel config option does not decrease this dynamic > ability. The config option simply alters the default inquiry timeout. > If a module parm is passed at module load time, it will override the > default. True. and we have precedent(s) for some options living in both .config and as kernel/module parameters/options. -- ~Randy From linas at austin.ibm.com Tue Oct 4 07:49:40 2005 From: linas at austin.ibm.com (linas) Date: Mon, 3 Oct 2005 16:49:40 -0500 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003213430.GD7554@mipter.zuzino.mipt.ru> References: <20051003185739.GR29826@austin.ibm.com> <20051003213430.GD7554@mipter.zuzino.mipt.ru> Message-ID: <20051003214940.GT29826@austin.ibm.com> On Tue, Oct 04, 2005 at 01:34:30AM +0400, Alexey Dobriyan was heard to remark: > > Please, add docs in a proper way: Done, new patch attached. --linas 08-hotplug-bugfix.patch In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add of a hotplug slot will crash the system, with the following (abbreviated) stack trace: cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 c000000000060754 .notifier_call_chain+0x68/0x9c The root cause was that the phb was not marked "dynamic", and so instead of having kmalloc() being called, the __init __alloc_bootmem() was called, resulting in access of garage data. The patch below fixes this crash, and adds some docs to clarify the code. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.000000000 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-03 16:46:33.816658976 -0500 @@ -121,6 +121,14 @@ return NULL; } +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,17 +209,24 @@ .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. */ void __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { pci_devs_phb_init_dynamic(phb); + phb->is_dynamic = 1; + } pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); } From benh at kernel.crashing.org Tue Oct 4 08:23:26 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 04 Oct 2005 08:23:26 +1000 Subject: [PATCH] NO_IDLE_HZ implementation for PPC64 In-Reply-To: <20051003161851.GA4244@in.ibm.com> References: <20051002174630.GA6786@in.ibm.com> <1128326892.8267.89.camel@gaston> <20051003161851.GA4244@in.ibm.com> Message-ID: <1128378206.8267.102.camel@gaston> > Aren't other exceptions possible when a CPU is in its idle loop? If not, > then yes we can insert a call to start_hz_timer only in do_IRQ. Nothign that matters imho.. You can probably get machine checks or system reset, but I wouldn't even try to replay ticks in those as they aren't synchronized with the rest (not blocked by MSR:EE). No, you really only should care about External Interrupts, Decrementer Interrupts and Performance Monitor interrupts (yah, forgot about those). > BTW, in the patch that I had sent, I had got the calculation of next_dec > (in stop_hz_timer) wrong. I had considered that tb_last_stamp is ahead of > jiffies, which is not correct. So the modified code should be something like > below: Why not just call the decrementer interrupt ? It should replay already... Ben. From paulus at samba.org Tue Oct 4 10:06:56 2005 From: paulus at samba.org (Paul Mackerras) Date: Tue, 4 Oct 2005 10:06:56 +1000 Subject: EXPORT_SYMBOL(foo) Trend? In-Reply-To: <1128374378.22452.16.camel@cashmere.sps.mot.com> References: <1128374378.22452.16.camel@cashmere.sps.mot.com> Message-ID: <17217.51104.316654.114287@cargo.ozlabs.ibm.com> Jon Loeliger writes: > Is the trend these days towards EXPORT_SYMBOL(foo) near > its definition, or in the one large mondo ppc_ksym.c file? The former. We should only need to have exports from assembly code in ppc_ksyms.c. Paul. From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From geoffrey.levand at am.sony.com Tue Oct 4 11:18:25 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Mon, 03 Oct 2005 18:18:25 -0700 Subject: spufs: User space thread library In-Reply-To: <200509300014.21756.arnd@arndb.de> References: <200509160840.31071.arnd@arndb.de> <433C314C.80409@am.sony.com> <200509300014.21756.arnd@arndb.de> Message-ID: <4341D861.2050306@am.sony.com> Arnd Bergmann wrote: > On Dunnersdag 29 September 2005 20:24, Geoff Levand wrote: > >>I noticed some parts of the package are not very cross-build >>friendly. ?Have you thought about using an autoconf based >>build system? >> > > I think we now have some very rudimentary cross build support > in our internal version, basically just setting CROSS and > DESTDIR to some predefined values when not building on a > ppc64 system. > > A patch to add a configure.ac is certainly welcome ;-) > OK, I set up an autoconf based build system. I moved some things around to make coding the makefiles easier. I also found a conflict with the system's spe.h, so renamed that file. I put the results here: http://tree.celinuxforum.org/downloads/libspe-0.9-autoconf-05.10.03.18.01.19.tar.bz2 Its just a first cut. I think more work is needed in setting up the spu compiler. Also, more work is needed to make the scripts in the tools directory use the host's cross toolchain. -Geoff From rdunlap at xenotime.net Tue Oct 4 07:47:22 2005 From: rdunlap at xenotime.net (Randy.Dunlap) Date: Mon, 3 Oct 2005 14:47:22 -0700 (PDT) Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <4341A5DE.5030804@us.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> <4341A325.5070406@us.ibm.com> <4341A5DE.5030804@us.ibm.com> Message-ID: On Mon, 3 Oct 2005, Brian King wrote: > Randy.Dunlap wrote: > > On Mon, 3 Oct 2005, Brian King wrote: > > > > > >>James Bottomley wrote: > >> > >>>On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > >>> > >>> > >>>>Add a .config option to default the scsi scan inquiry timeout. > >>>>Due to a broken device (SCSI/ATA converter card) that is very > >>>>common on IBM iSeries/pSeries machines, these architectures > >>>>need a longer default inquiry timeout. > >>> > >>> > >>>The inquiry timeout has already been changed a while ago to be a module > >>>parameter (or kernel parameter) for this very case. Why does it now > >>>need to be a config option as well? > >> > >>So that when a distro goes off and builds a PPC64 install kernel, > >>it can boot on one of these broken DVD-ROMs without requiring each > >>distro to add special module options inside their initrd. > > > > > > My reading on mailing lists is that distros prefer > > module or kernel parameters more than they do kernel config > > options (in general) because they are more dynamic -- it > > allows them to build one kernel instead of many kernels > > with various config options. > > I agree with that statement, however.... Right now I am sitting with > an install ISO, trying to boot off this broken DVD-ROM, with scsi > compiled as a module in the install kernel, so I can't pass any > boot parameters to it in order to make it boot... and you can't modify the module parameters (on the DVD)... Yes, I've seen that kind of problem recently too (not the same problem, however). > Additionally, this kernel config option does not decrease this dynamic > ability. The config option simply alters the default inquiry timeout. > If a module parm is passed at module load time, it will override the > default. True. and we have precedent(s) for some options living in both .config and as kernel/module parameters/options. -- ~Randy - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html From brking at us.ibm.com Tue Oct 4 06:44:28 2005 From: brking at us.ibm.com (Brian King) Date: Mon, 03 Oct 2005 15:44:28 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <20051003203924.GS29826@austin.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <20051003203924.GS29826@austin.ibm.com> Message-ID: <4341982C.8070702@us.ibm.com> linas wrote: > On Mon, Oct 03, 2005 at 03:26:30PM -0500, brking at us.ibm.com was heard to remark: > >>Add a .config option to default the scsi scan inquiry timeout. >>Due to a broken device (SCSI/ATA converter card) that is very >>common on IBM iSeries/pSeries machines, these architectures >>need a longer default inquiry timeout. > > > Hmm, I thought I fixed this several yeas ago. I guess it didn't > stay fixed? Some distro kernels have the default inquiry timeout changed for ppc64, but mainline only has the module parameter. -- Brian King eServer Storage I/O IBM Linux Technology Center - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html From rdunlap at xenotime.net Tue Oct 4 07:34:26 2005 From: rdunlap at xenotime.net (Randy.Dunlap) Date: Mon, 3 Oct 2005 14:34:26 -0700 (PDT) Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <4341A325.5070406@us.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> <4341A325.5070406@us.ibm.com> Message-ID: On Mon, 3 Oct 2005, Brian King wrote: > James Bottomley wrote: > > On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > > > >>Add a .config option to default the scsi scan inquiry timeout. > >>Due to a broken device (SCSI/ATA converter card) that is very > >>common on IBM iSeries/pSeries machines, these architectures > >>need a longer default inquiry timeout. > > > > > > The inquiry timeout has already been changed a while ago to be a module > > parameter (or kernel parameter) for this very case. Why does it now > > need to be a config option as well? > > So that when a distro goes off and builds a PPC64 install kernel, > it can boot on one of these broken DVD-ROMs without requiring each > distro to add special module options inside their initrd. My reading on mailing lists is that distros prefer module or kernel parameters more than they do kernel config options (in general) because they are more dynamic -- it allows them to build one kernel instead of many kernels with various config options. -- ~Randy - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html From linas at austin.ibm.com Tue Oct 4 07:49:40 2005 From: linas at austin.ibm.com (linas) Date: Mon, 3 Oct 2005 16:49:40 -0500 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003213430.GD7554@mipter.zuzino.mipt.ru> References: <20051003185739.GR29826@austin.ibm.com> <20051003213430.GD7554@mipter.zuzino.mipt.ru> Message-ID: <20051003214940.GT29826@austin.ibm.com> On Tue, Oct 04, 2005 at 01:34:30AM +0400, Alexey Dobriyan was heard to remark: > > Please, add docs in a proper way: Done, new patch attached. --linas 08-hotplug-bugfix.patch In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add of a hotplug slot will crash the system, with the following (abbreviated) stack trace: cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 c000000000060754 .notifier_call_chain+0x68/0x9c The root cause was that the phb was not marked "dynamic", and so instead of having kmalloc() being called, the __init __alloc_bootmem() was called, resulting in access of garage data. The patch below fixes this crash, and adds some docs to clarify the code. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.000000000 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-03 16:46:33.816658976 -0500 @@ -121,6 +121,14 @@ return NULL; } +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,17 +209,24 @@ .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. */ void __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { pci_devs_phb_init_dynamic(phb); + phb->is_dynamic = 1; + } pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); } - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From James.Bottomley at SteelEye.com Tue Oct 4 07:01:45 2005 From: James.Bottomley at SteelEye.com (James Bottomley) Date: Mon, 03 Oct 2005 17:01:45 -0400 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> Message-ID: <1128373305.5825.3.camel@mulgrave> On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > Add a .config option to default the scsi scan inquiry timeout. > Due to a broken device (SCSI/ATA converter card) that is very > common on IBM iSeries/pSeries machines, these architectures > need a longer default inquiry timeout. The inquiry timeout has already been changed a while ago to be a module parameter (or kernel parameter) for this very case. Why does it now need to be a config option as well? James - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html From brking at us.ibm.com Tue Oct 4 07:31:17 2005 From: brking at us.ibm.com (Brian King) Date: Mon, 03 Oct 2005 16:31:17 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <1128373305.5825.3.camel@mulgrave> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> Message-ID: <4341A325.5070406@us.ibm.com> James Bottomley wrote: > On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > >>Add a .config option to default the scsi scan inquiry timeout. >>Due to a broken device (SCSI/ATA converter card) that is very >>common on IBM iSeries/pSeries machines, these architectures >>need a longer default inquiry timeout. > > > The inquiry timeout has already been changed a while ago to be a module > parameter (or kernel parameter) for this very case. Why does it now > need to be a config option as well? So that when a distro goes off and builds a PPC64 install kernel, it can boot on one of these broken DVD-ROMs without requiring each distro to add special module options inside their initrd. -- Brian King eServer Storage I/O IBM Linux Technology Center - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html From linas at austin.ibm.com Tue Oct 4 07:49:40 2005 From: linas at austin.ibm.com (linas) Date: Mon, 3 Oct 2005 16:49:40 -0500 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003213430.GD7554@mipter.zuzino.mipt.ru> References: <20051003185739.GR29826@austin.ibm.com> <20051003213430.GD7554@mipter.zuzino.mipt.ru> Message-ID: <20051003214940.GT29826@austin.ibm.com> On Tue, Oct 04, 2005 at 01:34:30AM +0400, Alexey Dobriyan was heard to remark: > > Please, add docs in a proper way: Done, new patch attached. --linas 08-hotplug-bugfix.patch In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add of a hotplug slot will crash the system, with the following (abbreviated) stack trace: cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 c000000000060754 .notifier_call_chain+0x68/0x9c The root cause was that the phb was not marked "dynamic", and so instead of having kmalloc() being called, the __init __alloc_bootmem() was called, resulting in access of garage data. The patch below fixes this crash, and adds some docs to clarify the code. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.000000000 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-03 16:46:33.816658976 -0500 @@ -121,6 +121,14 @@ return NULL; } +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,17 +209,24 @@ .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. */ void __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { pci_devs_phb_init_dynamic(phb); + phb->is_dynamic = 1; + } pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); } - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From benh at kernel.crashing.org Tue Oct 4 15:30:41 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 04 Oct 2005 15:30:41 +1000 Subject: [PATCH] ppc64: Add cpufreq support for SMU based G5 Message-ID: <1128403842.31063.24.camel@gaston> iMac G5 and latest single CPU desktop G5 (SMU based machines) have a 970FX DD3 CPU that supports frequency & vooltage switching. This patch adds support for simple dual frequency switch. It is required for the upcoming thermal control patch for these machines. Signed-off-by: Benjamin Herrenschmidt Index: linux-work/arch/ppc64/kernel/misc.S =================================================================== --- linux-work.orig/arch/ppc64/kernel/misc.S 2005-09-23 12:43:21.000000000 +1000 +++ linux-work/arch/ppc64/kernel/misc.S 2005-09-29 17:03:04.000000000 +1000 @@ -616,7 +616,7 @@ isync blr - /* +/* * Do an IO access in real mode */ _GLOBAL(real_writeb) @@ -649,6 +649,76 @@ #endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ /* + * SCOM access functions for 970 (FX only for now) + * + * unsigned long scom970_read(unsigned int address); + * void scom970_write(unsigned int address, unsigned long value); + * + * The address passed in is the 24 bits register address. This code + * is 970 specific and will not check the status bits, so you should + * know what you are doing. + */ +_GLOBAL(scom970_read) + /* interrupts off */ + mfmsr r4 + ori r0,r4,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd, + * and finally or in RW bit + */ + rlwinm r3,r3,8,0,15 + ori r3,r3,0x8000 + + /* do the actual scom read */ + sync + mtspr SPRN_SCOMC,r3 + isync + mfspr r3,SPRN_SCOMD + isync + mfspr r0,SPRN_SCOMC + isync + + /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah + * that's the best we can do). Not implemented yet as we don't use + * the scom on any of the bogus CPUs yet, but may have to be done + * ultimately + */ + + /* restore interrupts */ + mtmsrd r4,1 + blr + + +_GLOBAL(scom970_write) + /* interrupts off */ + mfmsr r5 + ori r0,r5,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd. + */ + + rlwinm r3,r3,8,0,15 + + sync + mtspr SPRN_SCOMD,r4 /* write data */ + isync + mtspr SPRN_SCOMC,r3 /* write command */ + isync + mfspr 3,SPRN_SCOMC + isync + + /* restore interrupts */ + mtmsrd r5,1 + blr + + +/* * Create a kernel thread * kernel_thread(fn, arg, flags) */ Index: linux-work/include/asm-ppc64/processor.h =================================================================== --- linux-work.orig/include/asm-ppc64/processor.h 2005-09-23 12:44:12.000000000 +1000 +++ linux-work/include/asm-ppc64/processor.h 2005-09-27 11:42:50.000000000 +1000 @@ -177,6 +177,9 @@ #define SPRN_CTRLT 0x098 #define CTRL_RUNLATCH 0x1 +#define SPRN_SCOMC 0x114 +#define SPRN_SCOMD 0x115 + /* Performance monitor SPRs */ #define SPRN_SIAR 780 #define SPRN_SDAR 781 @@ -536,6 +539,9 @@ } } +extern unsigned long scom970_read(unsigned int address); +extern void scom970_write(unsigned int address, unsigned long value); + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ Index: linux-work/arch/ppc64/Kconfig =================================================================== --- linux-work.orig/arch/ppc64/Kconfig 2005-09-23 12:43:21.000000000 +1000 +++ linux-work/arch/ppc64/Kconfig 2005-09-28 10:41:27.000000000 +1000 @@ -159,6 +159,17 @@ support. As of this writing the exact hardware interface is strongly in flux, so no good recommendation can be made. +source "drivers/cpufreq/Kconfig" + +config CPU_FREQ_PMAC + bool "Support for Apple G5" + depends on CPU_FREQ && PPC_PMAC64 + select CPU_FREQ_TABLE + help + This adds support for frequency switching on some Apple G5 + machine. This is currently very experimental and works only + on some iMac G5. + config IBMVIO depends on PPC_PSERIES || PPC_ISERIES bool Index: linux-work/arch/ppc64/kernel/Makefile =================================================================== --- linux-work.orig/arch/ppc64/kernel/Makefile 2005-09-23 12:43:21.000000000 +1000 +++ linux-work/arch/ppc64/kernel/Makefile 2005-09-27 11:42:50.000000000 +1000 @@ -60,6 +60,7 @@ obj-$(CONFIG_PPC_PMAC) += pmac_setup.o pmac_feature.o pmac_pci.o \ pmac_time.o pmac_nvram.o pmac_low_i2c.o \ udbg_scc.o +obj-$(CONFIG_CPU_FREQ_PMAC) += pmac_cpufreq.o obj-$(CONFIG_PPC_MAPLE) += maple_setup.o maple_pci.o maple_time.o \ udbg_16550.o Index: linux-work/arch/ppc64/kernel/pmac_cpufreq.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/arch/ppc64/kernel/pmac_cpufreq.c 2005-09-27 11:42:50.000000000 +1000 @@ -0,0 +1,297 @@ +/* + * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt + * and Markus Demleitner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs, + * that is iMac G5 and latest single CPU desktop. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +/* see 970FX user manual */ + +#define SCOM_PCR 0x0aa001 /* PCR scom addr */ + +#define PCR_HILO_SELECT 0x80000000U /* 1 = PCR, 0 = PCRH */ +#define PCR_SPEED_FULL 0x00000000U /* 1:1 speed value */ +#define PCR_SPEED_HALF 0x00020000U /* 1:2 speed value */ +#define PCR_SPEED_QUARTER 0x00040000U /* 1:4 speed value */ +#define PCR_SPEED_MASK 0x000e0000U /* speed mask */ +#define PCR_SPEED_SHIFT 17 +#define PCR_FREQ_REQ_VALID 0x00010000U /* freq request valid */ +#define PCR_VOLT_REQ_VALID 0x00008000U /* volt request valid */ +#define PCR_TARGET_TIME_MASK 0x00006000U /* target time */ +#define PCR_STATLAT_MASK 0x00001f00U /* STATLAT value */ +#define PCR_SNOOPLAT_MASK 0x000000f0U /* SNOOPLAT value */ +#define PCR_SNOOPACC_MASK 0x0000000fU /* SNOOPACC value */ + +#define SCOM_PSR 0x408001 /* PSR scom addr */ +/* warning: PSR is a 64 bits register */ +#define PSR_CMD_RECEIVED 0x2000000000000000U /* command received */ +#define PSR_CMD_COMPLETED 0x1000000000000000U /* command completed */ +#define PSR_CUR_SPEED_MASK 0x0300000000000000U /* current speed */ +#define PSR_CUR_SPEED_SHIFT (56) + +/* + * The G5 only supports two frequencies (Quarter speed is not supported) + */ +#define CPUFREQ_HIGH 0 +#define CPUFREQ_LOW 1 + +static struct cpufreq_frequency_table g5_cpu_freqs[] = { + {CPUFREQ_HIGH, 0}, + {CPUFREQ_LOW, 0}, + {0, CPUFREQ_TABLE_END}, +}; + +static struct freq_attr* g5_cpu_freqs_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +/* Power mode data is an array of the 32 bits PCR values to use for + * the various frequencies, retreived from the device-tree + */ +static u32 *g5_pmode_data; +static int g5_pmode_max; +static int g5_pmode_cur; + + +static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ +static int g5_fvt_count; /* number of op. points */ +static int g5_fvt_cur; /* current op. point */ + +/* ----------------- real hardware interface */ + +static void g5_switch_volt(int speed_mode) +{ + struct smu_simple_cmd cmd; + + DECLARE_COMPLETION(comp); + smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, smu_done_complete, + &comp, 'V', 'S', 'L', 'E', 'W', + 0xff, g5_fvt_cur+1, speed_mode); + wait_for_completion(&comp); +} + +static int g5_switch_freq(int speed_mode) +{ + int to; + + if (g5_pmode_cur == speed_mode) + return 0; + + /* If frequency is going up, first ramp up the voltage */ + if (speed_mode < g5_pmode_cur) + g5_switch_volt(speed_mode); + + /* Clear PCR high */ + scom970_write(SCOM_PCR, 0); + /* Clear PCR low */ + scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0); + /* Set PCR low */ + scom970_write(SCOM_PCR, PCR_HILO_SELECT | + g5_pmode_data[speed_mode]); + + /* Wait for completion */ + for (to = 0; to < 10; to++) { + unsigned long psr = scom970_read(SCOM_PSR); + + if ((psr & PSR_CMD_RECEIVED) == 0 && + (((psr >> PSR_CUR_SPEED_SHIFT) ^ + (g5_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3) + == 0) + break; + if (psr & PSR_CMD_COMPLETED) + break; + udelay(100); + } + + /* If frequency is going down, last ramp the voltage */ + if (speed_mode > g5_pmode_cur) + g5_switch_volt(speed_mode); + + g5_pmode_cur = speed_mode; + ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; + + return 0; +} + +static int g5_query_freq(void) +{ + unsigned long psr = scom970_read(SCOM_PSR); + int i; + + for (i = 0; i <= g5_pmode_max; i++) + if ((((psr >> PSR_CUR_SPEED_SHIFT) ^ + (g5_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0) + break; + return i; +} + +/* ----------------- cpufreq bookkeeping */ +static int __pmac g5_cpufreq_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, g5_cpu_freqs); +} + +static int __pmac g5_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, g5_cpu_freqs, + target_freq, relation, &newstate)) + return -EINVAL; + + return g5_switch_freq(newstate); +} + +static int __pmac g5_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + if (policy->cpu != 0) + return -ENODEV; + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = g5_cpu_freqs[g5_query_freq()].frequency; + cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu); + + return cpufreq_frequency_table_cpuinfo(policy, + g5_cpu_freqs); +} + + +static struct cpufreq_driver g5_cpufreq_driver = { + .name = "powermac", + .owner = THIS_MODULE, + .flags = CPUFREQ_CONST_LOOPS, + .init = g5_cpufreq_cpu_init, + .verify = g5_cpufreq_verify, + .target = g5_cpufreq_target, + .attr = g5_cpu_freqs_attr, +}; + + +static int __init g5_cpufreq_init(void) +{ + struct device_node *cpunode; + unsigned int psize, ssize; + struct smu_sdbp_header *shdr; + unsigned long max_freq; + u32 *valp; + int rc = -ENODEV; + + /* Look for CPU and SMU nodes */ + cpunode = of_find_node_by_type(NULL, "cpu"); + if (!cpunode) { + DBG("No CPU node !\n"); + return -ENODEV; + } + + /* Check 970FX for now */ + valp = (u32 *)get_property(cpunode, "cpu-version", NULL); + if (!valp) { + DBG("No cpu-version property !\n"); + goto bail_noprops; + } + if (((*valp) >> 16) != 0x3c) { + DBG("Wrong CPU version: %08x\n", *valp); + goto bail_noprops; + } + + /* Look for the powertune data in the device-tree */ + g5_pmode_data = (u32 *)get_property(cpunode, "power-mode-data",&psize); + if (!g5_pmode_data) { + DBG("No power-mode-data !\n"); + goto bail_noprops; + } + g5_pmode_max = psize / sizeof(u32) - 1; + + /* Look for the FVT table */ + shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL); + if (!shdr) + goto bail_noprops; + g5_fvt_table = (struct smu_sdbp_fvt *)&shdr[1]; + ssize = (shdr->len * sizeof(u32)) - sizeof(struct smu_sdbp_header); + g5_fvt_count = ssize / sizeof(struct smu_sdbp_fvt); + g5_fvt_cur = 0; + + /* Sanity checking */ + if (g5_fvt_count < 1 || g5_pmode_max < 1) + goto bail_noprops; + + /* + * From what I see, clock-frequency is always the maximal frequency. + * The current driver can not slew sysclk yet, so we really only deal + * with powertune steps for now. We also only implement full freq and + * half freq in this version. So far, I haven't yet seen a machine + * supporting anything else. + */ + valp = (u32 *)get_property(cpunode, "clock-frequency", NULL); + if (!valp) + return -ENODEV; + max_freq = (*valp)/1000; + g5_cpu_freqs[0].frequency = max_freq; + g5_cpu_freqs[1].frequency = max_freq/2; + + /* Check current frequency */ + g5_pmode_cur = g5_query_freq(); + if (g5_pmode_cur > 1) { + /* We don't support anything but 1:1 and 1:2, fixup ... */ + g5_switch_freq(1); + g5_pmode_cur = 1; + } + + printk(KERN_INFO "Registering G5 CPU frequency driver\n"); + printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", + g5_cpu_freqs[1].frequency/1000, + g5_cpu_freqs[0].frequency/1000, + g5_cpu_freqs[g5_pmode_cur].frequency/1000); + + rc = cpufreq_register_driver(&g5_cpufreq_driver); + + /* We keep the CPU node on hold... hopefully, Apple G5 don't have + * hotplug CPU with a dynamic device-tree ... + */ + return rc; + + bail_noprops: + of_node_put(cpunode); + + return rc; +} + +module_init(g5_cpufreq_init); + + +MODULE_LICENSE("GPL"); Index: linux-work/drivers/macintosh/smu.c =================================================================== --- linux-work.orig/drivers/macintosh/smu.c 2005-09-26 11:48:36.000000000 +1000 +++ linux-work/drivers/macintosh/smu.c 2005-09-29 16:56:59.000000000 +1000 @@ -843,6 +843,18 @@ return 0; } +struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size) +{ + char pname[32]; + + if (!smu) + return NULL; + + sprintf(pname, "sdb-partition-%02x", id); + return (struct smu_sdbp_header *)get_property(smu->of_node, + pname, size); +} +EXPORT_SYMBOL(smu_get_sdb_partition); /* Index: linux-work/include/asm-ppc64/smu.h =================================================================== --- linux-work.orig/include/asm-ppc64/smu.h 2005-09-26 11:48:37.000000000 +1000 +++ linux-work/include/asm-ppc64/smu.h 2005-09-29 16:56:59.000000000 +1000 @@ -144,7 +144,11 @@ * - lenght 8 ("VSLEWxyz") has 3 additional bytes appended, and is * used to set the voltage slewing point. The SMU replies with "DONE" * I yet have to figure out their exact meaning of those 3 bytes in - * both cases. + * both cases. They seem to be: + * x = processor mask + * y = op. point index + * z = processor freq. step index + * I haven't yet decyphered result codes * */ #define SMU_CMD_POWER_COMMAND 0xaa @@ -244,6 +248,7 @@ */ extern void smu_done_complete(struct smu_cmd *cmd, void *misc); + /* * Synchronous helpers. Will spin-wait for completion of a command */ @@ -334,6 +339,59 @@ #endif /* __KERNEL__ */ /* + * - SMU "sdb" partitions informations - + */ + + +/* + * Partition header format + */ +struct smu_sdbp_header { + __u8 id; + __u8 len; + __u8 version; + __u8 flags; +}; + +/* + * 32 bits integers are usually encoded with 2x16 bits swapped, + * this demangles them + */ +#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) + +/* This is the definition of the SMU sdb-partition-0x12 table (called + * CPU F/V/T operating points in Darwin). The definition for all those + * SMU tables should be moved to some separate file + */ +#define SMU_SDB_FVT_ID 0x12 + +struct smu_sdbp_fvt { + __u32 sysclk; /* Base SysClk frequency in Hz for + * this operating point + */ + __u8 pad; + __u8 maxtemp; /* Max temp. supported by this + * operating point + */ + + __u16 volts[3]; /* CPU core voltage for the 3 + * PowerTune modes, a mode with + * 0V = not supported. + */ +}; + +#ifdef __KERNEL__ +/* + * This returns the pointer to an SMU "sdb" partition data or NULL + * if not found. The data format is described below + */ +extern struct smu_sdbp_header *smu_get_sdb_partition(int id, + unsigned int *size); + +#endif /* __KERNEL__ */ + + +/* * - Userland interface - */ @@ -376,4 +434,5 @@ __u32 reply_len; /* Lenght of data follwing */ }; + #endif /* _SMU_H */ From benh at kernel.crashing.org Tue Oct 4 15:34:33 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 04 Oct 2005 15:34:33 +1000 Subject: [PATCH] ppc64: Support retreiving missing SMU partitions Message-ID: <1128404073.31063.28.camel@gaston> The SMU chip has an EEPROM that contains various informations about the motherboard, like thermal calibration infos, etc... This EEPROM is divided in "partitions", and the firmware only extracts some of these and publish them in the device-tree. This patch adds a mecanism to retreive the missing ones which is necessary for the upcoming thermal control patch. In order to make this accessible to userland as well, the patch adds the ability to the /proc/device-tree code to get new properties added at runtime and simplify the code. Signed-off-by: Benjamin Herrenschmidt Index: linux-work/drivers/macintosh/smu.c =================================================================== --- linux-work.orig/drivers/macintosh/smu.c 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/drivers/macintosh/smu.c 2005-09-29 17:06:05.000000000 +1000 @@ -47,13 +47,13 @@ #include #include -#define VERSION "0.6" +#define VERSION "0.7" #define AUTHOR "(c) 2005 Benjamin Herrenschmidt, IBM Corp." #undef DEBUG_SMU #ifdef DEBUG_SMU -#define DPRINTK(fmt, args...) do { printk(KERN_DEBUG fmt , ##args); } while (0) +#define DPRINTK(fmt, args...) do { udbg_printf(KERN_DEBUG fmt , ##args); } while (0) #else #define DPRINTK(fmt, args...) do { } while (0) #endif @@ -92,7 +92,7 @@ * for now, just hard code that */ static struct smu_device *smu; - +static DECLARE_MUTEX(smu_part_access); /* * SMU driver low level stuff @@ -113,9 +113,11 @@ DPRINTK("SMU: starting cmd %x, %d bytes data\n", cmd->cmd, cmd->data_len); - DPRINTK("SMU: data buffer: %02x %02x %02x %02x ...\n", + DPRINTK("SMU: data buffer: %02x %02x %02x %02x %02x %02x %02x %02x\n", ((u8 *)cmd->data_buf)[0], ((u8 *)cmd->data_buf)[1], - ((u8 *)cmd->data_buf)[2], ((u8 *)cmd->data_buf)[3]); + ((u8 *)cmd->data_buf)[2], ((u8 *)cmd->data_buf)[3], + ((u8 *)cmd->data_buf)[4], ((u8 *)cmd->data_buf)[5], + ((u8 *)cmd->data_buf)[6], ((u8 *)cmd->data_buf)[7]); /* Fill the SMU command buffer */ smu->cmd_buf->cmd = cmd->cmd; @@ -438,7 +440,7 @@ EXPORT_SYMBOL(smu_present); -int smu_init (void) +int __init smu_init (void) { struct device_node *np; u32 *data; @@ -843,16 +845,154 @@ return 0; } -struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size) +/* + * Handling of "partitions" + */ + +static int smu_read_datablock(u8 *dest, unsigned int addr, unsigned int len) +{ + DECLARE_COMPLETION(comp); + unsigned int chunk; + struct smu_cmd cmd; + int rc; + u8 params[8]; + + /* We currently use a chunk size of 0xe. We could check the + * SMU firmware version and use bigger sizes though + */ + chunk = 0xe; + + while (len) { + unsigned int clen = min(len, chunk); + + cmd.cmd = SMU_CMD_MISC_ee_COMMAND; + cmd.data_len = 7; + cmd.data_buf = params; + cmd.reply_len = chunk; + cmd.reply_buf = dest; + cmd.done = smu_done_complete; + cmd.misc = ∁ + params[0] = SMU_CMD_MISC_ee_GET_DATABLOCK_REC; + params[1] = 0x4; + *((u32 *)¶ms[2]) = addr; + params[6] = clen; + + rc = smu_queue_cmd(&cmd); + if (rc) + return rc; + wait_for_completion(&comp); + if (cmd.status != 0) + return rc; + if (cmd.reply_len != clen) { + printk(KERN_DEBUG "SMU: short read in " + "smu_read_datablock, got: %d, want: %d\n", + cmd.reply_len, clen); + return -EIO; + } + len -= clen; + addr += clen; + dest += clen; + } + return 0; +} + +static struct smu_sdbp_header *smu_create_sdb_partition(int id) +{ + DECLARE_COMPLETION(comp); + struct smu_simple_cmd cmd; + unsigned int addr, len, tlen; + struct smu_sdbp_header *hdr; + struct property *prop; + + /* First query the partition info */ + smu_queue_simple(&cmd, SMU_CMD_PARTITION_COMMAND, 2, + smu_done_complete, &comp, + SMU_CMD_PARTITION_LATEST, id); + wait_for_completion(&comp); + + /* Partition doesn't exist (or other error) */ + if (cmd.cmd.status != 0 || cmd.cmd.reply_len != 6) + return NULL; + + /* Fetch address and length from reply */ + addr = *((u16 *)cmd.buffer); + len = cmd.buffer[3] << 2; + /* Calucluate total length to allocate, including the 17 bytes + * for "sdb-partition-XX" that we append at the end of the buffer + */ + tlen = sizeof(struct property) + len + 18; + + prop = kcalloc(tlen, 1, GFP_KERNEL); + if (prop == NULL) + return NULL; + hdr = (struct smu_sdbp_header *)(prop + 1); + prop->name = ((char *)prop) + tlen - 18; + sprintf(prop->name, "sdb-partition-%02x", id); + prop->length = len; + prop->value = (unsigned char *)hdr; + prop->next = NULL; + + /* Read the datablock */ + if (smu_read_datablock((u8 *)hdr, addr, len)) { + printk(KERN_DEBUG "SMU: datablock read failed while reading " + "partition %02x !\n", id); + goto failure; + } + + /* Got it, check a few things and create the property */ + if (hdr->id != id) { + printk(KERN_DEBUG "SMU: Reading partition %02x and got " + "%02x !\n", id, hdr->id); + goto failure; + } + if (prom_add_property(smu->of_node, prop)) { + printk(KERN_DEBUG "SMU: Failed creating sdb-partition-%02x " + "property !\n", id); + goto failure; + } + + return hdr; + failure: + kfree(prop); + return NULL; +} + +/* Note: Only allowed to return error code in pointers (using ERR_PTR) + * when interruptible is 1 + */ +struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size, + int interruptible) { char pname[32]; + struct smu_sdbp_header *part; if (!smu) return NULL; sprintf(pname, "sdb-partition-%02x", id); - return (struct smu_sdbp_header *)get_property(smu->of_node, + + if (interruptible) { + int rc; + rc = down_interruptible(&smu_part_access); + if (rc) + return ERR_PTR(rc); + } else + down(&smu_part_access); + + part = (struct smu_sdbp_header *)get_property(smu->of_node, pname, size); + if (part == NULL) { + part = smu_create_sdb_partition(id); + if (part != NULL && size) + *size = part->len << 2; + } + up(&smu_part_access); + return part; +} + +struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size) +{ + return __smu_get_sdb_partition(id, size, 0); } EXPORT_SYMBOL(smu_get_sdb_partition); @@ -928,6 +1068,14 @@ else if (hdr.cmdtype == SMU_CMDTYPE_WANTS_EVENTS) { pp->mode = smu_file_events; return 0; + } else if (hdr.cmdtype == SMU_CMDTYPE_GET_PARTITION) { + struct smu_sdbp_header *part; + part = __smu_get_sdb_partition(hdr.cmd, NULL, 1); + if (part == NULL) + return -EINVAL; + else if (IS_ERR(part)) + return PTR_ERR(part); + return 0; } else if (hdr.cmdtype != SMU_CMDTYPE_SMU) return -EINVAL; else if (pp->mode != smu_file_commands) Index: linux-work/include/asm-ppc64/smu.h =================================================================== --- linux-work.orig/include/asm-ppc64/smu.h 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/include/asm-ppc64/smu.h 2005-09-29 17:06:05.000000000 +1000 @@ -20,10 +20,23 @@ /* * Partition info commands * - * I do not know what those are for at this point + * These commands are used to retreive the sdb-partition-XX datas from + * the SMU. The lenght is always 2. First byte is the subcommand code + * and second byte is the partition ID. + * + * The reply is 6 bytes: + * + * - 0..1 : partition address + * - 2 : a byte containing the partition ID + * - 3 : length (maybe other bits are rest of header ?) + * + * The data must then be obtained with calls to another command: + * SMU_CMD_MISC_ee_GET_DATABLOCK_REC (described below). */ #define SMU_CMD_PARTITION_COMMAND 0x3e - +#define SMU_CMD_PARTITION_LATEST 0x01 +#define SMU_CMD_PARTITION_BASE 0x02 +#define SMU_CMD_PARTITION_UPDATE 0x03 /* * Fan control @@ -176,6 +189,25 @@ * Misc commands * * This command seem to be a grab bag of various things + * + * SMU_CMD_MISC_ee_GET_DATABLOCK_REC is used, among others, to + * transfer blocks of data from the SMU. So far, I've decrypted it's + * usage to retreive partition data. In order to do that, you have to + * break your transfer in "chunks" since that command cannot transfer + * more than a chunk at a time. The chunk size used by OF is 0xe bytes, + * but it seems that the darwin driver will let you do 0x1e bytes if + * your "PMU" version is >= 0x30. You can get the "PMU" version apparently + * either in the last 16 bits of property "smu-version-pmu" or as the 16 + * bytes at offset 1 of "smu-version-info" + * + * For each chunk, the command takes 7 bytes of arguments: + * byte 0: subcommand code (0x02) + * byte 1: 0x04 (always, I don't know what it means, maybe the address + * space to use or some other nicety. It's hard coded in OF) + * byte 2..5: SMU address of the chunk (big endian 32 bits) + * byte 6: size to transfer (up to max chunk size) + * + * The data is returned directly */ #define SMU_CMD_MISC_ee_COMMAND 0xee #define SMU_CMD_MISC_ee_GET_DATABLOCK_REC 0x02 @@ -357,13 +389,13 @@ * 32 bits integers are usually encoded with 2x16 bits swapped, * this demangles them */ -#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) +//#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) /* This is the definition of the SMU sdb-partition-0x12 table (called * CPU F/V/T operating points in Darwin). The definition for all those * SMU tables should be moved to some separate file */ -#define SMU_SDB_FVT_ID 0x12 +#define SMU_SDB_FVT_ID 0x12 struct smu_sdbp_fvt { __u32 sysclk; /* Base SysClk frequency in Hz for @@ -380,6 +412,9 @@ */ }; +/* Other partitions without known structures */ +#define SMU_SDB_DEBUG_SWITCHES_ID 0x05 + #ifdef __KERNEL__ /* * This returns the pointer to an SMU "sdb" partition data or NULL @@ -417,14 +452,22 @@ * It is illegal to send SMU commands through a file descriptor configured * for events reception * + * The special SMU_CMDTYPE_GET_PARTITION command can be used to retreive + * SMU sdb-partition's from the SMU when not available. The command will also + * cause the new partition to be added to the device-tree. That command has + * a data_len of 0, you pass the partition ID in the "cmd" field. It will + * not trigger any reply and is not asynchronous. Just fetch the partition + * from the device-tree after it's done. */ struct smu_user_cmd_hdr { __u32 cmdtype; #define SMU_CMDTYPE_SMU 0 /* SMU command */ #define SMU_CMDTYPE_WANTS_EVENTS 1 /* switch fd to events mode */ +#define SMU_CMDTYPE_GET_PARTITION 2 /* retreive an sdb partition */ __u8 cmd; /* SMU command byte */ + __u8 pad[3]; /* padding */ __u32 data_len; /* Lenght of data following */ }; Index: linux-work/arch/ppc64/kernel/prom.c =================================================================== --- linux-work.orig/arch/ppc64/kernel/prom.c 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/arch/ppc64/kernel/prom.c 2005-09-29 17:06:05.000000000 +1000 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -1893,17 +1894,32 @@ EXPORT_SYMBOL(get_property); /* - * Add a property to a node + * Add a property to a node. */ -void +int prom_add_property(struct device_node* np, struct property* prop) { - struct property **next = &np->properties; + struct property **next; prop->next = NULL; - while (*next) + write_lock(&devtree_lock); + next = &np->properties; + while (*next) { + if (strcmp(prop->name, (*next)->name) == 0) { + /* duplicate ! don't insert it */ + write_unlock(&devtree_lock); + return -1; + } next = &(*next)->next; + } *next = prop; + write_unlock(&devtree_lock); + + /* try to add to proc as well if it was initialized */ + if (np->pde) + proc_device_tree_add_prop(np->pde, prop); + + return 0; } #if 0 Index: linux-work/fs/proc/proc_devtree.c =================================================================== --- linux-work.orig/fs/proc/proc_devtree.c 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/fs/proc/proc_devtree.c 2005-09-29 17:06:05.000000000 +1000 @@ -49,6 +49,39 @@ */ /* + * Add a property to a node + */ +static struct proc_dir_entry * +__proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp) +{ + struct proc_dir_entry *ent; + + /* + * Unfortunately proc_register puts each new entry + * at the beginning of the list. So we rearrange them. + */ + ent = create_proc_read_entry(pp->name, + strncmp(pp->name, "security-", 9) + ? S_IRUGO : S_IRUSR, de, + property_read_proc, pp); + if (ent == NULL) + return NULL; + + if (!strncmp(pp->name, "security-", 9)) + ent->size = 0; /* don't leak number of password chars */ + else + ent->size = pp->length; + + return ent; +} + + +void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop) +{ + __proc_device_tree_add_prop(pde, prop); +} + +/* * Process a node, adding entries for its children and its properties. */ void proc_device_tree_add_node(struct device_node *np, @@ -57,11 +90,9 @@ struct property *pp; struct proc_dir_entry *ent; struct device_node *child; - struct proc_dir_entry *list = NULL, **lastp; const char *p; set_node_proc_entry(np, de); - lastp = &list; for (child = NULL; (child = of_get_next_child(np, child));) { p = strrchr(child->full_name, '/'); if (!p) @@ -71,9 +102,6 @@ ent = proc_mkdir(p, de); if (ent == 0) break; - *lastp = ent; - ent->next = NULL; - lastp = &ent->next; proc_device_tree_add_node(child, ent); } of_node_put(child); @@ -84,7 +112,7 @@ * properties are quite unimportant for us though, thus we * simply "skip" them here, but we do have to check. */ - for (ent = list; ent != NULL; ent = ent->next) + for (ent = de->subdir; ent != NULL; ent = ent->next) if (!strcmp(ent->name, pp->name)) break; if (ent != NULL) { @@ -94,25 +122,10 @@ continue; } - /* - * Unfortunately proc_register puts each new entry - * at the beginning of the list. So we rearrange them. - */ - ent = create_proc_read_entry(pp->name, - strncmp(pp->name, "security-", 9) - ? S_IRUGO : S_IRUSR, de, - property_read_proc, pp); + ent = __proc_device_tree_add_prop(de, pp); if (ent == 0) break; - if (!strncmp(pp->name, "security-", 9)) - ent->size = 0; /* don't leak number of password chars */ - else - ent->size = pp->length; - ent->next = NULL; - *lastp = ent; - lastp = &ent->next; } - de->subdir = list; } /* Index: linux-work/include/asm-ppc/prom.h =================================================================== --- linux-work.orig/include/asm-ppc/prom.h 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/include/asm-ppc/prom.h 2005-09-29 17:06:05.000000000 +1000 @@ -93,7 +93,7 @@ extern int machine_is_compatible(const char *compat); extern unsigned char *get_property(struct device_node *node, const char *name, int *lenp); -extern void prom_add_property(struct device_node* np, struct property* prop); +extern int prom_add_property(struct device_node* np, struct property* prop); extern void prom_get_irq_senses(unsigned char *, int, int); extern int prom_n_addr_cells(struct device_node* np); extern int prom_n_size_cells(struct device_node* np); Index: linux-work/include/asm-ppc64/prom.h =================================================================== --- linux-work.orig/include/asm-ppc64/prom.h 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/include/asm-ppc64/prom.h 2005-09-29 17:06:05.000000000 +1000 @@ -201,6 +201,6 @@ extern int prom_n_size_cells(struct device_node* np); extern int prom_n_intr_cells(struct device_node* np); extern void prom_get_irq_senses(unsigned char *senses, int off, int max); -extern void prom_add_property(struct device_node* np, struct property* prop); +extern int prom_add_property(struct device_node* np, struct property* prop); #endif /* _PPC64_PROM_H */ Index: linux-work/include/linux/proc_fs.h =================================================================== --- linux-work.orig/include/linux/proc_fs.h 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/include/linux/proc_fs.h 2005-09-29 17:06:05.000000000 +1000 @@ -139,15 +139,12 @@ /* * proc_devtree.c */ +#ifdef CONFIG_PROC_DEVICETREE struct device_node; +struct property; extern void proc_device_tree_init(void); -#ifdef CONFIG_PROC_DEVICETREE extern void proc_device_tree_add_node(struct device_node *, struct proc_dir_entry *); -#else /* !CONFIG_PROC_DEVICETREE */ -static inline void proc_device_tree_add_node(struct device_node *np, struct proc_dir_entry *pde) -{ - return; -} +extern void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop); #endif /* CONFIG_PROC_DEVICETREE */ extern struct proc_dir_entry *proc_symlink(const char *, Index: linux-work/arch/ppc/syslib/prom.c =================================================================== --- linux-work.orig/arch/ppc/syslib/prom.c 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/arch/ppc/syslib/prom.c 2005-09-29 17:06:05.000000000 +1000 @@ -1165,7 +1165,7 @@ /* * Add a property to a node */ -void __openfirmware +int __openfirmware prom_add_property(struct device_node* np, struct property* prop) { struct property **next = &np->properties; @@ -1174,6 +1174,8 @@ while (*next) next = &(*next)->next; *next = prop; + + return 0; } /* I quickly hacked that one, check against spec ! */ From benh at kernel.crashing.org Tue Oct 4 15:36:54 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 04 Oct 2005 15:36:54 +1000 Subject: [PATCH] ppc64: Thermal control for SMU based machines Message-ID: <1128404215.31063.32.camel@gaston> This is the actual thermal control support for PowerMac8,1, PowerMac8,2 and PowerMac9,1 machines (SMU based), that is iMac G5 and single CPU desktop. It requires CPUFREQ to be enabled to properly deal with overtemp conditions. The new thermal control code implements a new framework (nicknamed "windfarm") to which I expect to port the old G5 thermal control, and possibly some of the powerbook thermal control drivers as well in the future. Signed-off-by: Benjamin Herrenschmidt Index: linux-work/drivers/macintosh/smu.c =================================================================== --- linux-work.orig/drivers/macintosh/smu.c 2005-10-04 15:17:21.000000000 +1000 +++ linux-work/drivers/macintosh/smu.c 2005-10-04 15:17:32.000000000 +1000 @@ -590,6 +590,8 @@ sprintf(name, "smu-i2c-%02x", *reg); of_platform_device_create(np, name, &smu->of_dev->dev); } + if (device_is_compatible(np, "smu-sensors")) + of_platform_device_create(np, "smu-sensors", &smu->of_dev->dev); } } Index: linux-work/drivers/macintosh/Kconfig =================================================================== --- linux-work.orig/drivers/macintosh/Kconfig 2005-10-04 15:17:21.000000000 +1000 +++ linux-work/drivers/macintosh/Kconfig 2005-10-04 15:17:33.000000000 +1000 @@ -169,6 +169,16 @@ This driver provides thermostat and fan control for the desktop G5 machines. +config WINDFARM + tristate "New PowerMac thermal control infrastructure" + +config WINDFARM_SMU + tristate "Support for thermal management on SMU based PowerMacs" + depends on WINDFARM && I2C && CPU_FREQ_PMAC && PMAC_SMU + select I2C_PMAC_SMU + help + This driver provides thermal control for iMacG5 and newer + config ANSLCD tristate "Support for ANS LCD display" depends on ADB_CUDA && PPC_PMAC Index: linux-work/drivers/macintosh/Makefile =================================================================== --- linux-work.orig/drivers/macintosh/Makefile 2005-10-04 15:17:21.000000000 +1000 +++ linux-work/drivers/macintosh/Makefile 2005-10-04 15:17:33.000000000 +1000 @@ -26,3 +26,8 @@ obj-$(CONFIG_THERM_PM72) += therm_pm72.o obj-$(CONFIG_THERM_WINDTUNNEL) += therm_windtunnel.o obj-$(CONFIG_THERM_ADT746X) += therm_adt746x.o +obj-$(CONFIG_WINDFARM) += windfarm_core.o +obj-$(CONFIG_WINDFARM_SMU) += windfarm_smu_controls.o \ + windfarm_smu_sensors.o \ + windfarm_lm75_sensor.o windfarm_pid.o \ + windfarm_cpufreq_clamp.o windfarm_smu.o Index: linux-work/drivers/macintosh/windfarm.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm.h 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,122 @@ +#ifndef __WINDFARM_H__ +#define __WINDFARM_H__ + +#include +#include +#include +#include + +/* Display a 16.16 fixed point value */ +#define FIX32TOPRINT(f) ((f) >> 16),((((f) & 0xffff) * 1000) >> 16) + +/* + * Control objects + */ + +struct wf_control; + +struct wf_control_ops { + int (*set_value)(struct wf_control *ct, s32 val); + int (*get_value)(struct wf_control *ct, s32 *val); + s32 (*get_min)(struct wf_control *ct); + s32 (*get_max)(struct wf_control *ct); + void (*release)(struct wf_control *ct); + struct module *owner; +}; + +struct wf_control { + struct list_head link; + struct wf_control_ops *ops; + char *name; + int type; + struct kref ref; +}; + +#define WF_CONTROL_TYPE_GENERIC 0 +#define WF_CONTROL_RPM_FAN 1 +#define WF_CONTROL_PWM_FAN 2 + + +/* Note about lifetime rules: wf_register_control() will initialize + * the kref and wf_unregister_control will decrement it, thus the + * object creating/disposing a given control shouldn't assume it + * still exists after wf_unregister_control has been called. + * wf_find_control will inc the refcount for you + */ +extern int wf_register_control(struct wf_control *ct); +extern void wf_unregister_control(struct wf_control *ct); +extern struct wf_control * wf_find_control(const char *name); +extern int wf_get_control(struct wf_control *ct); +extern void wf_put_control(struct wf_control *ct); + +static inline int wf_control_set_max(struct wf_control *ct) +{ + s32 vmax = ct->ops->get_max(ct); + return ct->ops->set_value(ct, vmax); +} + +static inline int wf_control_set_min(struct wf_control *ct) +{ + s32 vmin = ct->ops->get_min(ct); + return ct->ops->set_value(ct, vmin); +} + +/* + * Sensor objects + */ + +struct wf_sensor; + +struct wf_sensor_ops { + int (*get_value)(struct wf_sensor *sr, s32 *val); + void (*release)(struct wf_sensor *sr); + struct module *owner; +}; + +struct wf_sensor { + struct list_head link; + struct wf_sensor_ops *ops; + char *name; + struct kref ref; +}; + +/* Same lifetime rules as controls */ +extern int wf_register_sensor(struct wf_sensor *sr); +extern void wf_unregister_sensor(struct wf_sensor *sr); +extern struct wf_sensor * wf_find_sensor(const char *name); +extern int wf_get_sensor(struct wf_sensor *sr); +extern void wf_put_sensor(struct wf_sensor *sr); + +/* For use by clients. Note that we are a bit racy here since + * notifier_block doesn't have a module owner field. I may fix + * it one day ... + * + * LOCKING NOTE ! + * + * All "events" except WF_EVENT_TICK are called with an internal mutex + * held which will deadlock if you call basically any core routine. + * So don't ! Just take note of the event and do your actual operations + * from the ticker. + * + */ +extern int wf_register_client(struct notifier_block *nb); +extern int wf_unregister_client(struct notifier_block *nb); + +/* Overtemp conditions. Those are refcounted */ +extern void wf_set_overtemp(void); +extern void wf_clear_overtemp(void); +extern int wf_is_overtemp(void); + +#define WF_EVENT_NEW_CONTROL 0 /* param is wf_control * */ +#define WF_EVENT_NEW_SENSOR 1 /* param is wf_sensor * */ +#define WF_EVENT_OVERTEMP 2 /* no param */ +#define WF_EVENT_NORMALTEMP 3 /* overtemp condition cleared */ +#define WF_EVENT_TICK 4 /* 1 second tick */ + +/* Note: If that driver gets more broad use, we could replace the + * simplistic overtemp bits with "environmental conditions". That + * could then be used to also notify of things like fan failure, + * case open, battery conditions, ... + */ + +#endif /* __WINDFARM_H__ */ Index: linux-work/drivers/macintosh/windfarm_core.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_core.c 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,428 @@ +/* + * Windfarm PowerMac thermal control. Core + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + * + * This core code tracks the list of sensors & controls, register + * clients, and holds the kernel thread used for control. + * + * TODO: + * + * Add some information about sensor/control type and data format to + * sensors/controls, and have the sysfs attribute stuff be moved + * generically here instead of hard coded in the platform specific + * driver as it us currently + * + * This however requires solving some annoying lifetime issues with + * sysfs which doesn't seem to have lifetime rules for struct attribute, + * I may have to create full features kobjects for every sensor/control + * instead which is a bit of an overkill imho + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.2" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +static LIST_HEAD(wf_controls); +static LIST_HEAD(wf_sensors); +static DECLARE_MUTEX(wf_lock); +static struct notifier_block *wf_client_list; +static int wf_client_count; +static unsigned int wf_overtemp; +static unsigned int wf_overtemp_counter; +struct task_struct *wf_thread; + +/* + * Utilities & tick thread + */ + +static inline void wf_notify(int event, void *param) +{ + notifier_call_chain(&wf_client_list, event, param); +} + +int wf_critical_overtemp(void) +{ + static char * critical_overtemp_path = "/sbin/critical_overtemp"; + char *argv[] = { critical_overtemp_path, NULL }; + static char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL }; + + return call_usermodehelper(critical_overtemp_path, argv, envp, 0); +} +EXPORT_SYMBOL_GPL(wf_critical_overtemp); + +static int wf_thread_func(void *data) +{ + unsigned long next, delay; + + next = jiffies; + + DBG("wf: thread started\n"); + + while(!kthread_should_stop()) { + try_to_freeze(); + + if (time_after_eq(jiffies, next)) { + wf_notify(WF_EVENT_TICK, NULL); + if (wf_overtemp) { + wf_overtemp_counter++; + /* 10 seconds overtemp, notify userland */ + if (wf_overtemp_counter > 10) + wf_critical_overtemp(); + /* 30 seconds, shutdown */ + if (wf_overtemp_counter > 30) { + printk(KERN_ERR "windfarm: Overtemp " + "for more than 30" + " seconds, shutting down\n"); + machine_power_off(); + } + } + next += HZ; + } + + set_current_state(TASK_INTERRUPTIBLE); + delay = next - jiffies; + if (delay <= HZ) + schedule_timeout(delay); + set_current_state(TASK_RUNNING); + + /* there should be no signal, but oh well */ + if (signal_pending(current)) { + printk(KERN_WARNING "windfarm: thread got sigl !\n"); + break; + } + } + + DBG("wf: thread stopped\n"); + + return 0; +} + +static void wf_start_thread(void) +{ + wf_thread = kthread_run(wf_thread_func, NULL, "kwindfarm"); + if (IS_ERR(wf_thread)) { + printk(KERN_ERR "windfarm: failed to create thread,err %ld\n", + PTR_ERR(wf_thread)); + wf_thread = NULL; + } +} + + +static void wf_stop_thread(void) +{ + if (wf_thread) + kthread_stop(wf_thread); + wf_thread = NULL; +} + +/* + * Controls + */ + +static void wf_control_release(struct kref *kref) +{ + struct wf_control *ct = container_of(kref, struct wf_control, ref); + + DBG("wf: Deleting control %s\n", ct->name); + + if (ct->ops && ct->ops->release) + ct->ops->release(ct); + else + kfree(ct); +} + +int wf_register_control(struct wf_control *new_ct) +{ + struct wf_control *ct; + + down(&wf_lock); + list_for_each_entry(ct, &wf_controls, link) { + if (!strcmp(ct->name, new_ct->name)) { + printk(KERN_WARNING "windfarm: trying to register" + " duplicate control %s\n", ct->name); + up(&wf_lock); + return -EEXIST; + } + } + kref_init(&new_ct->ref); + list_add(&new_ct->link, &wf_controls); + + DBG("wf: Registered control %s\n", new_ct->name); + + wf_notify(WF_EVENT_NEW_CONTROL, new_ct); + up(&wf_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(wf_register_control); + +void wf_unregister_control(struct wf_control *ct) +{ + down(&wf_lock); + list_del(&ct->link); + up(&wf_lock); + + DBG("wf: Unregistered control %s\n", ct->name); + + kref_put(&ct->ref, wf_control_release); +} +EXPORT_SYMBOL_GPL(wf_unregister_control); + +struct wf_control * wf_find_control(const char *name) +{ + struct wf_control *ct; + + down(&wf_lock); + list_for_each_entry(ct, &wf_controls, link) { + if (!strcmp(ct->name, name)) { + if (wf_get_control(ct)) + ct = NULL; + up(&wf_lock); + return ct; + } + } + up(&wf_lock); + return NULL; +} +EXPORT_SYMBOL_GPL(wf_find_control); + +int wf_get_control(struct wf_control *ct) +{ + if (!try_module_get(ct->ops->owner)) + return -ENODEV; + kref_get(&ct->ref); + return 0; +} +EXPORT_SYMBOL_GPL(wf_get_control); + +void wf_put_control(struct wf_control *ct) +{ + struct module *mod = ct->ops->owner; + kref_put(&ct->ref, wf_control_release); + module_put(mod); +} +EXPORT_SYMBOL_GPL(wf_put_control); + + +/* + * Sensors + */ + + +static void wf_sensor_release(struct kref *kref) +{ + struct wf_sensor *sr = container_of(kref, struct wf_sensor, ref); + + DBG("wf: Deleting sensor %s\n", sr->name); + + if (sr->ops && sr->ops->release) + sr->ops->release(sr); + else + kfree(sr); +} + +int wf_register_sensor(struct wf_sensor *new_sr) +{ + struct wf_sensor *sr; + + down(&wf_lock); + list_for_each_entry(sr, &wf_sensors, link) { + if (!strcmp(sr->name, new_sr->name)) { + printk(KERN_WARNING "windfarm: trying to register" + " duplicate sensor %s\n", sr->name); + up(&wf_lock); + return -EEXIST; + } + } + kref_init(&new_sr->ref); + list_add(&new_sr->link, &wf_sensors); + + DBG("wf: Registered sensor %s\n", new_sr->name); + + wf_notify(WF_EVENT_NEW_SENSOR, new_sr); + up(&wf_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(wf_register_sensor); + +void wf_unregister_sensor(struct wf_sensor *sr) +{ + down(&wf_lock); + list_del(&sr->link); + up(&wf_lock); + + DBG("wf: Unregistered sensor %s\n", sr->name); + + wf_put_sensor(sr); +} +EXPORT_SYMBOL_GPL(wf_unregister_sensor); + +struct wf_sensor * wf_find_sensor(const char *name) +{ + struct wf_sensor *sr; + + down(&wf_lock); + list_for_each_entry(sr, &wf_sensors, link) { + if (!strcmp(sr->name, name)) { + if (wf_get_sensor(sr)) + sr = NULL; + up(&wf_lock); + return sr; + } + } + up(&wf_lock); + return NULL; +} +EXPORT_SYMBOL_GPL(wf_find_sensor); + +int wf_get_sensor(struct wf_sensor *sr) +{ + if (!try_module_get(sr->ops->owner)) + return -ENODEV; + kref_get(&sr->ref); + return 0; +} +EXPORT_SYMBOL_GPL(wf_get_sensor); + +void wf_put_sensor(struct wf_sensor *sr) +{ + struct module *mod = sr->ops->owner; + kref_put(&sr->ref, wf_sensor_release); + module_put(mod); +} +EXPORT_SYMBOL_GPL(wf_put_sensor); + + +/* + * Client & notification + */ + +int wf_register_client(struct notifier_block *nb) +{ + int rc; + struct wf_control *ct; + struct wf_sensor *sr; + + down(&wf_lock); + rc = notifier_chain_register(&wf_client_list, nb); + if (rc != 0) + goto bail; + wf_client_count++; + list_for_each_entry(ct, &wf_controls, link) + wf_notify(WF_EVENT_NEW_CONTROL, ct); + list_for_each_entry(sr, &wf_sensors, link) + wf_notify(WF_EVENT_NEW_SENSOR, sr); + if (wf_client_count == 1) + wf_start_thread(); + up(&wf_lock); + bail: + return rc; +} +EXPORT_SYMBOL_GPL(wf_register_client); + +int wf_unregister_client(struct notifier_block *nb) +{ + down(&wf_lock); + notifier_chain_unregister(&wf_client_list, nb); + wf_client_count++; + if (wf_client_count == 0) + wf_stop_thread(); + up(&wf_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(wf_unregister_client); + +void wf_set_overtemp(void) +{ + down(&wf_lock); + wf_overtemp++; + if (wf_overtemp == 1) { + printk(KERN_WARNING "windfarm: Overtemp condition detected !\n"); + wf_overtemp_counter = 0; + wf_notify(WF_EVENT_OVERTEMP, NULL); + } + up(&wf_lock); +} +EXPORT_SYMBOL_GPL(wf_set_overtemp); + +void wf_clear_overtemp(void) +{ + down(&wf_lock); + WARN_ON(wf_overtemp == 0); + if (wf_overtemp == 0) { + up(&wf_lock); + return; + } + wf_overtemp--; + if (wf_overtemp == 0) { + printk(KERN_WARNING "windfarm: Overtemp condition cleared !\n"); + wf_notify(WF_EVENT_NORMALTEMP, NULL); + } + up(&wf_lock); +} +EXPORT_SYMBOL_GPL(wf_clear_overtemp); + +int wf_is_overtemp(void) +{ + return (wf_overtemp != 0); +} +EXPORT_SYMBOL_GPL(wf_is_overtemp); + +static struct platform_device wf_platform_device = { + .name = "windfarm", +}; + +static int __init windfarm_core_init(void) +{ + DBG("wf: core loaded\n"); + + platform_device_register(&wf_platform_device); + return 0; +} + +static void __exit windfarm_core_exit(void) +{ + BUG_ON(wf_client_count != 0); + + DBG("wf: core unloaded\n"); + + platform_device_unregister(&wf_platform_device); +} + + +module_init(windfarm_core_init); +module_exit(windfarm_core_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("Core component of PowerMac thermal control"); +MODULE_LICENSE("GPL"); + Index: linux-work/drivers/macintosh/windfarm_smu_controls.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_smu_controls.c 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,274 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.3" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +/* + * SMU fans control object + */ + +static LIST_HEAD(smu_fans); + +struct smu_fan_control { + struct list_head link; + int fan_type; /* 0 = rpm, 1 = pwm */ + u32 reg; /* index in SMU */ + s32 value; /* current value */ + s32 min, max; /* min/max values */ + struct wf_control ctrl; +}; +#define to_smu_fan(c) container_of(c, struct smu_fan_control, ctrl) + +static int smu_set_fan(int pwm, u8 id, u16 value) +{ + struct smu_cmd cmd; + u8 buffer[16]; + DECLARE_COMPLETION(comp); + int rc; + + /* Fill SMU command structure */ + cmd.cmd = SMU_CMD_FAN_COMMAND; + cmd.data_len = 14; + cmd.reply_len = 16; + cmd.data_buf = cmd.reply_buf = buffer; + cmd.status = 0; + cmd.done = smu_done_complete; + cmd.misc = ∁ + + /* Fill argument buffer */ + memset(buffer, 0, 16); + buffer[0] = pwm ? 0x10 : 0x00; + buffer[1] = 0x01 << id; + *((u16 *)&buffer[2 + id * 2]) = value; + + rc = smu_queue_cmd(&cmd); + if (rc) + return rc; + wait_for_completion(&comp); + return cmd.status; +} + +static void smu_fan_release(struct wf_control *ct) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + + kfree(fct); +} + +static int smu_fan_set(struct wf_control *ct, s32 value) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + + if (value < fct->min) + value = fct->min; + if (value > fct->max) + value = fct->max; + fct->value = value; + + return smu_set_fan(fct->fan_type, fct->reg, value); +} + +static int smu_fan_get(struct wf_control *ct, s32 *value) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + *value = fct->value; /* todo: read from SMU */ + return 0; +} + +static s32 smu_fan_min(struct wf_control *ct) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + return fct->min; +} + +static s32 smu_fan_max(struct wf_control *ct) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + return fct->max; +} + +static struct wf_control_ops smu_fan_ops = { + .set_value = smu_fan_set, + .get_value = smu_fan_get, + .get_min = smu_fan_min, + .get_max = smu_fan_max, + .release = smu_fan_release, + .owner = THIS_MODULE, +}; + +static struct smu_fan_control *smu_fan_create(struct device_node *node, + int pwm_fan) +{ + struct smu_fan_control *fct; + s32 *v; u32 *reg; + char *l; + + fct = kmalloc(sizeof(struct smu_fan_control), GFP_KERNEL); + if (fct == NULL) + return NULL; + fct->ctrl.ops = &smu_fan_ops; + l = (char *)get_property(node, "location", NULL); + if (l == NULL) + goto fail; + + fct->fan_type = pwm_fan; + fct->ctrl.type = pwm_fan ? WF_CONTROL_PWM_FAN : WF_CONTROL_RPM_FAN; + + /* We use the name & location here the same way we do for SMU sensors, + * see the comment in windfarm_smu_sensors.c. The locations are a bit + * less consistent here between the iMac and the desktop models, but + * that is good enough for our needs for now at least. + * + * One problem though is that Apple seem to be inconsistent with case + * and the kernel doesn't have strcasecmp =P + */ + + fct->ctrl.name = NULL; + + /* Names used on desktop models */ + if (!strcmp(l, "Rear Fan 0") || !strcmp(l, "Rear Fan") || + !strcmp(l, "Rear fan 0") || !strcmp(l, "Rear fan")) + fct->ctrl.name = "cpu-rear-fan-0"; + else if (!strcmp(l, "Rear Fan 1") || !strcmp(l, "Rear fan 1")) + fct->ctrl.name = "cpu-rear-fan-1"; + else if (!strcmp(l, "Front Fan 0") || !strcmp(l, "Front Fan") || + !strcmp(l, "Front fan 0") || !strcmp(l, "Front fan")) + fct->ctrl.name = "cpu-front-fan-0"; + else if (!strcmp(l, "Front Fan 1") || !strcmp(l, "Front fan 1")) + fct->ctrl.name = "cpu-front-fan-1"; + else if (!strcmp(l, "Slots Fan") || !strcmp(l, "Slots fan")) + fct->ctrl.name = "slots-fan"; + else if (!strcmp(l, "Drive Bay") || !strcmp(l, "Drive bay")) + fct->ctrl.name = "drive-bay-fan"; + + /* Names used on iMac models */ + if (!strcmp(l, "System Fan") || !strcmp(l, "System fan")) + fct->ctrl.name = "system-fan"; + else if (!strcmp(l, "CPU Fan") || !strcmp(l, "CPU fan")) + fct->ctrl.name = "cpu-fan"; + else if (!strcmp(l, "Hard Drive") || !strcmp(l, "Hard drive")) + fct->ctrl.name = "drive-bay-fan"; + + /* Unrecognized fan, bail out */ + if (fct->ctrl.name == NULL) + goto fail; + + /* Get min & max values*/ + v = (s32 *)get_property(node, "min-value", NULL); + if (v == NULL) + goto fail; + fct->min = *v; + v = (s32 *)get_property(node, "max-value", NULL); + if (v == NULL) + goto fail; + fct->max = *v; + + /* Get "reg" value */ + reg = (u32 *)get_property(node, "reg", NULL); + if (reg == NULL) + goto fail; + fct->reg = *reg; + + if (wf_register_control(&fct->ctrl)) + goto fail; + + return fct; + fail: + kfree(fct); + return NULL; +} + + +static int __init smu_controls_init(void) +{ + struct device_node *smu, *fans, *fan; + + if (!smu_present()) + return -ENODEV; + + smu = of_find_node_by_type(NULL, "smu"); + if (smu == NULL) + return -ENODEV; + + /* Look for RPM fans */ + for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;) + if (!strcmp(fans->name, "rpm-fans")) + break; + for (fan = NULL; + fans && (fan = of_get_next_child(fans, fan)) != NULL;) { + struct smu_fan_control *fct; + + fct = smu_fan_create(fan, 0); + if (fct == NULL) { + printk(KERN_WARNING "windfarm: Failed to create SMU " + "RPM fan %s\n", fan->name); + continue; + } + list_add(&fct->link, &smu_fans); + } + of_node_put(fans); + + + /* Look for PWM fans */ + for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;) + if (!strcmp(fans->name, "pwm-fans")) + break; + for (fan = NULL; + fans && (fan = of_get_next_child(fans, fan)) != NULL;) { + struct smu_fan_control *fct; + + fct = smu_fan_create(fan, 1); + if (fct == NULL) { + printk(KERN_WARNING "windfarm: Failed to create SMU " + "PWM fan %s\n", fan->name); + continue; + } + list_add(&fct->link, &smu_fans); + } + of_node_put(fans); + of_node_put(smu); + + return 0; +} + +static void __exit smu_controls_exit(void) +{ + struct smu_fan_control *fct; + + while (!list_empty(&smu_fans)) { + fct = list_entry(smu_fans.next, struct smu_fan_control, link); + list_del(&fct->link); + wf_unregister_control(&fct->ctrl); + } +} + + +module_init(smu_controls_init); +module_exit(smu_controls_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("SMU control objects for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + Index: linux-work/drivers/macintosh/windfarm_smu_sensors.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_smu_sensors.c 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,471 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.2" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +/* + * Various SMU "partitions" calibration objects for which we + * keep pointers here for use by bits & pieces of the driver + */ +static struct smu_sdbp_cpuvcp *cpuvcp; +static int cpuvcp_version; +static struct smu_sdbp_cpudiode *cpudiode; +static struct smu_sdbp_slotspow *slotspow; +static u8 *debugswitches; + +/* + * SMU basic sensors objects + */ + +static LIST_HEAD(smu_ads); + +struct smu_ad_sensor { + struct list_head link; + u32 reg; /* index in SMU */ + struct wf_sensor sens; +}; +#define to_smu_ads(c) container_of(c, struct smu_ad_sensor, sens) + +static void smu_ads_release(struct wf_sensor *sr) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + + kfree(ads); +} + +static int smu_read_adc(u8 id, s32 *value) +{ + struct smu_simple_cmd cmd; + DECLARE_COMPLETION(comp); + int rc; + + rc = smu_queue_simple(&cmd, SMU_CMD_READ_ADC, 1, + smu_done_complete, &comp, id); + if (rc) + return rc; + wait_for_completion(&comp); + if (cmd.cmd.status != 0) + return cmd.cmd.status; + if (cmd.cmd.reply_len != 2) { + printk(KERN_ERR "winfarm: read ADC 0x%x returned %d bytes !\n", + id, cmd.cmd.reply_len); + return -EIO; + } + *value = *((u16 *)cmd.buffer); + return 0; +} + +static int smu_cputemp_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + int rc; + s32 val; + s64 scaled; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read CPU temp failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s64)(((u64)val) * (u64)cpudiode->m_value); + scaled >>= 3; + scaled += ((s64)cpudiode->b_value) << 9; + *value = (s32)(scaled << 1); + + return 0; +} + +static int smu_cpuamp_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + s32 val, scaled; + int rc; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read CPU current failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s32)(val * (u32)cpuvcp->curr_scale); + scaled += (s32)cpuvcp->curr_offset; + *value = scaled << 4; + + return 0; +} + +static int smu_cpuvolt_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + s32 val, scaled; + int rc; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read CPU voltage failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s32)(val * (u32)cpuvcp->volt_scale); + scaled += (s32)cpuvcp->volt_offset; + *value = scaled << 4; + + return 0; +} + +static int smu_slotspow_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + s32 val, scaled; + int rc; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read slots power failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s32)(val * (u32)slotspow->pow_scale); + scaled += (s32)slotspow->pow_offset; + *value = scaled << 4; + + return 0; +} + + +static struct wf_sensor_ops smu_cputemp_ops = { + .get_value = smu_cputemp_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; +static struct wf_sensor_ops smu_cpuamp_ops = { + .get_value = smu_cpuamp_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; +static struct wf_sensor_ops smu_cpuvolt_ops = { + .get_value = smu_cpuvolt_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; +static struct wf_sensor_ops smu_slotspow_ops = { + .get_value = smu_slotspow_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; + + +static struct smu_ad_sensor *smu_ads_create(struct device_node *node) +{ + struct smu_ad_sensor *ads; + char *c, *l; + u32 *v; + + ads = kmalloc(sizeof(struct smu_ad_sensor), GFP_KERNEL); + if (ads == NULL) + return NULL; + c = (char *)get_property(node, "device_type", NULL); + l = (char *)get_property(node, "location", NULL); + if (c == NULL || l == NULL) + goto fail; + + /* We currently pick the sensors based on the OF name and location + * properties, while Darwin uses the sensor-id's. + * The problem with the IDs is that they are model specific while it + * looks like apple has been doing a reasonably good job at keeping + * the names and locations consistents so I'll stick with the names + * and locations for now. + */ + if (!strcmp(c, "temp-sensor") && + !strcmp(l, "CPU T-Diode")) { + ads->sens.ops = &smu_cputemp_ops; + ads->sens.name = "cpu-temp"; + } else if (!strcmp(c, "current-sensor") && + !strcmp(l, "CPU Current")) { + ads->sens.ops = &smu_cpuamp_ops; + ads->sens.name = "cpu-current"; + } else if (!strcmp(c, "voltage-sensor") && + !strcmp(l, "CPU Voltage")) { + ads->sens.ops = &smu_cpuvolt_ops; + ads->sens.name = "cpu-voltage"; + } else if (!strcmp(c, "power-sensor") && + !strcmp(l, "Slots Power")) { + ads->sens.ops = &smu_slotspow_ops; + ads->sens.name = "slots-power"; + if (slotspow == NULL) { + DBG("wf: slotspow partition (%02x) not found\n", + SMU_SDB_SLOTSPOW_ID); + goto fail; + } + } else + goto fail; + + v = (u32 *)get_property(node, "reg", NULL); + if (v == NULL) + goto fail; + ads->reg = *v; + + if (wf_register_sensor(&ads->sens)) + goto fail; + return ads; + fail: + kfree(ads); + return NULL; +} + +/* + * SMU Power combo sensor object + */ + +struct smu_cpu_power_sensor { + struct list_head link; + struct wf_sensor *volts; + struct wf_sensor *amps; + int fake_volts : 1; + int quadratic : 1; + struct wf_sensor sens; +}; +#define to_smu_cpu_power(c) container_of(c, struct smu_cpu_power_sensor, sens) + +static struct smu_cpu_power_sensor *smu_cpu_power; + +static void smu_cpu_power_release(struct wf_sensor *sr) +{ + struct smu_cpu_power_sensor *pow = to_smu_cpu_power(sr); + + if (pow->volts) + wf_put_sensor(pow->volts); + if (pow->amps) + wf_put_sensor(pow->amps); + kfree(pow); +} + +static int smu_cpu_power_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_cpu_power_sensor *pow = to_smu_cpu_power(sr); + s32 volts, amps, power; + u64 tmps, tmpa, tmpb; + int rc; + + rc = pow->amps->ops->get_value(pow->amps, &s); + if (rc) + return rc; + + if (pow->fake_volts) { + *value = amps * 12 - 0x30000; + return 0; + } + + rc = pow->volts->ops->get_value(pow->volts, &volts); + if (rc) + return rc; + + power = (s32)((((u64)volts) * ((u64)amps)) >> 16); + if (!pow->quadratic) { + *value = power; + return 0; + } + tmps = (((u64)power) * ((u64)power)) >> 16; + tmpa = ((u64)cpuvcp->power_quads[0]) * tmps; + tmpb = ((u64)cpuvcp->power_quads[1]) * ((u64)power); + *value = (tmpa >> 28) + (tmpb >> 28) + (cpuvcp->power_quads[2] >> 12); + + return 0; +} + +static struct wf_sensor_ops smu_cpu_power_ops = { + .get_value = smu_cpu_power_get, + .release = smu_cpu_power_release, + .owner = THIS_MODULE, +}; + + +static struct smu_cpu_power_sensor * +smu_cpu_power_create(struct wf_sensor *volts, struct wf_sensor *amps) +{ + struct smu_cpu_power_sensor *pow; + + pow = kmalloc(sizeof(struct smu_cpu_power_sensor), GFP_KERNEL); + if (pow == NULL) + return NULL; + pow->sens.ops = &smu_cpu_power_ops; + pow->sens.name = "cpu-power"; + + wf_get_sensor(volts); + pow->volts = volts; + wf_get_sensor(amps); + pow->amps = amps; + + /* Some early machines need a faked voltage */ + if (debugswitches && ((*debugswitches) & 0x80)) { + printk(KERN_INFO "windfarm: CPU Power sensor using faked" + " voltage !\n"); + pow->fake_volts = 1; + } else + pow->fake_volts = 0; + + /* Try to use quadratic transforms on PowerMac8,1 and 9,1 for now, + * I yet have to figure out what's up with 8,2 and will have to + * adjust for later, unless we can 100% trust the SDB partition... + */ + if ((machine_is_compatible("PowerMac8,1") || + machine_is_compatible("PowerMac8,2") || + machine_is_compatible("PowerMac9,1")) && + cpuvcp_version >= 2) { + pow->quadratic = 1; + DBG("windfarm: CPU Power using quadratic transform\n"); + } else + pow->quadratic = 0; + + if (wf_register_sensor(&pow->sens)) + goto fail; + return pow; + fail: + kfree(pow); + return NULL; +} + +static int smu_fetch_param_partitions(void) +{ + struct smu_sdbp_header *hdr; + + /* Get CPU voltage/current/power calibration data */ + hdr = smu_get_sdb_partition(SMU_SDB_CPUVCP_ID, NULL); + if (hdr == NULL) { + DBG("wf: cpuvcp partition (%02x) not found\n", + SMU_SDB_CPUVCP_ID); + return -ENODEV; + } + cpuvcp = (struct smu_sdbp_cpuvcp *)&hdr[1]; + /* Keep version around */ + cpuvcp_version = hdr->version; + + /* Get CPU diode calibration data */ + hdr = smu_get_sdb_partition(SMU_SDB_CPUDIODE_ID, NULL); + if (hdr == NULL) { + DBG("wf: cpudiode partition (%02x) not found\n", + SMU_SDB_CPUDIODE_ID); + return -ENODEV; + } + cpudiode = (struct smu_sdbp_cpudiode *)&hdr[1]; + + /* Get slots power calibration data if any */ + hdr = smu_get_sdb_partition(SMU_SDB_SLOTSPOW_ID, NULL); + if (hdr != NULL) + slotspow = (struct smu_sdbp_slotspow *)&hdr[1]; + + /* Get debug switches if any */ + hdr = smu_get_sdb_partition(SMU_SDB_DEBUG_SWITCHES_ID, NULL); + if (hdr != NULL) + debugswitches = (u8 *)&hdr[1]; + + return 0; +} + +static int __init smu_sensors_init(void) +{ + struct device_node *smu, *sensors, *s; + struct smu_ad_sensor *volt_sensor = NULL, *curr_sensor = NULL; + int rc; + + if (!smu_present()) + return -ENODEV; + + /* Get parameters partitions */ + rc = smu_fetch_param_partitions(); + if (rc) + return rc; + + smu = of_find_node_by_type(NULL, "smu"); + if (smu == NULL) + return -ENODEV; + + /* Look for sensors subdir */ + for (sensors = NULL; + (sensors = of_get_next_child(smu, sensors)) != NULL;) + if (!strcmp(sensors->name, "sensors")) + break; + + of_node_put(smu); + + /* Create basic sensors */ + for (s = NULL; + sensors && (s = of_get_next_child(sensors, s)) != NULL;) { + struct smu_ad_sensor *ads; + + ads = smu_ads_create(s); + if (ads == NULL) + continue; + list_add(&ads->link, &smu_ads); + /* keep track of cpu voltage & current */ + if (!strcmp(ads->sens.name, "cpu-voltage")) + volt_sensor = ads; + else if (!strcmp(ads->sens.name, "cpu-current")) + curr_sensor = ads; + } + + of_node_put(sensors); + + /* Create CPU power sensor if possible */ + if (volt_sensor && curr_sensor) + smu_cpu_power = smu_cpu_power_create(&volt_sensor->sens, + &curr_sensor->sens); + + return 0; +} + +static void __exit smu_sensors_exit(void) +{ + struct smu_ad_sensor *ads; + + /* dispose of power sensor */ + if (smu_cpu_power) + wf_unregister_sensor(&smu_cpu_power->sens); + + /* dispose of basic sensors */ + while (!list_empty(&smu_ads)) { + ads = list_entry(smu_ads.next, struct smu_ad_sensor, link); + list_del(&ads->link); + wf_unregister_sensor(&ads->sens); + } +} + + +module_init(smu_sensors_init); +module_exit(smu_sensors_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("SMU sensor objects for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + Index: linux-work/drivers/macintosh/windfarm_smu.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_smu.c 2005-10-04 15:19:19.000000000 +1000 @@ -0,0 +1,1221 @@ +/* + * Windfarm PowerMac thermal control. SMU based machines control loops + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + * + * The algorithm used is the PID control algorithm, used the same + * way the published Darwin code does, using the same values that + * are present in the Darwin 8.2 snapshot property lists (note however + * that none of the code has been re-used, it's a complete re-implementation + * + * The various control loops found in Darwin config file are: + * + * PowerMac8,1 and PowerMac8,2 + * =========================== + * + * System Fans control loop. Different based on models. In addition to the + * usual PID algorithm, the control loop gets 2 additional pairs of linear + * scaling factors (scale/offsets) expressed as 4.12 fixed point values + * signed offset, unsigned scale) + * + * The targets are modified such as: + * - the linked control (second control) gets the target value as-is + * (typically the drive fan) + * - the main control (first control) gets the target value scaled with + * the first pair of factors, and is then modified as below + * - the value of the target of the CPU Fan control loop is retreived, + * scaled with the second pair of factors, and the max of that and + * the scaled target is applied to the main control. + * + * # model_id: 2 + * controls : system-fan, drive-bay-fan + * sensors : hd-temp + * PID params : G_d = 0x15400000 + * G_p = 0x00200000 + * G_r = 0x000002fd + * History = 2 entries + * Input target = 0x3a0000 + * Interval = 5s + * linear-factors : offset = 0xff38 scale = 0x0ccd + * offset = 0x0208 scale = 0x07ae + * + * # model_id: 3 + * controls : system-fan, drive-bay-fan + * sensors : hd-temp + * PID params : G_d = 0x08e00000 + * G_p = 0x00566666 + * G_r = 0x0000072b + * History = 2 entries + * Input target = 0x350000 + * Interval = 5s + * linear-factors : offset = 0xff38 scale = 0x0ccd + * offset = 0x0000 scale = 0x0000 + * + * # model_id: 5 + * controls : system-fan + * sensors : hd-temp + * PID params : G_d = 0x15400000 + * G_p = 0x00233333 + * G_r = 0x000002fd + * History = 2 entries + * Input target = 0x3a0000 + * Interval = 5s + * linear-factors : offset = 0x0000 scale = 0x1000 + * offset = 0x0091 scale = 0x0bae + * + * CPU Fan control loop. The loop is identical for all models. it + * has an additional pair of scaling factor. This is used to scale the + * systems fan control loop target result (the one before it gets scaled + * by the System Fans control loop itself). Then, the max value of the + * calculated target value and system fan value is sent to the fans + * + * controls : cpu-fan + * sensors : cpu-temp cpu-power + * PID params : From SMU sdb partition + * linear-factors : offset = 0xfb50 scale = 0x1000 + * + * CPU Slew control loop. Not implemented. The cpufreq driver in linux is + * completely separate for now, though we could find a way to link it, either + * as a client reacting to overtemp notifications, or directling monitoring + * the CPU temperature + * + * WARNING ! The CPU control loop requires the CPU tmax for the current + * operating point. However, we currently are completely separated from + * the cpufreq driver and thus do not know what the current operating + * point is. Fortunately, we also do not have any hardware supporting anything + * but operating point 0 at the moment, thus we just peek that value directly + * from the SDB partition. If we ever end up with actually slewing the system + * clock and thus changing operating points, we'll have to find a way to + * communicate with the CPU freq driver; + * + * PowerMac9,1 + * =========== + * + * Has 3 control loops: CPU fans is similar to PowerMac8,1 (though it doesn't + * try to play with other control loops fans). Drive bay is rather basic PID + * with one sensor and one fan. Slots area is a bit different as the Darwin + * driver is supposed to be capable of working in a special "AGP" mode which + * involves the presence of an AGP sensor and an AGP fan (possibly on the + * AGP card itself). I can't deal with that special mode as I don't have + * access to those additional sensor/fans for now (though ultimately, it would + * be possible to add sensor objects for them) so I'm only implementing the + * basic PCI slot control loop + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" +#include "windfarm_pid.h" + +#define VERSION "0.3" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +/* define this to force CPU overtemp to 74 degree, useful for testing + * the overtemp code + */ +#undef HACKED_OVERTEMP + +/* Machine identification */ +#define MACHINE_PM81 81 /* PM81, PM82 */ +#define MACHINE_PM91 91 /* PM91 */ + +static int machine; /* machine */ +static int mach_model; /* machine model id */ + +static struct device *wf_dev; + +/* Controls & sensors */ +static struct wf_sensor *sensor_cpu_power; +static struct wf_sensor *sensor_cpu_temp; +static struct wf_sensor *sensor_hd_temp; +static struct wf_sensor *sensor_slots_power; +static struct wf_control *fan_cpu_main; +static struct wf_control *fan_cpu_second; +static struct wf_control *fan_cpu_third; +static struct wf_control *fan_hd; +static struct wf_control *fan_system; +static struct wf_control *fan_slots; +static struct wf_control *cpufreq_clamp; + +/* Set to kick the control loop into life */ +static int all_controls_ok, all_sensors_ok, started; + +/* Failure handling.. could be nicer */ +#define FAILURE_FAN 0x01 +#define FAILURE_SENSOR 0x02 +#define FAILURE_OVERTEMP 0x04 + +static unsigned int failure_state; +static int readjust, skipping; + +/* + * ****** System Fans Control Loop ****** + * + * (PowerMac8,1 and 8,2 only) + */ + +/* Parameters for the System Fans control loop. Parameters + * not in this table such as interval, history size, ... + * are common to all versions and thus hard coded for now. + */ +struct wf_smu_sys_fans_param { + int model_id; + s32 itarget; + s32 gd, gp, gr; + + s16 offset0; + u16 scale0; + s16 offset1; + u16 scale1; +}; + +#define WF_SMU_SYS_FANS_INTERVAL 5 +#define WF_SMU_SYS_FANS_HISTORY_SIZE 2 + +/* State data used by the system fans control loop + * (MACHINE_PM81 only) + */ +struct wf_smu_sys_fans_state { + int ticks; + s32 sys_setpoint; + s32 hd_setpoint; + s16 offset0; + u16 scale0; + s16 offset1; + u16 scale1; + struct wf_pid_state pid; +}; + +/* Only 3 known configs */ +#define WF_SMU_SYS_FANS_NUM_CONFIGS 3 + +/* + * Configs for SMU Sytem Fan control loop + */ +static struct wf_smu_sys_fans_param wf_smu_sys_all_params[] = { + /* Model ID 2 */ + { + .model_id = 2, + .itarget = 0x3a0000, + .gd = 0x15400000, + .gp = 0x00200000, + .gr = 0x000002fd, + .offset0 = 0xff38, + .scale0 = 0x0ccd, + .offset1 = 0x0208, + .scale1 = 0x07ae, + }, + /* Model ID 3 */ + { + .model_id = 2, + .itarget = 0x350000, + .gd = 0x08e00000, + .gp = 0x00566666, + .gr = 0x0000072b, + .offset0 = 0xff38, + .scale0 = 0x0ccd, + .offset1 = 0x0000, + .scale1 = 0x0000, + }, + /* Model ID 5 */ + { + .model_id = 2, + .itarget = 0x3a0000, + .gd = 0x15400000, + .gp = 0x00233333, + .gr = 0x000002fd, + .offset0 = 0x0000, + .scale0 = 0x1000, + .offset1 = 0x0091, + .scale1 = 0x0bae, + }, +}; + +static struct wf_smu_sys_fans_state *wf_smu_sys_fans; + +/* + * ****** CPU Fans Control Loop ****** + * + */ + + +#define WF_SMU_CPU_FANS_INTERVAL 1 +#define WF_SMU_CPU_FANS_MAX_HISTORY 16 +#define WF_SMU_CPU_FANS_SIBLING_SCALE 0x00001000 +#define WF_SMU_CPU_FANS_SIBLING_OFFSET 0xfffffb50 + +/* State data used by the cpu fans control loop + */ +struct wf_smu_cpu_fans_state { + int ticks; + s32 cpu_setpoint; + s32 scale; + s32 offset; + struct wf_cpu_pid_state pid; +}; + +static struct wf_smu_cpu_fans_state *wf_smu_cpu_fans; + + + +/* + * ****** Drive Fan Control Loop ****** + * + */ + +struct wf_smu_drive_fans_state { + int ticks; + s32 setpoint; + struct wf_pid_state pid; +}; + +static struct wf_smu_drive_fans_state *wf_smu_drive_fans; + +/* + * ****** Slots Fan Control Loop ****** + * + */ + +struct wf_smu_slots_fans_state { + int ticks; + s32 setpoint; + struct wf_pid_state pid; +}; + +static struct wf_smu_slots_fans_state *wf_smu_slots_fans; + +/* + * ***** Implementation ***** + * + */ + +static void wf_smu_create_sys_fans(void) +{ + struct wf_smu_sys_fans_param *param = NULL; + struct wf_pid_param pid_param; + int i; + + /* First, locate the params for this model */ + for (i = 0; i < WF_SMU_SYS_FANS_NUM_CONFIGS; i++) + if (wf_smu_sys_all_params[i].model_id == mach_model) { + param = &wf_smu_sys_all_params[i]; + break; + } + /* No params found, put fans to max */ + if (param == NULL) { + printk(KERN_WARNING "windfarm: System fan config not found " + "for this machine model, max fan speed\n"); + goto fail; + } + + /* Alloc & initialize state */ + wf_smu_sys_fans = kmalloc(sizeof(struct wf_smu_sys_fans_state), + GFP_KERNEL); + if (wf_smu_sys_fans == NULL) { + printk(KERN_WARNING "windfarm: Memory allocation error" + " max fan speed\n"); + goto fail; + } + wf_smu_sys_fans->ticks = 1; + wf_smu_sys_fans->scale0 = param->scale0; + wf_smu_sys_fans->offset0 = param->offset0; + wf_smu_sys_fans->scale1 = param->scale1; + wf_smu_sys_fans->offset1 = param->offset1; + + /* Fill PID params */ + pid_param.gd = param->gd; + pid_param.gp = param->gp; + pid_param.gr = param->gr; + pid_param.interval = WF_SMU_SYS_FANS_INTERVAL; + pid_param.history_len = WF_SMU_SYS_FANS_HISTORY_SIZE; + pid_param.itarget = param->itarget; + pid_param.min = fan_system->ops->get_min(fan_system); + pid_param.max = fan_system->ops->get_max(fan_system); + if (fan_hd) { + pid_param.min =max(pid_param.min,fan_hd->ops->get_min(fan_hd)); + pid_param.max =min(pid_param.max,fan_hd->ops->get_max(fan_hd)); + } + wf_pid_init(&wf_smu_sys_fans->pid, &pid_param); + + DBG("wf: System Fan control initialized.\n"); + DBG(" itarged=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(pid_param.itarget), pid_param.min, pid_param.max); + return; + + fail: + + if (fan_system) + wf_control_set_max(fan_system); + if (fan_hd) + wf_control_set_max(fan_hd); +} + +static void wf_smu_sys_fans_tick(struct wf_smu_sys_fans_state *st) +{ + s32 new_setpoint, temp, scaled, cputarget; + int rc; + + if (--st->ticks != 0) { + if (readjust) + goto readjust; + return; + } + st->ticks = WF_SMU_SYS_FANS_INTERVAL; + + rc = sensor_hd_temp->ops->get_value(sensor_hd_temp, &temp); + if (rc) { + printk(KERN_WARNING "windfarm: HD temp sensor error %d\n", + rc); + failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: System Fans tick ! HD temp: %d.%03d\n", + FIX32TOPRINT(temp)); + + if (temp > (st->pid.param.itarget + 0x50000)) + failure_state |= FAILURE_OVERTEMP; + + new_setpoint = wf_pid_run(&st->pid, temp); + + DBG("wf_smu: new_setpoint: %d RPM\n", (int)new_setpoint); + + scaled = ((((s64)new_setpoint) * (s64)st->scale0) >> 12) + st->offset0; + + DBG("wf_smu: scaled setpoint: %d RPM\n", (int)scaled); + + cputarget = wf_smu_cpu_fans ? wf_smu_cpu_fans->pid.target : 0; + cputarget = ((((s64)cputarget) * (s64)st->scale1) >> 12) + st->offset1; + scaled = max(scaled, cputarget); + scaled = max(scaled, st->pid.param.min); + scaled = min(scaled, st->pid.param.max); + + DBG("wf_smu: adjusted setpoint: %d RPM\n", (int)scaled); + + if (st->sys_setpoint == scaled && new_setpoint == st->hd_setpoint) + return; + st->sys_setpoint = scaled; + st->hd_setpoint = new_setpoint; + readjust: + if (fan_system && failure_state == 0) { + rc = fan_system->ops->set_value(fan_system, st->sys_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: Sys fan error %d\n", + rc); + failure_state |= FAILURE_FAN; + } + } + if (fan_hd && failure_state == 0) { + rc = fan_hd->ops->set_value(fan_hd, st->hd_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: HD fan error %d\n", + rc); + failure_state |= FAILURE_FAN; + } + } +} + +static void wf_smu_create_cpu_fans(void) +{ + struct wf_cpu_pid_param pid_param; + struct smu_sdbp_header *hdr; + struct smu_sdbp_cpupiddata *piddata; + struct smu_sdbp_fvt *fvt; + s32 tmax, tdelta, maxpow, powadj; + + /* First, locate the PID params in SMU SBD */ + hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL); + if (hdr == 0) { + printk(KERN_WARNING "windfarm: CPU PID fan config not found " + "max fan speed\n"); + goto fail; + } + piddata = (struct smu_sdbp_cpupiddata *)&hdr[1]; + + /* Get the FVT params for operating point 0 (the only supported one + * for now) in order to get tmax + */ + hdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL); + if (hdr) { + fvt = (struct smu_sdbp_fvt *)&hdr[1]; + tmax = ((s32)fvt->maxtemp) << 16; + } else + tmax = 0x5e0000; /* 94 degree default */ + + /* Alloc & initialize state */ + wf_smu_cpu_fans = kmalloc(sizeof(struct wf_smu_cpu_fans_state), + GFP_KERNEL); + if (wf_smu_cpu_fans == NULL) + goto fail; + wf_smu_cpu_fans->ticks = 1; + + if (machine == MACHINE_PM81) { + wf_smu_cpu_fans->scale = WF_SMU_CPU_FANS_SIBLING_SCALE; + wf_smu_cpu_fans->offset = WF_SMU_CPU_FANS_SIBLING_OFFSET; + } + + /* Fill PID params */ + pid_param.interval = WF_SMU_CPU_FANS_INTERVAL; + pid_param.history_len = piddata->history_len; + if (pid_param.history_len > WF_CPU_PID_MAX_HISTORY) { + printk(KERN_WARNING "windfarm: History size overflow on " + "CPU control loop (%d)\n", piddata->history_len); + pid_param.history_len = WF_CPU_PID_MAX_HISTORY; + } + pid_param.gd = piddata->gd; + pid_param.gp = piddata->gp; + pid_param.gr = piddata->gr / pid_param.history_len; + + tdelta = ((s32)piddata->target_temp_delta) << 16; + maxpow = ((s32)piddata->max_power) << 16; + powadj = ((s32)piddata->power_adj) << 16; + + pid_param.tmax = tmax; + pid_param.ttarget = tmax - tdelta; + pid_param.pmaxadj = maxpow - powadj; + + pid_param.min = fan_cpu_main->ops->get_min(fan_cpu_main); + pid_param.max = fan_cpu_main->ops->get_max(fan_cpu_main); + + wf_cpu_pid_init(&wf_smu_cpu_fans->pid, &pid_param); + + DBG("wf: CPU Fan control initialized.\n"); + DBG(" ttarged=%d.%03d, tmax=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(pid_param.ttarget), FIX32TOPRINT(pid_param.tmax), + pid_param.min, pid_param.max); + + return; + + fail: + printk(KERN_WARNING "windfarm: CPU fan config not found\n" + "for this machine model, max fan speed\n"); + + if (cpufreq_clamp) + wf_control_set_max(cpufreq_clamp); + if (fan_cpu_main) + wf_control_set_max(fan_cpu_main); +} + +static void wf_smu_cpu_fans_tick(struct wf_smu_cpu_fans_state *st) +{ + s32 new_setpoint, temp, power, systarget; + int rc; + + if (--st->ticks != 0) { + if (readjust) + goto readjust; + return; + } + st->ticks = WF_SMU_CPU_FANS_INTERVAL; + + rc = sensor_cpu_temp->ops->get_value(sensor_cpu_temp, &temp); + if (rc) { + printk(KERN_WARNING "windfarm: CPU temp sensor error %d\n", + rc); + failure_state |= FAILURE_SENSOR; + return; + } + + rc = sensor_cpu_power->ops->get_value(sensor_cpu_power, &power); + if (rc) { + printk(KERN_WARNING "windfarm: CPU power sensor error %d\n", + rc); + failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: CPU Fans tick ! CPU temp: %d.%03d, power: %d.%03d\n", + FIX32TOPRINT(temp), FIX32TOPRINT(power)); + +#ifdef HACKED_OVERTEMP + if (temp > 0x4a0000) + failure_state |= FAILURE_OVERTEMP; +#else + if (temp > st->pid.param.tmax) + failure_state |= FAILURE_OVERTEMP; +#endif + new_setpoint = wf_cpu_pid_run(&st->pid, power, temp); + + DBG("wf_smu: new_setpoint: %d RPM\n", (int)new_setpoint); + + if (machine == MACHINE_PM81) { + systarget = wf_smu_sys_fans ? wf_smu_sys_fans->pid.target : 0; + systarget = ((((s64)systarget) * (s64)st->scale) >> 12) + + st->offset; + new_setpoint = max(new_setpoint, systarget); + new_setpoint = max(new_setpoint, st->pid.param.min); + new_setpoint = min(new_setpoint, st->pid.param.max); + + DBG("wf_smu: adjusted setpoint: %d RPM\n", (int)new_setpoint); + } + if (st->cpu_setpoint == new_setpoint) + return; + st->cpu_setpoint = new_setpoint; + readjust: + if (fan_cpu_main && failure_state == 0) { + rc = fan_cpu_main->ops->set_value(fan_cpu_main, + st->cpu_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: CPU main fan" + " error %d\n", rc); + failure_state |= FAILURE_FAN; + } + } + if (fan_cpu_second && failure_state == 0) { + rc = fan_cpu_second->ops->set_value(fan_cpu_second, + st->cpu_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: CPU second fan" + " error %d\n", rc); + failure_state |= FAILURE_FAN; + } + } + if (fan_cpu_third && failure_state == 0) { + rc = fan_cpu_main->ops->set_value(fan_cpu_third, + st->cpu_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: CPU third fan" + " error %d\n", rc); + failure_state |= FAILURE_FAN; + } + } +} + +static void wf_smu_create_drive_fans(void) +{ + struct wf_pid_param param = { + .interval = 5, + .history_len = 2, + .gd = 0x01e00000, + .gp = 0x00500000, + .gr = 0x00000000, + .itarget = 0x00200000, + }; + + /* Alloc & initialize state */ + wf_smu_drive_fans = kmalloc(sizeof(struct wf_smu_drive_fans_state), + GFP_KERNEL); + if (wf_smu_drive_fans == NULL) { + printk(KERN_WARNING "windfarm: Memory allocation error" + " max fan speed\n"); + goto fail; + } + wf_smu_drive_fans->ticks = 1; + + /* Fill PID params */ + param.additive = (fan_hd->type == WF_CONTROL_RPM_FAN); + param.min = fan_hd->ops->get_min(fan_hd); + param.max = fan_hd->ops->get_max(fan_hd); + wf_pid_init(&wf_smu_drive_fans->pid, ¶m); + + DBG("wf: Drive Fan control initialized.\n"); + DBG(" itarged=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(param.itarget), param.min, param.max); + return; + + fail: + if (fan_hd) + wf_control_set_max(fan_hd); +} + +static void wf_smu_drive_fans_tick(struct wf_smu_drive_fans_state *st) +{ + s32 new_setpoint, temp; + int rc; + + if (--st->ticks != 0) { + if (readjust) + goto readjust; + return; + } + st->ticks = st->pid.param.interval; + + rc = sensor_hd_temp->ops->get_value(sensor_hd_temp, &temp); + if (rc) { + printk(KERN_WARNING "windfarm: HD temp sensor error %d\n", + rc); + failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: Drive Fans tick ! HD temp: %d.%03d\n", + FIX32TOPRINT(temp)); + + if (temp > (st->pid.param.itarget + 0x50000)) + failure_state |= FAILURE_OVERTEMP; + + new_setpoint = wf_pid_run(&st->pid, temp); + + DBG("wf_smu: new_setpoint: %d\n", (int)new_setpoint); + + if (st->setpoint == new_setpoint) + return; + st->setpoint = new_setpoint; + readjust: + if (fan_hd && failure_state == 0) { + rc = fan_hd->ops->set_value(fan_hd, st->setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: HD fan error %d\n", + rc); + failure_state |= FAILURE_FAN; + } + } +} + +static void wf_smu_create_slots_fans(void) +{ + struct wf_pid_param param = { + .interval = 1, + .history_len = 8, + .gd = 0x00000000, + .gp = 0x00000000, + .gr = 0x00020000, + .itarget = 0x00000000 + }; + + /* Alloc & initialize state */ + wf_smu_slots_fans = kmalloc(sizeof(struct wf_smu_slots_fans_state), + GFP_KERNEL); + if (wf_smu_slots_fans == NULL) { + printk(KERN_WARNING "windfarm: Memory allocation error" + " max fan speed\n"); + goto fail; + } + wf_smu_slots_fans->ticks = 1; + + /* Fill PID params */ + param.additive = (fan_slots->type == WF_CONTROL_RPM_FAN); + param.min = fan_slots->ops->get_min(fan_slots); + param.max = fan_slots->ops->get_max(fan_slots); + wf_pid_init(&wf_smu_slots_fans->pid, ¶m); + + DBG("wf: Slots Fan control initialized.\n"); + DBG(" itarged=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(param.itarget), param.min, param.max); + return; + + fail: + if (fan_slots) + wf_control_set_max(fan_slots); +} + +static void wf_smu_slots_fans_tick(struct wf_smu_slots_fans_state *st) +{ + s32 new_setpoint, power; + int rc; + + if (--st->ticks != 0) { + if (readjust) + goto readjust; + return; + } + st->ticks = st->pid.param.interval; + + rc = sensor_slots_power->ops->get_value(sensor_slots_power, &power); + if (rc) { + printk(KERN_WARNING "windfarm: Slots power sensor error %d\n", + rc); + failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: Slots Fans tick ! Slots power: %d.%03d\n", + FIX32TOPRINT(power)); + +#if 0 /* Check what makes a good overtemp condition */ + if (power > (st->pid.param.itarget + 0x50000)) + failure_state |= FAILURE_OVERTEMP; +#endif + + new_setpoint = wf_pid_run(&st->pid, power); + + DBG("wf_smu: new_setpoint: %d\n", (int)new_setpoint); + + if (st->setpoint == new_setpoint) + return; + st->setpoint = new_setpoint; + readjust: + if (fan_slots && failure_state == 0) { + rc = fan_slots->ops->set_value(fan_slots, st->setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: Slots fan error %d\n", + rc); + failure_state |= FAILURE_FAN; + } + } +} + + +/* + * ****** Attributes ****** + * + */ + +#define BUILD_SHOW_FUNC_FIX(name, data) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + ssize_t r; \ + s32 val = 0; \ + data->ops->get_value(data, &val); \ + r = sprintf(buf, "%d.%03d", FIX32TOPRINT(val)); \ + return r; \ +} \ +static DEVICE_ATTR(name,S_IRUGO,show_##name, NULL); + + +#define BUILD_SHOW_FUNC_INT(name, data) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + s32 val = 0; \ + data->ops->get_value(data, &val); \ + return sprintf(buf, "%d", val); \ +} \ +static DEVICE_ATTR(name,S_IRUGO,show_##name, NULL); + +BUILD_SHOW_FUNC_INT(cpu_fan, fan_cpu_main); +BUILD_SHOW_FUNC_INT(sys_fan, fan_system); +BUILD_SHOW_FUNC_INT(hd_fan, fan_hd); +BUILD_SHOW_FUNC_INT(slots_fan, fan_slots); + +BUILD_SHOW_FUNC_FIX(cpu_temp, sensor_cpu_temp); +BUILD_SHOW_FUNC_FIX(cpu_power, sensor_cpu_power); +BUILD_SHOW_FUNC_FIX(hd_temp, sensor_hd_temp); +BUILD_SHOW_FUNC_FIX(slots_power, sensor_slots_power); + +/* + * ****** Setup / Init / Misc ... ****** + * + */ + +static void wf_smu_tick(void) +{ + unsigned int last_failure = failure_state; + unsigned int new_failure; + + if (!started) { + DBG("wf: creating control loops !\n"); + if (machine == MACHINE_PM81) { + wf_smu_create_sys_fans(); + wf_smu_create_cpu_fans(); + } else if (machine == MACHINE_PM91) { + wf_smu_create_drive_fans(); + wf_smu_create_slots_fans(); + wf_smu_create_cpu_fans(); + } + started = 1; + } + + /* Skipping ticks */ + if (skipping && --skipping) + return; + + failure_state = 0; + if (wf_smu_sys_fans) + wf_smu_sys_fans_tick(wf_smu_sys_fans); + if (wf_smu_drive_fans) + wf_smu_drive_fans_tick(wf_smu_drive_fans); + if (wf_smu_slots_fans) + wf_smu_slots_fans_tick(wf_smu_slots_fans); + if (wf_smu_cpu_fans) + wf_smu_cpu_fans_tick(wf_smu_cpu_fans); + + readjust = 0; + new_failure = failure_state & ~last_failure; + + /* If entering failure mode, clamp cpufreq and ramp all + * fans to full speed. + */ + if (failure_state && !last_failure) { + if (cpufreq_clamp) + wf_control_set_max(cpufreq_clamp); + if (fan_system) + wf_control_set_max(fan_system); + if (fan_cpu_main) + wf_control_set_max(fan_cpu_main); + if (fan_cpu_second) + wf_control_set_max(fan_cpu_second); + if (fan_cpu_third) + wf_control_set_max(fan_cpu_third); + if (fan_hd) + wf_control_set_max(fan_hd); + if (fan_slots) + wf_control_set_max(fan_slots); + } + + /* If leaving failure mode, unclamp cpufreq and readjust + * all fans on next iteration + */ + if (!failure_state && last_failure) { + if (cpufreq_clamp) + wf_control_set_min(cpufreq_clamp); + readjust = 1; + } + + /* Overtemp condition detected, notify and start skipping a couple + * ticks to let the temperature go down + */ + if (new_failure & FAILURE_OVERTEMP) { + wf_set_overtemp(); + skipping = 2; + } + + /* We only clear the overtemp condition if overtemp is cleared + * _and_ no other failure is present. Since a sensor error will + * clear the overtemp condition (can't measure temperature) at + * the control loop levels, but we don't want to keep it clear + * here in this case + */ + if (new_failure == 0 && last_failure & FAILURE_OVERTEMP) + wf_clear_overtemp(); +} + +static void wf_smu_new_control81(struct wf_control *ct) +{ + if (all_controls_ok) + return; + + if (fan_cpu_main == NULL && !strcmp(ct->name, "cpu-fan")) { + if (wf_get_control(ct) == 0) { + fan_cpu_main = ct; + device_create_file(wf_dev, &dev_attr_cpu_fan); + } + } + + if (fan_system == NULL && !strcmp(ct->name, "system-fan")) { + if (wf_get_control(ct) == 0) { + fan_system = ct; + device_create_file(wf_dev, &dev_attr_sys_fan); + } + } + + if (cpufreq_clamp == NULL && !strcmp(ct->name, "cpufreq-clamp")) { + if (wf_get_control(ct) == 0) + cpufreq_clamp = ct; + } + + /* Darwin property list says the HD fan is only for model ID + * 0, 1, 2 and 3 + */ + + if (mach_model > 3) { + if (fan_system && fan_cpu_main && cpufreq_clamp) + all_controls_ok = 1; + return; + } + + if (fan_hd == NULL && !strcmp(ct->name, "drive-bay-fan")) { + if (wf_get_control(ct) == 0) { + fan_hd = ct; + device_create_file(wf_dev, &dev_attr_hd_fan); + } + } + + if (fan_system && fan_hd && fan_cpu_main && cpufreq_clamp) + all_controls_ok = 1; +} + +static void wf_smu_new_control91(struct wf_control *ct) +{ + if (all_controls_ok) + return; + + if (fan_cpu_main == NULL && !strcmp(ct->name, "cpu-rear-fan-0")) { + if (wf_get_control(ct) == 0) { + fan_cpu_main = ct; + device_create_file(wf_dev, &dev_attr_cpu_fan); + } + } + + if (fan_cpu_second == NULL && !strcmp(ct->name, "cpu-rear-fan-1")) { + if (wf_get_control(ct) == 0) + fan_cpu_second = ct; + } + + if (fan_cpu_third == NULL && !strcmp(ct->name, "cpu-front-fan-0")) { + if (wf_get_control(ct) == 0) + fan_cpu_third = ct; + } + + if (cpufreq_clamp == NULL && !strcmp(ct->name, "cpufreq-clamp")) { + if (wf_get_control(ct) == 0) + cpufreq_clamp = ct; + } + + if (fan_hd == NULL && !strcmp(ct->name, "drive-bay-fan")) { + if (wf_get_control(ct) == 0) { + fan_hd = ct; + device_create_file(wf_dev, &dev_attr_hd_fan); + } + } + + if (fan_slots == NULL && !strcmp(ct->name, "slots-fan")) { + if (wf_get_control(ct) == 0) { + fan_slots = ct; + device_create_file(wf_dev, &dev_attr_slots_fan); + } + } + + if (fan_cpu_main && (fan_cpu_second || fan_cpu_third) && fan_hd && + fan_slots && cpufreq_clamp) + all_controls_ok = 1; +} + +static void wf_smu_new_sensor(struct wf_sensor *sr) +{ + if (all_sensors_ok) + return; + + if (sensor_cpu_power == NULL && !strcmp(sr->name, "cpu-power")) { + if (wf_get_sensor(sr) == 0) { + sensor_cpu_power = sr; + device_create_file(wf_dev, &dev_attr_cpu_power); + } + } + + if (sensor_cpu_temp == NULL && !strcmp(sr->name, "cpu-temp")) { + if (wf_get_sensor(sr) == 0) { + sensor_cpu_temp = sr; + device_create_file(wf_dev, &dev_attr_cpu_temp); + } + } + + if (sensor_hd_temp == NULL && !strcmp(sr->name, "hd-temp")) { + if (wf_get_sensor(sr) == 0) { + sensor_hd_temp = sr; + device_create_file(wf_dev, &dev_attr_hd_temp); + } + } + + if (sensor_slots_power == NULL && !strcmp(sr->name, "slots-power")) { + if (wf_get_sensor(sr) == 0) { + sensor_slots_power = sr; + device_create_file(wf_dev, &dev_attr_slots_power); + } + } + + if (machine == MACHINE_PM81 && sensor_cpu_power && + sensor_cpu_temp && sensor_hd_temp) + all_sensors_ok = 1; + + if (machine == MACHINE_PM91 && sensor_cpu_power && + sensor_cpu_temp && sensor_hd_temp && sensor_slots_power) + all_sensors_ok = 1; +} + + +static int wf_smu_notify(struct notifier_block *self, + unsigned long event, void *data) +{ + switch(event) { + case WF_EVENT_NEW_CONTROL: + DBG("wf: new control %s detected\n", + ((struct wf_control *)data)->name); + if (machine == MACHINE_PM81) + wf_smu_new_control81(data); + else + wf_smu_new_control91(data); + readjust = 1; + break; + case WF_EVENT_NEW_SENSOR: + DBG("wf: new sensor %s detected\n", + ((struct wf_sensor *)data)->name); + wf_smu_new_sensor(data); + break; + case WF_EVENT_TICK: + if (all_controls_ok && all_sensors_ok) + wf_smu_tick(); + }; + + return 0; +} + +static struct notifier_block events = { + .notifier_call = wf_smu_notify, +}; + +static int wf_init_pm81(void) +{ + struct smu_sdbp_header *hdr; + + machine = MACHINE_PM81; + + hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL); + if (hdr != 0) { + struct smu_sdbp_sensortree *st = + (struct smu_sdbp_sensortree *)&hdr[1]; + mach_model = st->model_id; + } + + printk(KERN_INFO "windfarm: Initializing for iMacG5 model ID %d\n", + mach_model); + + return 0; +} + +static int wf_init_pm91(void) +{ + machine = MACHINE_PM91; + + printk(KERN_INFO "windfarm: Initializing for Desktop G5 model\n"); + + return 0; +} + +static int wf_smu_probe(struct device *ddev) +{ + wf_dev = ddev; + + wf_register_client(&events); + + return 0; +} + +static int wf_smu_remove(struct device *ddev) +{ + wf_unregister_client(&events); + + /* XXX We don't have yet a guarantee that our callback isn't + * in progress when returning from wf_unregister_client, so + * we add an arbitrary delay. I'll have to fix that in the core + */ + msleep(1000); + + /* Release all sensors */ + /* One more crappy race: I don't think we have any guarantee here + * that the attribute callback won't race with the sensor beeing + * disposed of, and I'm not 100% certain what best way to deal + * with that except by adding locks all over... I'll do that + * eventually but heh, who ever rmmod this module anyway ? + */ + if (sensor_cpu_power) { + device_remove_file(wf_dev, &dev_attr_cpu_power); + wf_put_sensor(sensor_cpu_power); + } + if (sensor_cpu_temp) { + device_remove_file(wf_dev, &dev_attr_cpu_temp); + wf_put_sensor(sensor_cpu_temp); + } + if (sensor_hd_temp) { + device_remove_file(wf_dev, &dev_attr_hd_temp); + wf_put_sensor(sensor_hd_temp); + } + if (sensor_slots_power) { + device_remove_file(wf_dev, &dev_attr_slots_power); + wf_put_sensor(sensor_slots_power); + } + + /* Release all controls */ + if (fan_cpu_main) { + device_remove_file(wf_dev, &dev_attr_cpu_fan); + wf_put_control(fan_cpu_main); + } + if (fan_cpu_second) + wf_put_control(fan_cpu_second); + if (fan_cpu_third) + wf_put_control(fan_cpu_third); + if (fan_hd) { + device_remove_file(wf_dev, &dev_attr_hd_fan); + wf_put_control(fan_hd); + } + if (fan_system) { + device_remove_file(wf_dev, &dev_attr_sys_fan); + wf_put_control(fan_system); + } + if (fan_slots) { + device_remove_file(wf_dev, &dev_attr_slots_fan); + wf_put_control(fan_slots); + } + if (cpufreq_clamp) + wf_put_control(cpufreq_clamp); + + /* Destroy control loops state structures */ + if (wf_smu_sys_fans) + kfree(wf_smu_sys_fans); + if (wf_smu_slots_fans) + kfree(wf_smu_cpu_fans); + if (wf_smu_drive_fans) + kfree(wf_smu_cpu_fans); + if (wf_smu_cpu_fans) + kfree(wf_smu_cpu_fans); + + wf_dev = NULL; + + return 0; +} + +static struct device_driver wf_smu_driver = { + .name = "windfarm", + .bus = &platform_bus_type, + .probe = wf_smu_probe, + .remove = wf_smu_remove, +}; + + +static int __init wf_smu_init(void) +{ + int rc = -ENODEV; + + if (machine_is_compatible("PowerMac8,1") || + machine_is_compatible("PowerMac8,2")) + rc = wf_init_pm81(); + else if (machine_is_compatible("PowerMac9,1")) + rc = wf_init_pm91(); + + if (rc == 0) { +#ifdef MODULE + request_module("windfarm_smu_controls"); + request_module("windfarm_smu_sensors"); + request_module("windfarm_lm75_sensor"); + +#endif /* MODULE */ + driver_register(&wf_smu_driver); + } + + return rc; +} + +static void __exit wf_smu_exit(void) +{ + + driver_unregister(&wf_smu_driver); +} + + +module_init(wf_smu_init); +module_exit(wf_smu_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("Thermal control logic for SMU based PowerMacs"); +MODULE_LICENSE("GPL"); + Index: linux-work/include/asm-ppc64/smu.h =================================================================== --- linux-work.orig/include/asm-ppc64/smu.h 2005-10-04 15:17:21.000000000 +1000 +++ linux-work/include/asm-ppc64/smu.h 2005-10-04 15:17:33.000000000 +1000 @@ -41,8 +41,30 @@ /* * Fan control * - * This is a "mux" for fan control commands, first byte is the - * "sub" command. + * This is a "mux" for fan control commands. The command seem to + * act differently based on the number of arguments. With 1 byte + * of argument, this seem to be queries for fans status, setpoint, + * etc..., while with 0xe arguments, we will set the fans speeds. + * + * Queries (1 byte arg): + * --------------------- + * + * arg=0x01: read RPM fans status + * arg=0x02: read RPM fans setpoint + * arg=0x11: read PWM fans status + * arg=0x12: read PWM fans setpoint + * + * the "status" queries return the current speed while the "setpoint" ones + * return the programmed/target speed. It _seems_ that the result is a bit + * mask in the first byte of active/available fans, followed by 6 words (16 + * bits) containing the requested speed. + * + * Setpoint (14 bytes arg): + * ------------------------ + * + * first arg byte is 0 for RPM fans and 0x10 for PWM. Second arg byte is the + * mask of fans affected by the command. Followed by 6 words containing the + * setpoint value for selected fans in the mask (or 0 if mask value is 0) */ #define SMU_CMD_FAN_COMMAND 0x4a @@ -169,7 +191,16 @@ #define SMU_CMD_POWER_SHUTDOWN "SHUTDOWN" #define SMU_CMD_POWER_VOLTAGE_SLEW "VSLEW" -/* Misc commands +/* + * Read ADC sensors + * + * This command takes one byte of parameter: the sensor ID (or "reg" + * value in the device-tree) and returns a 16 bits value + */ +#define SMU_CMD_READ_ADC 0xd8 + +/* + * Misc commands * * This command seem to be a grab bag of various things */ @@ -386,10 +417,12 @@ }; /* - * 32 bits integers are usually encoded with 2x16 bits swapped, - * this demangles them + * demangle 16 and 32 bits integer in some SMU partitions + * (currently, afaik, this concerns only the FVT partition + * (0x12) */ -//#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) +#define SMU_U16_MIX(x) le16_to_cpu(x); +#define SMU_U32_MIX(x) ((((x) & 0xff00ff00u) >> 8)|(((x) & 0x00ff00ffu) << 8)) /* This is the definition of the SMU sdb-partition-0x12 table (called * CPU F/V/T operating points in Darwin). The definition for all those @@ -399,7 +432,8 @@ struct smu_sdbp_fvt { __u32 sysclk; /* Base SysClk frequency in Hz for - * this operating point + * this operating point. Value need to + * be unmixed with SMU_U32_MIX() */ __u8 pad; __u8 maxtemp; /* Max temp. supported by this @@ -408,10 +442,69 @@ __u16 volts[3]; /* CPU core voltage for the 3 * PowerTune modes, a mode with - * 0V = not supported. + * 0V = not supported. Value need + * to be unmixed with SMU_U16_MIX() */ }; +/* This partition contains voltage & current sensor calibration + * informations + */ +#define SMU_SDB_CPUVCP_ID 0x21 + +struct smu_sdbp_cpuvcp { + __u16 volt_scale; /* u4.12 fixed point */ + __s16 volt_offset; /* s4.12 fixed point */ + __u16 curr_scale; /* u4.12 fixed point */ + __s16 curr_offset; /* s4.12 fixed point */ + __s32 power_quads[3]; /* s4.28 fixed point */ +}; + +/* This partition contains CPU thermal diode calibration + */ +#define SMU_SDB_CPUDIODE_ID 0x18 + +struct smu_sdbp_cpudiode { + __u16 m_value; /* u1.15 fixed point */ + __s16 b_value; /* s10.6 fixed point */ + +}; + +/* This partition contains Slots power calibration + */ +#define SMU_SDB_SLOTSPOW_ID 0x78 + +struct smu_sdbp_slotspow { + __u16 pow_scale; /* u4.12 fixed point */ + __s16 pow_offset; /* s4.12 fixed point */ +}; + +/* This partition contains machine specific version information about + * the sensor/control layout + */ +#define SMU_SDB_SENSORTREE_ID 0x25 + +struct smu_sdbp_sensortree { + u8 model_id; + u8 unknown[3]; +}; + +/* This partition contains CPU thermal control PID informations. So far + * only single CPU machines have been seen with an SMU, so we assume this + * carries only informations for those + */ +#define SMU_SDB_CPUPIDDATA_ID 0x17 + +struct smu_sdbp_cpupiddata { + u8 unknown1; + u8 target_temp_delta; + u8 unknown2; + u8 history_len; + s16 power_adj; + u16 max_power; + s32 gp,gr,gd; +}; + /* Other partitions without known structures */ #define SMU_SDB_DEBUG_SWITCHES_ID 0x05 Index: linux-work/drivers/macintosh/windfarm_lm75_sensor.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_lm75_sensor.c 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,255 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.1" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +struct wf_lm75_sensor { + int ds1775 : 1; + int inited : 1; + struct i2c_client i2c; + struct wf_sensor sens; +}; +#define wf_to_lm75(c) container_of(c, struct wf_lm75_sensor, sens) +#define i2c_to_lm75(c) container_of(c, struct wf_lm75_sensor, i2c) + +static int wf_lm75_attach(struct i2c_adapter *adapter); +static int wf_lm75_detach(struct i2c_client *client); + +static struct i2c_driver wf_lm75_driver = { + .owner = THIS_MODULE, + .name = "wf_lm75", + .flags = I2C_DF_NOTIFY, + .attach_adapter = wf_lm75_attach, + .detach_client = wf_lm75_detach, +}; + +static int wf_lm75_get(struct wf_sensor *sr, s32 *value) +{ + struct wf_lm75_sensor *lm = wf_to_lm75(sr); + s32 data; + + if (lm->i2c.adapter == NULL) + return -ENODEV; + + /* Init chip if necessary */ + if (!lm->inited) { + u8 cfg_new, cfg = (u8)i2c_smbus_read_byte_data(&lm->i2c, 1); + + DBG("wf_lm75: Initializing %s, cfg was: %02x\n", + sr->name, cfg); + + /* clear shutdown bit, keep other settings as left by + * the firmware for now + */ + cfg_new = cfg & ~0x01; + i2c_smbus_write_byte_data(&lm->i2c, 1, cfg_new); + lm->inited = 1; + + /* If we just powered it up, let's wait 200 ms */ + msleep(200); + } + + /* Read temperature register */ + data = (s32)le16_to_cpu(i2c_smbus_read_word_data(&lm->i2c, 0)); + data <<= 8; + *value = data; + + return 0; +} + +static void wf_lm75_release(struct wf_sensor *sr) +{ + struct wf_lm75_sensor *lm = wf_to_lm75(sr); + + /* check if client is registered and detach from i2c */ + if (lm->i2c.adapter) { + i2c_detach_client(&lm->i2c); + lm->i2c.adapter = NULL; + } + + kfree(lm); +} + +static struct wf_sensor_ops wf_lm75_ops = { + .get_value = wf_lm75_get, + .release = wf_lm75_release, + .owner = THIS_MODULE, +}; + +static struct wf_lm75_sensor *wf_lm75_create(struct i2c_adapter *adapter, + u8 addr, int ds1775, + const char *loc) +{ + struct wf_lm75_sensor *lm; + + DBG("wf_lm75: creating %s device at address 0x%02x\n", + ds1775 ? "ds1775" : "lm75", addr); + + lm = kmalloc(sizeof(struct wf_lm75_sensor), GFP_KERNEL); + if (lm == NULL) + return NULL; + memset(lm, 0, sizeof(struct wf_lm75_sensor)); + + /* Usual rant about sensor names not beeing very consistent in + * the device-tree, oh well ... + * Add more entries below as you deal with more setups + */ + if (!strcmp(loc, "Hard drive") || !strcmp(loc, "DRIVE BAY")) + lm->sens.name = "hd-temp"; + else + goto fail; + + lm->inited = 0; + lm->sens.ops = &wf_lm75_ops; + lm->ds1775 = ds1775; + lm->i2c.addr = (addr >> 1) & 0x7f; + lm->i2c.adapter = adapter; + lm->i2c.driver = &wf_lm75_driver; + strncpy(lm->i2c.name, lm->sens.name, I2C_NAME_SIZE-1); + + if (i2c_attach_client(&lm->i2c)) { + printk(KERN_ERR "windfarm: failed to attach %s %s to i2c\n", + ds1775 ? "ds1775" : "lm75", lm->i2c.name); + goto fail; + } + + if (wf_register_sensor(&lm->sens)) { + i2c_detach_client(&lm->i2c); + goto fail; + } + + return lm; + fail: + kfree(lm); + return NULL; +} + +static int wf_lm75_attach(struct i2c_adapter *adapter) +{ + u8 bus_id; + struct device_node *smu, *bus, *dev; + + /* We currently only deal with LM75's hanging off the SMU + * i2c busses. If we extend that driver to other/older + * machines, we should split this function into SMU-i2c, + * keywest-i2c, PMU-i2c, ... + */ + + DBG("wf_lm75: adapter %s detected\n", adapter->name); + + if (strncmp(adapter->name, "smu-i2c-", 8) != 0) + return 0; + smu = of_find_node_by_type(NULL, "smu"); + if (smu == NULL) + return 0; + + /* Look for the bus in the device-tree */ + bus_id = (u8)simple_strtoul(adapter->name + 8, NULL, 16); + + DBG("wf_lm75: bus ID is %x\n", bus_id); + + /* Look for sensors subdir */ + for (bus = NULL; + (bus = of_get_next_child(smu, bus)) != NULL;) { + u32 *reg; + + if (strcmp(bus->name, "i2c")) + continue; + reg = (u32 *)get_property(bus, "reg", NULL); + if (reg == NULL) + continue; + if (bus_id == *reg) + break; + } + of_node_put(smu); + if (bus == NULL) { + printk(KERN_WARNING "windfarm: SMU i2c bus 0x%x not found" + " in device-tree !\n", bus_id); + return 0; + } + + DBG("wf_lm75: bus found, looking for device...\n"); + + /* Now look for lm75(s) in there */ + for (dev = NULL; + (dev = of_get_next_child(bus, dev)) != NULL;) { + const char *loc = + get_property(dev, "hwsensor-location", NULL); + u32 *reg = (u32 *)get_property(dev, "reg", NULL); + DBG(" dev: %s... (loc: %p, reg: %p)\n", dev->name, loc, reg); + if (loc == NULL || reg == NULL) + continue; + /* real lm75 */ + if (device_is_compatible(dev, "lm75")) + wf_lm75_create(adapter, *reg, 0, loc); + /* ds1775 (compatible, better resolution */ + else if (device_is_compatible(dev, "ds1775")) + wf_lm75_create(adapter, *reg, 1, loc); + } + + of_node_put(bus); + + return 0; +} + +static int wf_lm75_detach(struct i2c_client *client) +{ + struct wf_lm75_sensor *lm = i2c_to_lm75(client); + + DBG("wf_lm75: i2c detatch called for %s\n", lm->sens.name); + + /* Mark client detached */ + lm->i2c.adapter = NULL; + + /* release sensor */ + wf_unregister_sensor(&lm->sens); + + return 0; +} + +static int __init wf_lm75_sensor_init(void) +{ + int rc; + + rc = i2c_add_driver(&wf_lm75_driver); + if (rc < 0) + return rc; + return 0; +} + +static void __exit wf_lm75_sensor_exit(void) +{ + i2c_del_driver(&wf_lm75_driver); +} + + +module_init(wf_lm75_sensor_init); +module_exit(wf_lm75_sensor_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("LM75 sensor objects for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + Index: linux-work/drivers/macintosh/windfarm_pid.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_pid.c 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,146 @@ +/* + * Windfarm PowerMac thermal control. Generic PID helpers + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + */ + +#include +#include +#include +#include +#include +#include + +#include "windfarm_pid.h" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +void wf_pid_init(struct wf_pid_state *st, struct wf_pid_param *param) +{ + memset(st, 0, sizeof(struct wf_pid_state)); + st->param = *param; + st->first = 1; +} +EXPORT_SYMBOL_GPL(wf_pid_init); + +s32 wf_pid_run(struct wf_pid_state *st, s32 new_sample) +{ + s64 error, integ, deriv; + s32 target; + int i, hlen = st->param.history_len; + + /* Calculate error term */ + error = new_sample - st->param.itarget; + + /* Get samples into our history buffer */ + if (st->first) { + for (i = 0; i < hlen; i++) { + st->samples[i] = new_sample; + st->errors[i] = error; + } + st->first = 0; + st->index = 0; + } else { + st->index = (st->index + 1) % hlen; + st->samples[st->index] = new_sample; + st->errors[st->index] = error; + } + + /* Calculate integral term */ + for (i = 0, integ = 0; i < hlen; i++) + integ += st->errors[(st->index + hlen - i) % hlen]; + integ *= st->param.interval; + + /* Calculate derivative term */ + deriv = st->errors[st->index] - + st->errors[(st->index + hlen - 1) % hlen]; + deriv /= st->param.interval; + + /* Calculate target */ + target = (s32)((integ * (s64)st->param.gr + deriv * (s64)st->param.gd + + error * (s64)st->param.gp) >> 36); + if (st->param.additive) + target += st->target; + target = max(target, st->param.min); + target = min(target, st->param.max); + st->target = target; + + return st->target; +} +EXPORT_SYMBOL_GPL(wf_pid_run); + +void wf_cpu_pid_init(struct wf_cpu_pid_state *st, + struct wf_cpu_pid_param *param) +{ + memset(st, 0, sizeof(struct wf_cpu_pid_state)); + st->param = *param; + st->first = 1; +} +EXPORT_SYMBOL_GPL(wf_cpu_pid_init); + +s32 wf_cpu_pid_run(struct wf_cpu_pid_state *st, s32 new_power, s32 new_temp) +{ + s64 error, integ, deriv, prop; + s32 target, sval, adj; + int i, hlen = st->param.history_len; + + /* Calculate error term */ + error = st->param.pmaxadj - new_power; + + /* Get samples into our history buffer */ + if (st->first) { + for (i = 0; i < hlen; i++) { + st->powers[i] = new_power; + st->errors[i] = error; + } + st->temps[0] = st->temps[1] = new_temp; + st->first = 0; + st->index = st->tindex = 0; + } else { + st->index = (st->index + 1) % hlen; + st->powers[st->index] = new_power; + st->errors[st->index] = error; + st->tindex = (st->tindex + 1) % 2; + st->temps[st->tindex] = new_temp; + } + + /* Calculate integral term */ + for (i = 0, integ = 0; i < hlen; i++) + integ += st->errors[(st->index + hlen - i) % hlen]; + integ *= st->param.interval; + integ *= st->param.gr; + sval = st->param.tmax - ((integ >> 20) & 0xffffffff); + adj = min(st->param.ttarget, sval); + + DBG("integ: %lx, sval: %lx, adj: %lx\n", integ, sval, adj); + + /* Calculate derivative term */ + deriv = st->temps[st->tindex] - + st->temps[(st->tindex + 2 - 1) % 2]; + deriv /= st->param.interval; + deriv *= st->param.gd; + + /* Calculate proportional term */ + prop = (new_temp - adj); + prop *= st->param.gp; + + DBG("deriv: %lx, prop: %lx\n", deriv, prop); + + /* Calculate target */ + target = st->target + (s32)((deriv + prop) >> 36); + target = max(target, st->param.min); + target = min(target, st->param.max); + st->target = target; + + return st->target; +} +EXPORT_SYMBOL_GPL(wf_cpu_pid_run); Index: linux-work/drivers/macintosh/windfarm_pid.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_pid.h 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,84 @@ +/* + * Windfarm PowerMac thermal control. Generic PID helpers + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + * + * This is a pair of generic PID helpers that can be used by + * control loops. One is the basic PID implementation, the + * other one is more specifically tailored to the loops used + * for CPU control with 2 input sample types (temp and power) + */ + +/* + * *** Simple PID *** + */ + +#define WF_PID_MAX_HISTORY 32 + +/* This parameter array is passed to the PID algorithm. Currently, + * we don't support changing parameters on the fly as it's not needed + * but could be implemented (with necessary adjustment of the history + * buffer + */ +struct wf_pid_param { + int interval; /* Interval between samples in seconds */ + int history_len; /* Size of history buffer */ + int additive; /* 1: target relative to previous value */ + s32 gd, gp, gr; /* PID gains */ + s32 itarget; /* PID input target */ + s32 min,max; /* min and max target values */ +}; + +struct wf_pid_state { + int first; /* first run of the loop */ + int index; /* index of current sample */ + s32 target; /* current target value */ + s32 samples[WF_PID_MAX_HISTORY]; /* samples history buffer */ + s32 errors[WF_PID_MAX_HISTORY]; /* error history buffer */ + + struct wf_pid_param param; +}; + +extern void wf_pid_init(struct wf_pid_state *st, struct wf_pid_param *param); +extern s32 wf_pid_run(struct wf_pid_state *st, s32 sample); + + +/* + * *** CPU PID *** + */ + +#define WF_CPU_PID_MAX_HISTORY 32 + +/* This parameter array is passed to the CPU PID algorithm. Currently, + * we don't support changing parameters on the fly as it's not needed + * but could be implemented (with necessary adjustment of the history + * buffer + */ +struct wf_cpu_pid_param { + int interval; /* Interval between samples in seconds */ + int history_len; /* Size of history buffer */ + s32 gd, gp, gr; /* PID gains */ + s32 pmaxadj; /* PID max power adjust */ + s32 ttarget; /* PID input target */ + s32 tmax; /* PID input max */ + s32 min,max; /* min and max target values */ +}; + +struct wf_cpu_pid_state { + int first; /* first run of the loop */ + int index; /* index of current power */ + int tindex; /* index of current temp */ + s32 target; /* current target value */ + s32 powers[WF_PID_MAX_HISTORY]; /* power history buffer */ + s32 errors[WF_PID_MAX_HISTORY]; /* error history buffer */ + s32 temps[2]; /* temp. history buffer */ + + struct wf_cpu_pid_param param; +}; + +extern void wf_cpu_pid_init(struct wf_cpu_pid_state *st, + struct wf_cpu_pid_param *param); +extern s32 wf_cpu_pid_run(struct wf_cpu_pid_state *st, s32 power, s32 temp); Index: linux-work/arch/ppc64/kernel/pmac_cpufreq.c =================================================================== --- linux-work.orig/arch/ppc64/kernel/pmac_cpufreq.c 2005-10-04 15:17:21.000000000 +1000 +++ linux-work/arch/ppc64/kernel/pmac_cpufreq.c 2005-10-04 15:18:26.000000000 +1000 @@ -84,7 +84,8 @@ static u32 *g5_pmode_data; static int g5_pmode_max; static int g5_pmode_cur; - +static int g5_driver_active; +static DECLARE_MUTEX(g5_switch_mutex); static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ static int g5_fvt_count; /* number of op. points */ @@ -105,11 +106,20 @@ static int g5_switch_freq(int speed_mode) { + struct cpufreq_freqs freqs; int to; if (g5_pmode_cur == speed_mode) return 0; + down(&g5_switch_mutex); + + freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency; + freqs.new = g5_cpu_freqs[speed_mode].frequency; + freqs.cpu = 0; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + /* If frequency is going up, first ramp up the voltage */ if (speed_mode < g5_pmode_cur) g5_switch_volt(speed_mode); @@ -143,6 +153,10 @@ g5_pmode_cur = speed_mode; ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + up(&g5_switch_mutex); + return 0; } @@ -159,12 +173,12 @@ } /* ----------------- cpufreq bookkeeping */ -static int __pmac g5_cpufreq_verify(struct cpufreq_policy *policy) +static int g5_cpufreq_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, g5_cpu_freqs); } -static int __pmac g5_cpufreq_target(struct cpufreq_policy *policy, +static int g5_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { unsigned int newstate = 0; @@ -173,10 +187,20 @@ target_freq, relation, &newstate)) return -EINVAL; + DBG("g5_cpufreq: Request to switch to %d state: %d\n", + target_freq, newstate); + return g5_switch_freq(newstate); } -static int __pmac g5_cpufreq_cpu_init(struct cpufreq_policy *policy) +static unsigned int g5_cpufreq_get_speed(unsigned int cpu) +{ + DBG("g5_cpufreq: Get speed %d\n", + g5_cpu_freqs[g5_pmode_cur].frequency); + return g5_cpu_freqs[g5_pmode_cur].frequency; +} + +static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy) { if (policy->cpu != 0) return -ENODEV; @@ -198,6 +222,7 @@ .init = g5_cpufreq_cpu_init, .verify = g5_cpufreq_verify, .target = g5_cpufreq_target, + .get = g5_cpufreq_get_speed, .attr = g5_cpu_freqs_attr, }; @@ -266,11 +291,14 @@ /* Check current frequency */ g5_pmode_cur = g5_query_freq(); - if (g5_pmode_cur > 1) { + if (g5_pmode_cur > 1) /* We don't support anything but 1:1 and 1:2, fixup ... */ - g5_switch_freq(1); g5_pmode_cur = 1; - } + + /* Force apply current frequency to make sure everything is in + * sync (voltage is right for example) + */ + g5_switch_freq(g5_pmode_cur); printk(KERN_INFO "Registering G5 CPU frequency driver\n"); printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", @@ -279,6 +307,8 @@ g5_cpu_freqs[g5_pmode_cur].frequency/1000); rc = cpufreq_register_driver(&g5_cpufreq_driver); + if (rc == 0) + g5_driver_active = 1; /* We keep the CPU node on hold... hopefully, Apple G5 don't have * hotplug CPU with a dynamic device-tree ... Index: linux-work/drivers/macintosh/windfarm_cpufreq_clamp.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_cpufreq_clamp.c 2005-10-04 15:19:48.000000000 +1000 @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.3" + +static int clamped; +static struct wf_control *clamp_control; + +static int clamp_notifier_call(struct notifier_block *self, + unsigned long event, void *data) +{ + struct cpufreq_policy *p = data; + unsigned long max_freq; + + if (event != CPUFREQ_ADJUST) + return 0; + + max_freq = clamped ? (p->cpuinfo.min_freq) : (p->cpuinfo.max_freq); + cpufreq_verify_within_limits(p, 0, max_freq); + + return 0; +} + +static struct notifier_block clamp_notifier = { + .notifier_call = clamp_notifier_call, +}; + +static int clamp_set(struct wf_control *ct, s32 value) +{ + if (value) + printk(KERN_INFO "windfarm: Clamping CPU frequency to " + "minimum !\n"); + else + printk(KERN_INFO "windfarm: CPU frequency unclamped !\n"); + clamped = value; + cpufreq_update_policy(0); + return 0; +} + +static int clamp_get(struct wf_control *ct, s32 *value) +{ + *value = clamped; + return 0; +} + +static s32 clamp_min(struct wf_control *ct) +{ + return 0; +} + +static s32 clamp_max(struct wf_control *ct) +{ + return 1; +} + +static struct wf_control_ops clamp_ops = { + .set_value = clamp_set, + .get_value = clamp_get, + .get_min = clamp_min, + .get_max = clamp_max, + .owner = THIS_MODULE, +}; + +static int __init wf_cpufreq_clamp_init(void) +{ + struct wf_control *clamp; + + clamp = kmalloc(sizeof(struct wf_control), GFP_KERNEL); + if (clamp == NULL) + return -ENOMEM; + cpufreq_register_notifier(&clamp_notifier, CPUFREQ_POLICY_NOTIFIER); + clamp->ops = &clamp_ops; + clamp->name = "cpufreq-clamp"; + if (wf_register_control(clamp)) + goto fail; + clamp_control = clamp; + return 0; + fail: + kfree(clamp); + return -ENODEV; +} + +static void __exit wf_cpufreq_clamp_exit(void) +{ + if (clamp_control) + wf_unregister_control(clamp_control); +} + + +module_init(wf_cpufreq_clamp_init); +module_exit(wf_cpufreq_clamp_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("CPU frequency clamp for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + From Eric.Piel at lifl.fr Tue Oct 4 18:49:06 2005 From: Eric.Piel at lifl.fr (Eric Piel) Date: Tue, 04 Oct 2005 10:49:06 +0200 Subject: [PATCH] ppc64: Add cpufreq support for SMU based G5 In-Reply-To: <1128403842.31063.24.camel@gaston> References: <1128403842.31063.24.camel@gaston> Message-ID: <43424202.7070600@lifl.fr> 10/04/2005 07:30 AM, Benjamin Herrenschmidt wrote/a ?crit: > iMac G5 and latest single CPU desktop G5 (SMU based machines) have a > 970FX DD3 CPU that supports frequency & vooltage switching. This patch > adds support for simple dual frequency switch. It is required for the > upcoming thermal control patch for these machines. > Hello, I know only very little about cpufreq, probably you could post your patch to the cpufreq mailing list for better review : cpufreq at lists.linux.org.uk (you may have to subscride before posting, don't remember). For what have seen, your patch looks pretty good in general. However, is this kind of CPU only in one CPU machines? Your patch doesn't seem support SMP, then it's probably safer to prevent compilation on an SMP kernel in the Makefile? Or you can add SMP support (shouldn't be so hard in theory, but with no hardware to test it might be pointless), you can have a look at other drivers that support it, like in arch/i386/kernel/cpu/cpufreq/p4-clockmod.c . Just a little more thing, concerning: + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; Could you have a look if you could find the real info about how long it takes to change the speed (put the worse case latency)? Maybe the info can be found in some parts of the ROM you read? I don't know if conservative or ondemand governors are supposed to be able to mix with your code (especially wrt Windfarm) but not putting this info will prevent them from ever working... Cheers, Eric From benh at kernel.crashing.org Tue Oct 4 19:12:24 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 04 Oct 2005 19:12:24 +1000 Subject: [PATCH] ppc64: Add cpufreq support for SMU based G5 In-Reply-To: <43424202.7070600@lifl.fr> References: <1128403842.31063.24.camel@gaston> <43424202.7070600@lifl.fr> Message-ID: <1128417145.6291.25.camel@gaston> > I know only very little about cpufreq, probably you could post your > patch to the cpufreq mailing list for better review : > cpufreq at lists.linux.org.uk (you may have to subscride before posting, > don't remember). I should probably have CC'd it... oh well, this isn't terribly important at this point but I'll do if I post a new release. It's powermac specific anyway. > For what have seen, your patch looks pretty good in general. However, is > this kind of CPU only in one CPU machines? So far, only single CPU machines shipped with an SMU. > Your patch doesn't seem > support SMP, then it's probably safer to prevent compilation on an SMP > kernel in the Makefile? Or you can add SMP support (shouldn't be so hard > in theory, but with no hardware to test it might be pointless), you can > have a look at other drivers that support it, like in > arch/i386/kernel/cpu/cpufreq/p4-clockmod.c . There are several problems (and that leads to problems in the cpufreq core too btw). The problem with the cpufreq core is that it disables adjusting of loops_per_jiffies when CONFIG_SMP is set. That can lead to pretty disastrous results when running an SMP kernel on a laptop... Fortunately, the driver provided by this patch doesn't need it as ppc64 has constants loops_per_jiffies (it uses the HW timebase which doesn't change frequency). The other problem is that the 970FX "PowerTune" mecanism will actually broadcast messages to the bus that sync all CPUs to the same speed. That is, all CPUs in the machine will always change frequency simultaneously, thus the whole SMP stuff doesn't make that much sense, and I'm not sure how to "inform" the cpufreq core of that fact (that changing one CPU actually triggered a change of all of them). But as I wrote earlier, there is currently no PowerMac SMP machine that has an SMU chip and a 970FX to which this driver would apply. Finally, as for preventing build with CONFIG_SMP, I think distros would kill me as I don't know any of them who intends to ship a G5 kernel with CONFIG_SMP disabled :) > Just a little more thing, concerning: > + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; > Could you have a look if you could find the real info about how long it > takes to change the speed (put the worse case latency)? I didn't find. Apple didn't bother putting it in the OF device-tree afaik, and while it might be in one of the undocumented SMU data partitions, I have no way to know. The problem isn't the frequency switch per-se which is extremely fast (and I could know), but the voltage switch that goes with it. I suppose I could measure and put an overestimated value in there, but that isn't critical for now. userland powernowd & friends work fine and I need the reduced frequency mostly for the thermal control driver so it can clamp it down when the CPU overtemps. > Maybe the info > can be found in some parts of the ROM you read? I don't know if > conservative or ondemand governors are supposed to be able to mix with > your code (especially wrt Windfarm) but not putting this info will > prevent them from ever working... Which is what I want for now, until I find out more about how well I can make them to work with those machines :) Let's call that a "conservative" approach ;) Ben. From segher at kernel.crashing.org Sun Oct 2 11:06:04 2005 From: segher at kernel.crashing.org (Segher Boessenkool) Date: Sun, 2 Oct 2005 03:06:04 +0200 Subject: [PATCH 8/9] powerpc: make iSeries build In-Reply-To: <20051001001005.348d7798.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001001005.348d7798.sfr@canb.auug.org.au> Message-ID: <27c25b49c3f8c5d3f563a0b9b39bda8c@kernel.crashing.org> > The merge of vmlinux.lds.S would be much cleaner if it is clear that > putting the ..start/end symbols inside the section definitions is OK on > ppc32. It is okay, and putting those symbol definitions outside the section definition (like the ppc32 linker script used to do) can be wrong even, for example, when the output sections need extra alignment. Segher From segher at kernel.crashing.org Sun Oct 2 11:27:11 2005 From: segher at kernel.crashing.org (Segher Boessenkool) Date: Sun, 2 Oct 2005 03:27:11 +0200 Subject: RFC on mem_pieces to LMB work In-Reply-To: <17211.25234.160085.163327@cargo.ozlabs.ibm.com> References: <17211.25234.160085.163327@cargo.ozlabs.ibm.com> Message-ID: > Of course, since > everybody has a device tree with at least one /memory node (right? :) > we can just populate the LMB struct from the device tree > unconditionally. Not everyone has at least one /memory node. Everyone has a "memory" property in /chosen though, which is the integer encoded ihandle of a package that handles memory allocation. To find _all_ random access memory, just walk the whole device tree looking for all nodes with "device_type" set to "memory". Segher From nish.aravamudan at gmail.com Wed Oct 5 02:44:10 2005 From: nish.aravamudan at gmail.com (Nish Aravamudan) Date: Tue, 4 Oct 2005 09:44:10 -0700 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <1128404215.31063.32.camel@gaston> References: <1128404215.31063.32.camel@gaston> Message-ID: <29495f1d0510040944i6d8eb36aud85b63ff12608e8a@mail.gmail.com> On 10/3/05, Benjamin Herrenschmidt wrote: > This is the actual thermal control support for PowerMac8,1, PowerMac8,2 > and PowerMac9,1 machines (SMU based), that is iMac G5 and single CPU desktop. > It requires CPUFREQ to be enabled to properly deal with overtemp conditions. > The new thermal control code implements a new framework (nicknamed "windfarm") > to which I expect to port the old G5 thermal control, and possibly some of the > powerbook thermal control drivers as well in the future. > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ linux-work/drivers/macintosh/windfarm_core.c 2005-10-04 15:17:33.000000000 +1000 > +static int wf_thread_func(void *data) > +{ > + unsigned long next, delay; > + > + next = jiffies; > + > + DBG("wf: thread started\n"); > + > + while(!kthread_should_stop()) { > + try_to_freeze(); > + > + if (time_after_eq(jiffies, next)) { > + wf_notify(WF_EVENT_TICK, NULL); > + if (wf_overtemp) { > + wf_overtemp_counter++; > + /* 10 seconds overtemp, notify userland */ > + if (wf_overtemp_counter > 10) > + wf_critical_overtemp(); > + /* 30 seconds, shutdown */ > + if (wf_overtemp_counter > 30) { > + printk(KERN_ERR "windfarm: Overtemp " > + "for more than 30" > + " seconds, shutting down\n"); > + machine_power_off(); > + } > + } > + next += HZ; > + } > + > + set_current_state(TASK_INTERRUPTIBLE); > + delay = next - jiffies; > + if (delay <= HZ) > + schedule_timeout(delay); > + set_current_state(TASK_RUNNING); This can be schedule_timeout_interruptible(delay); and then you can get rid of the set_current_state(TASK_RUNNING); Thanks, NIsh From hch at infradead.org Wed Oct 5 04:46:01 2005 From: hch at infradead.org (Christoph Hellwig) Date: Tue, 4 Oct 2005 19:46:01 +0100 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> Message-ID: <20051004184601.GA30667@infradead.org> On Mon, Oct 03, 2005 at 03:26:30PM -0500, brking at us.ibm.com wrote: > > Add a .config option to default the scsi scan inquiry timeout. > Due to a broken device (SCSI/ATA converter card) that is very > common on IBM iSeries/pSeries machines, these architectures > need a longer default inquiry timeout. This is really not something that should be a CONFIG_ symbol. We have a perfectly fine working module option for it. From jimix at watson.ibm.com Wed Oct 5 05:35:00 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Tue, 4 Oct 2005 15:35:00 -0400 Subject: [PATCH] PCI dev node without an OF node Message-ID: <17218.55652.57402.465678@kitch0.watson.ibm.com> Handing the pSeries_iommu*LP logic the device tree from a Maple-D results in PCI dev nodes that do not have a corresponding OF node. I cannot be certain if this is a bug with the devtree in PIBS, or if this case is normally possible, I believe it is the latter. The check for dn==NULL used to happen in iommu_dev_setup_pSeries() but that is no longer called as of: http://kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=blobdiff;h=d17f0108a03200c0437146f199acaab21ca6f678;hp=f0fd7fbd6531cd01fb8984d2c81e82a25825b484;hb=1635317facea3094ddf34082cd86797efb1d9f7e;f=arch/ppc64/kernel/pSeries_iommu.c so the following patch catches it. Signed-off-by: Jimi Xenidis diff -r fbe71a6b8d00 arch/ppc64/kernel/pSeries_iommu.c --- a/arch/ppc64/kernel/pSeries_iommu.c Tue Oct 4 19:14:08 2005 +++ b/arch/ppc64/kernel/pSeries_iommu.c Tue Oct 4 15:15:32 2005 @@ -513,6 +513,11 @@ * already allocated. */ dn = pci_device_to_OF_node(dev); + if (dn == NULL) { + DBG("%s, dev %p (%s) has no iommu table\n", + dev, pci_name(dev)); + return; + } for (pdn = dn; pdn && pdn->data && !PCI_DN(pdn)->iommu_table; pdn = pdn->parent) { -- "I got an idea, an idea so smart my head would explode if I even began to know what I was talking about." -- Peter Griffin (Family Guy) From jimix at watson.ibm.com Wed Oct 5 06:06:40 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Tue, 4 Oct 2005 16:06:40 -0400 Subject: [PATCH] PCI dev node without an OF node In-Reply-To: <17218.55652.57402.465678@kitch0.watson.ibm.com> References: <17218.55652.57402.465678@kitch0.watson.ibm.com> Message-ID: <17218.57552.497977.897826@kitch0.watson.ibm.com> >>>>> "JX" == Jimi Xenidis writes: oops sorry sent an early patch file and forgot the __func__ in the DBG statement. Signed-off-by: Jimi Xenidis diff -r 549d78c4d7ed arch/ppc64/kernel/pSeries_iommu.c --- a/arch/ppc64/kernel/pSeries_iommu.c Mon Oct 3 15:07:10 2005 +++ b/arch/ppc64/kernel/pSeries_iommu.c Tue Oct 4 16:04:00 2005 @@ -513,6 +513,11 @@ * already allocated. */ dn = pci_device_to_OF_node(dev); + if (dn == NULL) { + DBG("%s, dev %p (%s) has no iommu table\n", __func__, + dev, pci_name(dev)); + return; + } for (pdn = dn; pdn && pdn->data && !PCI_DN(pdn)->iommu_table; pdn = pdn->parent) { From linas at austin.ibm.com Wed Oct 5 06:30:19 2005 From: linas at austin.ibm.com (linas) Date: Tue, 4 Oct 2005 15:30:19 -0500 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051004203019.GV29826@austin.ibm.com> After discussion with John Rose, I relize that this patch breaks something else, and so its no good. I'll try to come up with a different patch, which will unfortunately be a bit more complex. --linas On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas was heard to remark: > > 08-hotplug-bugfix.patch > > In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add > of a hotplug slot will crash the system, with the following (abbreviated) > stack trace: > > cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] > pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 > lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 > c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) > c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 > c000000000060754 .notifier_call_chain+0x68/0x9c > > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > > Signed-off-by: Linas Vepstas > > > Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c > =================================================================== > --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.011393833 -0500 > +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:52:26.421786761 -0500 > @@ -121,6 +121,12 @@ > return NULL; > } > > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) > { > struct device_node * dn = (struct device_node *) phb->arch_data; > @@ -201,17 +207,19 @@ > .notifier_call = pci_dn_reconfig_notifier, > }; > > -/* > - * Actually initialize the phbs. > - * The buswalk on this phb has not happened yet. > +/** pci_devs_phb_init -- Initialize phbs and pci devs under them. > + * > + * When this is called, the buswalk of PHB's has not happened yet. > */ > void __init pci_devs_phb_init(void) > { > struct pci_controller *phb, *tmp; > > /* This must be done first so the device nodes have valid pci info! */ > - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) > + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { > pci_devs_phb_init_dynamic(phb); > + phb->is_dynamic = 1; > + } > > pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); > } > _______________________________________________ > Linuxppc64-dev mailing list > Linuxppc64-dev at ozlabs.org > https://ozlabs.org/mailman/listinfo/linuxppc64-dev > From johnrose at austin.ibm.com Wed Oct 5 06:47:12 2005 From: johnrose at austin.ibm.com (John Rose) Date: Tue, 04 Oct 2005 15:47:12 -0500 Subject: [PATCH] Separate pci bits out of struct device_node In-Reply-To: <17181.2658.910786.938698@cargo.ozlabs.ibm.com> References: <17181.2658.910786.938698@cargo.ozlabs.ibm.com> Message-ID: <1128458832.9315.17.camel@sinatra.austin.ibm.com> Hi Paul- > diff -urN linux-2.6/arch/ppc64/kernel/pci_dn.c pcidn/arch/ppc64/kernel/pci_dn.c > --- linux-2.6/arch/ppc64/kernel/pci_dn.c 2005-04-26 15:37:55.000000000 +1000 > +++ pcidn/arch/ppc64/kernel/pci_dn.c 2005-09-06 11:39:47.000000000 +1000 ... > @@ -40,16 +42,26 @@ > struct pci_controller *phb = data; > int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL); > u32 *regs; > + struct pci_dn *pdn; > > - dn->phb = phb; > + if (phb->is_dynamic) > + pdn = kmalloc(sizeof(*pdn), GFP_KERNEL); > + else > + pdn = alloc_bootmem(sizeof(*pdn)); I didn't notice it at first, but this check seems incorrect. The phb->is_dynamic flag indicates whether a PHB was present at boot. Suppose I try to hotplug add a device to a slot with a parent PHB that was present at boot. This code path gets called for every dynamic device node add. With this as-is, we get a runtime call to alloc_bootmem() for the new device nodes - crash. Linas reported this: http://www.ussg.iu.edu/hypermail/linux/kernel/0510.0/0510.html It would seem that the check should be asking whether we are at boot or not, and this flag probably isn't the one for that. I don't know whether mem_init_done is a better idea. This goes back to the "global var vs. init/dynamic versions of allocs" debate, for which we have historically followed the latter. Thoughts? Thanks- John From benh at kernel.crashing.org Wed Oct 5 07:59:10 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 05 Oct 2005 07:59:10 +1000 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <29495f1d0510040944i6d8eb36aud85b63ff12608e8a@mail.gmail.com> References: <1128404215.31063.32.camel@gaston> <29495f1d0510040944i6d8eb36aud85b63ff12608e8a@mail.gmail.com> Message-ID: <1128463151.6417.18.camel@gaston> On Tue, 2005-10-04 at 09:44 -0700, Nish Aravamudan wrote: > > This can be schedule_timeout_interruptible(delay); and then you can > get rid of the set_current_state(TASK_RUNNING); Ah, those lovely new "do-it-all" helpers :) Thanks. Ben. From jdl at freescale.com Wed Oct 5 07:59:50 2005 From: jdl at freescale.com (Jon Loeliger) Date: Tue, 04 Oct 2005 16:59:50 -0500 Subject: PATCH powerpc Move LMB from ppc64 to powerpc Message-ID: <1128463190.22452.29.camel@cashmere.sps.mot.com> Move the LMB code from ppc64 to powerpc. Only compile ppc32's tlb.c code on "standard" mmu machines. Signed-off-by: Jon Loeliger --- arch/powerpc/mm/Makefile | 8 + arch/powerpc/mm/lmb.c | 303 ++++++++++++++++++++++++++++++++++++++++++++ arch/ppc64/kernel/Makefile | 1 arch/ppc64/kernel/lmb.c | 299 ------------------------------------------- include/asm-powerpc/lmb.h | 78 +++++++++++ include/asm-ppc/page.h | 6 + 6 files changed, 391 insertions(+), 304 deletions(-) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -2,11 +2,11 @@ # Makefile for the linux ppc-specific parts of the memory manager. # -obj-y := fault.o mem.o -obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o \ - mem_pieces.o tlb.o +obj-y := fault.o lmb.o mem.o + +obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o mem_pieces.o obj-$(CONFIG_PPC64) += init64.o pgtable64.o mmu_context64.o -obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o +obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o tlb.o obj-$(CONFIG_40x) += 4xx_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c new file mode 100644 --- /dev/null +++ b/arch/powerpc/mm/lmb.c @@ -0,0 +1,303 @@ +/* + * Procedures for interfacing to Open Firmware. + * + * Peter Bergner, IBM Corp. June 2001. + * Copyright (C) 2001 Peter Bergner. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct lmb lmb; + +#undef DEBUG + +void lmb_dump_all(void) +{ +#ifdef DEBUG + unsigned long i; + + udbg_printf("lmb_dump_all:\n"); + udbg_printf(" memory.cnt = 0x%lx\n", + lmb.memory.cnt); + udbg_printf(" memory.size = 0x%lx\n", + lmb.memory.size); + for (i=0; i < lmb.memory.cnt ;i++) { + udbg_printf(" memory.region[0x%x].base = 0x%lx\n", + i, lmb.memory.region[i].base); + udbg_printf(" .size = 0x%lx\n", + lmb.memory.region[i].size); + } + + udbg_printf("\n reserved.cnt = 0x%lx\n", + lmb.reserved.cnt); + udbg_printf(" reserved.size = 0x%lx\n", + lmb.reserved.size); + for (i=0; i < lmb.reserved.cnt ;i++) { + udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", + i, lmb.reserved.region[i].base); + udbg_printf(" .size = 0x%lx\n", + lmb.reserved.region[i].size); + } +#endif /* DEBUG */ +} + +static unsigned long __init +lmb_addrs_overlap(unsigned long base1, unsigned long size1, + unsigned long base2, unsigned long size2) +{ + return ((base1 < (base2+size2)) && (base2 < (base1+size1))); +} + +static long __init +lmb_addrs_adjacent(unsigned long base1, unsigned long size1, + unsigned long base2, unsigned long size2) +{ + if (base2 == base1 + size1) + return 1; + else if (base1 == base2 + size2) + return -1; + + return 0; +} + +static long __init +lmb_regions_adjacent(struct lmb_region *rgn, + unsigned long r1, unsigned long r2) +{ + unsigned long base1 = rgn->region[r1].base; + unsigned long size1 = rgn->region[r1].size; + unsigned long base2 = rgn->region[r2].base; + unsigned long size2 = rgn->region[r2].size; + + return lmb_addrs_adjacent(base1, size1, base2, size2); +} + +/* Assumption: base addr of region 1 < base addr of region 2 */ +static void __init +lmb_coalesce_regions(struct lmb_region *rgn, + unsigned long r1, unsigned long r2) +{ + unsigned long i; + + rgn->region[r1].size += rgn->region[r2].size; + for (i=r2; i < rgn->cnt-1; i++) { + rgn->region[i].base = rgn->region[i+1].base; + rgn->region[i].size = rgn->region[i+1].size; + } + rgn->cnt--; +} + +/* This routine called with relocation disabled. */ +void __init +lmb_init(void) +{ + /* Create a dummy zero size LMB which will get coalesced away later. + * This simplifies the lmb_add() code below... + */ + lmb.memory.region[0].base = 0; + lmb.memory.region[0].size = 0; + lmb.memory.cnt = 1; + + /* Ditto. */ + lmb.reserved.region[0].base = 0; + lmb.reserved.region[0].size = 0; + lmb.reserved.cnt = 1; +} + +/* This routine called with relocation disabled. */ +void __init +lmb_analyze(void) +{ + int i; + + lmb.memory.size = 0; + + for (i = 0; i < lmb.memory.cnt; i++) + lmb.memory.size += lmb.memory.region[i].size; +} + +/* This routine called with relocation disabled. */ +static long __init +lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) +{ + unsigned long i, coalesced = 0; + long adjacent; + + /* First try and coalesce this LMB with another. */ + for (i=0; i < rgn->cnt; i++) { + unsigned long rgnbase = rgn->region[i].base; + unsigned long rgnsize = rgn->region[i].size; + + adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); + if ( adjacent > 0 ) { + rgn->region[i].base -= size; + rgn->region[i].size += size; + coalesced++; + break; + } + else if ( adjacent < 0 ) { + rgn->region[i].size += size; + coalesced++; + break; + } + } + + if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) { + lmb_coalesce_regions(rgn, i, i+1); + coalesced++; + } + + if ( coalesced ) { + return coalesced; + } else if ( rgn->cnt >= MAX_LMB_REGIONS ) { + return -1; + } + + /* Couldn't coalesce the LMB, so add it to the sorted table. */ + for (i=rgn->cnt-1; i >= 0; i--) { + if (base < rgn->region[i].base) { + rgn->region[i+1].base = rgn->region[i].base; + rgn->region[i+1].size = rgn->region[i].size; + } else { + rgn->region[i+1].base = base; + rgn->region[i+1].size = size; + break; + } + } + rgn->cnt++; + + return 0; +} + +/* This routine called with relocation disabled. */ +long __init +lmb_add(unsigned long base, unsigned long size) +{ + struct lmb_region *_rgn = &(lmb.memory); + + /* On pSeries LPAR systems, the first LMB is our RMO region. */ + if ( base == 0 ) + lmb.rmo_size = size; + + return lmb_add_region(_rgn, base, size); + +} + +long __init +lmb_reserve(unsigned long base, unsigned long size) +{ + struct lmb_region *_rgn = &(lmb.reserved); + + return lmb_add_region(_rgn, base, size); +} + +long __init +lmb_overlaps_region(struct lmb_region *rgn, + unsigned long base, unsigned long size) +{ + unsigned long i; + + for (i=0; i < rgn->cnt; i++) { + unsigned long rgnbase = rgn->region[i].base; + unsigned long rgnsize = rgn->region[i].size; + if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { + break; + } + } + + return (i < rgn->cnt) ? i : -1; +} + +unsigned long __init +lmb_alloc(unsigned long size, unsigned long align) +{ + return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); +} + +unsigned long __init +lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) +{ + long i, j; + unsigned long base = 0; + + for (i=lmb.memory.cnt-1; i >= 0; i--) { + unsigned long lmbbase = lmb.memory.region[i].base; + unsigned long lmbsize = lmb.memory.region[i].size; + + if ( max_addr == LMB_ALLOC_ANYWHERE ) + base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); + else if ( lmbbase < max_addr ) + base = _ALIGN_DOWN(min(lmbbase+lmbsize,max_addr)-size, + align); + else + continue; + + while ( (lmbbase <= base) && + ((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { + base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, + align); + } + + if ( (base != 0) && (lmbbase <= base) ) + break; + } + + if ( i < 0 ) + return 0; + + lmb_add_region(&lmb.reserved, base, size); + + return base; +} + +/* You must call lmb_analyze() before this. */ +unsigned long __init +lmb_phys_mem_size(void) +{ + return lmb.memory.size; +} + +unsigned long __init +lmb_end_of_DRAM(void) +{ + int idx = lmb.memory.cnt - 1; + + return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); +} + +/* + * Truncate the lmb list to memory_limit if it's set + * You must call lmb_analyze() after this. + */ +void __init lmb_enforce_memory_limit(void) +{ + extern unsigned long memory_limit; + unsigned long i, limit; + + if (! memory_limit) + return; + + limit = memory_limit; + for (i = 0; i < lmb.memory.cnt; i++) { + if (limit > lmb.memory.region[i].size) { + limit -= lmb.memory.region[i].size; + continue; + } + + lmb.memory.region[i].size = limit; + lmb.memory.cnt = i + 1; + break; + } +} diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -76,3 +76,4 @@ endif # These are here while we do the architecture merge vecemu-y += ../../powerpc/kernel/vecemu.o +lmb-y += ../../powerpc/mm/lmb.o diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c deleted file mode 100644 --- a/arch/ppc64/kernel/lmb.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Procedures for interfacing to Open Firmware. - * - * Peter Bergner, IBM Corp. June 2001. - * Copyright (C) 2001 Peter Bergner. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct lmb lmb; - -#undef DEBUG - -void lmb_dump_all(void) -{ -#ifdef DEBUG - unsigned long i; - - udbg_printf("lmb_dump_all:\n"); - udbg_printf(" memory.cnt = 0x%lx\n", - lmb.memory.cnt); - udbg_printf(" memory.size = 0x%lx\n", - lmb.memory.size); - for (i=0; i < lmb.memory.cnt ;i++) { - udbg_printf(" memory.region[0x%x].base = 0x%lx\n", - i, lmb.memory.region[i].base); - udbg_printf(" .size = 0x%lx\n", - lmb.memory.region[i].size); - } - - udbg_printf("\n reserved.cnt = 0x%lx\n", - lmb.reserved.cnt); - udbg_printf(" reserved.size = 0x%lx\n", - lmb.reserved.size); - for (i=0; i < lmb.reserved.cnt ;i++) { - udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", - i, lmb.reserved.region[i].base); - udbg_printf(" .size = 0x%lx\n", - lmb.reserved.region[i].size); - } -#endif /* DEBUG */ -} - -static unsigned long __init -lmb_addrs_overlap(unsigned long base1, unsigned long size1, - unsigned long base2, unsigned long size2) -{ - return ((base1 < (base2+size2)) && (base2 < (base1+size1))); -} - -static long __init -lmb_addrs_adjacent(unsigned long base1, unsigned long size1, - unsigned long base2, unsigned long size2) -{ - if (base2 == base1 + size1) - return 1; - else if (base1 == base2 + size2) - return -1; - - return 0; -} - -static long __init -lmb_regions_adjacent(struct lmb_region *rgn, unsigned long r1, unsigned long r2) -{ - unsigned long base1 = rgn->region[r1].base; - unsigned long size1 = rgn->region[r1].size; - unsigned long base2 = rgn->region[r2].base; - unsigned long size2 = rgn->region[r2].size; - - return lmb_addrs_adjacent(base1, size1, base2, size2); -} - -/* Assumption: base addr of region 1 < base addr of region 2 */ -static void __init -lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) -{ - unsigned long i; - - rgn->region[r1].size += rgn->region[r2].size; - for (i=r2; i < rgn->cnt-1; i++) { - rgn->region[i].base = rgn->region[i+1].base; - rgn->region[i].size = rgn->region[i+1].size; - } - rgn->cnt--; -} - -/* This routine called with relocation disabled. */ -void __init -lmb_init(void) -{ - /* Create a dummy zero size LMB which will get coalesced away later. - * This simplifies the lmb_add() code below... - */ - lmb.memory.region[0].base = 0; - lmb.memory.region[0].size = 0; - lmb.memory.cnt = 1; - - /* Ditto. */ - lmb.reserved.region[0].base = 0; - lmb.reserved.region[0].size = 0; - lmb.reserved.cnt = 1; -} - -/* This routine called with relocation disabled. */ -void __init -lmb_analyze(void) -{ - int i; - - lmb.memory.size = 0; - - for (i = 0; i < lmb.memory.cnt; i++) - lmb.memory.size += lmb.memory.region[i].size; -} - -/* This routine called with relocation disabled. */ -static long __init -lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) -{ - unsigned long i, coalesced = 0; - long adjacent; - - /* First try and coalesce this LMB with another. */ - for (i=0; i < rgn->cnt; i++) { - unsigned long rgnbase = rgn->region[i].base; - unsigned long rgnsize = rgn->region[i].size; - - adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); - if ( adjacent > 0 ) { - rgn->region[i].base -= size; - rgn->region[i].size += size; - coalesced++; - break; - } - else if ( adjacent < 0 ) { - rgn->region[i].size += size; - coalesced++; - break; - } - } - - if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) { - lmb_coalesce_regions(rgn, i, i+1); - coalesced++; - } - - if ( coalesced ) { - return coalesced; - } else if ( rgn->cnt >= MAX_LMB_REGIONS ) { - return -1; - } - - /* Couldn't coalesce the LMB, so add it to the sorted table. */ - for (i=rgn->cnt-1; i >= 0; i--) { - if (base < rgn->region[i].base) { - rgn->region[i+1].base = rgn->region[i].base; - rgn->region[i+1].size = rgn->region[i].size; - } else { - rgn->region[i+1].base = base; - rgn->region[i+1].size = size; - break; - } - } - rgn->cnt++; - - return 0; -} - -/* This routine called with relocation disabled. */ -long __init -lmb_add(unsigned long base, unsigned long size) -{ - struct lmb_region *_rgn = &(lmb.memory); - - /* On pSeries LPAR systems, the first LMB is our RMO region. */ - if ( base == 0 ) - lmb.rmo_size = size; - - return lmb_add_region(_rgn, base, size); - -} - -long __init -lmb_reserve(unsigned long base, unsigned long size) -{ - struct lmb_region *_rgn = &(lmb.reserved); - - return lmb_add_region(_rgn, base, size); -} - -long __init -lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, unsigned long size) -{ - unsigned long i; - - for (i=0; i < rgn->cnt; i++) { - unsigned long rgnbase = rgn->region[i].base; - unsigned long rgnsize = rgn->region[i].size; - if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { - break; - } - } - - return (i < rgn->cnt) ? i : -1; -} - -unsigned long __init -lmb_alloc(unsigned long size, unsigned long align) -{ - return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); -} - -unsigned long __init -lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) -{ - long i, j; - unsigned long base = 0; - - for (i=lmb.memory.cnt-1; i >= 0; i--) { - unsigned long lmbbase = lmb.memory.region[i].base; - unsigned long lmbsize = lmb.memory.region[i].size; - - if ( max_addr == LMB_ALLOC_ANYWHERE ) - base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); - else if ( lmbbase < max_addr ) - base = _ALIGN_DOWN(min(lmbbase+lmbsize,max_addr)-size, align); - else - continue; - - while ( (lmbbase <= base) && - ((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { - base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, align); - } - - if ( (base != 0) && (lmbbase <= base) ) - break; - } - - if ( i < 0 ) - return 0; - - lmb_add_region(&lmb.reserved, base, size); - - return base; -} - -/* You must call lmb_analyze() before this. */ -unsigned long __init -lmb_phys_mem_size(void) -{ - return lmb.memory.size; -} - -unsigned long __init -lmb_end_of_DRAM(void) -{ - int idx = lmb.memory.cnt - 1; - - return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); -} - -/* - * Truncate the lmb list to memory_limit if it's set - * You must call lmb_analyze() after this. - */ -void __init lmb_enforce_memory_limit(void) -{ - extern unsigned long memory_limit; - unsigned long i, limit; - - if (! memory_limit) - return; - - limit = memory_limit; - for (i = 0; i < lmb.memory.cnt; i++) { - if (limit > lmb.memory.region[i].size) { - limit -= lmb.memory.region[i].size; - continue; - } - - lmb.memory.region[i].size = limit; - lmb.memory.cnt = i + 1; - break; - } -} diff --git a/include/asm-powerpc/lmb.h b/include/asm-powerpc/lmb.h new file mode 100644 --- /dev/null +++ b/include/asm-powerpc/lmb.h @@ -0,0 +1,78 @@ +#ifndef _ASM_POWERPC_LMB_H +#define _ASM_POWERPC_LMB_H + +/* + * Low-level Memory Block management. + * + * Copyright (C) 2001 Peter Bergner, IBM Corp. + */ + +#include +#include + +#define MAX_LMB_REGIONS 128 + +#define LMB_ALLOC_ANYWHERE 0 + +struct lmb_block { + unsigned long base; + unsigned long size; +}; + +struct lmb_region { + unsigned long cnt; + unsigned long size; + struct lmb_block region[MAX_LMB_REGIONS+1]; +}; + +struct lmb { + unsigned long debug; + unsigned long rmo_size; + struct lmb_region memory; + struct lmb_region reserved; +}; + +extern struct lmb lmb; + +extern void __init lmb_init(void); +extern void __init lmb_analyze(void); +extern long __init lmb_add(unsigned long, unsigned long); +extern long __init lmb_reserve(unsigned long, unsigned long); +extern unsigned long __init lmb_alloc(unsigned long, unsigned long); +extern unsigned long __init lmb_alloc_base(unsigned long, unsigned long, + unsigned long); +extern unsigned long __init lmb_phys_mem_size(void); +extern unsigned long __init lmb_end_of_DRAM(void); +extern unsigned long __init lmb_abs_to_phys(unsigned long); +extern void __init lmb_enforce_memory_limit(void); + +extern void lmb_dump_all(void); + +extern unsigned long io_hole_start; + +static inline unsigned long +lmb_size_bytes(struct lmb_region *type, unsigned long region_nr) +{ + return type->region[region_nr].size; +} + +static inline unsigned long +lmb_size_pages(struct lmb_region *type, unsigned long region_nr) +{ + return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT; +} + +static inline unsigned long +lmb_start_pfn(struct lmb_region *type, unsigned long region_nr) +{ + return type->region[region_nr].base >> PAGE_SHIFT; +} + +static inline unsigned long +lmb_end_pfn(struct lmb_region *type, unsigned long region_nr) +{ + return lmb_start_pfn(type, region_nr) + + lmb_size_pages(type, region_nr); +} + +#endif /* _ASM_POWERPC_LMB_H */ diff --git a/include/asm-ppc/page.h b/include/asm-ppc/page.h --- a/include/asm-ppc/page.h +++ b/include/asm-ppc/page.h @@ -77,8 +77,12 @@ typedef unsigned long pgprot_t; #endif +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) +#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) + /* align addr on a size boundary - adjust address up if needed -- Cort */ -#define _ALIGN(addr,size) (((addr)+(size)-1)&(~((size)-1))) +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) From benh at kernel.crashing.org Wed Oct 5 08:08:05 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 05 Oct 2005 08:08:05 +1000 Subject: [PATCH] PCI dev node without an OF node In-Reply-To: <17218.55652.57402.465678@kitch0.watson.ibm.com> References: <17218.55652.57402.465678@kitch0.watson.ibm.com> Message-ID: <1128463686.6417.25.camel@gaston> On Tue, 2005-10-04 at 15:35 -0400, Jimi Xenidis wrote: > Handing the pSeries_iommu*LP logic the device tree from a Maple-D results > in PCI dev nodes that do not have a corresponding OF node. > > I cannot be certain if this is a bug with the devtree in PIBS, or if > this case is normally possible, I believe it is the latter. Yes, it's accepted to have only OF nodes for bridges. > The check for dn==NULL used to happen in iommu_dev_setup_pSeries() but > that is no longer called as of: > http://kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=blobdiff;h=d17f0108a03200c0437146f199acaab21ca6f678;hp=f0fd7fbd6531cd01fb8984d2c81e82a25825b484;hb=1635317facea3094ddf34082cd86797efb1d9f7e;f=arch/ppc64/kernel/pSeries_iommu.c > > so the following patch catches it. I'm not sure your patch is 100% correct, but then, we also shouldn't have called pSeries_iommu.c code on Maple neither in the first place unless you have put some kind of PAPR-like hypervisor on it (but still, the Maple has no TCEs so it shouldn't call this code). One must make sure that the dma_map* routines always get the closest parent device node though if the device has NULL. We used to populate all PCI devices wih the PHB node by default but that may have been broken. Ben. > Signed-off-by: Jimi Xenidis > > > diff -r fbe71a6b8d00 arch/ppc64/kernel/pSeries_iommu.c > --- a/arch/ppc64/kernel/pSeries_iommu.c Tue Oct 4 19:14:08 2005 > +++ b/arch/ppc64/kernel/pSeries_iommu.c Tue Oct 4 15:15:32 2005 > @@ -513,6 +513,11 @@ > * already allocated. > */ > dn = pci_device_to_OF_node(dev); > + if (dn == NULL) { > + DBG("%s, dev %p (%s) has no iommu table\n", > + dev, pci_name(dev)); > + return; > + } > > for (pdn = dn; pdn && pdn->data && !PCI_DN(pdn)->iommu_table; > pdn = pdn->parent) { > From jimix at watson.ibm.com Wed Oct 5 08:15:36 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Tue, 4 Oct 2005 18:15:36 -0400 Subject: [PATCH] PCI dev node without an OF node In-Reply-To: <1128463686.6417.25.camel@gaston> References: <17218.55652.57402.465678@kitch0.watson.ibm.com> <1128463686.6417.25.camel@gaston> Message-ID: <17218.65288.929638.35946@kitch0.watson.ibm.com> >>>>> "BH" == Benjamin Herrenschmidt writes: BH> On Tue, 2005-10-04 at 15:35 -0400, Jimi Xenidis wrote: >> Handing the pSeries_iommu*LP logic the device tree from a Maple-D results >> in PCI dev nodes that do not have a corresponding OF node. BH> I'm not sure your patch is 100% correct, but then, we also shouldn't BH> have called pSeries_iommu.c code on Maple neither in the first place BH> unless you have put some kind of PAPR-like hypervisor on it (but still, BH> the Maple has no TCEs so it shouldn't call this code). In our world of Xen/Hypervisor on Maple this is entirely possible for the Dom0/Alpha-LPAR, where the HTAB and DART are controlled via PAPR H_CALLS. :-) -- "I got an idea, an idea so smart my head would explode if I even began to know what I was talking about." -- Peter Griffin (Family Guy) From benh at kernel.crashing.org Wed Oct 5 08:43:13 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 05 Oct 2005 08:43:13 +1000 Subject: [PATCH] PCI dev node without an OF node In-Reply-To: <17218.65288.929638.35946@kitch0.watson.ibm.com> References: <17218.55652.57402.465678@kitch0.watson.ibm.com> <1128463686.6417.25.camel@gaston> <17218.65288.929638.35946@kitch0.watson.ibm.com> Message-ID: <1128465794.6417.38.camel@gaston> On Tue, 2005-10-04 at 18:15 -0400, Jimi Xenidis wrote: > >>>>> "BH" == Benjamin Herrenschmidt writes: > > BH> On Tue, 2005-10-04 at 15:35 -0400, Jimi Xenidis wrote: > >> Handing the pSeries_iommu*LP logic the device tree from a Maple-D results > >> in PCI dev nodes that do not have a corresponding OF node. > > BH> I'm not sure your patch is 100% correct, but then, we also shouldn't > BH> have called pSeries_iommu.c code on Maple neither in the first place > BH> unless you have put some kind of PAPR-like hypervisor on it (but still, > BH> the Maple has no TCEs so it shouldn't call this code). > > In our world of Xen/Hypervisor on Maple this is entirely possible for > the Dom0/Alpha-LPAR, where the HTAB and DART are controlled via PAPR > H_CALLS. :-) Ah, indeed... Oh well, just make sure that when a device calls pci_map_blah() it works wether the device has a node or not. Ben. From linas at austin.ibm.com Wed Oct 5 09:59:00 2005 From: linas at austin.ibm.com (linas) Date: Tue, 4 Oct 2005 18:59:00 -0500 Subject: [PATCH 1/2] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051004203019.GV29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> <20051004203019.GV29826@austin.ibm.com> Message-ID: <20051004235900.GW29826@austin.ibm.com> Paul, A new-improved variant of the previous patch in this thread. Please apply. 08-hotplug-bugfix.patch In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add of a hotplug slot will crash the system, with the following (abbreviated) stack trace: cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 c000000000060754 .notifier_call_chain+0x68/0x9c The root cause was that __init __alloc_bootmem() was called long after boot had finished, resulting in a crash because this routine is undefined after boot time. The patch below fixes this crash, and adds some docs to clarify the code. p.s. congrats to all for getting slashdotted on this yesterday! Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.000000000 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-04 15:37:49.761245845 -0500 @@ -44,7 +44,7 @@ u32 *regs; struct pci_dn *pdn; - if (phb->is_dynamic) + if (mem_init_done) pdn = kmalloc(sizeof(*pdn), GFP_KERNEL); else pdn = alloc_bootmem(sizeof(*pdn)); @@ -121,6 +121,14 @@ return NULL; } +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,9 +209,14 @@ .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. */ void __init pci_devs_phb_init(void) { From linas at austin.ibm.com Wed Oct 5 10:01:16 2005 From: linas at austin.ibm.com (linas) Date: Tue, 4 Oct 2005 19:01:16 -0500 Subject: [PATCH 2/2] ppc64: Crash in DLPAR code on remove operation In-Reply-To: <20051004203019.GV29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> <20051004203019.GV29826@austin.ibm.com> Message-ID: <20051005000116.GX29826@austin.ibm.com> This patch fixes two bugs related to dlpar slot removal and add. -- Both crashes are due to the fact the some children of pci nodes are not pci nodes themselves, and thus do not have pci_dn structures. For example: /pci at 800000020000002/pci at 2,3/usb at 1/hub at 1 /pci at 800000020000002/pci at 2,3/usb at 1,1/hub at 1 Strangely, though, sometimes the following appears, and I don't quite understand why. /interrupt-controller at 3fe0000a400 A typical stack trace: Vector: 300 (Data Access) at [c0000000555637d0] pc: c000000000202a50: .dlpar_add_slot+0x108/0x410 c000000000202e78 .add_slot_store+0x7c/0xac c000000000202da0 .dlpar_attr_store+0x48/0x64 c0000000000f8ee4 .sysfs_write_file+0x100/0x1a0 A similar stack trace is involved for the slot remove. This code survived testing, of adding and removing different slots, 23 times each, so far, as of this writing. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pSeries_iommu.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pSeries_iommu.c 2005-10-04 16:47:09.175705100 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pSeries_iommu.c 2005-10-04 17:12:54.123928903 -0500 @@ -478,10 +478,13 @@ { int err = NOTIFY_OK; struct device_node *np = node; - struct pci_dn *pci = np->data; + struct pci_dn *pci; switch (action) { case PSERIES_RECONFIG_REMOVE: + pci = PCI_DN(np); + if (!pci) + return NOTIFY_OK; if (pci->iommu_table && get_property(np, "ibm,dma-window", NULL)) iommu_free_table(np); Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-04 15:37:49.761245845 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-04 17:58:47.344628793 -0500 @@ -195,7 +195,10 @@ switch (action) { case PSERIES_RECONFIG_ADD: - pci = np->parent->data; + pci = PCI_DN(np->parent); + if (!pci) + return NOTIFY_OK; + update_dn_pci_info(np, pci->phb); break; default: From linas at austin.ibm.com Wed Oct 5 10:04:26 2005 From: linas at austin.ibm.com (linas) Date: Tue, 4 Oct 2005 19:04:26 -0500 Subject: [PATCH] rpaphp: PCI Hotplug crash on PHB DLPAR add In-Reply-To: <20051004203019.GV29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> <20051004203019.GV29826@austin.ibm.com> Message-ID: <20051005000426.GY29826@austin.ibm.com> This patch fixes a bug related to dlpar PHB add, after a PHB removal. -- The crash was due to the PHB not having a pci_dn structure yet, when the phb is being added. This code survived testing, of adding and removeig the PHB and all slots underneath it, 17 times so far, as of this writing. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpadlpar_core.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/pci/hotplug/rpadlpar_core.c 2005-10-04 16:40:12.539168432 -0500 +++ linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpadlpar_core.c 2005-10-04 17:55:43.165471615 -0500 @@ -303,7 +303,7 @@ { struct pci_controller *phb; - if (PCI_DN(dn)->phb) { + if (PCI_DN(dn) && PCI_DN(dn)->phb) { /* PHB already exists */ return -EINVAL; } From kumar.gala at freescale.com Wed Oct 5 15:17:35 2005 From: kumar.gala at freescale.com (Kumar Gala) Date: Wed, 5 Oct 2005 00:17:35 -0500 Subject: PATCH powerpc Move LMB from ppc64 to powerpc In-Reply-To: <1128463190.22452.29.camel@cashmere.sps.mot.com> References: <1128463190.22452.29.camel@cashmere.sps.mot.com> Message-ID: <01889CD2-412A-495A-9F2B-25605B543F00@freescale.com> Jon, Look good. As your removing the use of mem_pieces from arch/powerpc, we should see if the LMB data structures can always have 64-bit addresses. The reason for this is to handle the > 32-bit address case on ppc32. Anyways, something to keep in the back of your mind while looking at this code. - kumar On Oct 4, 2005, at 4:59 PM, Loeliger Jon-LOELIGER wrote: > Move the LMB code from ppc64 to powerpc. > Only compile ppc32's tlb.c code on "standard" mmu machines. > > Signed-off-by: Jon Loeliger > --- > > arch/powerpc/mm/Makefile | 8 + > arch/powerpc/mm/lmb.c | 303 > ++++++++++++++++++++++++++++++++++++++++++++ > arch/ppc64/kernel/Makefile | 1 > arch/ppc64/kernel/lmb.c | 299 > ------------------------------------------- > include/asm-powerpc/lmb.h | 78 +++++++++++ > include/asm-ppc/page.h | 6 + > 6 files changed, 391 insertions(+), 304 deletions(-) > > > diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile > --- a/arch/powerpc/mm/Makefile > +++ b/arch/powerpc/mm/Makefile > @@ -2,11 +2,11 @@ > # Makefile for the linux ppc-specific parts of the memory manager. > # > > -obj-y := fault.o mem.o > -obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o \ > - mem_pieces.o tlb.o > +obj-y := fault.o lmb.o mem.o > + > +obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o > mem_pieces.o > obj-$(CONFIG_PPC64) += init64.o pgtable64.o mmu_context64.o > -obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o > +obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o tlb.o > obj-$(CONFIG_40x) += 4xx_mmu.o > obj-$(CONFIG_44x) += 44x_mmu.o > obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o > diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c > new file mode 100644 > --- /dev/null > +++ b/arch/powerpc/mm/lmb.c > @@ -0,0 +1,303 @@ > +/* > + * Procedures for interfacing to Open Firmware. > + * > + * Peter Bergner, IBM Corp. June 2001. > + * Copyright (C) 2001 Peter Bergner. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +struct lmb lmb; > + > +#undef DEBUG > + > +void lmb_dump_all(void) > +{ > +#ifdef DEBUG > + unsigned long i; > + > + udbg_printf("lmb_dump_all:\n"); > + udbg_printf(" memory.cnt = 0x%lx\n", > + lmb.memory.cnt); > + udbg_printf(" memory.size = 0x%lx\n", > + lmb.memory.size); > + for (i=0; i < lmb.memory.cnt ;i++) { > + udbg_printf(" memory.region[0x%x].base = > 0x%lx\n", > + i, lmb.memory.region[i].base); > + udbg_printf(" .size = 0x%lx\n", > + lmb.memory.region[i].size); > + } > + > + udbg_printf("\n reserved.cnt = 0x%lx\n", > + lmb.reserved.cnt); > + udbg_printf(" reserved.size = 0x%lx\n", > + lmb.reserved.size); > + for (i=0; i < lmb.reserved.cnt ;i++) { > + udbg_printf(" reserved.region[0x%x].base = > 0x%lx\n", > + i, lmb.reserved.region[i].base); > + udbg_printf(" .size = 0x%lx\n", > + lmb.reserved.region[i].size); > + } > +#endif /* DEBUG */ > +} > + > +static unsigned long __init > +lmb_addrs_overlap(unsigned long base1, unsigned long size1, > + unsigned long base2, unsigned long size2) > +{ > + return ((base1 < (base2+size2)) && (base2 < (base1+size1))); > +} > + > +static long __init > +lmb_addrs_adjacent(unsigned long base1, unsigned long size1, > + unsigned long base2, unsigned long size2) > +{ > + if (base2 == base1 + size1) > + return 1; > + else if (base1 == base2 + size2) > + return -1; > + > + return 0; > +} > + > +static long __init > +lmb_regions_adjacent(struct lmb_region *rgn, > + unsigned long r1, unsigned long r2) > +{ > + unsigned long base1 = rgn->region[r1].base; > + unsigned long size1 = rgn->region[r1].size; > + unsigned long base2 = rgn->region[r2].base; > + unsigned long size2 = rgn->region[r2].size; > + > + return lmb_addrs_adjacent(base1, size1, base2, size2); > +} > + > +/* Assumption: base addr of region 1 < base addr of region 2 */ > +static void __init > +lmb_coalesce_regions(struct lmb_region *rgn, > + unsigned long r1, unsigned long r2) > +{ > + unsigned long i; > + > + rgn->region[r1].size += rgn->region[r2].size; > + for (i=r2; i < rgn->cnt-1; i++) { > + rgn->region[i].base = rgn->region[i+1].base; > + rgn->region[i].size = rgn->region[i+1].size; > + } > + rgn->cnt--; > +} > + > +/* This routine called with relocation disabled. */ > +void __init > +lmb_init(void) > +{ > + /* Create a dummy zero size LMB which will get coalesced away > later. > + * This simplifies the lmb_add() code below... > + */ > + lmb.memory.region[0].base = 0; > + lmb.memory.region[0].size = 0; > + lmb.memory.cnt = 1; > + > + /* Ditto. */ > + lmb.reserved.region[0].base = 0; > + lmb.reserved.region[0].size = 0; > + lmb.reserved.cnt = 1; > +} > + > +/* This routine called with relocation disabled. */ > +void __init > +lmb_analyze(void) > +{ > + int i; > + > + lmb.memory.size = 0; > + > + for (i = 0; i < lmb.memory.cnt; i++) > + lmb.memory.size += lmb.memory.region[i].size; > +} > + > +/* This routine called with relocation disabled. */ > +static long __init > +lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned > long size) > +{ > + unsigned long i, coalesced = 0; > + long adjacent; > + > + /* First try and coalesce this LMB with another. */ > + for (i=0; i < rgn->cnt; i++) { > + unsigned long rgnbase = rgn->region[i].base; > + unsigned long rgnsize = rgn->region[i].size; > + > + adjacent = > lmb_addrs_adjacent(base,size,rgnbase,rgnsize); > + if ( adjacent > 0 ) { > + rgn->region[i].base -= size; > + rgn->region[i].size += size; > + coalesced++; > + break; > + } > + else if ( adjacent < 0 ) { > + rgn->region[i].size += size; > + coalesced++; > + break; > + } > + } > + > + if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) { > + lmb_coalesce_regions(rgn, i, i+1); > + coalesced++; > + } > + > + if ( coalesced ) { > + return coalesced; > + } else if ( rgn->cnt >= MAX_LMB_REGIONS ) { > + return -1; > + } > + > + /* Couldn't coalesce the LMB, so add it to the sorted table. */ > + for (i=rgn->cnt-1; i >= 0; i--) { > + if (base < rgn->region[i].base) { > + rgn->region[i+1].base = rgn->region[i].base; > + rgn->region[i+1].size = rgn->region[i].size; > + } else { > + rgn->region[i+1].base = base; > + rgn->region[i+1].size = size; > + break; > + } > + } > + rgn->cnt++; > + > + return 0; > +} > + > +/* This routine called with relocation disabled. */ > +long __init > +lmb_add(unsigned long base, unsigned long size) > +{ > + struct lmb_region *_rgn = &(lmb.memory); > + > + /* On pSeries LPAR systems, the first LMB is our RMO region. */ > + if ( base == 0 ) > + lmb.rmo_size = size; > + > + return lmb_add_region(_rgn, base, size); > + > +} > + > +long __init > +lmb_reserve(unsigned long base, unsigned long size) > +{ > + struct lmb_region *_rgn = &(lmb.reserved); > + > + return lmb_add_region(_rgn, base, size); > +} > + > +long __init > +lmb_overlaps_region(struct lmb_region *rgn, > + unsigned long base, unsigned long size) > +{ > + unsigned long i; > + > + for (i=0; i < rgn->cnt; i++) { > + unsigned long rgnbase = rgn->region[i].base; > + unsigned long rgnsize = rgn->region[i].size; > + if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { > + break; > + } > + } > + > + return (i < rgn->cnt) ? i : -1; > +} > + > +unsigned long __init > +lmb_alloc(unsigned long size, unsigned long align) > +{ > + return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); > +} > + > +unsigned long __init > +lmb_alloc_base(unsigned long size, unsigned long align, unsigned long > max_addr) > +{ > + long i, j; > + unsigned long base = 0; > + > + for (i=lmb.memory.cnt-1; i >= 0; i--) { > + unsigned long lmbbase = lmb.memory.region[i].base; > + unsigned long lmbsize = lmb.memory.region[i].size; > + > + if ( max_addr == LMB_ALLOC_ANYWHERE ) > + base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); > + else if ( lmbbase < max_addr ) > + base = > _ALIGN_DOWN(min(lmbbase+lmbsize,max_addr)-size, > + align); > + else > + continue; > + > + while ( (lmbbase <= base) && > + ((j = > lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { > + base = > _ALIGN_DOWN(lmb.reserved.region[j].base-size, > + align); > + } > + > + if ( (base != 0) && (lmbbase <= base) ) > + break; > + } > + > + if ( i < 0 ) > + return 0; > + > + lmb_add_region(&lmb.reserved, base, size); > + > + return base; > +} > + > +/* You must call lmb_analyze() before this. */ > +unsigned long __init > +lmb_phys_mem_size(void) > +{ > + return lmb.memory.size; > +} > + > +unsigned long __init > +lmb_end_of_DRAM(void) > +{ > + int idx = lmb.memory.cnt - 1; > + > + return (lmb.memory.region[idx].base + > lmb.memory.region[idx].size); > +} > + > +/* > + * Truncate the lmb list to memory_limit if it's set > + * You must call lmb_analyze() after this. > + */ > +void __init lmb_enforce_memory_limit(void) > +{ > + extern unsigned long memory_limit; > + unsigned long i, limit; > + > + if (! memory_limit) > + return; > + > + limit = memory_limit; > + for (i = 0; i < lmb.memory.cnt; i++) { > + if (limit > lmb.memory.region[i].size) { > + limit -= lmb.memory.region[i].size; > + continue; > + } > + > + lmb.memory.region[i].size = limit; > + lmb.memory.cnt = i + 1; > + break; > + } > +} > diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile > --- a/arch/ppc64/kernel/Makefile > +++ b/arch/ppc64/kernel/Makefile > @@ -76,3 +76,4 @@ endif > > # These are here while we do the architecture merge > vecemu-y += ../../powerpc/kernel/vecemu.o > +lmb-y += ../../powerpc/mm/lmb.o > diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c > deleted file mode 100644 > --- a/arch/ppc64/kernel/lmb.c > +++ /dev/null > @@ -1,299 +0,0 @@ > -/* > - * Procedures for interfacing to Open Firmware. > - * > - * Peter Bergner, IBM Corp. June 2001. > - * Copyright (C) 2001 Peter Bergner. > - * > - * This program is free software; you can redistribute it and/or > - * modify it under the terms of the GNU General Public License > - * as published by the Free Software Foundation; either version > - * 2 of the License, or (at your option) any later version. > - */ > - > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > - > -struct lmb lmb; > - > -#undef DEBUG > - > -void lmb_dump_all(void) > -{ > -#ifdef DEBUG > - unsigned long i; > - > - udbg_printf("lmb_dump_all:\n"); > - udbg_printf(" memory.cnt = 0x%lx\n", > - lmb.memory.cnt); > - udbg_printf(" memory.size = 0x%lx\n", > - lmb.memory.size); > - for (i=0; i < lmb.memory.cnt ;i++) { > - udbg_printf(" memory.region[0x%x].base = > 0x%lx\n", > - i, lmb.memory.region[i].base); > - udbg_printf(" .size = 0x%lx\n", > - lmb.memory.region[i].size); > - } > - > - udbg_printf("\n reserved.cnt = 0x%lx\n", > - lmb.reserved.cnt); > - udbg_printf(" reserved.size = 0x%lx\n", > - lmb.reserved.size); > - for (i=0; i < lmb.reserved.cnt ;i++) { > - udbg_printf(" reserved.region[0x%x].base = > 0x%lx\n", > - i, lmb.reserved.region[i].base); > - udbg_printf(" .size = 0x%lx\n", > - lmb.reserved.region[i].size); > - } > -#endif /* DEBUG */ > -} > - > -static unsigned long __init > -lmb_addrs_overlap(unsigned long base1, unsigned long size1, > - unsigned long base2, unsigned long size2) > -{ > - return ((base1 < (base2+size2)) && (base2 < (base1+size1))); > -} > - > -static long __init > -lmb_addrs_adjacent(unsigned long base1, unsigned long size1, > - unsigned long base2, unsigned long size2) > -{ > - if (base2 == base1 + size1) > - return 1; > - else if (base1 == base2 + size2) > - return -1; > - > - return 0; > -} > - > -static long __init > -lmb_regions_adjacent(struct lmb_region *rgn, unsigned long r1, > unsigned > long r2) > -{ > - unsigned long base1 = rgn->region[r1].base; > - unsigned long size1 = rgn->region[r1].size; > - unsigned long base2 = rgn->region[r2].base; > - unsigned long size2 = rgn->region[r2].size; > - > - return lmb_addrs_adjacent(base1, size1, base2, size2); > -} > - > -/* Assumption: base addr of region 1 < base addr of region 2 */ > -static void __init > -lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, > unsigned > long r2) > -{ > - unsigned long i; > - > - rgn->region[r1].size += rgn->region[r2].size; > - for (i=r2; i < rgn->cnt-1; i++) { > - rgn->region[i].base = rgn->region[i+1].base; > - rgn->region[i].size = rgn->region[i+1].size; > - } > - rgn->cnt--; > -} > - > -/* This routine called with relocation disabled. */ > -void __init > -lmb_init(void) > -{ > - /* Create a dummy zero size LMB which will get coalesced away > later. > - * This simplifies the lmb_add() code below... > - */ > - lmb.memory.region[0].base = 0; > - lmb.memory.region[0].size = 0; > - lmb.memory.cnt = 1; > - > - /* Ditto. */ > - lmb.reserved.region[0].base = 0; > - lmb.reserved.region[0].size = 0; > - lmb.reserved.cnt = 1; > -} > - > -/* This routine called with relocation disabled. */ > -void __init > -lmb_analyze(void) > -{ > - int i; > - > - lmb.memory.size = 0; > - > - for (i = 0; i < lmb.memory.cnt; i++) > - lmb.memory.size += lmb.memory.region[i].size; > -} > - > -/* This routine called with relocation disabled. */ > -static long __init > -lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned > long size) > -{ > - unsigned long i, coalesced = 0; > - long adjacent; > - > - /* First try and coalesce this LMB with another. */ > - for (i=0; i < rgn->cnt; i++) { > - unsigned long rgnbase = rgn->region[i].base; > - unsigned long rgnsize = rgn->region[i].size; > - > - adjacent = > lmb_addrs_adjacent(base,size,rgnbase,rgnsize); > - if ( adjacent > 0 ) { > - rgn->region[i].base -= size; > - rgn->region[i].size += size; > - coalesced++; > - break; > - } > - else if ( adjacent < 0 ) { > - rgn->region[i].size += size; > - coalesced++; > - break; > - } > - } > - > - if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) { > - lmb_coalesce_regions(rgn, i, i+1); > - coalesced++; > - } > - > - if ( coalesced ) { > - return coalesced; > - } else if ( rgn->cnt >= MAX_LMB_REGIONS ) { > - return -1; > - } > - > - /* Couldn't coalesce the LMB, so add it to the sorted table. */ > - for (i=rgn->cnt-1; i >= 0; i--) { > - if (base < rgn->region[i].base) { > - rgn->region[i+1].base = rgn->region[i].base; > - rgn->region[i+1].size = rgn->region[i].size; > - } else { > - rgn->region[i+1].base = base; > - rgn->region[i+1].size = size; > - break; > - } > - } > - rgn->cnt++; > - > - return 0; > -} > - > -/* This routine called with relocation disabled. */ > -long __init > -lmb_add(unsigned long base, unsigned long size) > -{ > - struct lmb_region *_rgn = &(lmb.memory); > - > - /* On pSeries LPAR systems, the first LMB is our RMO region. */ > - if ( base == 0 ) > - lmb.rmo_size = size; > - > - return lmb_add_region(_rgn, base, size); > - > -} > - > -long __init > -lmb_reserve(unsigned long base, unsigned long size) > -{ > - struct lmb_region *_rgn = &(lmb.reserved); > - > - return lmb_add_region(_rgn, base, size); > -} > - > -long __init > -lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, > unsigned long size) > -{ > - unsigned long i; > - > - for (i=0; i < rgn->cnt; i++) { > - unsigned long rgnbase = rgn->region[i].base; > - unsigned long rgnsize = rgn->region[i].size; > - if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { > - break; > - } > - } > - > - return (i < rgn->cnt) ? i : -1; > -} > - > -unsigned long __init > -lmb_alloc(unsigned long size, unsigned long align) > -{ > - return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); > -} > - > -unsigned long __init > -lmb_alloc_base(unsigned long size, unsigned long align, unsigned long > max_addr) > -{ > - long i, j; > - unsigned long base = 0; > - > - for (i=lmb.memory.cnt-1; i >= 0; i--) { > - unsigned long lmbbase = lmb.memory.region[i].base; > - unsigned long lmbsize = lmb.memory.region[i].size; > - > - if ( max_addr == LMB_ALLOC_ANYWHERE ) > - base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); > - else if ( lmbbase < max_addr ) > - base = > _ALIGN_DOWN(min(lmbbase+lmbsize,max_addr)-size, align); > - else > - continue; > - > - while ( (lmbbase <= base) && > - ((j = > lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { > - base = > _ALIGN_DOWN(lmb.reserved.region[j].base-size, align); > - } > - > - if ( (base != 0) && (lmbbase <= base) ) > - break; > - } > - > - if ( i < 0 ) > - return 0; > - > - lmb_add_region(&lmb.reserved, base, size); > - > - return base; > -} > - > -/* You must call lmb_analyze() before this. */ > -unsigned long __init > -lmb_phys_mem_size(void) > -{ > - return lmb.memory.size; > -} > - > -unsigned long __init > -lmb_end_of_DRAM(void) > -{ > - int idx = lmb.memory.cnt - 1; > - > - return (lmb.memory.region[idx].base + > lmb.memory.region[idx].size); > -} > - > -/* > - * Truncate the lmb list to memory_limit if it's set > - * You must call lmb_analyze() after this. > - */ > -void __init lmb_enforce_memory_limit(void) > -{ > - extern unsigned long memory_limit; > - unsigned long i, limit; > - > - if (! memory_limit) > - return; > - > - limit = memory_limit; > - for (i = 0; i < lmb.memory.cnt; i++) { > - if (limit > lmb.memory.region[i].size) { > - limit -= lmb.memory.region[i].size; > - continue; > - } > - > - lmb.memory.region[i].size = limit; > - lmb.memory.cnt = i + 1; > - break; > - } > -} > diff --git a/include/asm-powerpc/lmb.h b/include/asm-powerpc/lmb.h > new file mode 100644 > --- /dev/null > +++ b/include/asm-powerpc/lmb.h > @@ -0,0 +1,78 @@ > +#ifndef _ASM_POWERPC_LMB_H > +#define _ASM_POWERPC_LMB_H > + > +/* > + * Low-level Memory Block management. > + * > + * Copyright (C) 2001 Peter Bergner, IBM Corp. > + */ > + > +#include > +#include > + > +#define MAX_LMB_REGIONS 128 > + > +#define LMB_ALLOC_ANYWHERE 0 > + > +struct lmb_block { > + unsigned long base; > + unsigned long size; > +}; > + > +struct lmb_region { > + unsigned long cnt; > + unsigned long size; > + struct lmb_block region[MAX_LMB_REGIONS+1]; > +}; > + > +struct lmb { > + unsigned long debug; > + unsigned long rmo_size; > + struct lmb_region memory; > + struct lmb_region reserved; > +}; > + > +extern struct lmb lmb; > + > +extern void __init lmb_init(void); > +extern void __init lmb_analyze(void); > +extern long __init lmb_add(unsigned long, unsigned long); > +extern long __init lmb_reserve(unsigned long, unsigned long); > +extern unsigned long __init lmb_alloc(unsigned long, unsigned long); > +extern unsigned long __init lmb_alloc_base(unsigned long, unsigned > long, > + unsigned long); > +extern unsigned long __init lmb_phys_mem_size(void); > +extern unsigned long __init lmb_end_of_DRAM(void); > +extern unsigned long __init lmb_abs_to_phys(unsigned long); > +extern void __init lmb_enforce_memory_limit(void); > + > +extern void lmb_dump_all(void); > + > +extern unsigned long io_hole_start; > + > +static inline unsigned long > +lmb_size_bytes(struct lmb_region *type, unsigned long region_nr) > +{ > + return type->region[region_nr].size; > +} > + > +static inline unsigned long > +lmb_size_pages(struct lmb_region *type, unsigned long region_nr) > +{ > + return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT; > +} > + > +static inline unsigned long > +lmb_start_pfn(struct lmb_region *type, unsigned long region_nr) > +{ > + return type->region[region_nr].base >> PAGE_SHIFT; > +} > + > +static inline unsigned long > +lmb_end_pfn(struct lmb_region *type, unsigned long region_nr) > +{ > + return lmb_start_pfn(type, region_nr) + > + lmb_size_pages(type, region_nr); > +} > + > +#endif /* _ASM_POWERPC_LMB_H */ > diff --git a/include/asm-ppc/page.h b/include/asm-ppc/page.h > --- a/include/asm-ppc/page.h > +++ b/include/asm-ppc/page.h > @@ -77,8 +77,12 @@ typedef unsigned long pgprot_t; > #endif > > > +/* align addr on a size boundary - adjust address up/down if > needed */ > +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) > +#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) > + > /* align addr on a size boundary - adjust address up if needed -- > Cort > */ > -#define _ALIGN(addr,size) (((addr)+(size)-1)&(~((size)-1))) > +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) > > /* to align the pointer to the (next) page boundary */ > #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) > > > _______________________________________________ > Linuxppc64-dev mailing list > Linuxppc64-dev at ozlabs.org > https://ozlabs.org/mailman/listinfo/linuxppc64-dev > From paulus at samba.org Wed Oct 5 21:11:43 2005 From: paulus at samba.org (Paul Mackerras) Date: Wed, 5 Oct 2005 21:11:43 +1000 Subject: [PATCH 1/7] ppc64: EEH typos, include files, macros, whitespace In-Reply-To: <20050930005141.GA6173@austin.ibm.com> References: <20050930004800.GL29826@austin.ibm.com> <20050930005141.GA6173@austin.ibm.com> Message-ID: <17219.46319.501091.93202@cargo.ozlabs.ibm.com> Linas writes: > 01-eeh-minor-cleanup.patch Some trivial comments on a trivial patch... :) > - printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", > - pci_name(dev)); > + printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev)); This makes the line go over 80 columns, which seems unnecessary. > - * @token i/o token, should be address in the form 0xE.... > + * @token i/o token, should be address in the form 0xA.... I think the virtual addresses we get from ioremap these days start with 0xD00008... Regards, Paul. From paulus at samba.org Wed Oct 5 21:23:11 2005 From: paulus at samba.org (Paul Mackerras) Date: Wed, 5 Oct 2005 21:23:11 +1000 Subject: [PATCH 6/7] ppc64: EEH Avoid racing reports of errors In-Reply-To: <20050930010038.GF6173@austin.ibm.com> References: <20050930004800.GL29826@austin.ibm.com> <20050930010038.GF6173@austin.ibm.com> Message-ID: <17219.47007.44643.148022@cargo.ozlabs.ibm.com> Linas writes: > 06-eeh-report-race.patch > +/** Mark all devices that are peers of this device as failed. > + * Mark the device driver too, so that it can see the failure > + * immediately; this is critical, since some drivers poll > + * status registers in interrupts ... If a driver is polling, > + * and the slot is frozen, then the driver can deadlock in > + * an interrupt context, which is bad. > + */ > + > +static inline void __eeh_mark_slot (struct device_node *dn) > +{ > + while (dn) { > + PCI_DN(dn)->eeh_mode |= EEH_MODE_ISOLATED; > + > + if (dn->child) > + __eeh_mark_slot (dn->child); > + dn = dn->sibling; > + } > +} So this does the device node that we pass in, plus all the nodes that come after it in its parent's list of children. On that basis I expected you to pass in the first child of the EADS bridge, but I see: > + pe_dn = find_device_pe (dn); > + __eeh_mark_slot (pe_dn); My understanding is that pe_dn will end up pointing to the device node for the EADS bridge. Shouldn't you pass in pe_dn->child here, or alternatively rearrange __eeh_mark_slot to do the node you give it plus its children (recursively)? Two other comments about __eeh_mark_slot: (1) despite the comment, the function doesn't do anything to any pci_dev or pci_driver (not that it should be touching any pci_driver), and (2) a recursive function can't really be inline (unless gcc is smart enough to turn arbitrary recursive functions into iterative functions, which I doubt :). Regards, Paul. From paulus at samba.org Wed Oct 5 21:14:58 2005 From: paulus at samba.org (Paul Mackerras) Date: Wed, 5 Oct 2005 21:14:58 +1000 Subject: [PATCH 3/7] ppc64: EEH Add event/internal state statistics In-Reply-To: <20050930005451.GC6173@austin.ibm.com> References: <20050930004800.GL29826@austin.ibm.com> <20050930005451.GC6173@austin.ibm.com> Message-ID: <17219.46514.903283.21680@cargo.ozlabs.ibm.com> Linas writes: > 03-eeh-statistics.patch > + if (!dn) { > + __get_cpu_var(no_dn)++; We have to make sure we are not preemptible when we use __get_cpu_var, since it uses smp_processor_id(). It's not clear to me that we have ensured that in every case where we use __get_cpu_var. Are you sure that we hold a spinlock, or are at interrupt level, or have explicitly disabled preemption at every point where we use __get_cpu_var? Regards, Paul. From arnd at arndb.de Wed Oct 5 23:07:41 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Wed, 5 Oct 2005 15:07:41 +0200 Subject: spufs: User space thread library In-Reply-To: <4341D861.2050306@am.sony.com> References: <200509160840.31071.arnd@arndb.de> <200509300014.21756.arnd@arndb.de> <4341D861.2050306@am.sony.com> Message-ID: <200510051507.42252.arnd@arndb.de> On Dinsdag 04 Oktober 2005 03:18, Geoff Levand wrote: > > OK, I set up an autoconf based build system. ?I moved some things > around to make coding the makefiles easier. ?I also found a conflict > with the system's spe.h, so renamed that file. We have found the same problem independently and already changed to 'libspe.h'. The contents of the current 'mfc.h' will be merged into that as well. > I put the results here: > > http://tree.celinuxforum.org/downloads/libspe-0.9-autoconf-05.10.03.18.01.19.tar.bz2 > > Its just a first cut. ?I think more work is needed in setting up > the spu compiler. ?Also, more work is needed to make the scripts > in the tools directory use the host's cross toolchain. Actually, I thought of using only autoconf, but not automake and libtool. We don't really have a complicated setup, so I'd prefer to avoid the extra overhead from those tools. Do you see a strong reason to use them? Arnd <>< From geoffrey.levand at am.sony.com Thu Oct 6 01:03:44 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Wed, 05 Oct 2005 08:03:44 -0700 Subject: spufs: User space thread library In-Reply-To: <200510051507.42252.arnd@arndb.de> References: <200510051507.42252.arnd@arndb.de> Message-ID: <4343EB50.1080202@am.sony.com> Arnd Bergmann wrote: > On Dinsdag 04 Oktober 2005 03:18, Geoff Levand wrote: > >>OK, I set up an autoconf based build system. I moved some things >>around to make coding the makefiles easier. I also found a conflict >>with the system's spe.h, so renamed that file. > > > We have found the same problem independently and already changed to > 'libspe.h'. The contents of the current 'mfc.h' will be merged into > that as well. > > >>I put the results here: >> >> > > http://tree.celinuxforum.org/downloads/libspe-0.9-autoconf-05.10.03.18.0 > 1.19.tar.bz2 > >>Its just a first cut. I think more work is needed in setting up >>the spu compiler. Also, more work is needed to make the scripts >>in the tools directory use the host's cross toolchain. > > > Actually, I thought of using only autoconf, but not automake and > libtool. > We don't really have a complicated setup, so I'd prefer to avoid the > extra overhead from those tools. > > Do you see a strong reason to use them? > It makes maintenance of both the build system and the distribution packaging easier by providing higher level abstractions and standard behavior. There are weaknesses with automake and libtool, but those are well known. At any rate, its your package, just use what you find useful. What 'extra overhead' are you concerned about? -Geoff From vatsa at in.ibm.com Thu Oct 6 03:52:44 2005 From: vatsa at in.ibm.com (Srivatsa Vaddagiri) Date: Wed, 5 Oct 2005 23:22:44 +0530 Subject: [PATCH] Let any CPU update xtime/jiffies Message-ID: <20051005175244.GA3844@in.ibm.com> Currently, only boot CPU updates xtime/jiffies _and_ invokes ppc_adjtimex during decrementer interrupt. This makes it difficult for the boot cpu to skip ticks when idle. Following patch changes that behavior so that any CPU can now update xtime/jiffies. Notes: - This was tested against 2.6.14-rc1 on a 4way Power4 (p630) box alongwith the NO_IDLE_HZ patch. - This patch does not serialize ppc_adjtimex. Do we need a spinlock to serialize it? Signed-off-by : Srivatsa Vaddagiri --- linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c | 24 ++++++++++++------------ 1 files changed, 12 insertions(+), 12 deletions(-) diff -puN arch/ppc64/kernel/time.c~boot_cpu_fix arch/ppc64/kernel/time.c --- linux-2.6.14-rc1/arch/ppc64/kernel/time.c~boot_cpu_fix 2005-10-05 15:12:37.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c 2005-10-05 16:37:16.000000000 +0530 @@ -342,21 +342,21 @@ int timer_interrupt(struct pt_regs * reg */ if (!cpu_is_offline(cpu)) update_process_times(user_mode(regs)); - /* - * No need to check whether cpu is offline here; boot_cpuid - * should have been fixed up by now. - */ - if (cpu == boot_cpuid) { - write_seqlock(&xtime_lock); - tb_last_stamp = lpaca->next_jiffy_update_tb; - timer_recalc_offset(lpaca->next_jiffy_update_tb); + + write_seqlock(&xtime_lock); + cur_tb = get_tb(); + if (cur_tb - tb_last_stamp > tb_ticks_per_jiffy) { + tb_last_stamp += tb_ticks_per_jiffy; + timer_recalc_offset(cur_tb); do_timer(regs); - timer_sync_xtime(lpaca->next_jiffy_update_tb); + timer_sync_xtime(cur_tb); timer_check_rtc(); - write_sequnlock(&xtime_lock); - if ( adjusting_time && (time_adjust == 0) ) - ppc_adjtimex(); } + write_sequnlock(&xtime_lock); + /* Fixme: This needs to be serialized as well */ + if ( adjusting_time && (time_adjust == 0) ) + ppc_adjtimex(); + lpaca->next_jiffy_update_tb += tb_ticks_per_jiffy; } _ -- Thanks and Regards, Srivatsa Vaddagiri, Linux Technology Center, IBM Software Labs, Bangalore, INDIA - 560017 From vatsa at in.ibm.com Thu Oct 6 04:01:42 2005 From: vatsa at in.ibm.com (Srivatsa Vaddagiri) Date: Wed, 5 Oct 2005 23:31:42 +0530 Subject: [PATCH] NO_IDLE_HZ implementation for ppc64 - v2 Message-ID: <20051005180142.GB3844@in.ibm.com> Ben, Here's the revised version, taking into account some of the comments you had. Changes since last time: - native_idle also converted over - Fixed a bug in calculation of next_dec in stop_hz_timer - Removed call to start_hz_timer from head.S - Added a call to start_hz_timer in performance_monitor_exception This has been tested against 2.6.14-rc1 on a 4way Power4 box (p630) with some additional patch (the same test patch I had sent earlier which showed decrementer statistics in /proc). I will rebase this patch against latest -mm if you think this is in the right direction. Signed-off-by: Srivatsa Vaddagiri --- linux-2.6.14-rc1-root/arch/ppc64/Kconfig | 6 linux-2.6.14-rc1-root/arch/ppc64/kernel/idle.c | 6 linux-2.6.14-rc1-root/arch/ppc64/kernel/irq.c | 3 linux-2.6.14-rc1-root/arch/ppc64/kernel/pSeries_setup.c | 10 - linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c | 112 ++++++++++++++-- linux-2.6.14-rc1-root/arch/ppc64/kernel/traps.c | 2 linux-2.6.14-rc1-root/include/asm-ppc64/time.h | 8 + linux-2.6.14-rc1-root/kernel/sysctl.c | 20 +- 8 files changed, 141 insertions(+), 26 deletions(-) diff -puN arch/ppc64/kernel/time.c~ppc64 arch/ppc64/kernel/time.c --- linux-2.6.14-rc1/arch/ppc64/kernel/time.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c 2005-10-05 16:34:51.000000000 +0530 @@ -315,23 +315,13 @@ static void iSeries_tb_recal(void) unsigned long tb_last_stamp __cacheline_aligned_in_smp; -/* - * timer_interrupt - gets called when the decrementer overflows, - * with interrupts disabled. - */ -int timer_interrupt(struct pt_regs * regs) +static void account_ticks(struct pt_regs *regs) { int next_dec; unsigned long cur_tb; struct paca_struct *lpaca = get_paca(); unsigned long cpu = smp_processor_id(); - irq_enter(); - - profile_tick(CPU_PROFILING, regs); - - lpaca->lppaca.int_dword.fields.decr_int = 0; - while (lpaca->next_jiffy_update_tb <= (cur_tb = get_tb())) { /* * We cannot disable the decrementer, so in the period @@ -364,6 +354,43 @@ int timer_interrupt(struct pt_regs * reg if (next_dec > lpaca->default_decr) next_dec = lpaca->default_decr; set_dec(next_dec); +} + +#ifdef CONFIG_NO_IDLE_HZ +/* Returns 1 if this CPU was set in the mask */ +static inline int clear_hzless_mask(void) +{ + unsigned long cpu = smp_processor_id(); + int rc = 0; + + if (unlikely(cpu_isset(cpu, nohz_cpu_mask))) { + cpu_clear(cpu, nohz_cpu_mask); + rc = 1; + } + + return rc; +} +#else +static inline int clear_hzless_mask(void) { return 0;} +#endif + +/* + * timer_interrupt - gets called when the decrementer overflows, + * with interrupts disabled. + */ +int timer_interrupt(struct pt_regs * regs) +{ + struct paca_struct *lpaca = get_paca(); + + irq_enter(); + + clear_hzless_mask(); + + profile_tick(CPU_PROFILING, regs); + + lpaca->lppaca.int_dword.fields.decr_int = 0; + + account_ticks(regs); #ifdef CONFIG_PPC_ISERIES if (hvlpevent_is_pending()) @@ -381,6 +408,69 @@ int timer_interrupt(struct pt_regs * reg return 1; } +#ifdef CONFIG_NO_IDLE_HZ + +#define MAX_DEC_COUNT (UINT_MAX) /* Decrementer is 32-bit */ +#define MIN_SKIP 2 +#define MAX_SKIP (MAX_DEC_COUNT/tb_ticks_per_jiffy) + +int sysctl_hz_timer = 1; + +/* Avoid the HZ timer (decrementer) interrupt on this CPU for "some" time. + * Has to be called with interrupts disabled. + * + * The HZ timer frequency is restored upon the occurence of an interrupt or + * exception on this CPU. Caller has to ensure that the CPU doesnt exit + * idle mode via other means. + */ +void stop_hz_timer(void) +{ + unsigned long cpu = smp_processor_id(), seq, delta; + int next_dec; + + if (sysctl_hz_timer != 0) + return; + + cpu_set(cpu, nohz_cpu_mask); + mb(); + if (rcu_pending(cpu) || local_softirq_pending()) { + cpu_clear(cpu, nohz_cpu_mask); + return; + } + + do { + seq = read_seqbegin(&xtime_lock); + + delta = next_timer_interrupt() - jiffies; + + if (delta < MIN_SKIP) { + cpu_clear(cpu, nohz_cpu_mask); + return; + } + + if (delta > MAX_SKIP) + delta = MAX_SKIP; + + next_dec = tb_last_stamp + delta * tb_ticks_per_jiffy; + + } while (read_seqretry(&xtime_lock, seq)); + + next_dec -= get_tb(); + set_dec(next_dec); + + return; +} + +/* Take into account skipped ticks and restore the HZ timer frequency */ +void start_hz_timer(struct pt_regs *regs) +{ + if (clear_hzless_mask()) + account_ticks(regs); +} + +#endif /* CONFIG_NO_IDLE_HZ */ + + /* * Scheduler clock - returns current time in nanosec units. * diff -puN arch/ppc64/kernel/irq.c~ppc64 arch/ppc64/kernel/irq.c --- linux-2.6.14-rc1/arch/ppc64/kernel/irq.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/irq.c 2005-10-05 16:33:06.000000000 +0530 @@ -55,6 +55,7 @@ #include #include #include +#include #ifdef CONFIG_SMP extern void iSeries_smp_message_recv( struct pt_regs * ); @@ -313,6 +314,8 @@ void do_IRQ(struct pt_regs *regs) irq_enter(); + start_hz_timer(regs); + #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 2KB free? */ { diff -puN include/asm-ppc64/time.h~ppc64 include/asm-ppc64/time.h --- linux-2.6.14-rc1/include/asm-ppc64/time.h~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/include/asm-ppc64/time.h 2005-10-05 16:33:06.000000000 +0530 @@ -102,6 +102,14 @@ static inline unsigned long tb_ticks_sin return get_tb() - tstamp; } +#ifdef CONFIG_NO_IDLE_HZ +extern void stop_hz_timer(void); +extern void start_hz_timer(struct pt_regs *); +#else +static inline void stop_hz_timer(void) { } +static inline void start_hz_timer(struct pt_regs *regs) { } +#endif + #define mulhwu(x,y) \ ({unsigned z; asm ("mulhwu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;}) #define mulhdu(x,y) \ diff -puN arch/ppc64/Kconfig~ppc64 arch/ppc64/Kconfig --- linux-2.6.14-rc1/arch/ppc64/Kconfig~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/Kconfig 2005-10-05 16:33:06.000000000 +0530 @@ -146,6 +146,12 @@ config PPC_SPLPAR processors, that is, which share physical processors between two or more partitions. +config NO_IDLE_HZ + depends on EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC || PPC_MAPLE) + bool "No HZ timer ticks in idle" + help + Switches the HZ timer interrupts off when a CPU is idle. + config KEXEC bool "kexec system call (EXPERIMENTAL)" depends on PPC_MULTIPLATFORM && EXPERIMENTAL diff -puN kernel/sysctl.c~ppc64 kernel/sysctl.c --- linux-2.6.14-rc1/kernel/sysctl.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/kernel/sysctl.c 2005-10-05 16:33:06.000000000 +0530 @@ -544,6 +544,16 @@ static ctl_table kern_table[] = { .extra1 = &minolduid, .extra2 = &maxolduid, }, +#ifdef CONFIG_NO_IDLE_HZ + { + .ctl_name = KERN_HZ_TIMER, + .procname = "hz_timer", + .data = &sysctl_hz_timer, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif #ifdef CONFIG_ARCH_S390 #ifdef CONFIG_MATHEMU { @@ -555,16 +565,6 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif -#ifdef CONFIG_NO_IDLE_HZ - { - .ctl_name = KERN_HZ_TIMER, - .procname = "hz_timer", - .data = &sysctl_hz_timer, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif { .ctl_name = KERN_S390_USER_DEBUG_LOGGING, .procname = "userprocess_debug", diff -puN arch/ppc64/kernel/pSeries_setup.c~ppc64 arch/ppc64/kernel/pSeries_setup.c --- linux-2.6.14-rc1/arch/ppc64/kernel/pSeries_setup.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/pSeries_setup.c 2005-10-05 16:33:06.000000000 +0530 @@ -475,9 +475,10 @@ static inline void dedicated_idle_sleep( * a prod occurs. Returning from the cede enables external * interrupts. */ - if (!need_resched()) + if (!need_resched()) { + stop_hz_timer(); cede_processor(); - else + } else local_irq_enable(); } else { /* @@ -570,9 +571,10 @@ static int pseries_shared_idle(void) * Check need_resched() again with interrupts disabled * to avoid a race. */ - if (!need_resched()) + if (!need_resched()) { + stop_hz_timer(); cede_processor(); - else + } else local_irq_enable(); HMT_medium(); diff -puN arch/ppc64/kernel/traps.c~ppc64 arch/ppc64/kernel/traps.c --- linux-2.6.14-rc1/arch/ppc64/kernel/traps.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/traps.c 2005-10-05 16:33:06.000000000 +0530 @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUGGER int (*__debugger)(struct pt_regs *regs); @@ -470,6 +471,7 @@ extern perf_irq_t perf_irq; void performance_monitor_exception(struct pt_regs *regs) { + start_hz_timer(regs); perf_irq(regs); } diff -puN arch/ppc64/kernel/idle.c~ppc64 arch/ppc64/kernel/idle.c --- linux-2.6.14-rc1/arch/ppc64/kernel/idle.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/idle.c 2005-10-05 16:33:06.000000000 +0530 @@ -73,8 +73,12 @@ int native_idle(void) while (1) { ppc64_runlatch_off(); - if (!need_resched()) + local_irq_disable(); + if (!need_resched()) { + stop_hz_timer(); + local_irq_enable(); power4_idle(); + } if (need_resched()) { ppc64_runlatch_on(); _ -- Thanks and Regards, Srivatsa Vaddagiri, Linux Technology Center, IBM Software Labs, Bangalore, INDIA - 560017 From paubert at iram.es Thu Oct 6 04:20:31 2005 From: paubert at iram.es (Gabriel Paubert) Date: Wed, 5 Oct 2005 20:20:31 +0200 Subject: [PATCH] powerpc: improved byte swapping functions In-Reply-To: <20050927211534.GA32173@iram.es> References: <20050927211534.GA32173@iram.es> Message-ID: <20051005182031.GA15359@iram.es> From: Gabriel Paubert The previous versions of ___arch__swab16 and ___arch__swab32 were not optimal. In most cases the code can be made shorter and faster with this patch. Signed-off-by: Gabriel Paubert --- Additional notes: 1) for ___arch__swab16, the trick is to let the compiler generate a single rlwinm instruction for the final right shift and cast. 2) For ___arch_swab32, the rotated value passed as a parameter already has 2 bytes at the right place, so only 2 rlwimi instructions are necessary to complete the byte swap. 3) edit if you don't like the formatting of the result. 4) I've been reading the thread about how to format patches and I hope that I got it right. But I believe that the diffstat output is overkill for such a small patch. Regards, Gabriel diff --git a/include/asm-powerpc/byteorder.h b/include/asm-powerpc/byteorder.h --- a/include/asm-powerpc/byteorder.h +++ b/include/asm-powerpc/byteorder.h @@ -42,23 +42,22 @@ static __inline__ void st_le32(volatile static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 value) { - __u16 result; + __u32 tmp; - __asm__("rlwimi %0,%1,8,16,23" - : "=r" (result) - : "r" (value), "0" (value >> 8)); - return result; + __asm__("rlwimi %0,%0,16,8,15" + : "=r" (tmp) : "0" (value)); + return (__u16)(tmp>>8); } static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 value) { __u32 result; - __asm__("rlwimi %0,%1,24,16,23\n\t" - "rlwimi %0,%1,8,8,15\n\t" - "rlwimi %0,%1,24,0,7" + __asm__( +" rlwimi %0,%1,24,16,23\n" +" rlwimi %0,%1,24,0,7\n" : "=r" (result) - : "r" (value), "0" (value >> 24)); + : "r" (value), "0" ((value >> 24)|(value<<8))); return result; } From geoffrey.levand at am.sony.com Thu Oct 6 07:06:15 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Wed, 05 Oct 2005 14:06:15 -0700 Subject: kgdb for ppc64 now available In-Reply-To: <20050930063234.GA4763@krispykreme> References: <433C22EF.8050007@am.sony.com> <20050930063234.GA4763@krispykreme> Message-ID: <43444047.2030008@am.sony.com> Anton Blanchard wrote: > Hi, > >>For those interested, kgdb now supports ppc64 SMP. On powermac only >>an Ethernet connection is supported currently, but generic 8250 UART >>PCI cards will be supported soon. > > > Nice! A few comments: > > + { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */ > + { 0x0400, 0x0a /* SIGBUS */ }, /* instruction access */ > > 0x380 data segment miss is not in the list of traps. > OK, I put it in as such: { 0x0380, 0x0b /* SIGSEGV */ }, /* data SLB access */ I guess this is better than the default SIGHUP, unless you think there's a better one to map to. > + return kgdb_handle_exception(0, computeSignal(regs->trap), 0, regs); > > We should use TRAP(regs) instead of regs->trap since the bottom bits may > not be zero for an exception that only saves a partial register set. Thanks. > @@ -2,7 +2,7 @@ > # Makefile for the linux ppc64 kernel. > # > > -EXTRA_CFLAGS += -mno-minimal-toc > +#EXTRA_CFLAGS += -mno-minimal-toc > extra-y := head.o vmlinux.lds > > obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ > > Is there a problem with compiling arch/ppc64/kernel -mno-minimal-toc? There doesn't seem to be any reason for this. I think it just slipped in when someone was hunting for a bug... > I notice x86-64 is using the new die hooks, Im about to convert xmon > over to it and once that is done it will make sense to move kgdb and kdb > across. Please let me know when you've done this so I can see what's needed. > In regs_to_gdb_regs it would be nice to send out the dar, dsisr and > perhaps softe (used on iseries and will be useful once we go to soft > interrupt disable on pseries). Even though we already get the trap > number delived to gdb via the signal number, it would be useful to dump > it in the regs since the bottom bits are important (to work out if > only a partial register set has been saved). That leaves orig_gpr3 and > result as the only pt_regs not dumped. gdb as of version 6.3 doesn't support these regs. There would need to be changes there to support these. If there is a real demand, I don't think it would be much effort to do. > > + /* vr registers not used by kernel, leave zero */ > + ptr += 64; > > Now that we use altivec in the kernel a bit, it might make sense to dump > these registers. Not sure yet. > > Index: linux-2.6.13/arch/ppc64/mm/fault.c > @@ -306,6 +307,13 @@ void bad_page_fault(struct pt_regs *regs > regs->nip = entry->fixup; > return; > } > +#ifdef CONFIG_KGDB > + if (atomic_read(&debugger_active) && kgdb_may_fault) { > + /* Restore our previous state. */ > + kgdb_fault_longjmp(kgdb_fault_jmp_regs); > + /* Not reached. */ > + } > +#endif > > We might need a new notify_die hook to cover this case. > > We could use hardware data breakpoint support in kgdb. > > Eventually it would be nice to be able to select debuggers at > boot/runtime. I have no current plans for these last items. Thanks for the comments, all very helpful. -Geoff From arnd at arndb.de Thu Oct 6 08:23:23 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Thu, 6 Oct 2005 00:23:23 +0200 Subject: spufs: User space thread library In-Reply-To: <4343EB50.1080202@am.sony.com> References: <200510051507.42252.arnd@arndb.de> <4343EB50.1080202@am.sony.com> Message-ID: <200510060023.23780.arnd@arndb.de> On Middeweken 05 Oktober 2005 17:03, Geoff Levand wrote: > > It makes maintenance of both the build system and the distribution > packaging easier by providing higher level abstractions and > standard behavior. ?There are weaknesses with automake and > libtool, but those are well known. ?At any rate, its your > package, just use what you find useful. > > What 'extra overhead' are you concerned about? > The overhead I see is in debugging the stuff when it doesn't work. Autoconf can be hard to debug already, but I think that's worth it because most users know how to run ./configure rather than having to look at the Makefiles. I definitely like your configure.ac file and the reordering of the files. For the other autotools, I think they make work harder instead of easier for small projects, so I'd rather do simple Makefile.in files in the places that need configuration. BTW: Are you ok with licensing your configure.ac under LGPL instead of GPL or did you copy parts from some other GPL package? IMHO, the files in the package all need have the same license for simplicity. Arnd <>< From geoffrey.levand at am.sony.com Thu Oct 6 08:48:22 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Wed, 05 Oct 2005 15:48:22 -0700 Subject: spufs: User space thread library In-Reply-To: <200510060023.23780.arnd@arndb.de> References: <200510051507.42252.arnd@arndb.de> <4343EB50.1080202@am.sony.com> <200510060023.23780.arnd@arndb.de> Message-ID: <43445836.2040205@am.sony.com> Arnd Bergmann wrote: > On Middeweken 05 Oktober 2005 17:03, Geoff Levand wrote: > >>It makes maintenance of both the build system and the distribution >>packaging easier by providing higher level abstractions and >>standard behavior. There are weaknesses with automake and >>libtool, but those are well known. At any rate, its your >>package, just use what you find useful. >> >>What 'extra overhead' are you concerned about? >> > > The overhead I see is in debugging the stuff when it doesn't work. > Autoconf can be hard to debug already, but I think that's worth it > because most users know how to run ./configure rather than having > to look at the Makefiles. I definitely like your configure.ac > file and the reordering of the files. > > For the other autotools, I think they make work harder instead of > easier for small projects, so I'd rather do simple Makefile.in > files in the places that need configuration. Sounds reasonable. > BTW: Are you ok with licensing your configure.ac under LGPL > instead of GPL or did you copy parts from some other GPL package? > IMHO, the files in the package all need have the same license > for simplicity. > LGPL is OK, I codded it from scratch, so feel free to change it to this license announcement. You can also then remove the file COPYING. ## configure.ac -- Process this file with autoconf to produce configure # # Copyright 2005 Sony Corp. # # This file is free software; you can redistribute it and/or # modify it under the terms of the GNU Library General Public # License as published by the Free Software Foundation; # version 2 of the License. # # This file is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Library General Public License for more details. # # You should have received a copy of the GNU Library General Public # License along with this file; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -Geoff From geoffrey.levand at am.sony.com Thu Oct 6 09:25:33 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Wed, 05 Oct 2005 16:25:33 -0700 Subject: install BOOTIMAGE Message-ID: <434460ED.9070605@am.sony.com> With the way the current arch/ppc64/boot/Makefile (2.6.13) calls install.sh, it seems 'make install' will always install vmlinux. It seems the proper behavior is for install.sh to install BOOTIMAGE, which for all but iSeries is a kind of zImage. Does this change below cause problems for anyone? -Geoff # Arguments: # $1 - kernel version # $2 - kernel image file # $3 - kernel map file # $4 - default install path (blank if root directory) # $5 - kernel boot file, the zImage Index: linux-2.6.13/arch/ppc64/boot/install.sh =================================================================== --- linux-2.6.13.orig/arch/ppc64/boot/install.sh 2005-08-28 16:41:01.000000000 -0700 +++ linux-2.6.13/arch/ppc64/boot/install.sh 2005-10-05 15:26:52.000000000 -0700 @@ -38,5 +38,5 @@ mv $4/System.map $4/System.old fi -cat $2 > $4/$image_name +cat $5 > $4/$image_name cp $3 $4/System.map From olh at suse.de Thu Oct 6 16:38:23 2005 From: olh at suse.de (Olaf Hering) Date: Thu, 6 Oct 2005 08:38:23 +0200 Subject: install BOOTIMAGE In-Reply-To: <434460ED.9070605@am.sony.com> References: <434460ED.9070605@am.sony.com> Message-ID: <20051006063823.GA13103@suse.de> On Wed, Oct 05, Geoff Levand wrote: > With the way the current arch/ppc64/boot/Makefile (2.6.13) calls > install.sh, it seems 'make install' will always install vmlinux. > > It seems the proper behavior is for install.sh to install > BOOTIMAGE, which for all but iSeries is a kind of zImage. Does > this change below cause problems for anyone? I dont think booting the zimage from the local hard disk should be prefered over the plain vmlinux. -- short story of a lazy sysadmin: alias appserv=wotan From vatsa at in.ibm.com Thu Oct 6 20:30:59 2005 From: vatsa at in.ibm.com (Srivatsa Vaddagiri) Date: Thu, 6 Oct 2005 16:00:59 +0530 Subject: [PATCH] NO_IDLE_HZ implementation for PPC64 In-Reply-To: <1128378206.8267.102.camel@gaston> References: <20051002174630.GA6786@in.ibm.com> <1128326892.8267.89.camel@gaston> <20051003161851.GA4244@in.ibm.com> <1128378206.8267.102.camel@gaston> Message-ID: <20051006103058.GA12620@in.ibm.com> On Tue, Oct 04, 2005 at 08:23:26AM +1000, Benjamin Herrenschmidt wrote: > Why not just call the decrementer interrupt ? It should replay > already... The only concern about that is wrt update_process_times - it may account all skipped ticks towards irq context rather than to idle context. That is possible if an external irq (do_IRQ) wakes the CPU up (leading to irq_enter being called twice ..) This could lead to, say, wrong idle statistics in vmstat? -- Thanks and Regards, Srivatsa Vaddagiri, Linux Technology Center, IBM Software Labs, Bangalore, INDIA - 560017 From olh at suse.de Thu Oct 6 21:47:07 2005 From: olh at suse.de (Olaf Hering) Date: Thu, 6 Oct 2005 13:47:07 +0200 Subject: R_PPC64_TOC16 not handled in apply_relocate_add Message-ID: <20051006114707.GA23913@suse.de> The current toolchain (in opensuse) creates unhandled relocations for rtas_flash and scanlog: rtas_flash: Unknown ADD relocation: 47 I think it is type R_PPC64_TOC16. gcc version 4.0.2 20050901 (prerelease) (SUSE Linux) GNU ld version 2.16.91.0.2 20050720 (SuSE Linux) Any ideas how to handle them? -- short story of a lazy sysadmin: alias appserv=wotan From anton at samba.org Fri Oct 7 00:16:14 2005 From: anton at samba.org (Anton Blanchard) Date: Fri, 7 Oct 2005 00:16:1