From sfr at canb.auug.org.au Sat Oct 1 00:05:16 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 00:05:16 +1000 Subject: [PATCH 7/9] ppc64: simplify the build a little In-Reply-To: <20050930233602.138b6e27.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> Message-ID: <20051001000516.1d444d51.sfr@canb.auug.org.au> This adds arch/powerp/kernel/ to core-y in arch/ppc64/Makefile so that we don;t have to put in a special line in arch/ppc64/kernel/Makefile for each file we merge. We should be able to use a similar technique for other directories as we get to them. Signed-off-by: Stephen Rothwell --- arch/powerpc/Makefile | 1 - arch/powerpc/kernel/Makefile | 13 +++++++++---- arch/ppc64/Makefile | 2 +- arch/ppc64/kernel/Makefile | 11 ++--------- 4 files changed, 12 insertions(+), 15 deletions(-) -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ 28163804fe2135701522671bd8c3828e1aa0ce62 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -121,7 +121,6 @@ head-$(CONFIG_FSL_BOOKE) := arch/powerpc ifeq ($(CONFIG_PPC32),y) head-$(CONFIG_6xx) += arch/powerpc/kernel/idle_6xx.o -head-$(CONFIG_POWER4) += arch/powerpc/kernel/idle_power4.o head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -2,6 +2,10 @@ # Makefile for the linux kernel. # +ifeq ($(CONFIG_PPC64),y) +EXTRA_CFLAGS += -mno-minimal-toc +endif + extra-$(CONFIG_PPC_STD_MMU) := head.o extra_$(CONFIG_PPC64) := head_64.o extra-$(CONFIG_40x) := head_4xx.o @@ -9,12 +13,13 @@ extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-$(CONFIG_6xx) += idle_6xx.o -extra-$(CONFIG_POWER4) += idle_power4.o extra-$(CONFIG_PPC_FPU) += fpu.o extra-y += vmlinux.lds -obj-y := semaphore.o process.o -obj-$(CONFIG_PPC32) += traps32.c -obj-$(CONFIG_PPC64) += traps64.c +obj-$(CONFIG_PPC32) := semaphore.o process.o +obj-$(CONFIG_PPC32) += traps32.o +obj-$(CONFIG_PPC64) += traps64.o idle_power4.o +ifeq ($(CONFIG_PPC32),y) obj-$(CONFIG_MODULES) += ppc_ksyms.o +endif obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile --- a/arch/ppc64/Makefile +++ b/arch/ppc64/Makefile @@ -82,7 +82,7 @@ CFLAGS += $(call cc-option,-funit-at-a-t head-y := arch/ppc64/kernel/head.o libs-y += arch/ppc64/lib/ -core-y += arch/ppc64/kernel/ +core-y += arch/ppc64/kernel/ arch/powerpc/kernel/ core-y += arch/ppc64/mm/ core-y += arch/powerpc/platforms/ core-$(CONFIG_XMON) += arch/ppc64/xmon/ diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -7,12 +7,12 @@ ifneq ($(CONFIG_PPC_MERGE),y) EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds -obj-y := setup.o entry.o traps64.o irq.o idle.o dma.o \ +obj-y := setup.o entry.o irq.o idle.o dma.o \ time.o process.o signal.o syscalls.o misc.o ptrace.o \ align.o semaphore.o bitops.o pacaData.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o rtc.o init_task.o \ - lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ + lmb.o cputable.o cpu_setup_power4.o \ iommu.o sysfs.o vdso.o pmc.o firmware.o prom.o obj-y += vdso32/ vdso64/ @@ -66,7 +66,6 @@ obj-$(CONFIG_PPC_BPA) += pSeries_smp.o obj-$(CONFIG_PPC_MAPLE) += smp-tbsync.o endif -obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_KPROBES) += kprobes.o CFLAGS_ioctl32.o += -Ifs/ @@ -76,12 +75,6 @@ arch/ppc64/kernel/head.o: arch/powerpc/p AFLAGS_head.o += -Iarch/powerpc/platforms/iseries endif -# These are here while we do the architecture merge -vecemu-y += ../../powerpc/kernel/vecemu.o -vector-y += ../../powerpc/kernel/vector.o -idle_power4-y += ../../powerpc/kernel/idle_power4.o -traps64-y += ../../powerpc/kernel/traps64.o - else endif From sfr at canb.auug.org.au Sat Oct 1 00:10:05 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 00:10:05 +1000 Subject: [PATCH 8/9] powerpc: make iSeries build In-Reply-To: <20050930233602.138b6e27.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> Message-ID: <20051001001005.348d7798.sfr@canb.auug.org.au> Also Merge vmlinux.lds.S and remove arch/powerpc/kernel/vmlinux.lds which is a generated file. The merge of vmlinux.lds.S would be much cleaner if it is clear that putting the ..start/end symbols inside the section definitions is OK on ppc32. Signed-off-by: Stephen Rothwell --- arch/powerpc/Kconfig | 6 + arch/powerpc/Makefile | 27 ++-- arch/powerpc/kernel/Makefile | 9 + arch/powerpc/kernel/vmlinux.lds | 174 --------------------------- arch/powerpc/kernel/vmlinux.lds.S | 190 +++++++++++++++++++++++++++-- arch/powerpc/platforms/iseries/lpevents.c | 2 arch/ppc64/kernel/Makefile | 8 - include/asm-powerpc/system.h | 4 - 8 files changed, 214 insertions(+), 206 deletions(-) delete mode 100644 arch/powerpc/kernel/vmlinux.lds -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ f0c094f719829a7a15cbbea72a33093b9a7dec5d diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -833,6 +833,12 @@ config PIN_TLB depends on ADVANCED_OPTIONS && 8xx endmenu +if PPC64 +config KERNEL_START + hex + default "0xc0000000" +endif + source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -124,12 +124,14 @@ head-$(CONFIG_6xx) += arch/powerpc/kern head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o endif -core-y += arch/powerpc/kernel/ \ - arch/$(OLDARCH)/kernel/ \ - arch/powerpc/mm/ \ - arch/powerpc/lib/ \ - arch/powerpc/sysdev/ \ - arch/powerpc/platforms/ +core-y += arch/powerpc/kernel/ +core-y += arch/$(OLDARCH)/kernel/ +core-$(CONFIG_PPC32) += arch/powerpc/mm/ +core-$(CONFIG_PPC64) += arch/$(OLDARCH)/mm/ +core-$(CONFIG_PPC32) += arch/powerpc/lib/ +libs-$(CONFIG_PPC64) += arch/$(OLDARCH)/lib/ +core-y += arch/powerpc/sysdev/ +core-y += arch/powerpc/platforms/ core-$(CONFIG_PPC32) += arch/ppc/syslib/ core-$(CONFIG_MATH_EMULATION) += arch/ppc/math-emu/ core-$(CONFIG_XMON) += arch/powerpc/xmon/ @@ -140,17 +142,20 @@ drivers-$(CONFIG_CPM2) += arch/ppc/8260 drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ -BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm - -.PHONY: $(BOOT_TARGETS) - -all: uImage zImage +defaultimage-$(CONFIG_PPC32) := uImage zImage +defaultimage-$(CONFIG_PPC_ISERIES) := vmlinux +KBUILD_IMAGE := $(defaultimage-y) +all: $(KBUILD_IMAGE) CPPFLAGS_vmlinux.lds := -Upowerpc # All the instructions talk about "make bzImage". bzImage: zImage +BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm + +.PHONY: $(BOOT_TARGETS) + boot := arch/$(OLDARCH)/boot $(BOOT_TARGETS): vmlinux diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -6,8 +6,10 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc endif +ifeq ($(CONFIG_PPC32),y) extra-$(CONFIG_PPC_STD_MMU) := head.o -extra_$(CONFIG_PPC64) := head_64.o +endif +extra-$(CONFIG_PPC64) := head_64.o extra-$(CONFIG_40x) := head_4xx.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o @@ -23,3 +25,8 @@ ifeq ($(CONFIG_PPC32),y) obj-$(CONFIG_MODULES) += ppc_ksyms.o endif obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o + +ifeq ($(CONFIG_PPC_ISERIES),y) +arch/powerpc/kernel/head_64.o: arch/powerpc/platforms/iseries/lparmap.s +AFLAGS_head_64.o += -Iarch/powerpc/platforms/iseries +endif diff --git a/arch/powerpc/kernel/vmlinux.lds b/arch/powerpc/kernel/vmlinux.lds deleted file mode 100644 --- a/arch/powerpc/kernel/vmlinux.lds +++ /dev/null @@ -1,174 +0,0 @@ -/* Align . to a 8 byte boundary equals to maximum function alignment. */ -/* sched.text is aling to function alignment to secure we have same - * address even at second ld pass when generating System.map */ -/* spinlock.text is aling to function alignment to secure we have same - * address even at second ld pass when generating System.map */ - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to - the beginning of the section so we begin them at 0. */ - /* Stabs debugging sections. */ -OUTPUT_ARCH(powerpc:common) -jiffies = jiffies_64 + 4; -SECTIONS -{ - /* Read-only sections, merged into text segment: */ - . = + SIZEOF_HEADERS; - .interp : { *(.interp) } - .hash : { *(.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .rel.text : { *(.rel.text) } - .rela.text : { *(.rela.text) } - .rel.data : { *(.rel.data) } - .rela.data : { *(.rela.data) } - .rel.rodata : { *(.rel.rodata) } - .rela.rodata : { *(.rela.rodata) } - .rel.got : { *(.rel.got) } - .rela.got : { *(.rela.got) } - .rel.ctors : { *(.rel.ctors) } - .rela.ctors : { *(.rela.ctors) } - .rel.dtors : { *(.rel.dtors) } - .rela.dtors : { *(.rela.dtors) } - .rel.bss : { *(.rel.bss) } - .rela.bss : { *(.rela.bss) } - .rel.plt : { *(.rel.plt) } - .rela.plt : { *(.rela.plt) } -/* .init : { *(.init) } =0*/ - .plt : { *(.plt) } - .text : - { - *(.text) - . = ALIGN(8); __sched_text_start = .; *(.sched.text) __sched_text_end = .; - . = ALIGN(8); __lock_text_start = .; *(.spinlock.text) __lock_text_end = .; - *(.fixup) - *(.got1) - __got2_start = .; - *(.got2) - __got2_end = .; - } - _etext = .; - PROVIDE (etext = .); - .rodata : AT(ADDR(.rodata) - 0) { *(.rodata) *(.rodata.*) *(__vermagic) } .rodata1 : AT(ADDR(.rodata1) - 0) { *(.rodata1) } .pci_fixup : AT(ADDR(.pci_fixup) - 0) { __start_pci_fixups_early = .; *(.pci_fixup_early) __end_pci_fixups_early = .; __start_pci_fixups_header = .; *(.pci_fixup_header) __end_pci_fixups_header = .; __start_pci_fixups_final = .; *(.pci_fixup_final) __end_pci_fixups_final = .; __start_pci_fixups_enable = .; *(.pci_fixup_enable) __end_pci_fixups_enable = .; } __ksymtab : AT(ADDR(__ksymtab) - 0) { __start___ksymtab = .; *(__ksymtab) __stop___ksymtab = .; } __ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - 0) { __start___ksymtab_gpl = .; *(__ksymtab_gpl) __stop___ksymtab_gpl = .; } __kcrctab : AT(ADDR(__kcrctab) - 0) { __start___kcrctab = .; *(__kcrctab) __stop___kcrctab = .; } __kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - 0) { __start___kcrctab_gpl = .; *(__kcrctab_gpl) __stop___kcrctab_gpl = .; } __ksymtab_strings : AT(ADDR(__ksymtab_strings) - 0) { *(__ksymtab_strings) } __param : AT(ADDR(__param) - 0) { __start___param = .; *(__param) __stop___param = .; } - .fini : { *(.fini) } =0 - .ctors : { *(.ctors) } - .dtors : { *(.dtors) } - .fixup : { *(.fixup) } - __ex_table : { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } - __bug_table : { - __start___bug_table = .; - *(__bug_table) - __stop___bug_table = .; - } - /* Read-write section, merged into data segment: */ - . = ALIGN(4096); - .data : - { - *(.data) - *(.data1) - *(.sdata) - *(.sdata2) - *(.got.plt) *(.got) - *(.dynamic) - CONSTRUCTORS - } - - . = ALIGN(4096); - __nosave_begin = .; - .data_nosave : { *(.data.nosave) } - . = ALIGN(4096); - __nosave_end = .; - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - _edata = .; - PROVIDE (edata = .); - - . = ALIGN(8192); - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); - __init_begin = .; - .init.text : { - _sinittext = .; - *(.init.text) - _einittext = .; - } - /* .exit.text is discarded at runtime, not link time, - to deal with references from __bug_table */ - .exit.text : { *(.exit.text) } - .init.data : { - *(.init.data); - __vtop_table_begin = .; - *(.vtop_fixup); - __vtop_table_end = .; - __ptov_table_begin = .; - *(.ptov_fixup); - __ptov_table_end = .; - } - . = ALIGN(16); - __setup_start = .; - .init.setup : { *(.init.setup) } - __setup_end = .; - __initcall_start = .; - .initcall.init : { - *(.initcall1.init) - *(.initcall2.init) - *(.initcall3.init) - *(.initcall4.init) - *(.initcall5.init) - *(.initcall6.init) - *(.initcall7.init) - } - __initcall_end = .; - - __con_initcall_start = .; - .con_initcall.init : { *(.con_initcall.init) } - __con_initcall_end = .; - - .security_initcall.init : AT(ADDR(.security_initcall.init) - 0) { __security_initcall_start = .; *(.security_initcall.init) __security_initcall_end = .; } - - __start___ftr_fixup = .; - __ftr_fixup : { *(__ftr_fixup) } - __stop___ftr_fixup = .; - - . = ALIGN(32); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; - - . = ALIGN(4096); - __initramfs_start = .; - .init.ramfs : { *(.init.ramfs) } - __initramfs_end = .; - - . = ALIGN(4096); - __init_end = .; - - . = ALIGN(4096); - _sextratext = .; - _eextratext = .; - - __bss_start = .; - .bss : - { - *(.sbss) *(.scommon) - *(.dynbss) - *(.bss) - *(COMMON) - } - __bss_stop = .; - - _end = . ; - PROVIDE (end = .); - - /* Sections to be discarded. */ - /DISCARD/ : { - *(.exitcall.exit) - *(.exit.data) - } -} diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -1,10 +1,29 @@ +#include +#ifdef CONFIG_PPC64 +#include +#endif #include +#ifdef CONFIG_PPC64 +OUTPUT_ARCH(powerpc:common64) +jiffies = jiffies_64; +#else OUTPUT_ARCH(powerpc:common) jiffies = jiffies_64 + 4; +#endif SECTIONS { + /* Sections to be discarded. */ + /DISCARD/ : { + *(.exitcall.exit) +#ifdef CONFIG_PPC32 + *(.exit.data) +#endif + } + + /* Read-only sections, merged into text segment: */ +#ifdef CONFIG_PPC32 . = + SIZEOF_HEADERS; .interp : { *(.interp) } .hash : { *(.hash) } @@ -28,17 +47,30 @@ SECTIONS .rela.plt : { *(.rela.plt) } /* .init : { *(.init) } =0*/ .plt : { *(.plt) } - .text : - { +#endif + .text : { +#ifdef CONFIG_PPC64 + *(.text .text.*) +#else *(.text) +#endif SCHED_TEXT LOCK_TEXT +#ifdef CONFIG_PPC64 + KPROBES_TEXT +#endif *(.fixup) +#ifdef CONFIG_PPC32 *(.got1) __got2_start = .; *(.got2) __got2_end = .; +#else + . = ALIGN(PAGE_SIZE); + _etext = .; +#endif } +#ifdef CONFIG_PPC32 _etext = .; PROVIDE (etext = .); @@ -48,6 +80,7 @@ SECTIONS .dtors : { *(.dtors) } .fixup : { *(.fixup) } +#endif __ex_table : { __start___ex_table = .; @@ -61,6 +94,17 @@ SECTIONS __stop___bug_table = .; } +#ifdef CONFIG_PPC64 + __ftr_fixup : { + __start___ftr_fixup = .; + *(__ftr_fixup) + __stop___ftr_fixup = .; + } + + RODATA +#endif + +#ifdef CONFIG_PPC32 /* Read-write section, merged into data segment: */ . = ALIGN(4096); .data : @@ -90,16 +134,25 @@ SECTIONS .data.init_task : { *(.data.init_task) } . = ALIGN(4096); +#else + /* will be freed after init */ + . = ALIGN(PAGE_SIZE); +#endif __init_begin = .; .init.text : { _sinittext = .; *(.init.text) _einittext = .; } +#ifdef CONFIG_PPC32 /* .exit.text is discarded at runtime, not link time, to deal with references from __bug_table */ .exit.text : { *(.exit.text) } +#endif .init.data : { +#ifdef CONFIG_PPC64 + *(.init.data) +#else *(.init.data); __vtop_table_begin = .; *(.vtop_fixup); @@ -107,13 +160,31 @@ SECTIONS __ptov_table_begin = .; *(.ptov_fixup); __ptov_table_end = .; +#endif } + . = ALIGN(16); +#ifdef CONFIG_PPC32 __setup_start = .; - .init.setup : { *(.init.setup) } +#endif + .init.setup : { +#ifdef CONFIG_PPC64 + __setup_start = .; +#endif + *(.init.setup) +#ifdef CONFIG_PPC64 + __setup_end = .; +#endif + } +#ifdef CONFIG_PPC32 __setup_end = .; + __initcall_start = .; +#endif .initcall.init : { +#ifdef CONFIG_PPC64 + __initcall_start = .; +#endif *(.initcall1.init) *(.initcall2.init) *(.initcall3.init) @@ -121,27 +192,109 @@ SECTIONS *(.initcall5.init) *(.initcall6.init) *(.initcall7.init) +#ifdef CONFIG_PPC64 + __initcall_end = .; +#endif } +#ifdef CONFIG_PPC32 __initcall_end = .; __con_initcall_start = .; - .con_initcall.init : { *(.con_initcall.init) } +#endif + .con_initcall.init : { +#ifdef CONFIG_PPC64 + __con_initcall_start = .; +#endif + *(.con_initcall.init) +#ifdef CONFIG_PPC64 + __con_initcall_end = .; +#endif + } +#ifdef CONFIG_PPC32 __con_initcall_end = .; +#endif SECURITY_INIT +#ifdef CONFIG_PPC32 __start___ftr_fixup = .; __ftr_fixup : { *(__ftr_fixup) } __stop___ftr_fixup = .; +#else + . = ALIGN(PAGE_SIZE); + .init.ramfs : { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } +#endif +#ifdef CONFIG_PPC32 . = ALIGN(32); __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } +#endif + .data.percpu : { +#ifdef CONFIG_PPC64 + __per_cpu_start = .; +#endif + *(.data.percpu) +#ifdef CONFIG_PPC64 + __per_cpu_end = .; +#endif + } +#ifdef CONFIG_PPC32 __per_cpu_end = .; +#endif + +#ifdef CONFIG_PPC64 + . = ALIGN(PAGE_SIZE); + . = ALIGN(16384); + __init_end = .; + /* freed after init ends here */ + + + /* Read/write sections */ + . = ALIGN(PAGE_SIZE); + . = ALIGN(16384); + /* The initial task and kernel stack */ + .data.init_task : { + *(.data.init_task) + } + + . = ALIGN(PAGE_SIZE); + .data.page_aligned : { + *(.data.page_aligned) + } + + .data.cacheline_aligned : { + *(.data.cacheline_aligned) + } + + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) + } + + .opd : { + *(.opd) + } + + .got : { + __toc_start = .; + *(.got) + *(.toc) + . = ALIGN(PAGE_SIZE); + _edata = .; + } + + . = ALIGN(PAGE_SIZE); +#else . = ALIGN(4096); __initramfs_start = .; - .init.ramfs : { *(.init.ramfs) } + .init.ramfs : { + *(.init.ramfs) + } __initramfs_end = .; . = ALIGN(4096); @@ -152,21 +305,30 @@ SECTIONS _eextratext = .; __bss_start = .; - .bss : - { +#endif + .bss : { +#ifdef CONFIG_PPC64 + __bss_start = .; +#else *(.sbss) *(.scommon) *(.dynbss) +#endif *(.bss) +#ifdef CONFIG_PPC32 *(COMMON) +#else + __bss_stop = .; +#endif } +#ifdef CONFIG_PPC32 __bss_stop = .; +#endif +#ifdef CONFIG_PPC64 + . = ALIGN(PAGE_SIZE); +#endif _end = . ; +#ifdef CONFIG_PPC32 PROVIDE (end = .); - - /* Sections to be discarded. */ - /DISCARD/ : { - *(.exitcall.exit) - *(.exit.data) - } +#endif } diff --git a/arch/powerpc/platforms/iseries/lpevents.c b/arch/powerpc/platforms/iseries/lpevents.c --- a/arch/powerpc/platforms/iseries/lpevents.c +++ b/arch/powerpc/platforms/iseries/lpevents.c @@ -13,6 +13,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -2,10 +2,10 @@ # Makefile for the linux ppc64 kernel. # -ifneq ($(CONFIG_PPC_MERGE),y) - EXTRA_CFLAGS += -mno-minimal-toc +ifneq ($(CONFIG_PPC_MERGE),y) extra-y := head.o vmlinux.lds +endif obj-y := setup.o entry.o irq.o idle.o dma.o \ time.o process.o signal.o syscalls.o misc.o ptrace.o \ @@ -70,11 +70,9 @@ obj-$(CONFIG_KPROBES) += kprobes.o CFLAGS_ioctl32.o += -Ifs/ +ifneq ($(CONFIG_PPC_MERGE),y) ifeq ($(CONFIG_PPC_ISERIES),y) arch/ppc64/kernel/head.o: arch/powerpc/platforms/iseries/lparmap.s AFLAGS_head.o += -Iarch/powerpc/platforms/iseries endif - -else - endif diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -118,10 +118,10 @@ extern void _set_L3CR(unsigned long); #endif extern void via_cuda_init(void); -extern void pmac_nvram_init(void); extern void read_rtc_time(void); extern void pmac_find_display(void); extern void giveup_fpu(struct task_struct *); +extern void disable_kernel_fp(void); extern void enable_kernel_fp(void); extern void flush_fp_to_thread(struct task_struct *); extern void enable_kernel_altivec(void); @@ -346,5 +346,7 @@ __cmpxchg(volatile void *ptr, unsigned l #define arch_align_stack(x) (x) +extern unsigned long reloc_offset(void); + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SYSTEM_H */ From sfr at canb.auug.org.au Sat Oct 1 00:00:01 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 00:00:01 +1000 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and fixup traps.c In-Reply-To: <20050930233602.138b6e27.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> Message-ID: <20051001000001.1f1d8c48.sfr@canb.auug.org.au> Use idle_power4.S from ppc64 as we are not going to support 32 bit power4 in the merged tree. create traps{32,64}.c as these are hard to merge. Signed-off-by: Stephen Rothwell --- arch/powerpc/Kconfig | 4 arch/powerpc/kernel/Makefile | 4 arch/powerpc/kernel/idle_power4.S | 78 +++ arch/powerpc/kernel/traps.c | 1047 ------------------------------------- arch/powerpc/kernel/traps32.c | 1047 +++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/traps64.c | 568 ++++++++++++++++++++ arch/ppc64/kernel/Makefile | 10 arch/ppc64/kernel/idle_power4.S | 79 --- arch/ppc64/kernel/traps.c | 568 -------------------- 9 files changed, 1707 insertions(+), 1698 deletions(-) create mode 100644 arch/powerpc/kernel/idle_power4.S delete mode 100644 arch/powerpc/kernel/traps.c create mode 100644 arch/powerpc/kernel/traps32.c create mode 100644 arch/powerpc/kernel/traps64.c delete mode 100644 arch/ppc64/kernel/idle_power4.S delete mode 100644 arch/ppc64/kernel/traps.c -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ bbc83a78c1c417cc6bb44e5a1bdcd5a56e625bc5 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -142,8 +142,8 @@ config POWER4 def_bool y config PPC_FPU - bool - default y if PPC64 + depends on PPC32 + def_bool y config BOOKE bool diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -13,6 +13,8 @@ extra-$(CONFIG_POWER4) += idle_power4.o extra-$(CONFIG_PPC_FPU) += fpu.o extra-y += vmlinux.lds -obj-y := semaphore.o traps.o process.o +obj-y := semaphore.o process.o +obj-$(CONFIG_PPC32) += traps32.c +obj-$(CONFIG_PPC64) += traps64.c obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S new file mode 100644 --- /dev/null +++ b/arch/powerpc/kernel/idle_power4.S @@ -0,0 +1,78 @@ +/* + * This file contains the power_save function for 6xx & 7xxx CPUs + * rewritten in assembler + * + * Warning ! This code assumes that if your machine has a 750fx + * it will have PLL 1 set to low speed mode (used during NAP/DOZE). + * if this is not the case some additional changes will have to + * be done to check a runtime var (a bit like powersave-nap) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + + .text + +/* + * Here is the power_save_6xx function. This could eventually be + * split into several functions & changing the function pointer + * depending on the various features. + */ +_GLOBAL(power4_idle) +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) + /* We must dynamically check for the NAP feature as it + * can be cleared by CPU init after the fixups are done + */ + LOADBASE(r3,cur_cpu_spec) + ld r4,cur_cpu_spec at l(r3) + ld r4,CPU_SPEC_FEATURES(r4) + andi. r0,r4,CPU_FTR_CAN_NAP + beqlr + /* Now check if user or arch enabled NAP mode */ + LOADBASE(r3,powersave_nap) + lwz r4,powersave_nap at l(r3) + cmpwi 0,r4,0 + beqlr + + /* Clear MSR:EE */ + mfmsr r7 + li r4,0 + ori r4,r4,MSR_EE + andc r0,r7,r4 + mtmsrd r0 + + /* Check current_thread_info()->flags */ + clrrdi r4,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r4) + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + mtmsrd r7 /* out of line this ? */ + blr +1: + /* Go to NAP now */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + oris r7,r7,MSR_POW at h + sync + isync + mtmsrd r7 + isync + sync + blr diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c deleted file mode 100644 --- a/arch/powerpc/kernel/traps.c +++ /dev/null @@ -1,1047 +0,0 @@ -/* - * arch/powerpc/kernel/traps.c - * - * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Modified by Cort Dougan (cort at cs.nmt.edu) - * and Paul Mackerras (paulus at samba.org) - */ - -/* - * This file handles the architecture-dependent parts of hardware exceptions - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_PMAC_BACKLIGHT -#include -#endif -#include - -#ifdef CONFIG_DEBUGGER -int (*__debugger)(struct pt_regs *regs); -int (*__debugger_ipi)(struct pt_regs *regs); -int (*__debugger_bpt)(struct pt_regs *regs); -int (*__debugger_sstep)(struct pt_regs *regs); -int (*__debugger_iabr_match)(struct pt_regs *regs); -int (*__debugger_dabr_match)(struct pt_regs *regs); -int (*__debugger_fault_handler)(struct pt_regs *regs); - -EXPORT_SYMBOL(__debugger); -EXPORT_SYMBOL(__debugger_ipi); -EXPORT_SYMBOL(__debugger_bpt); -EXPORT_SYMBOL(__debugger_sstep); -EXPORT_SYMBOL(__debugger_iabr_match); -EXPORT_SYMBOL(__debugger_dabr_match); -EXPORT_SYMBOL(__debugger_fault_handler); -#endif - -struct notifier_block *powerpc_die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); - -int register_die_notifier(struct notifier_block *nb) -{ - int err = 0; - unsigned long flags; - - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&powerpc_die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; -} - -/* - * Trap & Exception support - */ - -static DEFINE_SPINLOCK(die_lock); - -int die(const char *str, struct pt_regs *regs, long err) -{ - static int die_counter; - int nl = 0; - - if (debugger(regs)) - return 1; - - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); -#ifdef CONFIG_PMAC_BACKLIGHT - if (_machine == _MACH_Pmac) { - set_backlight_enable(1); - set_backlight_level(BACKLIGHT_MAX); - } -#endif - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); - nl = 1; -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); - nl = 1; -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); - nl = 1; -#endif -#ifdef CONFIG_NUMA - printk("NUMA "); - nl = 1; -#endif -#ifdef CONFIG_PPC64 - switch (systemcfg->platform) { - case PLATFORM_PSERIES: - printk("PSERIES "); - nl = 1; - break; - case PLATFORM_PSERIES_LPAR: - printk("PSERIES LPAR "); - nl = 1; - break; - case PLATFORM_ISERIES_LPAR: - printk("ISERIES LPAR "); - nl = 1; - break; - case PLATFORM_POWERMAC: - printk("POWERMAC "); - nl = 1; - break; - case PLATFORM_BPA: - printk("BPA "); - nl = 1; - break; - } -#endif - if (nl) - printk("\n"); - print_modules(); - show_regs(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); - - if (in_interrupt()) - panic("Fatal exception in interrupt"); - - if (panic_on_oops) { - panic("Fatal exception"); - } - do_exit(err); - - return 0; -} - -void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) -{ - siginfo_t info; - - if (!user_mode(regs)) { - if (die("Exception in kernel mode", regs, signr)) - return; - } - - memset(&info, 0, sizeof(info)); - info.si_signo = signr; - info.si_code = code; - info.si_addr = (void __user *) addr; - force_sig_info(signr, &info, current); - - /* - * Init gets no signals that it doesn't have a handler for. - * That's all very well, but if it has caused a synchronous - * exception and we ignore the resulting signal, it will just - * generate the same exception over and over again and we get - * nowhere. Better to kill it and let the kernel panic. - */ - if (current->pid == 1) { - __sighandler_t handler; - - spin_lock_irq(¤t->sighand->siglock); - handler = current->sighand->action[signr-1].sa.sa_handler; - spin_unlock_irq(¤t->sighand->siglock); - if (handler == SIG_DFL) { - /* init has generated a synchronous exception - and it doesn't have a handler for the signal */ - printk(KERN_CRIT "init has generated signal %d " - "but has no handler for it\n", signr); - do_exit(signr); - } - } -} - -#ifdef CONFIG_PPC64 -void system_reset_exception(struct pt_regs *regs) -{ - /* See if any machine dependent calls */ - if (ppc_md.system_reset_exception) - ppc_md.system_reset_exception(regs); - - die("System Reset", regs, SIGABRT); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable System Reset"); - - /* What should we do here? We could issue a shutdown or hard reset. */ -} -#endif - -/* - * I/O accesses can cause machine checks on powermacs. - * Check if the NIP corresponds to the address of a sync - * instruction for which there is an entry in the exception - * table. - * Note that the 601 only takes a machine check on TEA - * (transfer error ack) signal assertion, and does not - * set any of the top 16 bits of SRR1. - * -- paulus. - */ -static inline int check_io_access(struct pt_regs *regs) -{ -#ifdef CONFIG_PPC_PMAC - unsigned long msr = regs->msr; - const struct exception_table_entry *entry; - unsigned int *nip = (unsigned int *)regs->nip; - - if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000))) - && (entry = search_exception_tables(regs->nip)) != NULL) { - /* - * Check that it's a sync instruction, or somewhere - * in the twi; isync; nop sequence that inb/inw/inl uses. - * As the address is in the exception table - * we should be able to read the instr there. - * For the debug message, we look at the preceding - * load or store. - */ - if (*nip == 0x60000000) /* nop */ - nip -= 2; - else if (*nip == 0x4c00012c) /* isync */ - --nip; - if (*nip == 0x7c0004ac || (*nip >> 26) == 3) { - /* sync or twi */ - unsigned int rb; - - --nip; - rb = (*nip >> 11) & 0x1f; - printk(KERN_DEBUG "%s bad port %lx at %p\n", - (*nip & 0x100)? "OUT to": "IN from", - regs->gpr[rb] - _IO_BASE, nip); - regs->msr |= MSR_RI; - regs->nip = entry->fixup; - return 1; - } - } -#endif /* CONFIG_PPC_PMAC */ - return 0; -} - -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) -/* On 4xx, the reason for the machine check or program exception - is in the ESR. */ -#define get_reason(regs) ((regs)->dsisr) -#ifndef CONFIG_FSL_BOOKE -#define get_mc_reason(regs) ((regs)->dsisr) -#else -#define get_mc_reason(regs) (mfspr(SPRN_MCSR)) -#endif -#define REASON_FP ESR_FP -#define REASON_ILLEGAL (ESR_PIL | ESR_PUO) -#define REASON_PRIVILEGED ESR_PPR -#define REASON_TRAP ESR_PTR - -/* single-step stuff */ -#define single_stepping(regs) (current->thread.dbcr0 & DBCR0_IC) -#define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC) - -#else -/* On non-4xx, the reason for the machine check or program - exception is in the MSR. */ -#define get_reason(regs) ((regs)->msr) -#define get_mc_reason(regs) ((regs)->msr) -#define REASON_FP 0x100000 -#define REASON_ILLEGAL 0x80000 -#define REASON_PRIVILEGED 0x40000 -#define REASON_TRAP 0x20000 - -#define single_stepping(regs) ((regs)->msr & MSR_SE) -#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) -#endif - -/* - * This is "fall-back" implementation for configurations - * which don't provide platform-specific machine check info - */ -void __attribute__ ((weak)) -platform_machine_check(struct pt_regs *regs) -{ -} - -void MachineCheckException(struct pt_regs *regs) -{ -#ifdef CONFIG_PPC64 - int recover = 0; - - /* See if any machine dependent calls */ - if (ppc_md.machine_check_exception) - recover = ppc_md.machine_check_exception(regs); - - if (recover) - return; -#else - unsigned long reason = get_mc_reason(regs); - - if (user_mode(regs)) { - regs->msr |= MSR_RI; - _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); - return; - } - -#if defined(CONFIG_8xx) && defined(CONFIG_PCI) - /* the qspan pci read routines can cause machine checks -- Cort */ - bad_page_fault(regs, regs->dar, SIGBUS); - return; -#endif - - if (debugger_fault_handler(regs)) { - regs->msr |= MSR_RI; - return; - } - - if (check_io_access(regs)) - return; - -#if defined(CONFIG_4xx) && !defined(CONFIG_440A) - if (reason & ESR_IMCP) { - printk("Instruction"); - mtspr(SPRN_ESR, reason & ~ESR_IMCP); - } else - printk("Data"); - printk(" machine check in kernel mode.\n"); -#elif defined(CONFIG_440A) - printk("Machine check in kernel mode.\n"); - if (reason & ESR_IMCP){ - printk("Instruction Synchronous Machine Check exception\n"); - mtspr(SPRN_ESR, reason & ~ESR_IMCP); - } - else { - u32 mcsr = mfspr(SPRN_MCSR); - if (mcsr & MCSR_IB) - printk("Instruction Read PLB Error\n"); - if (mcsr & MCSR_DRB) - printk("Data Read PLB Error\n"); - if (mcsr & MCSR_DWB) - printk("Data Write PLB Error\n"); - if (mcsr & MCSR_TLBP) - printk("TLB Parity Error\n"); - if (mcsr & MCSR_ICP){ - flush_instruction_cache(); - printk("I-Cache Parity Error\n"); - } - if (mcsr & MCSR_DCSP) - printk("D-Cache Search Parity Error\n"); - if (mcsr & MCSR_DCFP) - printk("D-Cache Flush Parity Error\n"); - if (mcsr & MCSR_IMPE) - printk("Machine Check exception is imprecise\n"); - - /* Clear MCSR */ - mtspr(SPRN_MCSR, mcsr); - } -#elif defined (CONFIG_E500) - printk("Machine check in kernel mode.\n"); - printk("Caused by (from MCSR=%lx): ", reason); - - if (reason & MCSR_MCP) - printk("Machine Check Signal\n"); - if (reason & MCSR_ICPERR) - printk("Instruction Cache Parity Error\n"); - if (reason & MCSR_DCP_PERR) - printk("Data Cache Push Parity Error\n"); - if (reason & MCSR_DCPERR) - printk("Data Cache Parity Error\n"); - if (reason & MCSR_GL_CI) - printk("Guarded Load or Cache-Inhibited stwcx.\n"); - if (reason & MCSR_BUS_IAERR) - printk("Bus - Instruction Address Error\n"); - if (reason & MCSR_BUS_RAERR) - printk("Bus - Read Address Error\n"); - if (reason & MCSR_BUS_WAERR) - printk("Bus - Write Address Error\n"); - if (reason & MCSR_BUS_IBERR) - printk("Bus - Instruction Data Error\n"); - if (reason & MCSR_BUS_RBERR) - printk("Bus - Read Data Bus Error\n"); - if (reason & MCSR_BUS_WBERR) - printk("Bus - Read Data Bus Error\n"); - if (reason & MCSR_BUS_IPERR) - printk("Bus - Instruction Parity Error\n"); - if (reason & MCSR_BUS_RPERR) - printk("Bus - Read Parity Error\n"); -#elif defined (CONFIG_E200) - printk("Machine check in kernel mode.\n"); - printk("Caused by (from MCSR=%lx): ", reason); - - if (reason & MCSR_MCP) - printk("Machine Check Signal\n"); - if (reason & MCSR_CP_PERR) - printk("Cache Push Parity Error\n"); - if (reason & MCSR_CPERR) - printk("Cache Parity Error\n"); - if (reason & MCSR_EXCP_ERR) - printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); - if (reason & MCSR_BUS_IRERR) - printk("Bus - Read Bus Error on instruction fetch\n"); - if (reason & MCSR_BUS_DRERR) - printk("Bus - Read Bus Error on data load\n"); - if (reason & MCSR_BUS_WRERR) - printk("Bus - Write Bus Error on buffered store or cache line push\n"); -#else /* !CONFIG_4xx && !CONFIG_E500 && !CONFIG_E200 */ - printk("Machine check in kernel mode.\n"); - printk("Caused by (from SRR1=%lx): ", reason); - switch (reason & 0x601F0000) { - case 0x80000: - printk("Machine check signal\n"); - break; - case 0: /* for 601 */ - case 0x40000: - case 0x140000: /* 7450 MSS error and TEA */ - printk("Transfer error ack signal\n"); - break; - case 0x20000: - printk("Data parity error signal\n"); - break; - case 0x10000: - printk("Address parity error signal\n"); - break; - case 0x20000000: - printk("L1 Data Cache error\n"); - break; - case 0x40000000: - printk("L1 Instruction Cache error\n"); - break; - case 0x00100000: - printk("L2 data cache parity error\n"); - break; - default: - printk("Unknown values in msr\n"); - } -#endif /* CONFIG_4xx */ - - /* - * Optional platform-provided routine to print out - * additional info, e.g. bus error registers. - */ - platform_machine_check(regs); -#endif /* CONFIG_PPC64 */ - - if (debugger_fault_handler(regs)) - return; - die("Machine check", regs, SIGBUS); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable Machine check"); -} - -void SMIException(struct pt_regs *regs) -{ - die("System Management Interrupt", regs, SIGABRT); -} - -void UnknownException(struct pt_regs *regs) -{ - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", - regs->nip, regs->msr, regs->trap); - - _exception(SIGTRAP, regs, 0, 0); -} - -void InstructionBreakpoint(struct pt_regs *regs) -{ - if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_iabr_match(regs)) - return; - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); -} - -void RunModeException(struct pt_regs *regs) -{ - _exception(SIGTRAP, regs, 0, 0); -} - -void SingleStepException(struct pt_regs *regs) -{ - regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ - - if (notify_die(DIE_SSTEP, "single_step", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_sstep(regs)) - return; - - _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); -} - -/* - * After we have successfully emulated an instruction, we have to - * check if the instruction was being single-stepped, and if so, - * pretend we got a single-step exception. This was pointed out - * by Kumar Gala. -- paulus - */ -static void emulate_single_step(struct pt_regs *regs) -{ - if (single_stepping(regs)) { - clear_single_step(regs); - _exception(SIGTRAP, regs, TRAP_TRACE, 0); - } -} - -/* Illegal instruction emulation support. Originally written to - * provide the PVR to user applications using the mfspr rd, PVR. - * Return non-zero if we can't emulate, or -EFAULT if the associated - * memory access caused an access fault. Return zero on success. - * - * There are a couple of ways to do this, either "decode" the instruction - * or directly match lots of bits. In this case, matching lots of - * bits is faster and easier. - * - */ -#define INST_MFSPR_PVR 0x7c1f42a6 -#define INST_MFSPR_PVR_MASK 0xfc1fffff - -#define INST_DCBA 0x7c0005ec -#define INST_DCBA_MASK 0x7c0007fe - -#define INST_MCRXR 0x7c000400 -#define INST_MCRXR_MASK 0x7c0007fe - -#define INST_STRING 0x7c00042a -#define INST_STRING_MASK 0x7c0007fe -#define INST_STRING_GEN_MASK 0x7c00067e -#define INST_LSWI 0x7c0004aa -#define INST_LSWX 0x7c00042a -#define INST_STSWI 0x7c0005aa -#define INST_STSWX 0x7c00052a - -static int emulate_string_inst(struct pt_regs *regs, u32 instword) -{ - u8 rT = (instword >> 21) & 0x1f; - u8 rA = (instword >> 16) & 0x1f; - u8 NB_RB = (instword >> 11) & 0x1f; - u32 num_bytes; - unsigned long EA; - int pos = 0; - - /* Early out if we are an invalid form of lswx */ - if ((instword & INST_STRING_MASK) == INST_LSWX) - if ((rT == rA) || (rT == NB_RB)) - return -EINVAL; - - EA = (rA == 0) ? 0 : regs->gpr[rA]; - - switch (instword & INST_STRING_MASK) { - case INST_LSWX: - case INST_STSWX: - EA += NB_RB; - num_bytes = regs->xer & 0x7f; - break; - case INST_LSWI: - case INST_STSWI: - num_bytes = (NB_RB == 0) ? 32 : NB_RB; - break; - default: - return -EINVAL; - } - - while (num_bytes != 0) - { - u8 val; - u32 shift = 8 * (3 - (pos & 0x3)); - - switch ((instword & INST_STRING_MASK)) { - case INST_LSWX: - case INST_LSWI: - if (get_user(val, (u8 __user *)EA)) - return -EFAULT; - /* first time updating this reg, - * zero it out */ - if (pos == 0) - regs->gpr[rT] = 0; - regs->gpr[rT] |= val << shift; - break; - case INST_STSWI: - case INST_STSWX: - val = regs->gpr[rT] >> shift; - if (put_user(val, (u8 __user *)EA)) - return -EFAULT; - break; - } - /* move EA to next address */ - EA += 1; - num_bytes--; - - /* manage our position within the register */ - if (++pos == 4) { - pos = 0; - if (++rT == 32) - rT = 0; - } - } - - return 0; -} - -static int emulate_instruction(struct pt_regs *regs) -{ - u32 instword; - u32 rd; - - if (!user_mode(regs)) - return -EINVAL; - CHECK_FULL_REGS(regs); - - if (get_user(instword, (u32 __user *)(regs->nip))) - return -EFAULT; - - /* Emulate the mfspr rD, PVR. */ - if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { - rd = (instword >> 21) & 0x1f; - regs->gpr[rd] = mfspr(SPRN_PVR); - return 0; - } - - /* Emulating the dcba insn is just a no-op. */ - if ((instword & INST_DCBA_MASK) == INST_DCBA) - return 0; - - /* Emulate the mcrxr insn. */ - if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { - int shift = (instword >> 21) & 0x1c; - unsigned long msk = 0xf0000000UL >> shift; - - regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); - regs->xer &= ~0xf0000000UL; - return 0; - } - - /* Emulate load/store string insn. */ - if ((instword & INST_STRING_GEN_MASK) == INST_STRING) - return emulate_string_inst(regs, instword); - - return -EINVAL; -} - -/* - * Look through the list of trap instructions that are used for BUG(), - * BUG_ON() and WARN_ON() and see if we hit one. At this point we know - * that the exception was caused by a trap instruction of some kind. - * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 - * otherwise. - */ -extern struct bug_entry __start___bug_table[], __stop___bug_table[]; - -#ifndef CONFIG_MODULES -#define module_find_bug(x) NULL -#endif - -struct bug_entry *find_bug(unsigned long bugaddr) -{ - struct bug_entry *bug; - - for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) - if (bugaddr == bug->bug_addr) - return bug; - return module_find_bug(bugaddr); -} - -int check_bug_trap(struct pt_regs *regs) -{ - struct bug_entry *bug; - unsigned long addr; - - if (regs->msr & MSR_PR) - return 0; /* not in kernel */ - addr = regs->nip; /* address of trap instruction */ - if (addr < PAGE_OFFSET) - return 0; - bug = find_bug(regs->nip); - if (bug == NULL) - return 0; - if (bug->line & BUG_WARNING_TRAP) { - /* this is a WARN_ON rather than BUG/BUG_ON */ -#ifdef CONFIG_XMON - xmon_printf(KERN_ERR "Badness in %s at %s:%d\n", - bug->function, bug->file, - bug->line & ~BUG_WARNING_TRAP); -#endif /* CONFIG_XMON */ - printk(KERN_ERR "Badness in %s at %s:%d\n", - bug->function, bug->file, - bug->line & ~BUG_WARNING_TRAP); - dump_stack(); - return 1; - } -#ifdef CONFIG_XMON - xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%d!\n", - bug->function, bug->file, bug->line); - xmon(regs); -#endif /* CONFIG_XMON */ - printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", - bug->function, bug->file, bug->line); - - return 0; -} - -void ProgramCheckException(struct pt_regs *regs) -{ - unsigned int reason = get_reason(regs); - extern int do_mathemu(struct pt_regs *regs); - -#ifdef CONFIG_MATH_EMULATION - /* (reason & REASON_ILLEGAL) would be the obvious thing here, - * but there seems to be a hardware bug on the 405GP (RevD) - * that means ESR is sometimes set incorrectly - either to - * ESR_DST (!?) or 0. In the process of chasing this with the - * hardware people - not sure if it can happen on any illegal - * instruction or only on FP instructions, whether there is a - * pattern to occurences etc. -dgibson 31/Mar/2003 */ - if (!(reason & REASON_TRAP) && do_mathemu(regs) == 0) { - emulate_single_step(regs); - return; - } -#endif /* CONFIG_MATH_EMULATION */ - - if (reason & REASON_FP) { - /* IEEE FP exception */ - int code = 0; - u32 fpscr; - - /* We must make sure the FP state is consistent with - * our MSR_FP in regs - */ - preempt_disable(); - if (regs->msr & MSR_FP) - giveup_fpu(current); - preempt_enable(); - - fpscr = current->thread.fpscr; - fpscr &= fpscr << 22; /* mask summary bits with enables */ - if (fpscr & FPSCR_VX) - code = FPE_FLTINV; - else if (fpscr & FPSCR_OX) - code = FPE_FLTOVF; - else if (fpscr & FPSCR_UX) - code = FPE_FLTUND; - else if (fpscr & FPSCR_ZX) - code = FPE_FLTDIV; - else if (fpscr & FPSCR_XX) - code = FPE_FLTRES; - _exception(SIGFPE, regs, code, regs->nip); - return; - } - - if (reason & REASON_TRAP) { - /* trap exception */ - if (debugger_bpt(regs)) - return; - if (check_bug_trap(regs)) { - regs->nip += 4; - return; - } - _exception(SIGTRAP, regs, TRAP_BRKPT, 0); - return; - } - - /* Try to emulate it if we should. */ - if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { - switch (emulate_instruction(regs)) { - case 0: - regs->nip += 4; - emulate_single_step(regs); - return; - case -EFAULT: - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; - } - } - - if (reason & REASON_PRIVILEGED) - _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); - else - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); -} - -void AlignmentException(struct pt_regs *regs) -{ - int fixed; - - fixed = fix_alignment(regs); - - if (fixed == 1) { - regs->nip += 4; /* skip over emulated instruction */ - emulate_single_step(regs); - return; - } - - /* Operand address was bad */ - if (fixed == -EFAULT) { - if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_ACCERR, regs->dar); - else - /* Search exception table */ - bad_page_fault(regs, regs->dar, SIGSEGV); - return; - } - _exception(SIGBUS, regs, BUS_ADRALN, regs->dar); -} - -void StackOverflow(struct pt_regs *regs) -{ - printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", - current, regs->gpr[1]); - debugger(regs); - show_regs(regs); - panic("kernel stack overflow"); -} - -void nonrecoverable_exception(struct pt_regs *regs) -{ - printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n", - regs->nip, regs->msr); - debugger(regs); - die("nonrecoverable exception", regs, SIGKILL); -} - -void trace_syscall(struct pt_regs *regs) -{ - printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", - current, current->pid, regs->nip, regs->link, regs->gpr[0], - regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); -} - -#ifdef CONFIG_8xx -void SoftwareEmulation(struct pt_regs *regs) -{ - extern int do_mathemu(struct pt_regs *); - extern int Soft_emulate_8xx(struct pt_regs *); - int errcode; - - CHECK_FULL_REGS(regs); - - if (!user_mode(regs)) { - debugger(regs); - die("Kernel Mode Software FPU Emulation", regs, SIGFPE); - } - -#ifdef CONFIG_MATH_EMULATION - errcode = do_mathemu(regs); -#else - errcode = Soft_emulate_8xx(regs); -#endif - if (errcode) { - if (errcode > 0) - _exception(SIGFPE, regs, 0, 0); - else if (errcode == -EFAULT) - _exception(SIGSEGV, regs, 0, 0); - else - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - } else - emulate_single_step(regs); -} -#endif /* CONFIG_8xx */ - -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - -void DebugException(struct pt_regs *regs, unsigned long debug_status) -{ - if (debug_status & DBSR_IC) { /* instruction completion */ - regs->msr &= ~MSR_DE; - if (user_mode(regs)) { - current->thread.dbcr0 &= ~DBCR0_IC; - } else { - /* Disable instruction completion */ - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); - /* Clear the instruction completion event */ - mtspr(SPRN_DBSR, DBSR_IC); - if (debugger_sstep(regs)) - return; - } - _exception(SIGTRAP, regs, TRAP_TRACE, 0); - } -} -#endif /* CONFIG_4xx || CONFIG_BOOKE */ - -#if !defined(CONFIG_TAU_INT) -void TAUException(struct pt_regs *regs) -{ - printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n", - regs->nip, regs->msr, regs->trap, print_tainted()); -} -#endif /* CONFIG_INT_TAU */ - -void AltivecUnavailException(struct pt_regs *regs) -{ - static int kernel_altivec_count; - -#ifndef CONFIG_ALTIVEC - if (user_mode(regs)) { - /* A user program has executed an altivec instruction, - but this kernel doesn't support altivec. */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - } -#endif - /* The kernel has executed an altivec instruction without - first enabling altivec. Whinge but let it do it. */ - if (++kernel_altivec_count < 10) - printk(KERN_ERR "AltiVec used in kernel (task=%p, pc=%lx)\n", - current, regs->nip); - regs->msr |= MSR_VEC; -} - -#ifdef CONFIG_ALTIVEC -void AltivecAssistException(struct pt_regs *regs) -{ - int err; - - preempt_disable(); - if (regs->msr & MSR_VEC) - giveup_altivec(current); - preempt_enable(); - if (!user_mode(regs)) { - printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" - " at %lx\n", regs->nip); - die("Kernel Altivec assist exception", regs, SIGILL); - } - - err = emulate_altivec(regs); - if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ - emulate_single_step(regs); - return; - } - - if (err == -EFAULT) { - /* got an error reading the instruction */ - _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); - } else { - /* didn't recognize the instruction */ - /* XXX quick hack for now: set the non-Java bit in the VSCR */ - if (printk_ratelimit()) - printk(KERN_ERR "Unrecognized altivec instruction " - "in %s at %lx\n", current->comm, regs->nip); - current->thread.vscr.u[3] |= 0x10000; - } -} -#endif /* CONFIG_ALTIVEC */ - -#ifdef CONFIG_E500 -void PerformanceMonitorException(struct pt_regs *regs) -{ - perf_irq(regs); -} -#endif - -#ifdef CONFIG_FSL_BOOKE -void CacheLockingException(struct pt_regs *regs, unsigned long address, - unsigned long error_code) -{ - /* We treat cache locking instructions from the user - * as priv ops, in the future we could try to do - * something smarter - */ - if (error_code & (ESR_DLK|ESR_ILK)) - _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); - return; -} -#endif /* CONFIG_FSL_BOOKE */ - -#ifdef CONFIG_SPE -void SPEFloatingPointException(struct pt_regs *regs) -{ - unsigned long spefscr; - int fpexc_mode; - int code = 0; - - spefscr = current->thread.spefscr; - fpexc_mode = current->thread.fpexc_mode; - - /* Hardware does not neccessarily set sticky - * underflow/overflow/invalid flags */ - if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) { - code = FPE_FLTOVF; - spefscr |= SPEFSCR_FOVFS; - } - else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) { - code = FPE_FLTUND; - spefscr |= SPEFSCR_FUNFS; - } - else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV)) - code = FPE_FLTDIV; - else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) { - code = FPE_FLTINV; - spefscr |= SPEFSCR_FINVS; - } - else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES)) - code = FPE_FLTRES; - - current->thread.spefscr = spefscr; - - _exception(SIGFPE, regs, code, regs->nip); - return; -} -#endif - -#ifdef CONFIG_BOOKE_WDT -/* - * Default handler for a Watchdog exception, - * spins until a reboot occurs - */ -void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) -{ - /* Generic WatchdogHandler, implement your own */ - mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE)); - return; -} - -void WatchdogException(struct pt_regs *regs) -{ - printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); - WatchdogHandler(regs); -} -#endif - -void __init trap_init(void) -{ -} diff --git a/arch/powerpc/kernel/traps32.c b/arch/powerpc/kernel/traps32.c new file mode 100644 --- /dev/null +++ b/arch/powerpc/kernel/traps32.c @@ -0,0 +1,1047 @@ +/* + * arch/powerpc/kernel/traps.c + * + * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Modified by Cort Dougan (cort at cs.nmt.edu) + * and Paul Mackerras (paulus at samba.org) + */ + +/* + * This file handles the architecture-dependent parts of hardware exceptions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PMAC_BACKLIGHT +#include +#endif +#include + +#ifdef CONFIG_DEBUGGER +int (*__debugger)(struct pt_regs *regs); +int (*__debugger_ipi)(struct pt_regs *regs); +int (*__debugger_bpt)(struct pt_regs *regs); +int (*__debugger_sstep)(struct pt_regs *regs); +int (*__debugger_iabr_match)(struct pt_regs *regs); +int (*__debugger_dabr_match)(struct pt_regs *regs); +int (*__debugger_fault_handler)(struct pt_regs *regs); + +EXPORT_SYMBOL(__debugger); +EXPORT_SYMBOL(__debugger_ipi); +EXPORT_SYMBOL(__debugger_bpt); +EXPORT_SYMBOL(__debugger_sstep); +EXPORT_SYMBOL(__debugger_iabr_match); +EXPORT_SYMBOL(__debugger_dabr_match); +EXPORT_SYMBOL(__debugger_fault_handler); +#endif + +struct notifier_block *powerpc_die_chain; +static DEFINE_SPINLOCK(die_notifier_lock); + +int register_die_notifier(struct notifier_block *nb) +{ + int err = 0; + unsigned long flags; + + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_register(&powerpc_die_chain, nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + return err; +} + +/* + * Trap & Exception support + */ + +static DEFINE_SPINLOCK(die_lock); + +int die(const char *str, struct pt_regs *regs, long err) +{ + static int die_counter; + int nl = 0; + + if (debugger(regs)) + return 1; + + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); +#ifdef CONFIG_PMAC_BACKLIGHT + if (_machine == _MACH_Pmac) { + set_backlight_enable(1); + set_backlight_level(BACKLIGHT_MAX); + } +#endif + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); + nl = 1; +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); + nl = 1; +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); + nl = 1; +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); + nl = 1; +#endif +#ifdef CONFIG_PPC64 + switch (systemcfg->platform) { + case PLATFORM_PSERIES: + printk("PSERIES "); + nl = 1; + break; + case PLATFORM_PSERIES_LPAR: + printk("PSERIES LPAR "); + nl = 1; + break; + case PLATFORM_ISERIES_LPAR: + printk("ISERIES LPAR "); + nl = 1; + break; + case PLATFORM_POWERMAC: + printk("POWERMAC "); + nl = 1; + break; + case PLATFORM_BPA: + printk("BPA "); + nl = 1; + break; + } +#endif + if (nl) + printk("\n"); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) { + panic("Fatal exception"); + } + do_exit(err); + + return 0; +} + +void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) +{ + siginfo_t info; + + if (!user_mode(regs)) { + if (die("Exception in kernel mode", regs, signr)) + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = signr; + info.si_code = code; + info.si_addr = (void __user *) addr; + force_sig_info(signr, &info, current); + + /* + * Init gets no signals that it doesn't have a handler for. + * That's all very well, but if it has caused a synchronous + * exception and we ignore the resulting signal, it will just + * generate the same exception over and over again and we get + * nowhere. Better to kill it and let the kernel panic. + */ + if (current->pid == 1) { + __sighandler_t handler; + + spin_lock_irq(¤t->sighand->siglock); + handler = current->sighand->action[signr-1].sa.sa_handler; + spin_unlock_irq(¤t->sighand->siglock); + if (handler == SIG_DFL) { + /* init has generated a synchronous exception + and it doesn't have a handler for the signal */ + printk(KERN_CRIT "init has generated signal %d " + "but has no handler for it\n", signr); + do_exit(signr); + } + } +} + +#ifdef CONFIG_PPC64 +void system_reset_exception(struct pt_regs *regs) +{ + /* See if any machine dependent calls */ + if (ppc_md.system_reset_exception) + ppc_md.system_reset_exception(regs); + + die("System Reset", regs, SIGABRT); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable System Reset"); + + /* What should we do here? We could issue a shutdown or hard reset. */ +} +#endif + +/* + * I/O accesses can cause machine checks on powermacs. + * Check if the NIP corresponds to the address of a sync + * instruction for which there is an entry in the exception + * table. + * Note that the 601 only takes a machine check on TEA + * (transfer error ack) signal assertion, and does not + * set any of the top 16 bits of SRR1. + * -- paulus. + */ +static inline int check_io_access(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_PMAC + unsigned long msr = regs->msr; + const struct exception_table_entry *entry; + unsigned int *nip = (unsigned int *)regs->nip; + + if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000))) + && (entry = search_exception_tables(regs->nip)) != NULL) { + /* + * Check that it's a sync instruction, or somewhere + * in the twi; isync; nop sequence that inb/inw/inl uses. + * As the address is in the exception table + * we should be able to read the instr there. + * For the debug message, we look at the preceding + * load or store. + */ + if (*nip == 0x60000000) /* nop */ + nip -= 2; + else if (*nip == 0x4c00012c) /* isync */ + --nip; + if (*nip == 0x7c0004ac || (*nip >> 26) == 3) { + /* sync or twi */ + unsigned int rb; + + --nip; + rb = (*nip >> 11) & 0x1f; + printk(KERN_DEBUG "%s bad port %lx at %p\n", + (*nip & 0x100)? "OUT to": "IN from", + regs->gpr[rb] - _IO_BASE, nip); + regs->msr |= MSR_RI; + regs->nip = entry->fixup; + return 1; + } + } +#endif /* CONFIG_PPC_PMAC */ + return 0; +} + +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +/* On 4xx, the reason for the machine check or program exception + is in the ESR. */ +#define get_reason(regs) ((regs)->dsisr) +#ifndef CONFIG_FSL_BOOKE +#define get_mc_reason(regs) ((regs)->dsisr) +#else +#define get_mc_reason(regs) (mfspr(SPRN_MCSR)) +#endif +#define REASON_FP ESR_FP +#define REASON_ILLEGAL (ESR_PIL | ESR_PUO) +#define REASON_PRIVILEGED ESR_PPR +#define REASON_TRAP ESR_PTR + +/* single-step stuff */ +#define single_stepping(regs) (current->thread.dbcr0 & DBCR0_IC) +#define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC) + +#else +/* On non-4xx, the reason for the machine check or program + exception is in the MSR. */ +#define get_reason(regs) ((regs)->msr) +#define get_mc_reason(regs) ((regs)->msr) +#define REASON_FP 0x100000 +#define REASON_ILLEGAL 0x80000 +#define REASON_PRIVILEGED 0x40000 +#define REASON_TRAP 0x20000 + +#define single_stepping(regs) ((regs)->msr & MSR_SE) +#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) +#endif + +/* + * This is "fall-back" implementation for configurations + * which don't provide platform-specific machine check info + */ +void __attribute__ ((weak)) +platform_machine_check(struct pt_regs *regs) +{ +} + +void MachineCheckException(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC64 + int recover = 0; + + /* See if any machine dependent calls */ + if (ppc_md.machine_check_exception) + recover = ppc_md.machine_check_exception(regs); + + if (recover) + return; +#else + unsigned long reason = get_mc_reason(regs); + + if (user_mode(regs)) { + regs->msr |= MSR_RI; + _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); + return; + } + +#if defined(CONFIG_8xx) && defined(CONFIG_PCI) + /* the qspan pci read routines can cause machine checks -- Cort */ + bad_page_fault(regs, regs->dar, SIGBUS); + return; +#endif + + if (debugger_fault_handler(regs)) { + regs->msr |= MSR_RI; + return; + } + + if (check_io_access(regs)) + return; + +#if defined(CONFIG_4xx) && !defined(CONFIG_440A) + if (reason & ESR_IMCP) { + printk("Instruction"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } else + printk("Data"); + printk(" machine check in kernel mode.\n"); +#elif defined(CONFIG_440A) + printk("Machine check in kernel mode.\n"); + if (reason & ESR_IMCP){ + printk("Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } + else { + u32 mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk("Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk("Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk("Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk("TLB Parity Error\n"); + if (mcsr & MCSR_ICP){ + flush_instruction_cache(); + printk("I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk("D-Cache Search Parity Error\n"); + if (mcsr & MCSR_DCFP) + printk("D-Cache Flush Parity Error\n"); + if (mcsr & MCSR_IMPE) + printk("Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + } +#elif defined (CONFIG_E500) + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_ICPERR) + printk("Instruction Cache Parity Error\n"); + if (reason & MCSR_DCP_PERR) + printk("Data Cache Push Parity Error\n"); + if (reason & MCSR_DCPERR) + printk("Data Cache Parity Error\n"); + if (reason & MCSR_GL_CI) + printk("Guarded Load or Cache-Inhibited stwcx.\n"); + if (reason & MCSR_BUS_IAERR) + printk("Bus - Instruction Address Error\n"); + if (reason & MCSR_BUS_RAERR) + printk("Bus - Read Address Error\n"); + if (reason & MCSR_BUS_WAERR) + printk("Bus - Write Address Error\n"); + if (reason & MCSR_BUS_IBERR) + printk("Bus - Instruction Data Error\n"); + if (reason & MCSR_BUS_RBERR) + printk("Bus - Read Data Bus Error\n"); + if (reason & MCSR_BUS_WBERR) + printk("Bus - Read Data Bus Error\n"); + if (reason & MCSR_BUS_IPERR) + printk("Bus - Instruction Parity Error\n"); + if (reason & MCSR_BUS_RPERR) + printk("Bus - Read Parity Error\n"); +#elif defined (CONFIG_E200) + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_CP_PERR) + printk("Cache Push Parity Error\n"); + if (reason & MCSR_CPERR) + printk("Cache Parity Error\n"); + if (reason & MCSR_EXCP_ERR) + printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); + if (reason & MCSR_BUS_IRERR) + printk("Bus - Read Bus Error on instruction fetch\n"); + if (reason & MCSR_BUS_DRERR) + printk("Bus - Read Bus Error on data load\n"); + if (reason & MCSR_BUS_WRERR) + printk("Bus - Write Bus Error on buffered store or cache line push\n"); +#else /* !CONFIG_4xx && !CONFIG_E500 && !CONFIG_E200 */ + printk("Machine check in kernel mode.\n"); + printk("Caused by (from SRR1=%lx): ", reason); + switch (reason & 0x601F0000) { + case 0x80000: + printk("Machine check signal\n"); + break; + case 0: /* for 601 */ + case 0x40000: + case 0x140000: /* 7450 MSS error and TEA */ + printk("Transfer error ack signal\n"); + break; + case 0x20000: + printk("Data parity error signal\n"); + break; + case 0x10000: + printk("Address parity error signal\n"); + break; + case 0x20000000: + printk("L1 Data Cache error\n"); + break; + case 0x40000000: + printk("L1 Instruction Cache error\n"); + break; + case 0x00100000: + printk("L2 data cache parity error\n"); + break; + default: + printk("Unknown values in msr\n"); + } +#endif /* CONFIG_4xx */ + + /* + * Optional platform-provided routine to print out + * additional info, e.g. bus error registers. + */ + platform_machine_check(regs); +#endif /* CONFIG_PPC64 */ + + if (debugger_fault_handler(regs)) + return; + die("Machine check", regs, SIGBUS); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable Machine check"); +} + +void SMIException(struct pt_regs *regs) +{ + die("System Management Interrupt", regs, SIGABRT); +} + +void UnknownException(struct pt_regs *regs) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, 0, 0); +} + +void InstructionBreakpoint(struct pt_regs *regs) +{ + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_iabr_match(regs)) + return; + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); +} + +void RunModeException(struct pt_regs *regs) +{ + _exception(SIGTRAP, regs, 0, 0); +} + +void SingleStepException(struct pt_regs *regs) +{ + regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ + + if (notify_die(DIE_SSTEP, "single_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_sstep(regs)) + return; + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); +} + +/* + * After we have successfully emulated an instruction, we have to + * check if the instruction was being single-stepped, and if so, + * pretend we got a single-step exception. This was pointed out + * by Kumar Gala. -- paulus + */ +static void emulate_single_step(struct pt_regs *regs) +{ + if (single_stepping(regs)) { + clear_single_step(regs); + _exception(SIGTRAP, regs, TRAP_TRACE, 0); + } +} + +/* Illegal instruction emulation support. Originally written to + * provide the PVR to user applications using the mfspr rd, PVR. + * Return non-zero if we can't emulate, or -EFAULT if the associated + * memory access caused an access fault. Return zero on success. + * + * There are a couple of ways to do this, either "decode" the instruction + * or directly match lots of bits. In this case, matching lots of + * bits is faster and easier. + * + */ +#define INST_MFSPR_PVR 0x7c1f42a6 +#define INST_MFSPR_PVR_MASK 0xfc1fffff + +#define INST_DCBA 0x7c0005ec +#define INST_DCBA_MASK 0x7c0007fe + +#define INST_MCRXR 0x7c000400 +#define INST_MCRXR_MASK 0x7c0007fe + +#define INST_STRING 0x7c00042a +#define INST_STRING_MASK 0x7c0007fe +#define INST_STRING_GEN_MASK 0x7c00067e +#define INST_LSWI 0x7c0004aa +#define INST_LSWX 0x7c00042a +#define INST_STSWI 0x7c0005aa +#define INST_STSWX 0x7c00052a + +static int emulate_string_inst(struct pt_regs *regs, u32 instword) +{ + u8 rT = (instword >> 21) & 0x1f; + u8 rA = (instword >> 16) & 0x1f; + u8 NB_RB = (instword >> 11) & 0x1f; + u32 num_bytes; + unsigned long EA; + int pos = 0; + + /* Early out if we are an invalid form of lswx */ + if ((instword & INST_STRING_MASK) == INST_LSWX) + if ((rT == rA) || (rT == NB_RB)) + return -EINVAL; + + EA = (rA == 0) ? 0 : regs->gpr[rA]; + + switch (instword & INST_STRING_MASK) { + case INST_LSWX: + case INST_STSWX: + EA += NB_RB; + num_bytes = regs->xer & 0x7f; + break; + case INST_LSWI: + case INST_STSWI: + num_bytes = (NB_RB == 0) ? 32 : NB_RB; + break; + default: + return -EINVAL; + } + + while (num_bytes != 0) + { + u8 val; + u32 shift = 8 * (3 - (pos & 0x3)); + + switch ((instword & INST_STRING_MASK)) { + case INST_LSWX: + case INST_LSWI: + if (get_user(val, (u8 __user *)EA)) + return -EFAULT; + /* first time updating this reg, + * zero it out */ + if (pos == 0) + regs->gpr[rT] = 0; + regs->gpr[rT] |= val << shift; + break; + case INST_STSWI: + case INST_STSWX: + val = regs->gpr[rT] >> shift; + if (put_user(val, (u8 __user *)EA)) + return -EFAULT; + break; + } + /* move EA to next address */ + EA += 1; + num_bytes--; + + /* manage our position within the register */ + if (++pos == 4) { + pos = 0; + if (++rT == 32) + rT = 0; + } + } + + return 0; +} + +static int emulate_instruction(struct pt_regs *regs) +{ + u32 instword; + u32 rd; + + if (!user_mode(regs)) + return -EINVAL; + CHECK_FULL_REGS(regs); + + if (get_user(instword, (u32 __user *)(regs->nip))) + return -EFAULT; + + /* Emulate the mfspr rD, PVR. */ + if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_PVR); + return 0; + } + + /* Emulating the dcba insn is just a no-op. */ + if ((instword & INST_DCBA_MASK) == INST_DCBA) + return 0; + + /* Emulate the mcrxr insn. */ + if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { + int shift = (instword >> 21) & 0x1c; + unsigned long msk = 0xf0000000UL >> shift; + + regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); + regs->xer &= ~0xf0000000UL; + return 0; + } + + /* Emulate load/store string insn. */ + if ((instword & INST_STRING_GEN_MASK) == INST_STRING) + return emulate_string_inst(regs, instword); + + return -EINVAL; +} + +/* + * Look through the list of trap instructions that are used for BUG(), + * BUG_ON() and WARN_ON() and see if we hit one. At this point we know + * that the exception was caused by a trap instruction of some kind. + * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 + * otherwise. + */ +extern struct bug_entry __start___bug_table[], __stop___bug_table[]; + +#ifndef CONFIG_MODULES +#define module_find_bug(x) NULL +#endif + +struct bug_entry *find_bug(unsigned long bugaddr) +{ + struct bug_entry *bug; + + for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) + if (bugaddr == bug->bug_addr) + return bug; + return module_find_bug(bugaddr); +} + +int check_bug_trap(struct pt_regs *regs) +{ + struct bug_entry *bug; + unsigned long addr; + + if (regs->msr & MSR_PR) + return 0; /* not in kernel */ + addr = regs->nip; /* address of trap instruction */ + if (addr < PAGE_OFFSET) + return 0; + bug = find_bug(regs->nip); + if (bug == NULL) + return 0; + if (bug->line & BUG_WARNING_TRAP) { + /* this is a WARN_ON rather than BUG/BUG_ON */ +#ifdef CONFIG_XMON + xmon_printf(KERN_ERR "Badness in %s at %s:%d\n", + bug->function, bug->file, + bug->line & ~BUG_WARNING_TRAP); +#endif /* CONFIG_XMON */ + printk(KERN_ERR "Badness in %s at %s:%d\n", + bug->function, bug->file, + bug->line & ~BUG_WARNING_TRAP); + dump_stack(); + return 1; + } +#ifdef CONFIG_XMON + xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, bug->line); + xmon(regs); +#endif /* CONFIG_XMON */ + printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, bug->line); + + return 0; +} + +void ProgramCheckException(struct pt_regs *regs) +{ + unsigned int reason = get_reason(regs); + extern int do_mathemu(struct pt_regs *regs); + +#ifdef CONFIG_MATH_EMULATION + /* (reason & REASON_ILLEGAL) would be the obvious thing here, + * but there seems to be a hardware bug on the 405GP (RevD) + * that means ESR is sometimes set incorrectly - either to + * ESR_DST (!?) or 0. In the process of chasing this with the + * hardware people - not sure if it can happen on any illegal + * instruction or only on FP instructions, whether there is a + * pattern to occurences etc. -dgibson 31/Mar/2003 */ + if (!(reason & REASON_TRAP) && do_mathemu(regs) == 0) { + emulate_single_step(regs); + return; + } +#endif /* CONFIG_MATH_EMULATION */ + + if (reason & REASON_FP) { + /* IEEE FP exception */ + int code = 0; + u32 fpscr; + + /* We must make sure the FP state is consistent with + * our MSR_FP in regs + */ + preempt_disable(); + if (regs->msr & MSR_FP) + giveup_fpu(current); + preempt_enable(); + + fpscr = current->thread.fpscr; + fpscr &= fpscr << 22; /* mask summary bits with enables */ + if (fpscr & FPSCR_VX) + code = FPE_FLTINV; + else if (fpscr & FPSCR_OX) + code = FPE_FLTOVF; + else if (fpscr & FPSCR_UX) + code = FPE_FLTUND; + else if (fpscr & FPSCR_ZX) + code = FPE_FLTDIV; + else if (fpscr & FPSCR_XX) + code = FPE_FLTRES; + _exception(SIGFPE, regs, code, regs->nip); + return; + } + + if (reason & REASON_TRAP) { + /* trap exception */ + if (debugger_bpt(regs)) + return; + if (check_bug_trap(regs)) { + regs->nip += 4; + return; + } + _exception(SIGTRAP, regs, TRAP_BRKPT, 0); + return; + } + + /* Try to emulate it if we should. */ + if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { + switch (emulate_instruction(regs)) { + case 0: + regs->nip += 4; + emulate_single_step(regs); + return; + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } + } + + if (reason & REASON_PRIVILEGED) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); +} + +void AlignmentException(struct pt_regs *regs) +{ + int fixed; + + fixed = fix_alignment(regs); + + if (fixed == 1) { + regs->nip += 4; /* skip over emulated instruction */ + emulate_single_step(regs); + return; + } + + /* Operand address was bad */ + if (fixed == -EFAULT) { + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_ACCERR, regs->dar); + else + /* Search exception table */ + bad_page_fault(regs, regs->dar, SIGSEGV); + return; + } + _exception(SIGBUS, regs, BUS_ADRALN, regs->dar); +} + +void StackOverflow(struct pt_regs *regs) +{ + printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", + current, regs->gpr[1]); + debugger(regs); + show_regs(regs); + panic("kernel stack overflow"); +} + +void nonrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n", + regs->nip, regs->msr); + debugger(regs); + die("nonrecoverable exception", regs, SIGKILL); +} + +void trace_syscall(struct pt_regs *regs) +{ + printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", + current, current->pid, regs->nip, regs->link, regs->gpr[0], + regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); +} + +#ifdef CONFIG_8xx +void SoftwareEmulation(struct pt_regs *regs) +{ + extern int do_mathemu(struct pt_regs *); + extern int Soft_emulate_8xx(struct pt_regs *); + int errcode; + + CHECK_FULL_REGS(regs); + + if (!user_mode(regs)) { + debugger(regs); + die("Kernel Mode Software FPU Emulation", regs, SIGFPE); + } + +#ifdef CONFIG_MATH_EMULATION + errcode = do_mathemu(regs); +#else + errcode = Soft_emulate_8xx(regs); +#endif + if (errcode) { + if (errcode > 0) + _exception(SIGFPE, regs, 0, 0); + else if (errcode == -EFAULT) + _exception(SIGSEGV, regs, 0, 0); + else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + } else + emulate_single_step(regs); +} +#endif /* CONFIG_8xx */ + +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + +void DebugException(struct pt_regs *regs, unsigned long debug_status) +{ + if (debug_status & DBSR_IC) { /* instruction completion */ + regs->msr &= ~MSR_DE; + if (user_mode(regs)) { + current->thread.dbcr0 &= ~DBCR0_IC; + } else { + /* Disable instruction completion */ + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); + /* Clear the instruction completion event */ + mtspr(SPRN_DBSR, DBSR_IC); + if (debugger_sstep(regs)) + return; + } + _exception(SIGTRAP, regs, TRAP_TRACE, 0); + } +} +#endif /* CONFIG_4xx || CONFIG_BOOKE */ + +#if !defined(CONFIG_TAU_INT) +void TAUException(struct pt_regs *regs) +{ + printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n", + regs->nip, regs->msr, regs->trap, print_tainted()); +} +#endif /* CONFIG_INT_TAU */ + +void AltivecUnavailException(struct pt_regs *regs) +{ + static int kernel_altivec_count; + +#ifndef CONFIG_ALTIVEC + if (user_mode(regs)) { + /* A user program has executed an altivec instruction, + but this kernel doesn't support altivec. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } +#endif + /* The kernel has executed an altivec instruction without + first enabling altivec. Whinge but let it do it. */ + if (++kernel_altivec_count < 10) + printk(KERN_ERR "AltiVec used in kernel (task=%p, pc=%lx)\n", + current, regs->nip); + regs->msr |= MSR_VEC; +} + +#ifdef CONFIG_ALTIVEC +void AltivecAssistException(struct pt_regs *regs) +{ + int err; + + preempt_disable(); + if (regs->msr & MSR_VEC) + giveup_altivec(current); + preempt_enable(); + if (!user_mode(regs)) { + printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" + " at %lx\n", regs->nip); + die("Kernel Altivec assist exception", regs, SIGILL); + } + + err = emulate_altivec(regs); + if (err == 0) { + regs->nip += 4; /* skip emulated instruction */ + emulate_single_step(regs); + return; + } + + if (err == -EFAULT) { + /* got an error reading the instruction */ + _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); + } else { + /* didn't recognize the instruction */ + /* XXX quick hack for now: set the non-Java bit in the VSCR */ + if (printk_ratelimit()) + printk(KERN_ERR "Unrecognized altivec instruction " + "in %s at %lx\n", current->comm, regs->nip); + current->thread.vscr.u[3] |= 0x10000; + } +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_E500 +void PerformanceMonitorException(struct pt_regs *regs) +{ + perf_irq(regs); +} +#endif + +#ifdef CONFIG_FSL_BOOKE +void CacheLockingException(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + /* We treat cache locking instructions from the user + * as priv ops, in the future we could try to do + * something smarter + */ + if (error_code & (ESR_DLK|ESR_ILK)) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + return; +} +#endif /* CONFIG_FSL_BOOKE */ + +#ifdef CONFIG_SPE +void SPEFloatingPointException(struct pt_regs *regs) +{ + unsigned long spefscr; + int fpexc_mode; + int code = 0; + + spefscr = current->thread.spefscr; + fpexc_mode = current->thread.fpexc_mode; + + /* Hardware does not neccessarily set sticky + * underflow/overflow/invalid flags */ + if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) { + code = FPE_FLTOVF; + spefscr |= SPEFSCR_FOVFS; + } + else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) { + code = FPE_FLTUND; + spefscr |= SPEFSCR_FUNFS; + } + else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV)) + code = FPE_FLTDIV; + else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) { + code = FPE_FLTINV; + spefscr |= SPEFSCR_FINVS; + } + else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES)) + code = FPE_FLTRES; + + current->thread.spefscr = spefscr; + + _exception(SIGFPE, regs, code, regs->nip); + return; +} +#endif + +#ifdef CONFIG_BOOKE_WDT +/* + * Default handler for a Watchdog exception, + * spins until a reboot occurs + */ +void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) +{ + /* Generic WatchdogHandler, implement your own */ + mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE)); + return; +} + +void WatchdogException(struct pt_regs *regs) +{ + printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); + WatchdogHandler(regs); +} +#endif + +void __init trap_init(void) +{ +} diff --git a/arch/powerpc/kernel/traps64.c b/arch/powerpc/kernel/traps64.c new file mode 100644 --- /dev/null +++ b/arch/powerpc/kernel/traps64.c @@ -0,0 +1,568 @@ +/* + * linux/arch/ppc64/kernel/traps.c + * + * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Modified by Cort Dougan (cort at cs.nmt.edu) + * and Paul Mackerras (paulus at cs.anu.edu.au) + */ + +/* + * This file handles the architecture-dependent parts of hardware exceptions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_DEBUGGER +int (*__debugger)(struct pt_regs *regs); +int (*__debugger_ipi)(struct pt_regs *regs); +int (*__debugger_bpt)(struct pt_regs *regs); +int (*__debugger_sstep)(struct pt_regs *regs); +int (*__debugger_iabr_match)(struct pt_regs *regs); +int (*__debugger_dabr_match)(struct pt_regs *regs); +int (*__debugger_fault_handler)(struct pt_regs *regs); + +EXPORT_SYMBOL(__debugger); +EXPORT_SYMBOL(__debugger_ipi); +EXPORT_SYMBOL(__debugger_bpt); +EXPORT_SYMBOL(__debugger_sstep); +EXPORT_SYMBOL(__debugger_iabr_match); +EXPORT_SYMBOL(__debugger_dabr_match); +EXPORT_SYMBOL(__debugger_fault_handler); +#endif + +struct notifier_block *powerpc_die_chain; +static DEFINE_SPINLOCK(die_notifier_lock); + +int register_die_notifier(struct notifier_block *nb) +{ + int err = 0; + unsigned long flags; + + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_register(&powerpc_die_chain, nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + return err; +} + +/* + * Trap & Exception support + */ + +static DEFINE_SPINLOCK(die_lock); + +int die(const char *str, struct pt_regs *regs, long err) +{ + static int die_counter; + int nl = 0; + + if (debugger(regs)) + return 1; + + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); + nl = 1; +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); + nl = 1; +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); + nl = 1; +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); + nl = 1; +#endif + switch(systemcfg->platform) { + case PLATFORM_PSERIES: + printk("PSERIES "); + nl = 1; + break; + case PLATFORM_PSERIES_LPAR: + printk("PSERIES LPAR "); + nl = 1; + break; + case PLATFORM_ISERIES_LPAR: + printk("ISERIES LPAR "); + nl = 1; + break; + case PLATFORM_POWERMAC: + printk("POWERMAC "); + nl = 1; + break; + case PLATFORM_BPA: + printk("BPA "); + nl = 1; + break; + } + if (nl) + printk("\n"); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) { + printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); + ssleep(5); + panic("Fatal exception"); + } + do_exit(SIGSEGV); + + return 0; +} + +void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) +{ + siginfo_t info; + + if (!user_mode(regs)) { + if (die("Exception in kernel mode", regs, signr)) + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = signr; + info.si_code = code; + info.si_addr = (void __user *) addr; + force_sig_info(signr, &info, current); +} + +void system_reset_exception(struct pt_regs *regs) +{ + /* See if any machine dependent calls */ + if (ppc_md.system_reset_exception) + ppc_md.system_reset_exception(regs); + + die("System Reset", regs, 0); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable System Reset"); + + /* What should we do here? We could issue a shutdown or hard reset. */ +} + +void machine_check_exception(struct pt_regs *regs) +{ + int recover = 0; + + /* See if any machine dependent calls */ + if (ppc_md.machine_check_exception) + recover = ppc_md.machine_check_exception(regs); + + if (recover) + return; + + if (debugger_fault_handler(regs)) + return; + die("Machine check", regs, 0); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable Machine check"); +} + +void unknown_exception(struct pt_regs *regs) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, 0, 0); +} + +void instruction_breakpoint_exception(struct pt_regs *regs) +{ + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_iabr_match(regs)) + return; + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); +} + +void __kprobes single_step_exception(struct pt_regs *regs) +{ + regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */ + + if (notify_die(DIE_SSTEP, "single_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_sstep(regs)) + return; + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); +} + +/* + * After we have successfully emulated an instruction, we have to + * check if the instruction was being single-stepped, and if so, + * pretend we got a single-step exception. This was pointed out + * by Kumar Gala. -- paulus + */ +static inline void emulate_single_step(struct pt_regs *regs) +{ + if (regs->msr & MSR_SE) + single_step_exception(regs); +} + +static void parse_fpe(struct pt_regs *regs) +{ + int code = 0; + unsigned long fpscr; + + flush_fp_to_thread(current); + + fpscr = current->thread.fpscr; + + /* Invalid operation */ + if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) + code = FPE_FLTINV; + + /* Overflow */ + else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX)) + code = FPE_FLTOVF; + + /* Underflow */ + else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX)) + code = FPE_FLTUND; + + /* Divide by zero */ + else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX)) + code = FPE_FLTDIV; + + /* Inexact result */ + else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX)) + code = FPE_FLTRES; + + _exception(SIGFPE, regs, code, regs->nip); +} + +/* + * Illegal instruction emulation support. Return non-zero if we can't + * emulate, or -EFAULT if the associated memory access caused an access + * fault. Return zero on success. + */ + +#define INST_MFSPR_PVR 0x7c1f42a6 +#define INST_MFSPR_PVR_MASK 0xfc1fffff + +#define INST_DCBA 0x7c0005ec +#define INST_DCBA_MASK 0x7c0007fe + +#define INST_MCRXR 0x7c000400 +#define INST_MCRXR_MASK 0x7c0007fe + +static int emulate_instruction(struct pt_regs *regs) +{ + unsigned int instword; + + if (!user_mode(regs)) + return -EINVAL; + + CHECK_FULL_REGS(regs); + + if (get_user(instword, (unsigned int __user *)(regs->nip))) + return -EFAULT; + + /* Emulate the mfspr rD, PVR. */ + if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { + unsigned int rd; + + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_PVR); + return 0; + } + + /* Emulating the dcba insn is just a no-op. */ + if ((instword & INST_DCBA_MASK) == INST_DCBA) { + static int warned; + + if (!warned) { + printk(KERN_WARNING + "process %d (%s) uses obsolete 'dcba' insn\n", + current->pid, current->comm); + warned = 1; + } + return 0; + } + + /* Emulate the mcrxr insn. */ + if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { + static int warned; + unsigned int shift; + + if (!warned) { + printk(KERN_WARNING + "process %d (%s) uses obsolete 'mcrxr' insn\n", + current->pid, current->comm); + warned = 1; + } + + shift = (instword >> 21) & 0x1c; + regs->ccr &= ~(0xf0000000 >> shift); + regs->ccr |= (regs->xer & 0xf0000000) >> shift; + regs->xer &= ~0xf0000000; + return 0; + } + + return -EINVAL; +} + +/* + * Look through the list of trap instructions that are used for BUG(), + * BUG_ON() and WARN_ON() and see if we hit one. At this point we know + * that the exception was caused by a trap instruction of some kind. + * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 + * otherwise. + */ +extern struct bug_entry __start___bug_table[], __stop___bug_table[]; + +#ifndef CONFIG_MODULES +#define module_find_bug(x) NULL +#endif + +struct bug_entry *find_bug(unsigned long bugaddr) +{ + struct bug_entry *bug; + + for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) + if (bugaddr == bug->bug_addr) + return bug; + return module_find_bug(bugaddr); +} + +static int +check_bug_trap(struct pt_regs *regs) +{ + struct bug_entry *bug; + unsigned long addr; + + if (regs->msr & MSR_PR) + return 0; /* not in kernel */ + addr = regs->nip; /* address of trap instruction */ + if (addr < PAGE_OFFSET) + return 0; + bug = find_bug(regs->nip); + if (bug == NULL) + return 0; + if (bug->line & BUG_WARNING_TRAP) { + /* this is a WARN_ON rather than BUG/BUG_ON */ + printk(KERN_ERR "Badness in %s at %s:%d\n", + bug->function, bug->file, + bug->line & ~BUG_WARNING_TRAP); + show_stack(current, (void *)regs->gpr[1]); + return 1; + } + printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, bug->line); + return 0; +} + +void __kprobes program_check_exception(struct pt_regs *regs) +{ + if (debugger_fault_handler(regs)) + return; + + if (regs->msr & 0x100000) { + /* IEEE FP exception */ + parse_fpe(regs); + } else if (regs->msr & 0x20000) { + /* trap exception */ + + if (notify_die(DIE_BPT, "breakpoint", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_bpt(regs)) + return; + + if (check_bug_trap(regs)) { + regs->nip += 4; + return; + } + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + + } else { + /* Privileged or illegal instruction; try to emulate it. */ + switch (emulate_instruction(regs)) { + case 0: + regs->nip += 4; + emulate_single_step(regs); + break; + + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + break; + + default: + if (regs->msr & 0x40000) + /* priveleged */ + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + else + /* illegal */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + break; + } + } +} + +void kernel_fp_unavailable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); +} + +void altivec_unavailable_exception(struct pt_regs *regs) +{ + if (user_mode(regs)) { + /* A user program has executed an altivec instruction, + but this kernel doesn't support altivec. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } + printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); +} + +extern perf_irq_t perf_irq; + +void performance_monitor_exception(struct pt_regs *regs) +{ + perf_irq(regs); +} + +void alignment_exception(struct pt_regs *regs) +{ + int fixed; + + fixed = fix_alignment(regs); + + if (fixed == 1) { + regs->nip += 4; /* skip over emulated instruction */ + emulate_single_step(regs); + return; + } + + /* Operand address was bad */ + if (fixed == -EFAULT) { + if (user_mode(regs)) { + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->dar); + } else { + /* Search exception table */ + bad_page_fault(regs, regs->dar, SIGSEGV); + } + + return; + } + + _exception(SIGBUS, regs, BUS_ADRALN, regs->nip); +} + +#ifdef CONFIG_ALTIVEC +void altivec_assist_exception(struct pt_regs *regs) +{ + int err; + siginfo_t info; + + if (!user_mode(regs)) { + printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" + " at %lx\n", regs->nip); + die("Kernel VMX/Altivec assist exception", regs, SIGILL); + } + + flush_altivec_to_thread(current); + + err = emulate_altivec(regs); + if (err == 0) { + regs->nip += 4; /* skip emulated instruction */ + emulate_single_step(regs); + return; + } + + if (err == -EFAULT) { + /* got an error reading the instruction */ + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *) regs->nip; + force_sig_info(SIGSEGV, &info, current); + } else { + /* didn't recognize the instruction */ + /* XXX quick hack for now: set the non-Java bit in the VSCR */ + if (printk_ratelimit()) + printk(KERN_ERR "Unrecognized altivec instruction " + "in %s at %lx\n", current->comm, regs->nip); + current->thread.vscr.u[3] |= 0x10000; + } +} +#endif /* CONFIG_ALTIVEC */ + +/* + * We enter here if we get an unrecoverable exception, that is, one + * that happened at a point where the RI (recoverable interrupt) bit + * in the MSR is 0. This indicates that SRR0/1 are live, and that + * we therefore lost state by taking this exception. + */ +void unrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", + regs->trap, regs->nip); + die("Unrecoverable exception", regs, SIGABRT); +} + +/* + * We enter here if we discover during exception entry that we are + * running in supervisor mode with a userspace value in the stack pointer. + */ +void kernel_bad_stack(struct pt_regs *regs) +{ + printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", + regs->gpr[1], regs->nip); + die("Bad kernel stack pointer", regs, SIGABRT); +} + +void __init trap_init(void) +{ +} diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -2,10 +2,12 @@ # Makefile for the linux ppc64 kernel. # +ifneq ($(CONFIG_PPC_MERGE),y) + EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds -obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ +obj-y := setup.o entry.o traps64.o irq.o idle.o dma.o \ time.o process.o signal.o syscalls.o misc.o ptrace.o \ align.o semaphore.o bitops.o pacaData.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ @@ -77,3 +79,9 @@ endif # These are here while we do the architecture merge vecemu-y += ../../powerpc/kernel/vecemu.o vector-y += ../../powerpc/kernel/vector.o +idle_power4-y += ../../powerpc/kernel/idle_power4.o +traps64-y += ../../powerpc/kernel/traps64.o + +else + +endif diff --git a/arch/ppc64/kernel/idle_power4.S b/arch/ppc64/kernel/idle_power4.S deleted file mode 100644 --- a/arch/ppc64/kernel/idle_power4.S +++ /dev/null @@ -1,79 +0,0 @@ -/* - * This file contains the power_save function for 6xx & 7xxx CPUs - * rewritten in assembler - * - * Warning ! This code assumes that if your machine has a 750fx - * it will have PLL 1 set to low speed mode (used during NAP/DOZE). - * if this is not the case some additional changes will have to - * be done to check a runtime var (a bit like powersave-nap) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#undef DEBUG - - .text - -/* - * Here is the power_save_6xx function. This could eventually be - * split into several functions & changing the function pointer - * depending on the various features. - */ -_GLOBAL(power4_idle) -BEGIN_FTR_SECTION - blr -END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) - /* We must dynamically check for the NAP feature as it - * can be cleared by CPU init after the fixups are done - */ - LOADBASE(r3,cur_cpu_spec) - ld r4,cur_cpu_spec at l(r3) - ld r4,CPU_SPEC_FEATURES(r4) - andi. r0,r4,CPU_FTR_CAN_NAP - beqlr - /* Now check if user or arch enabled NAP mode */ - LOADBASE(r3,powersave_nap) - lwz r4,powersave_nap at l(r3) - cmpwi 0,r4,0 - beqlr - - /* Clear MSR:EE */ - mfmsr r7 - li r4,0 - ori r4,r4,MSR_EE - andc r0,r7,r4 - mtmsrd r0 - - /* Check current_thread_info()->flags */ - clrrdi r4,r1,THREAD_SHIFT - ld r4,TI_FLAGS(r4) - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f - mtmsrd r7 /* out of line this ? */ - blr -1: - /* Go to NAP now */ -BEGIN_FTR_SECTION - DSSALL - sync -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - oris r7,r7,MSR_POW at h - sync - isync - mtmsrd r7 - isync - sync - blr - diff --git a/arch/ppc64/kernel/traps.c b/arch/ppc64/kernel/traps.c deleted file mode 100644 --- a/arch/ppc64/kernel/traps.c +++ /dev/null @@ -1,568 +0,0 @@ -/* - * linux/arch/ppc64/kernel/traps.c - * - * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Modified by Cort Dougan (cort at cs.nmt.edu) - * and Paul Mackerras (paulus at cs.anu.edu.au) - */ - -/* - * This file handles the architecture-dependent parts of hardware exceptions - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_DEBUGGER -int (*__debugger)(struct pt_regs *regs); -int (*__debugger_ipi)(struct pt_regs *regs); -int (*__debugger_bpt)(struct pt_regs *regs); -int (*__debugger_sstep)(struct pt_regs *regs); -int (*__debugger_iabr_match)(struct pt_regs *regs); -int (*__debugger_dabr_match)(struct pt_regs *regs); -int (*__debugger_fault_handler)(struct pt_regs *regs); - -EXPORT_SYMBOL(__debugger); -EXPORT_SYMBOL(__debugger_ipi); -EXPORT_SYMBOL(__debugger_bpt); -EXPORT_SYMBOL(__debugger_sstep); -EXPORT_SYMBOL(__debugger_iabr_match); -EXPORT_SYMBOL(__debugger_dabr_match); -EXPORT_SYMBOL(__debugger_fault_handler); -#endif - -struct notifier_block *powerpc_die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); - -int register_die_notifier(struct notifier_block *nb) -{ - int err = 0; - unsigned long flags; - - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&powerpc_die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; -} - -/* - * Trap & Exception support - */ - -static DEFINE_SPINLOCK(die_lock); - -int die(const char *str, struct pt_regs *regs, long err) -{ - static int die_counter; - int nl = 0; - - if (debugger(regs)) - return 1; - - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); - nl = 1; -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); - nl = 1; -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); - nl = 1; -#endif -#ifdef CONFIG_NUMA - printk("NUMA "); - nl = 1; -#endif - switch(systemcfg->platform) { - case PLATFORM_PSERIES: - printk("PSERIES "); - nl = 1; - break; - case PLATFORM_PSERIES_LPAR: - printk("PSERIES LPAR "); - nl = 1; - break; - case PLATFORM_ISERIES_LPAR: - printk("ISERIES LPAR "); - nl = 1; - break; - case PLATFORM_POWERMAC: - printk("POWERMAC "); - nl = 1; - break; - case PLATFORM_BPA: - printk("BPA "); - nl = 1; - break; - } - if (nl) - printk("\n"); - print_modules(); - show_regs(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); - - if (in_interrupt()) - panic("Fatal exception in interrupt"); - - if (panic_on_oops) { - printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); - ssleep(5); - panic("Fatal exception"); - } - do_exit(SIGSEGV); - - return 0; -} - -void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) -{ - siginfo_t info; - - if (!user_mode(regs)) { - if (die("Exception in kernel mode", regs, signr)) - return; - } - - memset(&info, 0, sizeof(info)); - info.si_signo = signr; - info.si_code = code; - info.si_addr = (void __user *) addr; - force_sig_info(signr, &info, current); -} - -void system_reset_exception(struct pt_regs *regs) -{ - /* See if any machine dependent calls */ - if (ppc_md.system_reset_exception) - ppc_md.system_reset_exception(regs); - - die("System Reset", regs, 0); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable System Reset"); - - /* What should we do here? We could issue a shutdown or hard reset. */ -} - -void machine_check_exception(struct pt_regs *regs) -{ - int recover = 0; - - /* See if any machine dependent calls */ - if (ppc_md.machine_check_exception) - recover = ppc_md.machine_check_exception(regs); - - if (recover) - return; - - if (debugger_fault_handler(regs)) - return; - die("Machine check", regs, 0); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable Machine check"); -} - -void unknown_exception(struct pt_regs *regs) -{ - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", - regs->nip, regs->msr, regs->trap); - - _exception(SIGTRAP, regs, 0, 0); -} - -void instruction_breakpoint_exception(struct pt_regs *regs) -{ - if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_iabr_match(regs)) - return; - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); -} - -void __kprobes single_step_exception(struct pt_regs *regs) -{ - regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */ - - if (notify_die(DIE_SSTEP, "single_step", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_sstep(regs)) - return; - - _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); -} - -/* - * After we have successfully emulated an instruction, we have to - * check if the instruction was being single-stepped, and if so, - * pretend we got a single-step exception. This was pointed out - * by Kumar Gala. -- paulus - */ -static inline void emulate_single_step(struct pt_regs *regs) -{ - if (regs->msr & MSR_SE) - single_step_exception(regs); -} - -static void parse_fpe(struct pt_regs *regs) -{ - int code = 0; - unsigned long fpscr; - - flush_fp_to_thread(current); - - fpscr = current->thread.fpscr; - - /* Invalid operation */ - if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) - code = FPE_FLTINV; - - /* Overflow */ - else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX)) - code = FPE_FLTOVF; - - /* Underflow */ - else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX)) - code = FPE_FLTUND; - - /* Divide by zero */ - else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX)) - code = FPE_FLTDIV; - - /* Inexact result */ - else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX)) - code = FPE_FLTRES; - - _exception(SIGFPE, regs, code, regs->nip); -} - -/* - * Illegal instruction emulation support. Return non-zero if we can't - * emulate, or -EFAULT if the associated memory access caused an access - * fault. Return zero on success. - */ - -#define INST_MFSPR_PVR 0x7c1f42a6 -#define INST_MFSPR_PVR_MASK 0xfc1fffff - -#define INST_DCBA 0x7c0005ec -#define INST_DCBA_MASK 0x7c0007fe - -#define INST_MCRXR 0x7c000400 -#define INST_MCRXR_MASK 0x7c0007fe - -static int emulate_instruction(struct pt_regs *regs) -{ - unsigned int instword; - - if (!user_mode(regs)) - return -EINVAL; - - CHECK_FULL_REGS(regs); - - if (get_user(instword, (unsigned int __user *)(regs->nip))) - return -EFAULT; - - /* Emulate the mfspr rD, PVR. */ - if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { - unsigned int rd; - - rd = (instword >> 21) & 0x1f; - regs->gpr[rd] = mfspr(SPRN_PVR); - return 0; - } - - /* Emulating the dcba insn is just a no-op. */ - if ((instword & INST_DCBA_MASK) == INST_DCBA) { - static int warned; - - if (!warned) { - printk(KERN_WARNING - "process %d (%s) uses obsolete 'dcba' insn\n", - current->pid, current->comm); - warned = 1; - } - return 0; - } - - /* Emulate the mcrxr insn. */ - if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { - static int warned; - unsigned int shift; - - if (!warned) { - printk(KERN_WARNING - "process %d (%s) uses obsolete 'mcrxr' insn\n", - current->pid, current->comm); - warned = 1; - } - - shift = (instword >> 21) & 0x1c; - regs->ccr &= ~(0xf0000000 >> shift); - regs->ccr |= (regs->xer & 0xf0000000) >> shift; - regs->xer &= ~0xf0000000; - return 0; - } - - return -EINVAL; -} - -/* - * Look through the list of trap instructions that are used for BUG(), - * BUG_ON() and WARN_ON() and see if we hit one. At this point we know - * that the exception was caused by a trap instruction of some kind. - * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 - * otherwise. - */ -extern struct bug_entry __start___bug_table[], __stop___bug_table[]; - -#ifndef CONFIG_MODULES -#define module_find_bug(x) NULL -#endif - -struct bug_entry *find_bug(unsigned long bugaddr) -{ - struct bug_entry *bug; - - for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) - if (bugaddr == bug->bug_addr) - return bug; - return module_find_bug(bugaddr); -} - -static int -check_bug_trap(struct pt_regs *regs) -{ - struct bug_entry *bug; - unsigned long addr; - - if (regs->msr & MSR_PR) - return 0; /* not in kernel */ - addr = regs->nip; /* address of trap instruction */ - if (addr < PAGE_OFFSET) - return 0; - bug = find_bug(regs->nip); - if (bug == NULL) - return 0; - if (bug->line & BUG_WARNING_TRAP) { - /* this is a WARN_ON rather than BUG/BUG_ON */ - printk(KERN_ERR "Badness in %s at %s:%d\n", - bug->function, bug->file, - bug->line & ~BUG_WARNING_TRAP); - show_stack(current, (void *)regs->gpr[1]); - return 1; - } - printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", - bug->function, bug->file, bug->line); - return 0; -} - -void __kprobes program_check_exception(struct pt_regs *regs) -{ - if (debugger_fault_handler(regs)) - return; - - if (regs->msr & 0x100000) { - /* IEEE FP exception */ - parse_fpe(regs); - } else if (regs->msr & 0x20000) { - /* trap exception */ - - if (notify_die(DIE_BPT, "breakpoint", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_bpt(regs)) - return; - - if (check_bug_trap(regs)) { - regs->nip += 4; - return; - } - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - - } else { - /* Privileged or illegal instruction; try to emulate it. */ - switch (emulate_instruction(regs)) { - case 0: - regs->nip += 4; - emulate_single_step(regs); - break; - - case -EFAULT: - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - break; - - default: - if (regs->msr & 0x40000) - /* priveleged */ - _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); - else - /* illegal */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - break; - } - } -} - -void kernel_fp_unavailable_exception(struct pt_regs *regs) -{ - printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " - "%lx at %lx\n", regs->trap, regs->nip); - die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); -} - -void altivec_unavailable_exception(struct pt_regs *regs) -{ - if (user_mode(regs)) { - /* A user program has executed an altivec instruction, - but this kernel doesn't support altivec. */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - } - printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " - "%lx at %lx\n", regs->trap, regs->nip); - die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); -} - -extern perf_irq_t perf_irq; - -void performance_monitor_exception(struct pt_regs *regs) -{ - perf_irq(regs); -} - -void alignment_exception(struct pt_regs *regs) -{ - int fixed; - - fixed = fix_alignment(regs); - - if (fixed == 1) { - regs->nip += 4; /* skip over emulated instruction */ - emulate_single_step(regs); - return; - } - - /* Operand address was bad */ - if (fixed == -EFAULT) { - if (user_mode(regs)) { - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->dar); - } else { - /* Search exception table */ - bad_page_fault(regs, regs->dar, SIGSEGV); - } - - return; - } - - _exception(SIGBUS, regs, BUS_ADRALN, regs->nip); -} - -#ifdef CONFIG_ALTIVEC -void altivec_assist_exception(struct pt_regs *regs) -{ - int err; - siginfo_t info; - - if (!user_mode(regs)) { - printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" - " at %lx\n", regs->nip); - die("Kernel VMX/Altivec assist exception", regs, SIGILL); - } - - flush_altivec_to_thread(current); - - err = emulate_altivec(regs); - if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ - emulate_single_step(regs); - return; - } - - if (err == -EFAULT) { - /* got an error reading the instruction */ - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *) regs->nip; - force_sig_info(SIGSEGV, &info, current); - } else { - /* didn't recognize the instruction */ - /* XXX quick hack for now: set the non-Java bit in the VSCR */ - if (printk_ratelimit()) - printk(KERN_ERR "Unrecognized altivec instruction " - "in %s at %lx\n", current->comm, regs->nip); - current->thread.vscr.u[3] |= 0x10000; - } -} -#endif /* CONFIG_ALTIVEC */ - -/* - * We enter here if we get an unrecoverable exception, that is, one - * that happened at a point where the RI (recoverable interrupt) bit - * in the MSR is 0. This indicates that SRR0/1 are live, and that - * we therefore lost state by taking this exception. - */ -void unrecoverable_exception(struct pt_regs *regs) -{ - printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", - regs->trap, regs->nip); - die("Unrecoverable exception", regs, SIGABRT); -} - -/* - * We enter here if we discover during exception entry that we are - * running in supervisor mode with a userspace value in the stack pointer. - */ -void kernel_bad_stack(struct pt_regs *regs) -{ - printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", - regs->gpr[1], regs->nip); - die("Bad kernel stack pointer", regs, SIGABRT); -} - -void __init trap_init(void) -{ -} From sfr at canb.auug.org.au Sat Oct 1 00:14:39 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 00:14:39 +1000 Subject: [PATCH 9/9] powerpc: make iSeries boot In-Reply-To: <20050930233602.138b6e27.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> Message-ID: <20051001001439.2f04423d.sfr@canb.auug.org.au> Now that we use the device tree, it helps to build it in. It also helps to link the kernel at the correct address. Signed-off-by: Stephen Rothwell --- arch/powerpc/Kconfig | 4 ++-- arch/powerpc/Kconfig.debug | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ 1b4c416e0cf237dce004392122db45eb22dbc416 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -317,7 +317,7 @@ config PPC_BPA config PPC_OF bool - depends on PPC_MULTIPLATFORM # for now + depends on PPC_MULTIPLATFORM || PPC_ISERIES default y config XICS @@ -836,7 +836,7 @@ endmenu if PPC64 config KERNEL_START hex - default "0xc0000000" + default "0xc000000000000000" endif source "net/Kconfig" diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -55,7 +55,7 @@ config BDI_SWITCH config BOOTX_TEXT bool "Support for early boot text console (BootX or OpenFirmware only)" - depends PPC_OF + depends PPC_OF && !PPC_ISERIES help Say Y here to see progress messages from the boot firmware in text mode. Requires either BootX or Open Firmware. From arnd at arndb.de Sat Oct 1 00:28:48 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Fri, 30 Sep 2005 16:28:48 +0200 Subject: libspe for 2.6.13 spufs In-Reply-To: <20050929220009.146368000@localhost> References: <20050929220009.146368000@localhost> Message-ID: <200509301628.49277.arnd@arndb.de> As a companion to the spufs release posted yesterday, this is the user space libspe library from Dirk Herrendoerfer, together with the extracted interface documentation. This library gives an operating system independent abstraction on top of the spufs to work with asynchronous SPU threads. Arnd <>< -------------- next part -------------- A non-text attachment was scrubbed... Name: libspe-0.9.tar.gz Type: application/x-tgz Size: 21092 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20050930/91c930ab/attachment.bin -------------- next part -------------- A non-text attachment was scrubbed... Name: libspe-0.9.pdf.gz Type: application/x-gzip Size: 25758 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20050930/91c930ab/attachment-0001.bin From linas at austin.ibm.com Sat Oct 1 00:58:23 2005 From: linas at austin.ibm.com (linas) Date: Fri, 30 Sep 2005 09:58:23 -0500 Subject: [PATCH 7/7] ppc64: EEH Halt if bad drivers spin in error condition In-Reply-To: <200509300449.j8U4n94d014765@falcon30.maxeymade.com> References: <20050930010228.GG6173@austin.ibm.com> <200509300449.j8U4n94d014765@falcon30.maxeymade.com> Message-ID: <20050930145822.GM29826@austin.ibm.com> On Thu, Sep 29, 2005 at 11:49:09PM -0500, Doug Maxey was heard to remark: > > On Thu, 29 Sep 2005 20:02:28 CDT, linas wrote: > > > >07-eeh-spin-counter.patch > > > >One an EEH event is triggers, all further I/O to a device is blocked (until > >reset). Bad device drivers may end up spinning in their interrupt handlers, > >trying to read an interrupt status register that will never change state. > >This patch moves that spin counter to a per-device structure, and adds > >some diagnostic prints to help locate the bad driver. > > > > Which struct gets the element? struct pci_dn, which Paulus recently introduced; it splits off the pci parts from struct device_node. Think of it as holding all the firmaware and arch-specific peices that can't be jammed in the generic struct pci_dev. --linas From haveblue at us.ibm.com Sat Oct 1 04:23:05 2005 From: haveblue at us.ibm.com (Dave Hansen) Date: Fri, 30 Sep 2005 11:23:05 -0700 Subject: [PATCH] fix 2.6.14-rc2-git8 compile errors Message-ID: <1128104585.8123.21.camel@localhost> I've been having some problems compiling the latest git snapshot. Seems to be some missing includes, which keeps it from finding stuff like boot_cpuid. But, this is after applying Anton's discontig removal patches as well, so it might be just a side-effect of those. Here's my config: http://www.sr71.net/patches/2.6.14/2.6.14-rc1-mhp1/configs/config-ppc64-abat --- arch/ppc64/kernel/pSeries_lpar.c | 0 memhotplug-dave/arch/ppc64/kernel/pSeries_setup.c | 1 + memhotplug-dave/arch/ppc64/kernel/time.c | 1 + 3 files changed, 2 insertions(+) diff -puN arch/ppc64/kernel/time.c~no-found-boot_cpuid arch/ppc64/kernel/time.c --- memhotplug/arch/ppc64/kernel/time.c~no-found-boot_cpuid 2005-09-30 11:00:40.000000000 -0700 +++ memhotplug-dave/arch/ppc64/kernel/time.c 2005-09-30 11:00:40.000000000 -0700 @@ -65,6 +65,7 @@ #include #include #include +#include #include #include diff -puN arch/ppc64/kernel/pSeries_setup.c~no-found-boot_cpuid arch/ppc64/kernel/pSeries_setup.c --- memhotplug/arch/ppc64/kernel/pSeries_setup.c~no-found-boot_cpuid 2005-09-30 11:00:40.000000000 -0700 +++ memhotplug-dave/arch/ppc64/kernel/pSeries_setup.c 2005-09-30 11:00:40.000000000 -0700 @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include From kumar.gala at freescale.com Sat Oct 1 06:52:40 2005 From: kumar.gala at freescale.com (Kumar Gala) Date: Fri, 30 Sep 2005 15:52:40 -0500 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and fixup traps.c In-Reply-To: <20051001000001.1f1d8c48.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000001.1f1d8c48.sfr@canb.auug.org.au> Message-ID: <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> (My first attempt at posting to the list failed due to size) I really dont like the ideal of splitting up traps.c into traps32.c and traps64.c. This defeats the purpose of the merge. I expect that a significant portion of traps.c is common (or can be made to be) between all powerpc's. - kumar On Sep 30, 2005, at 9:00 AM, Stephen Rothwell wrote: > Use idle_power4.S from ppc64 as we are not going to support > 32 bit power4 in the merged tree. > > create traps{32,64}.c as these are hard to merge. > > Signed-off-by: Stephen Rothwell > --- > > arch/powerpc/Kconfig | 4 > arch/powerpc/kernel/Makefile | 4 > arch/powerpc/kernel/idle_power4.S | 78 +++ > arch/powerpc/kernel/traps.c | 1047 > ------------------------------------- > arch/powerpc/kernel/traps32.c | 1047 > +++++++++++++++++++++++++++++++++++++ > arch/powerpc/kernel/traps64.c | 568 ++++++++++++++++++++ > arch/ppc64/kernel/Makefile | 10 > arch/ppc64/kernel/idle_power4.S | 79 --- > arch/ppc64/kernel/traps.c | 568 -------------------- > 9 files changed, 1707 insertions(+), 1698 deletions(-) > create mode 100644 arch/powerpc/kernel/idle_power4.S > delete mode 100644 arch/powerpc/kernel/traps.c > create mode 100644 arch/powerpc/kernel/traps32.c > create mode 100644 arch/powerpc/kernel/traps64.c > delete mode 100644 arch/ppc64/kernel/idle_power4.S > delete mode 100644 arch/ppc64/kernel/traps.c > From linas at austin.ibm.com Sat Oct 1 08:29:18 2005 From: linas at austin.ibm.com (linas) Date: Fri, 30 Sep 2005 17:29:18 -0500 Subject: [PATCH 0/7] ppc64: Assorted minor EEH cleanups In-Reply-To: <20050930004800.GL29826@austin.ibm.com> References: <20050930004800.GL29826@austin.ibm.com> Message-ID: <20050930222918.GN29826@austin.ibm.com> On Thu, Sep 29, 2005 at 07:48:00PM -0500, linas was heard to remark: > > They compile but (ahem) are not tested, They are now tested. They work (I had a corupted initrd yesterday). Please apply and foward as soon as possible. During testing I found two unrelated bugs; wasn't able to squeeze out patches for today; maybe monday. Paul, these are: 1) You added an eeh_capable flag that is never initialized, and so this blocks operation. I don't think this flag is needed, as it duplicates a bitflag in eeh_mode. (Unless your plan is to use bitfields; do you want to use C language bitfields?) 2) PCI hotplug is broken because the flag phb->is_dynamic is never set to one. As a result, hotplug add calls __alloc_bootmem instead of kmalloc(), and crashes. I was testing a potential patch just now, but the clock ran out. --linas p.s. I hope to spit out the rest of the patces, including the kthread handling, early next week. I've got things mostly ported, and am testing. Let me know how to best coordinate on this. From jimix at watson.ibm.com Sat Oct 1 10:43:55 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Fri, 30 Sep 2005 20:43:55 -0400 Subject: To large page or not to large page Message-ID: <17213.56267.826654.651632@kitch0.watson.ibm.com> It seems as tho Linux will map the kernel with large pages if the processor allows it regardless if the lmb is sufficient to hold a large page, correct? Is there some runtime option to force the use of 4K pages. Ultimately, my desire is to define a 256Mig segment that, using a Hypervisor, that can be populated by shared pages that can physically belong to the hypervisor or other partions/domains) and restrict the mappings to 4k. I have some ideas, but am willing to hear any suggestions. -JX -- "I got an idea, an idea so smart my head would explode if I even began to know what I was talking about." -- Peter Griffin (Family Guy) From sfr at canb.auug.org.au Sat Oct 1 12:17:14 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 12:17:14 +1000 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and fixup traps.c In-Reply-To: <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000001.1f1d8c48.sfr@canb.auug.org.au> <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> Message-ID: <20051001121714.1b5886aa.sfr@canb.auug.org.au> On Fri, 30 Sep 2005 15:52:40 -0500 Kumar Gala wrote: > > (My first attempt at posting to the list failed due to size) Yes, quoting the whole patch was probably not necessary :-) > I really dont like the ideal of splitting up traps.c into traps32.c > and traps64.c. This defeats the purpose of the merge. I expect that > a significant portion of traps.c is common (or can be made to be) > between all powerpc's. My first attempt at the merge was a real mess and a right pain, so I put the two files in as a compromise. However, I have made another attempt and ,although it took a while, it seems to be going ok. How about we put the two in for now (as that will allow the merge of more platforms to continue) and I will supply a further patch in a few days that combines the two trapsxx.c files? -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051001/faf277cd/attachment.pgp From paulus at samba.org Sat Oct 1 13:28:56 2005 From: paulus at samba.org (Paul Mackerras) Date: Sat, 1 Oct 2005 13:28:56 +1000 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and fixup traps.c In-Reply-To: <20051001121714.1b5886aa.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000001.1f1d8c48.sfr@canb.auug.org.au> <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> <20051001121714.1b5886aa.sfr@canb.auug.org.au> Message-ID: <17214.632.655003.750819@cargo.ozlabs.ibm.com> Stephen Rothwell writes: > My first attempt at the merge was a real mess and a right pain, so I put > the two files in as a compromise. However, I have made another attempt > and ,although it took a while, it seems to be going ok. How about we put > the two in for now (as that will allow the merge of more platforms to > continue) and I will supply a further patch in a few days that combines > the two trapsxx.c files? In that case, what is the advantage of having two traps*.c files in arch/powerpc/kernel instead of having them in arch/ppc*/kernel? Paul. From sfr at canb.auug.org.au Sat Oct 1 21:37:53 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 21:37:53 +1000 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and traps.c In-Reply-To: <17214.632.655003.750819@cargo.ozlabs.ibm.com> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000001.1f1d8c48.sfr@canb.auug.org.au> <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> <20051001121714.1b5886aa.sfr@canb.auug.org.au> <17214.632.655003.750819@cargo.ozlabs.ibm.com> Message-ID: <20051001213753.52d5d5c3.sfr@canb.auug.org.au> On Sat, 1 Oct 2005 13:28:56 +1000 Paul Mackerras wrote: > > Stephen Rothwell writes: > > > My first attempt at the merge was a real mess and a right pain, so I put > > the two files in as a compromise. However, I have made another attempt > > and ,although it took a while, it seems to be going ok. How about we put > > the two in for now (as that will allow the merge of more platforms to > > continue) and I will supply a further patch in a few days that combines > > the two trapsxx.c files? > > In that case, what is the advantage of having two traps*.c files in > arch/powerpc/kernel instead of having them in arch/ppc*/kernel? OK, thanks for keeping me honest :-) Here is new versions of patches 6 and 7 (all the rest are the same as before). --------------- Use idle_power4.S from ppc64 as we are not going to support 32 bit power4 in the merged tree. Merge ppc64 traps.c into powerpc traps.c: use ppc64 versions of exception routine names (as they don't have StudlyCaps) make all the versions if die() have the same prototype Signed-off-by: Stephen Rothwell --- arch/powerpc/Kconfig | 4 arch/powerpc/kernel/head.S | 84 +++-- arch/powerpc/kernel/head_44x.S | 14 - arch/powerpc/kernel/head_4xx.S | 52 ++- arch/powerpc/kernel/head_8xx.S | 42 +-- arch/powerpc/kernel/head_fsl_booke.S | 24 + arch/powerpc/kernel/idle_power4.S | 78 +++++ arch/powerpc/kernel/ppc_ksyms.c | 16 - arch/powerpc/kernel/traps.c | 407 ++++++++++++++++++------ arch/ppc/kernel/head.S | 84 +++-- arch/ppc/kernel/head_44x.S | 14 - arch/ppc/kernel/head_4xx.S | 52 ++- arch/ppc/kernel/head_8xx.S | 42 +-- arch/ppc/kernel/head_booke.h | 4 arch/ppc/kernel/head_fsl_booke.S | 24 + arch/ppc/kernel/ppc_ksyms.c | 16 - arch/ppc/kernel/traps.c | 20 + arch/ppc/syslib/ibm44x_common.c | 2 arch/ppc/syslib/ppc4xx_setup.c | 2 arch/ppc64/kernel/Makefile | 8 arch/ppc64/kernel/idle_power4.S | 79 ----- arch/ppc64/kernel/traps.c | 568 ---------------------------------- include/asm-ppc/system.h | 2 23 files changed, 640 insertions(+), 998 deletions(-) create mode 100644 arch/powerpc/kernel/idle_power4.S delete mode 100644 arch/ppc64/kernel/idle_power4.S delete mode 100644 arch/ppc64/kernel/traps.c -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ dc1c1ca3dcd94c545c5e01d7c06b46824d43f4d0 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -142,8 +142,8 @@ config POWER4 def_bool y config PPC_FPU - bool - default y if PPC64 + depends on PPC32 + def_bool y config BOOKE bool diff --git a/arch/powerpc/kernel/head.S b/arch/powerpc/kernel/head.S --- a/arch/powerpc/kernel/head.S +++ b/arch/powerpc/kernel/head.S @@ -349,12 +349,12 @@ i##n: \ /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and - putting it back to what it was (UnknownException) when done. */ + putting it back to what it was (unknown_exception) when done. */ #if defined(CONFIG_GEMINI) && defined(CONFIG_SMP) . = 0x100 b __secondary_start_gemini #else - EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD) + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) #endif /* Machine check */ @@ -389,7 +389,7 @@ i##n: \ cmpwi cr1,r4,0 bne cr1,1f #endif - EXC_XFER_STD(0x200, MachineCheckException) + EXC_XFER_STD(0x200, machine_check_exception) #ifdef CONFIG_PPC_CHRP 1: b machine_check_in_rtas #endif @@ -456,10 +456,10 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD) + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) /* Floating-point unavailable */ . = 0x800 @@ -472,8 +472,8 @@ FPUnavailable: /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) /* System call */ . = 0xc00 @@ -482,8 +482,8 @@ SystemCall: EXC_XFER_EE_LITE(0xc00, DoSyscall) /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE) + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) /* * The Altivec unavailable trap is at 0x0f20. Foo. @@ -502,7 +502,7 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0xf00, UnknownException) + EXC_XFER_EE(0xf00, unknown_exception) /* * Handle TLB miss for instruction on 603/603e. @@ -702,44 +702,44 @@ DataStoreTLBMiss: rfi #ifndef CONFIG_ALTIVEC -#define AltivecAssistException UnknownException +#define altivec_assist_exception unknown_exception #endif - EXCEPTION(0x1300, Trap_13, InstructionBreakpoint, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE) EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) #ifdef CONFIG_POWER4 - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1700, Trap_17, AltivecAssistException, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, altivec_assist_exception, EXC_XFER_EE) EXCEPTION(0x1800, Trap_18, TAUException, EXC_XFER_STD) #else /* !CONFIG_POWER4 */ - EXCEPTION(0x1600, Trap_16, AltivecAssistException, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE) EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_POWER4 */ - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) - EXCEPTION(0x2100, Trap_21, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2200, Trap_22, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2300, Trap_23, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2400, Trap_24, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2500, Trap_25, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2600, Trap_26, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2700, Trap_27, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2800, Trap_28, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2900, Trap_29, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2a00, Trap_2a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2b00, Trap_2b, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2c00, Trap_2c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2d00, Trap_2d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2e00, Trap_2e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2f00, MOLTrampoline, UnknownException, EXC_XFER_EE_LITE) + EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE) .globl mol_trampoline .set mol_trampoline, i0x2f00 @@ -751,7 +751,7 @@ AltiVecUnavailable: #ifdef CONFIG_ALTIVEC bne load_up_altivec /* if from user, just load it up */ #endif /* CONFIG_ALTIVEC */ - EXC_XFER_EE_LITE(0xf20, AltivecUnavailException) + EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) #ifdef CONFIG_PPC64BRIDGE DataAccess: @@ -767,12 +767,12 @@ DataSegment: addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_DAR stw r4,_DAR(r11) - EXC_XFER_STD(0x380, UnknownException) + EXC_XFER_STD(0x380, unknown_exception) InstructionSegment: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x480, UnknownException) + EXC_XFER_STD(0x480, unknown_exception) #endif /* CONFIG_PPC64BRIDGE */ #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -309,13 +309,13 @@ skpinv: addi r4,r4,1 /* Increment */ interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) /* Machine Check Interrupt */ #ifdef CONFIG_440A - MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #else - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #endif /* Data Storage Interrupt */ @@ -442,7 +442,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x2010, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif /* System Call Interrupt */ @@ -451,21 +451,21 @@ interrupt_base: EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxillary Processor Unavailable Interrupt */ - EXCEPTION(0x2020, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x1010, FixedIntervalTimer, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x1020, WatchdogTimer, UnknownException) + CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ diff --git a/arch/powerpc/kernel/head_4xx.S b/arch/powerpc/kernel/head_4xx.S --- a/arch/powerpc/kernel/head_4xx.S +++ b/arch/powerpc/kernel/head_4xx.S @@ -245,12 +245,12 @@ label: /* * 0x0100 - Critical Interrupt Exception */ - CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception) /* * 0x0200 - Machine Check Exception */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) /* * 0x0300 - Data Storage Exception @@ -405,7 +405,7 @@ label: mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ stw r4,_DEAR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) @@ -413,21 +413,21 @@ label: mfspr r4,SPRN_ESR /* Grab the ESR and save it */ stw r4,_ESR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x700, ProgramCheckException) + EXC_XFER_STD(0x700, program_check_exception) - EXCEPTION(0x0800, Trap_08, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0900, Trap_09, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0A00, Trap_0A, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0B00, Trap_0B, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) NORMAL_EXCEPTION_PROLOG EXC_XFER_EE_LITE(0xc00, DoSyscall) - EXCEPTION(0x0D00, Trap_0D, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0E00, Trap_0E, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0F00, Trap_0F, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ START_EXCEPTION(0x1000, Decrementer) @@ -444,14 +444,14 @@ label: /* 0x1010 - Fixed Interval Timer (FIT) Exception */ - STND_EXCEPTION(0x1010, FITException, UnknownException) + STND_EXCEPTION(0x1010, FITException, unknown_exception) /* 0x1020 - Watchdog Timer (WDT) Exception */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException) #else - CRITICAL_EXCEPTION(0x1020, WDTException, UnknownException) + CRITICAL_EXCEPTION(0x1020, WDTException, unknown_exception) #endif #endif @@ -656,25 +656,25 @@ label: mfspr r10, SPRN_SPRG0 b InstructionAccess - EXCEPTION(0x1300, Trap_13, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1400, Trap_14, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) #ifdef CONFIG_IBM405_ERR51 /* 405GP errata 51 */ START_EXCEPTION(0x1700, Trap_17) b DTLBMiss #else - EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) #endif - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1A00, Trap_1A, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1B00, Trap_1B, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1C00, Trap_1C, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1D00, Trap_1D, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1E00, Trap_1E, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1F00, Trap_1F, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE) /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -203,7 +203,7 @@ i##n: \ ret_from_except) /* System reset */ - EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD) + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) /* Machine check */ . = 0x200 @@ -214,7 +214,7 @@ MachineCheck: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x200, MachineCheckException) + EXC_XFER_STD(0x200, machine_check_exception) /* Data access exception. * This is "never generated" by the MPC8xx. We jump to it for other @@ -252,20 +252,20 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD) + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) /* No FPU on MPC8xx. This exception is not supposed to happen. */ - EXCEPTION(0x800, FPUnavailable, UnknownException, EXC_XFER_STD) + EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD) /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) /* System call */ . = 0xc00 @@ -274,9 +274,9 @@ SystemCall: EXC_XFER_EE_LITE(0xc00, DoSyscall) /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE) - EXCEPTION(0xf00, Trap_0f, UnknownException, EXC_XFER_EE) + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. @@ -540,22 +540,22 @@ DataTLBError: #endif b DataAccess - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to * using them. */ - EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) . = 0x2000 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -426,14 +426,14 @@ skpinv: addi r6,r6,1 /* Increment */ interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) /* Machine Check Interrupt */ #ifdef CONFIG_E200 /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #else - MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #endif /* Data Storage Interrupt */ @@ -542,9 +542,9 @@ interrupt_base: #else #ifdef CONFIG_E200 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FloatingPointUnavailable, ProgramCheckException, EXC_XFER_EE) + EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE) #else - EXCEPTION(0x0800, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif #endif @@ -554,20 +554,20 @@ interrupt_base: EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxillary Processor Unavailable Interrupt */ - EXCEPTION(0x2900, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x3100, FixedIntervalTimer, UnknownException, EXC_XFER_EE) + EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x3200, WatchdogTimer, UnknownException) + CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ @@ -696,21 +696,21 @@ interrupt_base: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) #else - EXCEPTION(0x2020, SPEUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE); #else - EXCEPTION(0x2040, SPEFloatingPointData, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Round */ - EXCEPTION(0x2050, SPEFloatingPointRound, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE) /* Performance Monitor */ - EXCEPTION(0x2060, PerformanceMonitor, PerformanceMonitorException, EXC_XFER_STD) + EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) /* Debug Interrupt */ diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S new file mode 100644 --- /dev/null +++ b/arch/powerpc/kernel/idle_power4.S @@ -0,0 +1,78 @@ +/* + * This file contains the power_save function for 6xx & 7xxx CPUs + * rewritten in assembler + * + * Warning ! This code assumes that if your machine has a 750fx + * it will have PLL 1 set to low speed mode (used during NAP/DOZE). + * if this is not the case some additional changes will have to + * be done to check a runtime var (a bit like powersave-nap) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + + .text + +/* + * Here is the power_save_6xx function. This could eventually be + * split into several functions & changing the function pointer + * depending on the various features. + */ +_GLOBAL(power4_idle) +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) + /* We must dynamically check for the NAP feature as it + * can be cleared by CPU init after the fixups are done + */ + LOADBASE(r3,cur_cpu_spec) + ld r4,cur_cpu_spec at l(r3) + ld r4,CPU_SPEC_FEATURES(r4) + andi. r0,r4,CPU_FTR_CAN_NAP + beqlr + /* Now check if user or arch enabled NAP mode */ + LOADBASE(r3,powersave_nap) + lwz r4,powersave_nap at l(r3) + cmpwi 0,r4,0 + beqlr + + /* Clear MSR:EE */ + mfmsr r7 + li r4,0 + ori r4,r4,MSR_EE + andc r0,r7,r4 + mtmsrd r0 + + /* Check current_thread_info()->flags */ + clrrdi r4,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r4) + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + mtmsrd r7 /* out of line this ? */ + blr +1: + /* Go to NAP now */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + oris r7,r7,MSR_POW at h + sync + isync + mtmsrd r7 + isync + sync + blr diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -53,10 +53,10 @@ extern void transfer_to_handler(void); extern void do_IRQ(struct pt_regs *regs); -extern void MachineCheckException(struct pt_regs *regs); -extern void AlignmentException(struct pt_regs *regs); -extern void ProgramCheckException(struct pt_regs *regs); -extern void SingleStepException(struct pt_regs *regs); +extern void machine_check_exception(struct pt_regs *regs); +extern void alignment_exception(struct pt_regs *regs); +extern void program_check_exception(struct pt_regs *regs); +extern void single_step_exception(struct pt_regs *regs); extern int do_signal(sigset_t *, struct pt_regs *); extern int pmac_newworld; extern int sys_sigreturn(struct pt_regs *regs); @@ -72,10 +72,10 @@ EXPORT_SYMBOL(clear_user_page); EXPORT_SYMBOL(do_signal); EXPORT_SYMBOL(transfer_to_handler); EXPORT_SYMBOL(do_IRQ); -EXPORT_SYMBOL(MachineCheckException); -EXPORT_SYMBOL(AlignmentException); -EXPORT_SYMBOL(ProgramCheckException); -EXPORT_SYMBOL(SingleStepException); +EXPORT_SYMBOL(machine_check_exception); +EXPORT_SYMBOL(alignment_exception); +EXPORT_SYMBOL(program_check_exception); +EXPORT_SYMBOL(single_step_exception); EXPORT_SYMBOL(sys_sigreturn); EXPORT_SYMBOL(ppc_n_lost_interrupts); EXPORT_SYMBOL(ppc_lost_interrupts); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1,6 +1,4 @@ /* - * arch/powerpc/kernel/traps.c - * * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) * * This program is free software; you can redistribute it and/or @@ -23,29 +21,46 @@ #include #include #include -#include #include #include #include #include -#include #include #include -#include #include #include #include +#ifdef CONFIG_PPC32 +#include +#include +#endif #include #include #include #include +#ifdef CONFIG_PPC32 #include #include #ifdef CONFIG_PMAC_BACKLIGHT #include #endif #include +#endif +#ifdef CONFIG_PPC64 +#include +#include +#include +#include +#include +#include +#endif + +#ifdef CONFIG_PPC64 +#define __KPROBES __kprobes +#else +#define __KPROBES +#endif #ifdef CONFIG_DEBUGGER int (*__debugger)(struct pt_regs *regs); @@ -96,7 +111,7 @@ int die(const char *str, struct pt_regs console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); -#ifdef CONFIG_PMAC_BACKLIGHT +#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC_BACKLIGHT) if (_machine == _MACH_Pmac) { set_backlight_enable(1); set_backlight_level(BACKLIGHT_MAX); @@ -154,9 +169,17 @@ int die(const char *str, struct pt_regs panic("Fatal exception in interrupt"); if (panic_on_oops) { +#ifdef CONFIG_PPC64 + printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); + ssleep(5); +#endif panic("Fatal exception"); } +#ifdef CONFIG_PPC32 do_exit(err); +#else + do_exit(SIGSEGV); +#endif return 0; } @@ -176,6 +199,7 @@ void _exception(int signr, struct pt_reg info.si_addr = (void __user *) addr; force_sig_info(signr, &info, current); +#ifdef CONFIG_PPC32 /* * Init gets no signals that it doesn't have a handler for. * That's all very well, but if it has caused a synchronous @@ -197,6 +221,7 @@ void _exception(int signr, struct pt_reg do_exit(signr); } } +#endif } #ifdef CONFIG_PPC64 @@ -206,7 +231,7 @@ void system_reset_exception(struct pt_re if (ppc_md.system_reset_exception) ppc_md.system_reset_exception(regs); - die("System Reset", regs, SIGABRT); + die("System Reset", regs, 0); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) @@ -216,6 +241,7 @@ void system_reset_exception(struct pt_re } #endif +#ifdef CONFIG_PPC32 /* * I/O accesses can cause machine checks on powermacs. * Check if the NIP corresponds to the address of a sync @@ -264,8 +290,10 @@ static inline int check_io_access(struct #endif /* CONFIG_PPC_PMAC */ return 0; } +#endif /* CONFIG_PPC32 */ #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + /* On 4xx, the reason for the machine check or program exception is in the ESR. */ #define get_reason(regs) ((regs)->dsisr) @@ -284,6 +312,7 @@ static inline int check_io_access(struct #define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC) #else + /* On non-4xx, the reason for the machine check or program exception is in the MSR. */ #define get_reason(regs) ((regs)->msr) @@ -297,6 +326,7 @@ static inline int check_io_access(struct #define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) #endif +#ifdef CONFIG_PPC32 /* * This is "fall-back" implementation for configurations * which don't provide platform-specific machine check info @@ -305,8 +335,9 @@ void __attribute__ ((weak)) platform_machine_check(struct pt_regs *regs) { } +#endif -void MachineCheckException(struct pt_regs *regs) +void machine_check_exception(struct pt_regs *regs) { #ifdef CONFIG_PPC64 int recover = 0; @@ -462,23 +493,31 @@ void MachineCheckException(struct pt_reg * additional info, e.g. bus error registers. */ platform_machine_check(regs); -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC32 */ if (debugger_fault_handler(regs)) return; - die("Machine check", regs, SIGBUS); + die("Machine check", regs, +#ifdef CONFIG_PPC32 + SIGBUS +#else + 0 +#endif + ); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable Machine check"); } +#ifdef CONFIG_PPC32 void SMIException(struct pt_regs *regs) { die("System Management Interrupt", regs, SIGABRT); } +#endif -void UnknownException(struct pt_regs *regs) +void unknown_exception(struct pt_regs *regs) { printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); @@ -486,7 +525,7 @@ void UnknownException(struct pt_regs *re _exception(SIGTRAP, regs, 0, 0); } -void InstructionBreakpoint(struct pt_regs *regs) +void instruction_breakpoint_exception(struct pt_regs *regs) { if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) @@ -496,14 +535,20 @@ void InstructionBreakpoint(struct pt_reg _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); } +#ifdef CONFIG_PPC32 void RunModeException(struct pt_regs *regs) { _exception(SIGTRAP, regs, 0, 0); } +#endif -void SingleStepException(struct pt_regs *regs) +void __KPROBES single_step_exception(struct pt_regs *regs) { +#ifdef CONFIG_PPC32 regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ +#else + regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */ +#endif if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) @@ -520,15 +565,62 @@ void SingleStepException(struct pt_regs * pretend we got a single-step exception. This was pointed out * by Kumar Gala. -- paulus */ -static void emulate_single_step(struct pt_regs *regs) +static inline void emulate_single_step(struct pt_regs *regs) { if (single_stepping(regs)) { +#ifdef CONFIG_PPC32 clear_single_step(regs); _exception(SIGTRAP, regs, TRAP_TRACE, 0); +#else + single_step_exception(regs); +#endif } } -/* Illegal instruction emulation support. Originally written to +static void parse_fpe(struct pt_regs *regs) +{ + int code = 0; + unsigned long fpscr; + +#ifdef CONFIG_PPC32 + /* We must make sure the FP state is consistent with + * our MSR_FP in regs + */ + preempt_disable(); + if (regs->msr & MSR_FP) + giveup_fpu(current); + preempt_enable(); +#else + flush_fp_to_thread(current); +#endif + + fpscr = current->thread.fpscr; + + /* Invalid operation */ + if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) + code = FPE_FLTINV; + + /* Overflow */ + else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX)) + code = FPE_FLTOVF; + + /* Underflow */ + else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX)) + code = FPE_FLTUND; + + /* Divide by zero */ + else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX)) + code = FPE_FLTDIV; + + /* Inexact result */ + else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX)) + code = FPE_FLTRES; + + _exception(SIGFPE, regs, code, regs->nip); +} + +/* + * Illegal instruction emulation support. Originally written to * provide the PVR to user applications using the mfspr rd, PVR. * Return non-zero if we can't emulate, or -EFAULT if the associated * memory access caused an access fault. Return zero on success. @@ -536,7 +628,6 @@ static void emulate_single_step(struct p * There are a couple of ways to do this, either "decode" the instruction * or directly match lots of bits. In this case, matching lots of * bits is faster and easier. - * */ #define INST_MFSPR_PVR 0x7c1f42a6 #define INST_MFSPR_PVR_MASK 0xfc1fffff @@ -547,6 +638,8 @@ static void emulate_single_step(struct p #define INST_MCRXR 0x7c000400 #define INST_MCRXR_MASK 0x7c0007fe +#ifdef CONFIG_PPC32 + #define INST_STRING 0x7c00042a #define INST_STRING_MASK 0x7c0007fe #define INST_STRING_GEN_MASK 0x7c00067e @@ -622,6 +715,7 @@ static int emulate_string_inst(struct pt return 0; } +#endif /* CONFIG_PPC32 */ static int emulate_instruction(struct pt_regs *regs) { @@ -643,22 +737,44 @@ static int emulate_instruction(struct pt } /* Emulating the dcba insn is just a no-op. */ - if ((instword & INST_DCBA_MASK) == INST_DCBA) + if ((instword & INST_DCBA_MASK) == INST_DCBA) { +#ifdef CONFIG_PPC64 + static int warned; + + if (!warned) { + printk(KERN_WARNING + "process %d (%s) uses obsolete 'dcba' insn\n", + current->pid, current->comm); + warned = 1; + } +#endif /* CONFIG_PPC64 */ return 0; + } /* Emulate the mcrxr insn. */ if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { - int shift = (instword >> 21) & 0x1c; + unsigned int shift = (instword >> 21) & 0x1c; unsigned long msk = 0xf0000000UL >> shift; +#ifdef CONFIG_PPC64 + static int warned; + if (!warned) { + printk(KERN_WARNING + "process %d (%s) uses obsolete 'mcrxr' insn\n", + current->pid, current->comm); + warned = 1; + } +#endif regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); regs->xer &= ~0xf0000000UL; return 0; } +#ifdef CONFIG_PPC32 /* Emulate load/store string insn. */ if ((instword & INST_STRING_GEN_MASK) == INST_STRING) return emulate_string_inst(regs, instword); +#endif return -EINVAL; } @@ -686,7 +802,7 @@ struct bug_entry *find_bug(unsigned long return module_find_bug(bugaddr); } -int check_bug_trap(struct pt_regs *regs) +static int check_bug_trap(struct pt_regs *regs) { struct bug_entry *bug; unsigned long addr; @@ -701,34 +817,38 @@ int check_bug_trap(struct pt_regs *regs) return 0; if (bug->line & BUG_WARNING_TRAP) { /* this is a WARN_ON rather than BUG/BUG_ON */ -#ifdef CONFIG_XMON +#if defined(CONFIG_PPC32) && defined(CONFIG_XMON) xmon_printf(KERN_ERR "Badness in %s at %s:%d\n", bug->function, bug->file, bug->line & ~BUG_WARNING_TRAP); -#endif /* CONFIG_XMON */ +#endif printk(KERN_ERR "Badness in %s at %s:%d\n", bug->function, bug->file, bug->line & ~BUG_WARNING_TRAP); +#ifdef CONFIG_PPC32 dump_stack(); +#else + show_stack(current, (void *)regs->gpr[1]); +#endif return 1; } -#ifdef CONFIG_XMON +#if defined(CONFIG_PPC32) && defined(CONFIG_XMON) xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%d!\n", bug->function, bug->file, bug->line); xmon(regs); -#endif /* CONFIG_XMON */ +#endif printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", bug->function, bug->file, bug->line); return 0; } -void ProgramCheckException(struct pt_regs *regs) +void __KPROBES program_check_exception(struct pt_regs *regs) { unsigned int reason = get_reason(regs); +#if defined(CONFIG_PPC32) && defined(CONFIG_MATH_EMULATION) extern int do_mathemu(struct pt_regs *regs); -#ifdef CONFIG_MATH_EMULATION /* (reason & REASON_ILLEGAL) would be the obvious thing here, * but there seems to be a hardware bug on the 405GP (RevD) * that means ESR is sometimes set incorrectly - either to @@ -740,69 +860,61 @@ void ProgramCheckException(struct pt_reg emulate_single_step(regs); return; } -#endif /* CONFIG_MATH_EMULATION */ - - if (reason & REASON_FP) { - /* IEEE FP exception */ - int code = 0; - u32 fpscr; +#endif - /* We must make sure the FP state is consistent with - * our MSR_FP in regs - */ - preempt_disable(); - if (regs->msr & MSR_FP) - giveup_fpu(current); - preempt_enable(); - - fpscr = current->thread.fpscr; - fpscr &= fpscr << 22; /* mask summary bits with enables */ - if (fpscr & FPSCR_VX) - code = FPE_FLTINV; - else if (fpscr & FPSCR_OX) - code = FPE_FLTOVF; - else if (fpscr & FPSCR_UX) - code = FPE_FLTUND; - else if (fpscr & FPSCR_ZX) - code = FPE_FLTDIV; - else if (fpscr & FPSCR_XX) - code = FPE_FLTRES; - _exception(SIGFPE, regs, code, regs->nip); +#ifdef CONFIG_PPC64 + if (debugger_fault_handler(regs)) return; - } +#endif - if (reason & REASON_TRAP) { + if (reason & REASON_FP) { + /* IEEE FP exception */ + parse_fpe(regs); + } else if (reason & REASON_TRAP) { /* trap exception */ +#ifdef CONFIG_PPC64 + if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) + == NOTIFY_STOP) + return; +#endif if (debugger_bpt(regs)) return; if (check_bug_trap(regs)) { regs->nip += 4; return; } - _exception(SIGTRAP, regs, TRAP_BRKPT, 0); - return; - } - - /* Try to emulate it if we should. */ - if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { + _exception(SIGTRAP, regs, TRAP_BRKPT, +#ifdef CONFIG_PPC32 + 0 +#else + regs->nip +#endif + ); + } else +#ifdef CONFIG_PPC32 + if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) +#endif + { + /* Privileged or illegal instruction; try to emulate it. */ switch (emulate_instruction(regs)) { case 0: regs->nip += 4; emulate_single_step(regs); - return; + break; case -EFAULT: _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; + break; + default: + if (reason & REASON_PRIVILEGED) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + break; } } - - if (reason & REASON_PRIVILEGED) - _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); - else - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); } -void AlignmentException(struct pt_regs *regs) +void alignment_exception(struct pt_regs *regs) { int fixed; @@ -814,18 +926,31 @@ void AlignmentException(struct pt_regs * return; } - /* Operand address was bad */ + /* Operand address was bad */ if (fixed == -EFAULT) { if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_ACCERR, regs->dar); + _exception(SIGSEGV, regs, +#ifdef CONFIG_PPC32 + SEGV_ACCERR, +#else + SEGV_MAPERR, +#endif + regs->dar); else /* Search exception table */ bad_page_fault(regs, regs->dar, SIGSEGV); return; } - _exception(SIGBUS, regs, BUS_ADRALN, regs->dar); + _exception(SIGBUS, regs, BUS_ADRALN, +#ifdef CONFIG_PPC32 + regs->dar +#else + regs->nip +#endif + ); } +#ifdef CONFIG_PPC32 void StackOverflow(struct pt_regs *regs) { printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", @@ -849,8 +974,58 @@ void trace_syscall(struct pt_regs *regs) current, current->pid, regs->nip, regs->link, regs->gpr[0], regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); } +#endif /* CONFIG_PPC32 */ + +#ifdef CONFIG_PPC64 +void kernel_fp_unavailable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); +} +#endif + +void altivec_unavailable_exception(struct pt_regs *regs) +{ +#if !defined(CONFIG_ALTIVEC) || defined(CONFIG_PPC64) + if (user_mode(regs)) { + /* A user program has executed an altivec instruction, + but this kernel doesn't support altivec. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } +#endif +#ifdef CONFIG_PPC32 + { + static int kernel_altivec_count; + + /* The kernel has executed an altivec instruction without + first enabling altivec. Whinge but let it do it. */ + if (++kernel_altivec_count < 10) + printk(KERN_ERR "AltiVec used in kernel (task=%p, pc=%lx)\n", + current, regs->nip); + regs->msr |= MSR_VEC; + } +#else + printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); +#endif +} + +#ifdef CONFIG_PPC64 +extern perf_irq_t perf_irq; +#endif + +#if defined(CONFIG_PPC64) || defined(CONFIG_E500) +void performance_monitor_exception(struct pt_regs *regs) +{ + perf_irq(regs); +} +#endif + -#ifdef CONFIG_8xx +#if defined(CONFIG_PPC32) && defined(CONFIG_8xx) void SoftwareEmulation(struct pt_regs *regs) { extern int do_mathemu(struct pt_regs *); @@ -879,8 +1054,9 @@ void SoftwareEmulation(struct pt_regs *r } else emulate_single_step(regs); } -#endif /* CONFIG_8xx */ +#endif /* defined(CONFIG_PPC32) && defined(CONFIG_8xx) */ +#ifdef CONFIG_PPC32 #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) void DebugException(struct pt_regs *regs, unsigned long debug_status) @@ -909,42 +1085,36 @@ void TAUException(struct pt_regs *regs) regs->nip, regs->msr, regs->trap, print_tainted()); } #endif /* CONFIG_INT_TAU */ - -void AltivecUnavailException(struct pt_regs *regs) -{ - static int kernel_altivec_count; - -#ifndef CONFIG_ALTIVEC - if (user_mode(regs)) { - /* A user program has executed an altivec instruction, - but this kernel doesn't support altivec. */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - } -#endif - /* The kernel has executed an altivec instruction without - first enabling altivec. Whinge but let it do it. */ - if (++kernel_altivec_count < 10) - printk(KERN_ERR "AltiVec used in kernel (task=%p, pc=%lx)\n", - current, regs->nip); - regs->msr |= MSR_VEC; -} +#endif /* CONFIG_PPC32*/ #ifdef CONFIG_ALTIVEC -void AltivecAssistException(struct pt_regs *regs) +void altivec_assist_exception(struct pt_regs *regs) { int err; +#ifdef CONFIG_PPC64 + siginfo_t info; +#endif +#ifdef CONFIG_PPC32 preempt_disable(); if (regs->msr & MSR_VEC) giveup_altivec(current); preempt_enable(); +#endif if (!user_mode(regs)) { printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" " at %lx\n", regs->nip); - die("Kernel Altivec assist exception", regs, SIGILL); + die("Kernel " +#ifdef CONFIG_PPC64 + "VMX/" +#endif + "Altivec assist exception", regs, SIGILL); } +#ifdef CONFIG_PPC64 + flush_altivec_to_thread(current); +#endif /* CONFIG_PPC64 */ + err = emulate_altivec(regs); if (err == 0) { regs->nip += 4; /* skip emulated instruction */ @@ -954,7 +1124,15 @@ void AltivecAssistException(struct pt_re if (err == -EFAULT) { /* got an error reading the instruction */ +#ifdef CONFIG_PPC32 _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); +#else + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *) regs->nip; + force_sig_info(SIGSEGV, &info, current); +#endif } else { /* didn't recognize the instruction */ /* XXX quick hack for now: set the non-Java bit in the VSCR */ @@ -966,13 +1144,7 @@ void AltivecAssistException(struct pt_re } #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_E500 -void PerformanceMonitorException(struct pt_regs *regs) -{ - perf_irq(regs); -} -#endif - +#ifdef CONFIG_PPC32 #ifdef CONFIG_FSL_BOOKE void CacheLockingException(struct pt_regs *regs, unsigned long address, unsigned long error_code) @@ -1022,7 +1194,24 @@ void SPEFloatingPointException(struct pt return; } #endif +#endif /* CONFIG_PPC32 */ +#ifdef CONFIG_PPC64 +/* + * We enter here if we get an unrecoverable exception, that is, one + * that happened at a point where the RI (recoverable interrupt) bit + * in the MSR is 0. This indicates that SRR0/1 are live, and that + * we therefore lost state by taking this exception. + */ +void unrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", + regs->trap, regs->nip); + die("Unrecoverable exception", regs, SIGABRT); +} +#endif /* CONFIG_PPC64 */ + +#ifdef CONFIG_PPC32 #ifdef CONFIG_BOOKE_WDT /* * Default handler for a Watchdog exception, @@ -1041,6 +1230,20 @@ void WatchdogException(struct pt_regs *r WatchdogHandler(regs); } #endif +#endif /* CONFIG_PPC32 */ + +#ifdef CONFIG_PPC64 +/* + * We enter here if we discover during exception entry that we are + * running in supervisor mode with a userspace value in the stack pointer. + */ +void kernel_bad_stack(struct pt_regs *regs) +{ + printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", + regs->gpr[1], regs->nip); + die("Bad kernel stack pointer", regs, SIGABRT); +} +#endif void __init trap_init(void) { diff --git a/arch/ppc/kernel/head.S b/arch/ppc/kernel/head.S --- a/arch/ppc/kernel/head.S +++ b/arch/ppc/kernel/head.S @@ -349,12 +349,12 @@ i##n: \ /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and - putting it back to what it was (UnknownException) when done. */ + putting it back to what it was (unknown_exception) when done. */ #if defined(CONFIG_GEMINI) && defined(CONFIG_SMP) . = 0x100 b __secondary_start_gemini #else - EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD) + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) #endif /* Machine check */ @@ -389,7 +389,7 @@ i##n: \ cmpwi cr1,r4,0 bne cr1,1f #endif - EXC_XFER_STD(0x200, MachineCheckException) + EXC_XFER_STD(0x200, machine_check_exception) #ifdef CONFIG_PPC_CHRP 1: b machine_check_in_rtas #endif @@ -456,10 +456,10 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD) + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) /* Floating-point unavailable */ . = 0x800 @@ -472,8 +472,8 @@ FPUnavailable: /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) /* System call */ . = 0xc00 @@ -482,8 +482,8 @@ SystemCall: EXC_XFER_EE_LITE(0xc00, DoSyscall) /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE) + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) /* * The Altivec unavailable trap is at 0x0f20. Foo. @@ -502,7 +502,7 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0xf00, UnknownException) + EXC_XFER_EE(0xf00, unknown_exception) /* * Handle TLB miss for instruction on 603/603e. @@ -702,44 +702,44 @@ DataStoreTLBMiss: rfi #ifndef CONFIG_ALTIVEC -#define AltivecAssistException UnknownException +#define altivec_assist_exception unknown_exception #endif - EXCEPTION(0x1300, Trap_13, InstructionBreakpoint, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE) EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) #ifdef CONFIG_POWER4 - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1700, Trap_17, AltivecAssistException, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, altivec_assist_exception, EXC_XFER_EE) EXCEPTION(0x1800, Trap_18, TAUException, EXC_XFER_STD) #else /* !CONFIG_POWER4 */ - EXCEPTION(0x1600, Trap_16, AltivecAssistException, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE) EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_POWER4 */ - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) - EXCEPTION(0x2100, Trap_21, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2200, Trap_22, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2300, Trap_23, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2400, Trap_24, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2500, Trap_25, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2600, Trap_26, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2700, Trap_27, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2800, Trap_28, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2900, Trap_29, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2a00, Trap_2a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2b00, Trap_2b, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2c00, Trap_2c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2d00, Trap_2d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2e00, Trap_2e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x2f00, MOLTrampoline, UnknownException, EXC_XFER_EE_LITE) + EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE) .globl mol_trampoline .set mol_trampoline, i0x2f00 @@ -751,7 +751,7 @@ AltiVecUnavailable: #ifdef CONFIG_ALTIVEC bne load_up_altivec /* if from user, just load it up */ #endif /* CONFIG_ALTIVEC */ - EXC_XFER_EE_LITE(0xf20, AltivecUnavailException) + EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) #ifdef CONFIG_PPC64BRIDGE DataAccess: @@ -767,12 +767,12 @@ DataSegment: addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_DAR stw r4,_DAR(r11) - EXC_XFER_STD(0x380, UnknownException) + EXC_XFER_STD(0x380, unknown_exception) InstructionSegment: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x480, UnknownException) + EXC_XFER_STD(0x480, unknown_exception) #endif /* CONFIG_PPC64BRIDGE */ #ifdef CONFIG_ALTIVEC diff --git a/arch/ppc/kernel/head_44x.S b/arch/ppc/kernel/head_44x.S --- a/arch/ppc/kernel/head_44x.S +++ b/arch/ppc/kernel/head_44x.S @@ -309,13 +309,13 @@ skpinv: addi r4,r4,1 /* Increment */ interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) /* Machine Check Interrupt */ #ifdef CONFIG_440A - MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #else - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #endif /* Data Storage Interrupt */ @@ -442,7 +442,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x2010, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif /* System Call Interrupt */ @@ -451,21 +451,21 @@ interrupt_base: EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxillary Processor Unavailable Interrupt */ - EXCEPTION(0x2020, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x1010, FixedIntervalTimer, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x1020, WatchdogTimer, UnknownException) + CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ diff --git a/arch/ppc/kernel/head_4xx.S b/arch/ppc/kernel/head_4xx.S --- a/arch/ppc/kernel/head_4xx.S +++ b/arch/ppc/kernel/head_4xx.S @@ -245,12 +245,12 @@ label: /* * 0x0100 - Critical Interrupt Exception */ - CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception) /* * 0x0200 - Machine Check Exception */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) /* * 0x0300 - Data Storage Exception @@ -405,7 +405,7 @@ label: mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ stw r4,_DEAR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) @@ -413,21 +413,21 @@ label: mfspr r4,SPRN_ESR /* Grab the ESR and save it */ stw r4,_ESR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x700, ProgramCheckException) + EXC_XFER_STD(0x700, program_check_exception) - EXCEPTION(0x0800, Trap_08, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0900, Trap_09, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0A00, Trap_0A, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0B00, Trap_0B, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) NORMAL_EXCEPTION_PROLOG EXC_XFER_EE_LITE(0xc00, DoSyscall) - EXCEPTION(0x0D00, Trap_0D, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0E00, Trap_0E, UnknownException, EXC_XFER_EE) - EXCEPTION(0x0F00, Trap_0F, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ START_EXCEPTION(0x1000, Decrementer) @@ -444,14 +444,14 @@ label: /* 0x1010 - Fixed Interval Timer (FIT) Exception */ - STND_EXCEPTION(0x1010, FITException, UnknownException) + STND_EXCEPTION(0x1010, FITException, unknown_exception) /* 0x1020 - Watchdog Timer (WDT) Exception */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException) #else - CRITICAL_EXCEPTION(0x1020, WDTException, UnknownException) + CRITICAL_EXCEPTION(0x1020, WDTException, unknown_exception) #endif #endif @@ -656,25 +656,25 @@ label: mfspr r10, SPRN_SPRG0 b InstructionAccess - EXCEPTION(0x1300, Trap_13, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1400, Trap_14, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) #ifdef CONFIG_IBM405_ERR51 /* 405GP errata 51 */ START_EXCEPTION(0x1700, Trap_17) b DTLBMiss #else - EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) #endif - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1A00, Trap_1A, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1B00, Trap_1B, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1C00, Trap_1C, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1D00, Trap_1D, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1E00, Trap_1E, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1F00, Trap_1F, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE) /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case diff --git a/arch/ppc/kernel/head_8xx.S b/arch/ppc/kernel/head_8xx.S --- a/arch/ppc/kernel/head_8xx.S +++ b/arch/ppc/kernel/head_8xx.S @@ -203,7 +203,7 @@ i##n: \ ret_from_except) /* System reset */ - EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD) + EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) /* Machine check */ . = 0x200 @@ -214,7 +214,7 @@ MachineCheck: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x200, MachineCheckException) + EXC_XFER_STD(0x200, machine_check_exception) /* Data access exception. * This is "never generated" by the MPC8xx. We jump to it for other @@ -252,20 +252,20 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, AlignmentException) + EXC_XFER_EE(0x600, alignment_exception) /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD) + EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) /* No FPU on MPC8xx. This exception is not supposed to happen. */ - EXCEPTION(0x800, FPUnavailable, UnknownException, EXC_XFER_STD) + EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD) /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) /* System call */ . = 0xc00 @@ -274,9 +274,9 @@ SystemCall: EXC_XFER_EE_LITE(0xc00, DoSyscall) /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE) - EXCEPTION(0xf00, Trap_0f, UnknownException, EXC_XFER_EE) + EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. @@ -540,22 +540,22 @@ DataTLBError: #endif b DataAccess - EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to * using them. */ - EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) . = 0x2000 diff --git a/arch/ppc/kernel/head_booke.h b/arch/ppc/kernel/head_booke.h --- a/arch/ppc/kernel/head_booke.h +++ b/arch/ppc/kernel/head_booke.h @@ -335,7 +335,7 @@ label: mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_EE(0x0600, AlignmentException) + EXC_XFER_EE(0x0600, alignment_exception) #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ @@ -343,7 +343,7 @@ label: mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ stw r4,_ESR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_STD(0x0700, ProgramCheckException) + EXC_XFER_STD(0x0700, program_check_exception) #define DECREMENTER_EXCEPTION \ START_EXCEPTION(Decrementer) \ diff --git a/arch/ppc/kernel/head_fsl_booke.S b/arch/ppc/kernel/head_fsl_booke.S --- a/arch/ppc/kernel/head_fsl_booke.S +++ b/arch/ppc/kernel/head_fsl_booke.S @@ -426,14 +426,14 @@ skpinv: addi r6,r6,1 /* Increment */ interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) + CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) /* Machine Check Interrupt */ #ifdef CONFIG_E200 /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #else - MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #endif /* Data Storage Interrupt */ @@ -542,9 +542,9 @@ interrupt_base: #else #ifdef CONFIG_E200 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FloatingPointUnavailable, ProgramCheckException, EXC_XFER_EE) + EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE) #else - EXCEPTION(0x0800, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif #endif @@ -554,20 +554,20 @@ interrupt_base: EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxillary Processor Unavailable Interrupt */ - EXCEPTION(0x2900, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x3100, FixedIntervalTimer, UnknownException, EXC_XFER_EE) + EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x3200, WatchdogTimer, UnknownException) + CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ @@ -696,21 +696,21 @@ interrupt_base: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) #else - EXCEPTION(0x2020, SPEUnavailable, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE); #else - EXCEPTION(0x2040, SPEFloatingPointData, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Round */ - EXCEPTION(0x2050, SPEFloatingPointRound, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE) /* Performance Monitor */ - EXCEPTION(0x2060, PerformanceMonitor, PerformanceMonitorException, EXC_XFER_STD) + EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) /* Debug Interrupt */ diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c --- a/arch/ppc/kernel/ppc_ksyms.c +++ b/arch/ppc/kernel/ppc_ksyms.c @@ -53,10 +53,10 @@ extern void transfer_to_handler(void); extern void do_IRQ(struct pt_regs *regs); -extern void MachineCheckException(struct pt_regs *regs); -extern void AlignmentException(struct pt_regs *regs); -extern void ProgramCheckException(struct pt_regs *regs); -extern void SingleStepException(struct pt_regs *regs); +extern void machine_check_exception(struct pt_regs *regs); +extern void alignment_exception(struct pt_regs *regs); +extern void program_check_exception(struct pt_regs *regs); +extern void single_step_exception(struct pt_regs *regs); extern int do_signal(sigset_t *, struct pt_regs *); extern int pmac_newworld; extern int sys_sigreturn(struct pt_regs *regs); @@ -72,10 +72,10 @@ EXPORT_SYMBOL(clear_user_page); EXPORT_SYMBOL(do_signal); EXPORT_SYMBOL(transfer_to_handler); EXPORT_SYMBOL(do_IRQ); -EXPORT_SYMBOL(MachineCheckException); -EXPORT_SYMBOL(AlignmentException); -EXPORT_SYMBOL(ProgramCheckException); -EXPORT_SYMBOL(SingleStepException); +EXPORT_SYMBOL(machine_check_exception); +EXPORT_SYMBOL(alignment_exception); +EXPORT_SYMBOL(program_check_exception); +EXPORT_SYMBOL(single_step_exception); EXPORT_SYMBOL(sys_sigreturn); EXPORT_SYMBOL(ppc_n_lost_interrupts); EXPORT_SYMBOL(ppc_lost_interrupts); diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c --- a/arch/ppc/kernel/traps.c +++ b/arch/ppc/kernel/traps.c @@ -74,7 +74,7 @@ void (*debugger_fault_handler)(struct pt DEFINE_SPINLOCK(die_lock); -void die(const char * str, struct pt_regs * fp, long err) +int die(const char * str, struct pt_regs * fp, long err) { static int die_counter; int nl = 0; @@ -232,7 +232,7 @@ platform_machine_check(struct pt_regs *r { } -void MachineCheckException(struct pt_regs *regs) +void machine_check_exception(struct pt_regs *regs) { unsigned long reason = get_mc_reason(regs); @@ -393,14 +393,14 @@ void SMIException(struct pt_regs *regs) #endif } -void UnknownException(struct pt_regs *regs) +void unknown_exception(struct pt_regs *regs) { printk("Bad trap at PC: %lx, MSR: %lx, vector=%lx %s\n", regs->nip, regs->msr, regs->trap, print_tainted()); _exception(SIGTRAP, regs, 0, 0); } -void InstructionBreakpoint(struct pt_regs *regs) +void instruction_breakpoint_exception(struct pt_regs *regs) { if (debugger_iabr_match(regs)) return; @@ -622,7 +622,7 @@ int check_bug_trap(struct pt_regs *regs) return 0; } -void ProgramCheckException(struct pt_regs *regs) +void program_check_exception(struct pt_regs *regs) { unsigned int reason = get_reason(regs); extern int do_mathemu(struct pt_regs *regs); @@ -701,7 +701,7 @@ void ProgramCheckException(struct pt_reg _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); } -void SingleStepException(struct pt_regs *regs) +void single_step_exception(struct pt_regs *regs) { regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ if (debugger_sstep(regs)) @@ -709,7 +709,7 @@ void SingleStepException(struct pt_regs _exception(SIGTRAP, regs, TRAP_TRACE, 0); } -void AlignmentException(struct pt_regs *regs) +void alignment_exception(struct pt_regs *regs) { int fixed; @@ -814,7 +814,7 @@ void TAUException(struct pt_regs *regs) } #endif /* CONFIG_INT_TAU */ -void AltivecUnavailException(struct pt_regs *regs) +void altivec_unavailable_exception(struct pt_regs *regs) { static int kernel_altivec_count; @@ -835,7 +835,7 @@ void AltivecUnavailException(struct pt_r } #ifdef CONFIG_ALTIVEC -void AltivecAssistException(struct pt_regs *regs) +void altivec_assist_exception(struct pt_regs *regs) { int err; @@ -872,7 +872,7 @@ void AltivecAssistException(struct pt_re #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_E500 -void PerformanceMonitorException(struct pt_regs *regs) +void performance_monitor_exception(struct pt_regs *regs) { perf_irq(regs); } diff --git a/arch/ppc/syslib/ibm44x_common.c b/arch/ppc/syslib/ibm44x_common.c --- a/arch/ppc/syslib/ibm44x_common.c +++ b/arch/ppc/syslib/ibm44x_common.c @@ -178,7 +178,7 @@ void __init ibm44x_platform_init(void) #endif } -/* Called from MachineCheckException */ +/* Called from machine_check_exception */ void platform_machine_check(struct pt_regs *regs) { printk("PLB0: BEAR=0x%08x%08x ACR= 0x%08x BESR= 0x%08x\n", diff --git a/arch/ppc/syslib/ppc4xx_setup.c b/arch/ppc/syslib/ppc4xx_setup.c --- a/arch/ppc/syslib/ppc4xx_setup.c +++ b/arch/ppc/syslib/ppc4xx_setup.c @@ -279,7 +279,7 @@ ppc4xx_init(unsigned long r3, unsigned l #endif /* defined(CONFIG_PCI) && defined(CONFIG_IDE) */ } -/* Called from MachineCheckException */ +/* Called from machine_check_exception */ void platform_machine_check(struct pt_regs *regs) { #if defined(DCRN_PLB0_BEAR) diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -2,6 +2,8 @@ # Makefile for the linux ppc64 kernel. # +ifneq ($(CONFIG_PPC_MERGE),y) + EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds @@ -77,3 +79,9 @@ endif # These are here while we do the architecture merge vecemu-y += ../../powerpc/kernel/vecemu.o vector-y += ../../powerpc/kernel/vector.o +idle_power4-y += ../../powerpc/kernel/idle_power4.o +traps-y += ../../powerpc/kernel/traps.o + +else + +endif diff --git a/arch/ppc64/kernel/idle_power4.S b/arch/ppc64/kernel/idle_power4.S deleted file mode 100644 --- a/arch/ppc64/kernel/idle_power4.S +++ /dev/null @@ -1,79 +0,0 @@ -/* - * This file contains the power_save function for 6xx & 7xxx CPUs - * rewritten in assembler - * - * Warning ! This code assumes that if your machine has a 750fx - * it will have PLL 1 set to low speed mode (used during NAP/DOZE). - * if this is not the case some additional changes will have to - * be done to check a runtime var (a bit like powersave-nap) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#undef DEBUG - - .text - -/* - * Here is the power_save_6xx function. This could eventually be - * split into several functions & changing the function pointer - * depending on the various features. - */ -_GLOBAL(power4_idle) -BEGIN_FTR_SECTION - blr -END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) - /* We must dynamically check for the NAP feature as it - * can be cleared by CPU init after the fixups are done - */ - LOADBASE(r3,cur_cpu_spec) - ld r4,cur_cpu_spec at l(r3) - ld r4,CPU_SPEC_FEATURES(r4) - andi. r0,r4,CPU_FTR_CAN_NAP - beqlr - /* Now check if user or arch enabled NAP mode */ - LOADBASE(r3,powersave_nap) - lwz r4,powersave_nap at l(r3) - cmpwi 0,r4,0 - beqlr - - /* Clear MSR:EE */ - mfmsr r7 - li r4,0 - ori r4,r4,MSR_EE - andc r0,r7,r4 - mtmsrd r0 - - /* Check current_thread_info()->flags */ - clrrdi r4,r1,THREAD_SHIFT - ld r4,TI_FLAGS(r4) - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f - mtmsrd r7 /* out of line this ? */ - blr -1: - /* Go to NAP now */ -BEGIN_FTR_SECTION - DSSALL - sync -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - oris r7,r7,MSR_POW at h - sync - isync - mtmsrd r7 - isync - sync - blr - diff --git a/arch/ppc64/kernel/traps.c b/arch/ppc64/kernel/traps.c deleted file mode 100644 --- a/arch/ppc64/kernel/traps.c +++ /dev/null @@ -1,568 +0,0 @@ -/* - * linux/arch/ppc64/kernel/traps.c - * - * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Modified by Cort Dougan (cort at cs.nmt.edu) - * and Paul Mackerras (paulus at cs.anu.edu.au) - */ - -/* - * This file handles the architecture-dependent parts of hardware exceptions - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_DEBUGGER -int (*__debugger)(struct pt_regs *regs); -int (*__debugger_ipi)(struct pt_regs *regs); -int (*__debugger_bpt)(struct pt_regs *regs); -int (*__debugger_sstep)(struct pt_regs *regs); -int (*__debugger_iabr_match)(struct pt_regs *regs); -int (*__debugger_dabr_match)(struct pt_regs *regs); -int (*__debugger_fault_handler)(struct pt_regs *regs); - -EXPORT_SYMBOL(__debugger); -EXPORT_SYMBOL(__debugger_ipi); -EXPORT_SYMBOL(__debugger_bpt); -EXPORT_SYMBOL(__debugger_sstep); -EXPORT_SYMBOL(__debugger_iabr_match); -EXPORT_SYMBOL(__debugger_dabr_match); -EXPORT_SYMBOL(__debugger_fault_handler); -#endif - -struct notifier_block *powerpc_die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); - -int register_die_notifier(struct notifier_block *nb) -{ - int err = 0; - unsigned long flags; - - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&powerpc_die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; -} - -/* - * Trap & Exception support - */ - -static DEFINE_SPINLOCK(die_lock); - -int die(const char *str, struct pt_regs *regs, long err) -{ - static int die_counter; - int nl = 0; - - if (debugger(regs)) - return 1; - - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); - nl = 1; -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); - nl = 1; -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); - nl = 1; -#endif -#ifdef CONFIG_NUMA - printk("NUMA "); - nl = 1; -#endif - switch(systemcfg->platform) { - case PLATFORM_PSERIES: - printk("PSERIES "); - nl = 1; - break; - case PLATFORM_PSERIES_LPAR: - printk("PSERIES LPAR "); - nl = 1; - break; - case PLATFORM_ISERIES_LPAR: - printk("ISERIES LPAR "); - nl = 1; - break; - case PLATFORM_POWERMAC: - printk("POWERMAC "); - nl = 1; - break; - case PLATFORM_BPA: - printk("BPA "); - nl = 1; - break; - } - if (nl) - printk("\n"); - print_modules(); - show_regs(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); - - if (in_interrupt()) - panic("Fatal exception in interrupt"); - - if (panic_on_oops) { - printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); - ssleep(5); - panic("Fatal exception"); - } - do_exit(SIGSEGV); - - return 0; -} - -void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) -{ - siginfo_t info; - - if (!user_mode(regs)) { - if (die("Exception in kernel mode", regs, signr)) - return; - } - - memset(&info, 0, sizeof(info)); - info.si_signo = signr; - info.si_code = code; - info.si_addr = (void __user *) addr; - force_sig_info(signr, &info, current); -} - -void system_reset_exception(struct pt_regs *regs) -{ - /* See if any machine dependent calls */ - if (ppc_md.system_reset_exception) - ppc_md.system_reset_exception(regs); - - die("System Reset", regs, 0); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable System Reset"); - - /* What should we do here? We could issue a shutdown or hard reset. */ -} - -void machine_check_exception(struct pt_regs *regs) -{ - int recover = 0; - - /* See if any machine dependent calls */ - if (ppc_md.machine_check_exception) - recover = ppc_md.machine_check_exception(regs); - - if (recover) - return; - - if (debugger_fault_handler(regs)) - return; - die("Machine check", regs, 0); - - /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - panic("Unrecoverable Machine check"); -} - -void unknown_exception(struct pt_regs *regs) -{ - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", - regs->nip, regs->msr, regs->trap); - - _exception(SIGTRAP, regs, 0, 0); -} - -void instruction_breakpoint_exception(struct pt_regs *regs) -{ - if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_iabr_match(regs)) - return; - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); -} - -void __kprobes single_step_exception(struct pt_regs *regs) -{ - regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */ - - if (notify_die(DIE_SSTEP, "single_step", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_sstep(regs)) - return; - - _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); -} - -/* - * After we have successfully emulated an instruction, we have to - * check if the instruction was being single-stepped, and if so, - * pretend we got a single-step exception. This was pointed out - * by Kumar Gala. -- paulus - */ -static inline void emulate_single_step(struct pt_regs *regs) -{ - if (regs->msr & MSR_SE) - single_step_exception(regs); -} - -static void parse_fpe(struct pt_regs *regs) -{ - int code = 0; - unsigned long fpscr; - - flush_fp_to_thread(current); - - fpscr = current->thread.fpscr; - - /* Invalid operation */ - if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) - code = FPE_FLTINV; - - /* Overflow */ - else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX)) - code = FPE_FLTOVF; - - /* Underflow */ - else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX)) - code = FPE_FLTUND; - - /* Divide by zero */ - else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX)) - code = FPE_FLTDIV; - - /* Inexact result */ - else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX)) - code = FPE_FLTRES; - - _exception(SIGFPE, regs, code, regs->nip); -} - -/* - * Illegal instruction emulation support. Return non-zero if we can't - * emulate, or -EFAULT if the associated memory access caused an access - * fault. Return zero on success. - */ - -#define INST_MFSPR_PVR 0x7c1f42a6 -#define INST_MFSPR_PVR_MASK 0xfc1fffff - -#define INST_DCBA 0x7c0005ec -#define INST_DCBA_MASK 0x7c0007fe - -#define INST_MCRXR 0x7c000400 -#define INST_MCRXR_MASK 0x7c0007fe - -static int emulate_instruction(struct pt_regs *regs) -{ - unsigned int instword; - - if (!user_mode(regs)) - return -EINVAL; - - CHECK_FULL_REGS(regs); - - if (get_user(instword, (unsigned int __user *)(regs->nip))) - return -EFAULT; - - /* Emulate the mfspr rD, PVR. */ - if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { - unsigned int rd; - - rd = (instword >> 21) & 0x1f; - regs->gpr[rd] = mfspr(SPRN_PVR); - return 0; - } - - /* Emulating the dcba insn is just a no-op. */ - if ((instword & INST_DCBA_MASK) == INST_DCBA) { - static int warned; - - if (!warned) { - printk(KERN_WARNING - "process %d (%s) uses obsolete 'dcba' insn\n", - current->pid, current->comm); - warned = 1; - } - return 0; - } - - /* Emulate the mcrxr insn. */ - if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { - static int warned; - unsigned int shift; - - if (!warned) { - printk(KERN_WARNING - "process %d (%s) uses obsolete 'mcrxr' insn\n", - current->pid, current->comm); - warned = 1; - } - - shift = (instword >> 21) & 0x1c; - regs->ccr &= ~(0xf0000000 >> shift); - regs->ccr |= (regs->xer & 0xf0000000) >> shift; - regs->xer &= ~0xf0000000; - return 0; - } - - return -EINVAL; -} - -/* - * Look through the list of trap instructions that are used for BUG(), - * BUG_ON() and WARN_ON() and see if we hit one. At this point we know - * that the exception was caused by a trap instruction of some kind. - * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 - * otherwise. - */ -extern struct bug_entry __start___bug_table[], __stop___bug_table[]; - -#ifndef CONFIG_MODULES -#define module_find_bug(x) NULL -#endif - -struct bug_entry *find_bug(unsigned long bugaddr) -{ - struct bug_entry *bug; - - for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) - if (bugaddr == bug->bug_addr) - return bug; - return module_find_bug(bugaddr); -} - -static int -check_bug_trap(struct pt_regs *regs) -{ - struct bug_entry *bug; - unsigned long addr; - - if (regs->msr & MSR_PR) - return 0; /* not in kernel */ - addr = regs->nip; /* address of trap instruction */ - if (addr < PAGE_OFFSET) - return 0; - bug = find_bug(regs->nip); - if (bug == NULL) - return 0; - if (bug->line & BUG_WARNING_TRAP) { - /* this is a WARN_ON rather than BUG/BUG_ON */ - printk(KERN_ERR "Badness in %s at %s:%d\n", - bug->function, bug->file, - bug->line & ~BUG_WARNING_TRAP); - show_stack(current, (void *)regs->gpr[1]); - return 1; - } - printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", - bug->function, bug->file, bug->line); - return 0; -} - -void __kprobes program_check_exception(struct pt_regs *regs) -{ - if (debugger_fault_handler(regs)) - return; - - if (regs->msr & 0x100000) { - /* IEEE FP exception */ - parse_fpe(regs); - } else if (regs->msr & 0x20000) { - /* trap exception */ - - if (notify_die(DIE_BPT, "breakpoint", regs, 5, - 5, SIGTRAP) == NOTIFY_STOP) - return; - if (debugger_bpt(regs)) - return; - - if (check_bug_trap(regs)) { - regs->nip += 4; - return; - } - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - - } else { - /* Privileged or illegal instruction; try to emulate it. */ - switch (emulate_instruction(regs)) { - case 0: - regs->nip += 4; - emulate_single_step(regs); - break; - - case -EFAULT: - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - break; - - default: - if (regs->msr & 0x40000) - /* priveleged */ - _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); - else - /* illegal */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - break; - } - } -} - -void kernel_fp_unavailable_exception(struct pt_regs *regs) -{ - printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " - "%lx at %lx\n", regs->trap, regs->nip); - die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); -} - -void altivec_unavailable_exception(struct pt_regs *regs) -{ - if (user_mode(regs)) { - /* A user program has executed an altivec instruction, - but this kernel doesn't support altivec. */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - } - printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " - "%lx at %lx\n", regs->trap, regs->nip); - die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); -} - -extern perf_irq_t perf_irq; - -void performance_monitor_exception(struct pt_regs *regs) -{ - perf_irq(regs); -} - -void alignment_exception(struct pt_regs *regs) -{ - int fixed; - - fixed = fix_alignment(regs); - - if (fixed == 1) { - regs->nip += 4; /* skip over emulated instruction */ - emulate_single_step(regs); - return; - } - - /* Operand address was bad */ - if (fixed == -EFAULT) { - if (user_mode(regs)) { - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->dar); - } else { - /* Search exception table */ - bad_page_fault(regs, regs->dar, SIGSEGV); - } - - return; - } - - _exception(SIGBUS, regs, BUS_ADRALN, regs->nip); -} - -#ifdef CONFIG_ALTIVEC -void altivec_assist_exception(struct pt_regs *regs) -{ - int err; - siginfo_t info; - - if (!user_mode(regs)) { - printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" - " at %lx\n", regs->nip); - die("Kernel VMX/Altivec assist exception", regs, SIGILL); - } - - flush_altivec_to_thread(current); - - err = emulate_altivec(regs); - if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ - emulate_single_step(regs); - return; - } - - if (err == -EFAULT) { - /* got an error reading the instruction */ - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *) regs->nip; - force_sig_info(SIGSEGV, &info, current); - } else { - /* didn't recognize the instruction */ - /* XXX quick hack for now: set the non-Java bit in the VSCR */ - if (printk_ratelimit()) - printk(KERN_ERR "Unrecognized altivec instruction " - "in %s at %lx\n", current->comm, regs->nip); - current->thread.vscr.u[3] |= 0x10000; - } -} -#endif /* CONFIG_ALTIVEC */ - -/* - * We enter here if we get an unrecoverable exception, that is, one - * that happened at a point where the RI (recoverable interrupt) bit - * in the MSR is 0. This indicates that SRR0/1 are live, and that - * we therefore lost state by taking this exception. - */ -void unrecoverable_exception(struct pt_regs *regs) -{ - printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", - regs->trap, regs->nip); - die("Unrecoverable exception", regs, SIGABRT); -} - -/* - * We enter here if we discover during exception entry that we are - * running in supervisor mode with a userspace value in the stack pointer. - */ -void kernel_bad_stack(struct pt_regs *regs) -{ - printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", - regs->gpr[1], regs->nip); - die("Bad kernel stack pointer", regs, SIGABRT); -} - -void __init trap_init(void) -{ -} diff --git a/include/asm-ppc/system.h b/include/asm-ppc/system.h --- a/include/asm-ppc/system.h +++ b/include/asm-ppc/system.h @@ -87,7 +87,7 @@ extern void cacheable_memzero(void *p, u extern void *cacheable_memcpy(void *, const void *, unsigned int); extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); -extern void die(const char *, struct pt_regs *, long); +extern int die(const char *, struct pt_regs *, long); extern void _exception(int, struct pt_regs *, int, unsigned long); #ifdef CONFIG_BOOKE_WDT extern u32 booke_wdt_enabled; From sfr at canb.auug.org.au Sat Oct 1 21:40:29 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 21:40:29 +1000 Subject: [PATCH 7/9] ppc64: simplify the build a little In-Reply-To: <20051001000516.1d444d51.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000516.1d444d51.sfr@canb.auug.org.au> Message-ID: <20051001214029.5d66fdb4.sfr@canb.auug.org.au> New version to because of changes in 6/9 Signed-off-by: Stephen Rothwell --- arch/powerpc/Makefile | 1 - arch/powerpc/kernel/Makefile | 11 +++++++++-- arch/ppc64/Makefile | 2 +- arch/ppc64/kernel/Makefile | 11 ++--------- 4 files changed, 12 insertions(+), 13 deletions(-) -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ bd142b70a6bd5522f7d95f0cec06091b93bb0715 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -121,7 +121,6 @@ head-$(CONFIG_FSL_BOOKE) := arch/powerpc ifeq ($(CONFIG_PPC32),y) head-$(CONFIG_6xx) += arch/powerpc/kernel/idle_6xx.o -head-$(CONFIG_POWER4) += arch/powerpc/kernel/idle_power4.o head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -2,6 +2,10 @@ # Makefile for the linux kernel. # +ifeq ($(CONFIG_PPC64),y) +EXTRA_CFLAGS += -mno-minimal-toc +endif + extra-$(CONFIG_PPC_STD_MMU) := head.o extra_$(CONFIG_PPC64) := head_64.o extra-$(CONFIG_40x) := head_4xx.o @@ -9,10 +13,13 @@ extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-$(CONFIG_6xx) += idle_6xx.o -extra-$(CONFIG_POWER4) += idle_power4.o extra-$(CONFIG_PPC_FPU) += fpu.o extra-y += vmlinux.lds -obj-y := semaphore.o traps.o process.o +obj-y := traps.o +obj-$(CONFIG_PPC32) += semaphore.o process.o +obj-$(CONFIG_PPC64) += idle_power4.o +ifeq ($(CONFIG_PPC32),y) obj-$(CONFIG_MODULES) += ppc_ksyms.o +endif obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile --- a/arch/ppc64/Makefile +++ b/arch/ppc64/Makefile @@ -82,7 +82,7 @@ CFLAGS += $(call cc-option,-funit-at-a-t head-y := arch/ppc64/kernel/head.o libs-y += arch/ppc64/lib/ -core-y += arch/ppc64/kernel/ +core-y += arch/ppc64/kernel/ arch/powerpc/kernel/ core-y += arch/ppc64/mm/ core-y += arch/powerpc/platforms/ core-$(CONFIG_XMON) += arch/ppc64/xmon/ diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -7,12 +7,12 @@ ifneq ($(CONFIG_PPC_MERGE),y) EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds -obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ +obj-y := setup.o entry.o irq.o idle.o dma.o \ time.o process.o signal.o syscalls.o misc.o ptrace.o \ align.o semaphore.o bitops.o pacaData.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o rtc.o init_task.o \ - lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ + lmb.o cputable.o cpu_setup_power4.o \ iommu.o sysfs.o vdso.o pmc.o firmware.o prom.o obj-y += vdso32/ vdso64/ @@ -66,7 +66,6 @@ obj-$(CONFIG_PPC_BPA) += pSeries_smp.o obj-$(CONFIG_PPC_MAPLE) += smp-tbsync.o endif -obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_KPROBES) += kprobes.o CFLAGS_ioctl32.o += -Ifs/ @@ -76,12 +75,6 @@ arch/ppc64/kernel/head.o: arch/powerpc/p AFLAGS_head.o += -Iarch/powerpc/platforms/iseries endif -# These are here while we do the architecture merge -vecemu-y += ../../powerpc/kernel/vecemu.o -vector-y += ../../powerpc/kernel/vector.o -idle_power4-y += ../../powerpc/kernel/idle_power4.o -traps-y += ../../powerpc/kernel/traps.o - else endif From sfr at canb.auug.org.au Sat Oct 1 22:30:28 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 1 Oct 2005 22:30:28 +1000 Subject: [PATCH 6/9] powerpc: merge idle_power4.S and traps.c In-Reply-To: <20051001213753.52d5d5c3.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001000001.1f1d8c48.sfr@canb.auug.org.au> <52E29A49-AAF2-4E61-AAB2-CD02ABCBE447@freescale.com> <20051001121714.1b5886aa.sfr@canb.auug.org.au> <17214.632.655003.750819@cargo.ozlabs.ibm.com> <20051001213753.52d5d5c3.sfr@canb.auug.org.au> Message-ID: <20051001223028.56ef9bfd.sfr@canb.auug.org.au> On Sat, 1 Oct 2005 21:37:53 +1000 Stephen Rothwell wrote: > > OK, thanks for keeping me honest :-) Here is new versions of patches 6 > and 7 (all the rest are the same as before). Just in case anyone is wondering, the new patchset has been built for (my configs) pSeries, iSeries, g5, ARCH=ppc, ARCH=powerpc ppc32 pmac, ARCH=powerpc iSeries and I have booted the ARCH=powerpc iSeries kernel. -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051001/1d938420/attachment.pgp From benh at kernel.crashing.org Sun Oct 2 18:45:59 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Sun, 02 Oct 2005 18:45:59 +1000 Subject: To large page or not to large page In-Reply-To: <17213.56267.826654.651632@kitch0.watson.ibm.com> References: <17213.56267.826654.651632@kitch0.watson.ibm.com> Message-ID: <1128242759.8267.45.camel@gaston> On Fri, 2005-09-30 at 20:43 -0400, Jimi Xenidis wrote: > It seems as tho Linux will map the kernel with large pages if the > processor allows it regardless if the lmb is sufficient to hold a > large page, correct? > > Is there some runtime option to force the use of 4K pages. > > Ultimately, my desire is to define a 256Mig segment that, using a > Hypervisor, that can be populated by shared pages that can physically > belong to the hypervisor or other partions/domains) and restrict the > mappings to 4k. I have some ideas, but am willing to hear any suggestions. Does that segment has to be part of the linear mapping ? Can't it just be mapped afterward using a kernel virtual mapping ? Also, don't forget that the 64k pages patch won't support 4k pages at all for performances reasons when CONFIG_PPC_64K_PAGES is enabled (at least on processors that have HW support for 64k pages) . Ben? From jimix at watson.ibm.com Sun Oct 2 22:19:09 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Sun, 2 Oct 2005 08:19:09 -0400 Subject: To large page or not to large page In-Reply-To: <1128242759.8267.45.camel@gaston> References: <17213.56267.826654.651632@kitch0.watson.ibm.com> <1128242759.8267.45.camel@gaston> Message-ID: <17215.53309.488586.919028@kitch0.watson.ibm.com> >>>>> "BH" == Benjamin Herrenschmidt writes: BH> On Fri, 2005-09-30 at 20:43 -0400, Jimi Xenidis wrote: >> It seems as tho Linux will map the kernel with large pages if the >> processor allows it regardless if the lmb is sufficient to hold a >> large page, correct? >> >> Is there some runtime option to force the use of 4K pages. >> >> Ultimately, my desire is to define a 256Mig segment that, using a >> Hypervisor, that can be populated by shared pages that can physically >> belong to the hypervisor or other partions/domains) and restrict the >> mappings to 4k. I have some ideas, but am willing to hear any suggestions. BH> Does that segment has to be part of the linear mapping ? Not sure what _you_ mean by "linear mapping". More specifically I would like for: #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) to work, and the segment to be managed. I think that is the linear map. BH> Can't it just be mapped afterward using a kernel virtual mapping BH> ? Can it? given the use of __pa()? Should I consider a new REGION_ID()? BH> Also, don't forget that the 64k pages patch won't support 4k BH> pages at all for performances reasons when CONFIG_PPC_64K_PAGES BH> is enabled (at least on processors that have HW support for 64k BH> pages) . "at all", surely, IO space will require and continue to use 4k pages. -JX -- "I got an idea, an idea so smart my head would explode if I even began to know what I was talking about." -- Peter Griffin (Family Guy) From schwab at suse.de Sun Oct 2 23:25:16 2005 From: schwab at suse.de (Andreas Schwab) Date: Sun, 02 Oct 2005 15:25:16 +0200 Subject: PMac motherboard information missing from /proc/cpuinfo Message-ID: With 2.6.14-rc3 I no longer get the motherboard information in /proc/cpuinfo. With 2.6.13 I got this: machine : PowerMac7,3 motherboard : PowerMac7,3 MacRISC4 Power Macintosh Now I only get this: machine : PowerMac The information in /proc/device-tree is still complete. Andreas. -- Andreas Schwab, SuSE Labs, schwab at suse.de SuSE Linux Products GmbH, Maxfeldstra?e 5, 90409 N?rnberg, Germany Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5 "And now for something completely different." From vatsa at in.ibm.com Mon Oct 3 03:46:30 2005 From: vatsa at in.ibm.com (Srivatsa Vaddagiri) Date: Sun, 2 Oct 2005 23:16:30 +0530 Subject: [PATCH] NO_IDLE_HZ implementation for PPC64 Message-ID: <20051002174630.GA6786@in.ibm.com> Hello, The patch below implements NO_IDLE_HZ support for pSeries/PPC64. It basically lets idle CPUs to cut off their timer ticks until they can. Some notes about the patch: - Patch is against 2.6.14-rc1 and has undergone some basic test (with an additional patch - also in the mail) on a Power4 box. I intend to test on a Power5 box also sometime soon. - Only pseries_shared_idle and pseries_dedicated_idle routines have been converted over to use this support, since I felt cutting off ticks doesnt make sense if the idle routine is poll-based. - Boot CPU cannot skip ticks. This is because of the current design wherein only boot CPU updates wall-clock/jiffies. I didn't see any particular reason why it has been designed like that (maybe to reduce lock contention on xtime_lock?). If we have to allow boot CPU also to skip ticks (which IMO we should), then this design needs to change, i.e we should allow xtime/jiffies to be updated from any CPU (like S390 allows). If people agree that this is the right direction, then I can give it a shot next. - By default the feature is disabled at bootup and has to be enabled by writing 0 to /proc/sys/kernel/hz_timer. This can be modifed later after the patch has undergone sufficient test. Also we can introduce a boottime argument to control this behaviour. - One requirement is that a call to start_hz_timer should be inserted in every possible interrupt path. Towards this end, have I missed some interrupt paths? Or have I included some exception path which I shouldn't be! Below are both the patches - actual patch and the patch which I used to test on Power4 box. First the actual NO_IDLE_HZ patch: --- linux-2.6.14-rc1-root/arch/ppc64/Kconfig | 6 + linux-2.6.14-rc1-root/arch/ppc64/kernel/head.S | 9 + linux-2.6.14-rc1-root/arch/ppc64/kernel/irq.c | 3 linux-2.6.14-rc1-root/arch/ppc64/kernel/pSeries_setup.c | 10 + linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c | 96 ++++++++++++++-- linux-2.6.14-rc1-root/include/asm-ppc64/time.h | 8 + linux-2.6.14-rc1-root/kernel/sysctl.c | 20 +-- 7 files changed, 127 insertions(+), 25 deletions(-) diff -puN arch/ppc64/kernel/time.c~ppc64 arch/ppc64/kernel/time.c --- linux-2.6.14-rc1/arch/ppc64/kernel/time.c~ppc64 2005-09-28 19:35:36.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c 2005-10-02 22:53:44.000000000 +0530 @@ -315,23 +315,13 @@ static void iSeries_tb_recal(void) unsigned long tb_last_stamp __cacheline_aligned_in_smp; -/* - * timer_interrupt - gets called when the decrementer overflows, - * with interrupts disabled. - */ -int timer_interrupt(struct pt_regs * regs) +static void account_ticks(struct pt_regs *regs) { int next_dec; unsigned long cur_tb; struct paca_struct *lpaca = get_paca(); unsigned long cpu = smp_processor_id(); - irq_enter(); - - profile_tick(CPU_PROFILING, regs); - - lpaca->lppaca.int_dword.fields.decr_int = 0; - while (lpaca->next_jiffy_update_tb <= (cur_tb = get_tb())) { /* * We cannot disable the decrementer, so in the period @@ -364,6 +354,23 @@ int timer_interrupt(struct pt_regs * reg if (next_dec > lpaca->default_decr) next_dec = lpaca->default_decr; set_dec(next_dec); +} + +/* + * timer_interrupt - gets called when the decrementer overflows, + * with interrupts disabled. + */ +int timer_interrupt(struct pt_regs * regs) +{ + struct paca_struct *lpaca = get_paca(); + + irq_enter(); + + profile_tick(CPU_PROFILING, regs); + + lpaca->lppaca.int_dword.fields.decr_int = 0; + + account_ticks(regs); #ifdef CONFIG_PPC_ISERIES if (hvlpevent_is_pending()) @@ -381,6 +388,73 @@ int timer_interrupt(struct pt_regs * reg return 1; } +#ifdef CONFIG_NO_IDLE_HZ + +#define MAX_DEC_COUNT (UINT_MAX) /* Decrementer is 32-bit */ +#define MIN_SKIP 2 +#define MAX_SKIP (MAX_DEC_COUNT/tb_ticks_per_jiffy) + +int sysctl_hz_timer = 1; + +/* Avoid the HZ timer (decrementer) exception on this CPU for "some" time. + * Has to be called with interrupts disabled. + * + * The HZ timer frequency is restored upon the occurence of an interrupt or + * exception on this CPU. + */ +void stop_hz_timer(void) +{ + unsigned long cpu = smp_processor_id(), seq, delta; + int next_dec; + + if (sysctl_hz_timer != 0 || cpu == boot_cpuid) + return; + + cpu_set(cpu, nohz_cpu_mask); + mb(); + if (rcu_pending(cpu) || local_softirq_pending()) { + cpu_clear(cpu, nohz_cpu_mask); + return; + } + + do { + seq = read_seqbegin(&xtime_lock); + + delta = next_timer_interrupt() - jiffies; + + if (delta < MIN_SKIP) { + cpu_clear(cpu, nohz_cpu_mask); + return; + } + + if (delta > MAX_SKIP) + delta = MAX_SKIP; + + next_dec = tb_last_stamp + (delta-1) * tb_ticks_per_jiffy; + + } while (read_seqretry(&xtime_lock, seq)); + + next_dec -= get_tb(); + set_dec(next_dec); + + return; +} + +/* Take into account skipped ticks and restore the HZ timer frequency */ +void start_hz_timer(struct pt_regs *regs) +{ + unsigned long cpu = smp_processor_id(); + + if (!cpu_isset(cpu, nohz_cpu_mask)) + return; + + cpu_clear(cpu, nohz_cpu_mask); + account_ticks(regs); +} + +#endif /* CONFIG_NO_IDLE_HZ */ + + /* * Scheduler clock - returns current time in nanosec units. * diff -puN arch/ppc64/kernel/irq.c~ppc64 arch/ppc64/kernel/irq.c --- linux-2.6.14-rc1/arch/ppc64/kernel/irq.c~ppc64 2005-09-28 19:35:36.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/irq.c 2005-10-02 21:42:03.000000000 +0530 @@ -55,6 +55,7 @@ #include #include #include +#include #ifdef CONFIG_SMP extern void iSeries_smp_message_recv( struct pt_regs * ); @@ -313,6 +314,8 @@ void do_IRQ(struct pt_regs *regs) irq_enter(); + start_hz_timer(regs); + #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 2KB free? */ { diff -puN arch/ppc64/kernel/head.S~ppc64 arch/ppc64/kernel/head.S --- linux-2.6.14-rc1/arch/ppc64/kernel/head.S~ppc64 2005-09-28 19:35:36.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/head.S 2005-10-02 22:45:44.000000000 +0530 @@ -355,6 +355,13 @@ label##_iSeries: \ #endif +#ifdef CONFIG_NO_IDLE_HZ +#define START_HZ_TIMER \ + bl .start_hz_timer +#else +#define START_HZ_TIMER +#endif + #define STD_EXCEPTION_COMMON(trap, label, hdlr) \ .align 7; \ .globl label##_common; \ @@ -363,6 +370,7 @@ label##_common: \ DISABLE_INTS; \ bl .save_nvgprs; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ + START_HZ_TIMER; \ bl hdlr; \ b .ret_from_except @@ -373,6 +381,7 @@ label##_common: \ EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ DISABLE_INTS; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ + START_HZ_TIMER; \ bl hdlr; \ b .ret_from_except_lite diff -puN include/asm-ppc64/time.h~ppc64 include/asm-ppc64/time.h --- linux-2.6.14-rc1/include/asm-ppc64/time.h~ppc64 2005-09-28 19:43:54.000000000 +0530 +++ linux-2.6.14-rc1-root/include/asm-ppc64/time.h 2005-10-02 21:32:08.000000000 +0530 @@ -102,6 +102,14 @@ static inline unsigned long tb_ticks_sin return get_tb() - tstamp; } +#ifdef CONFIG_NO_IDLE_HZ +extern void stop_hz_timer(void); +extern void start_hz_timer(struct pt_regs *); +#else +static inline void stop_hz_timer(void) { } +static inline void start_hz_timer(struct pt_regs *regs) { } +#endif + #define mulhwu(x,y) \ ({unsigned z; asm ("mulhwu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;}) #define mulhdu(x,y) \ diff -puN arch/ppc64/Kconfig~ppc64 arch/ppc64/Kconfig --- linux-2.6.14-rc1/arch/ppc64/Kconfig~ppc64 2005-09-28 20:08:39.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/Kconfig 2005-10-01 15:45:06.000000000 +0530 @@ -146,6 +146,12 @@ config PPC_SPLPAR processors, that is, which share physical processors between two or more partitions. +config NO_IDLE_HZ + depends on EXPERIMENTAL && PPC_PSERIES + bool "No HZ timer ticks in idle" + help + Switches the HZ timer interrupts off when a CPU is idle. + config KEXEC bool "kexec system call (EXPERIMENTAL)" depends on PPC_MULTIPLATFORM && EXPERIMENTAL diff -puN kernel/sysctl.c~ppc64 kernel/sysctl.c --- linux-2.6.14-rc1/kernel/sysctl.c~ppc64 2005-09-28 21:08:05.000000000 +0530 +++ linux-2.6.14-rc1-root/kernel/sysctl.c 2005-10-01 10:55:32.000000000 +0530 @@ -544,6 +544,16 @@ static ctl_table kern_table[] = { .extra1 = &minolduid, .extra2 = &maxolduid, }, +#ifdef CONFIG_NO_IDLE_HZ + { + .ctl_name = KERN_HZ_TIMER, + .procname = "hz_timer", + .data = &sysctl_hz_timer, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif #ifdef CONFIG_ARCH_S390 #ifdef CONFIG_MATHEMU { @@ -555,16 +565,6 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif -#ifdef CONFIG_NO_IDLE_HZ - { - .ctl_name = KERN_HZ_TIMER, - .procname = "hz_timer", - .data = &sysctl_hz_timer, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif { .ctl_name = KERN_S390_USER_DEBUG_LOGGING, .procname = "userprocess_debug", diff -puN arch/ppc64/kernel/pSeries_setup.c~ppc64 arch/ppc64/kernel/pSeries_setup.c --- linux-2.6.14-rc1/arch/ppc64/kernel/pSeries_setup.c~ppc64 2005-10-01 11:02:18.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/pSeries_setup.c 2005-10-01 11:10:50.000000000 +0530 @@ -475,9 +475,10 @@ static inline void dedicated_idle_sleep( * a prod occurs. Returning from the cede enables external * interrupts. */ - if (!need_resched()) + if (!need_resched()) { + stop_hz_timer(); cede_processor(); - else + } else local_irq_enable(); } else { /* @@ -570,9 +571,10 @@ static int pseries_shared_idle(void) * Check need_resched() again with interrupts disabled * to avoid a race. */ - if (!need_resched()) + if (!need_resched()) { + stop_hz_timer(); cede_processor(); - else + } else local_irq_enable(); HMT_medium(); _ Now the test patch. It was something quick that I wrote to get the data I needed. Are the decrementer exception statistics available somewhere already? Also I assume that there are 4 CPUs in the m/c! --- linux-2.6.14-rc1-root/arch/ppc64/kernel/idle.c | 5 +++++ linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c | 4 ++++ linux-2.6.14-rc1-root/fs/proc/proc_misc.c | 12 ++++++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff -puN arch/ppc64/kernel/time.c~debug arch/ppc64/kernel/time.c --- linux-2.6.14-rc1/arch/ppc64/kernel/time.c~debug 2005-10-02 22:56:58.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c 2005-10-02 22:56:58.000000000 +0530 @@ -315,6 +315,8 @@ static void iSeries_tb_recal(void) unsigned long tb_last_stamp __cacheline_aligned_in_smp; +DEFINE_PER_CPU(int, dec_ticks); + static void account_ticks(struct pt_regs *regs) { int next_dec; @@ -366,6 +368,8 @@ int timer_interrupt(struct pt_regs * reg irq_enter(); + __get_cpu_var(dec_ticks) += 1; + profile_tick(CPU_PROFILING, regs); lpaca->lppaca.int_dword.fields.decr_int = 0; diff -puN fs/proc/proc_misc.c~debug fs/proc/proc_misc.c --- linux-2.6.14-rc1/fs/proc/proc_misc.c~debug 2005-10-02 22:56:58.000000000 +0530 +++ linux-2.6.14-rc1-root/fs/proc/proc_misc.c 2005-10-02 22:56:58.000000000 +0530 @@ -233,13 +233,21 @@ static struct file_operations proc_zonei .release = seq_release, }; +DECLARE_PER_CPU(int, dec_ticks); + static int version_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { int len; + char *cp = page; - strcpy(page, linux_banner); - len = strlen(page); + cp += sprintf(cp, "%s\n", linux_banner); + cp += sprintf (cp, "%d %d %d %d \n", + per_cpu(dec_ticks, 0), + per_cpu(dec_ticks, 1), + per_cpu(dec_ticks, 2), + per_cpu(dec_ticks, 3)); + len = cp - page; return proc_calc_metrics(page, start, off, count, eof, len); } diff -puN arch/ppc64/kernel/idle.c~debug arch/ppc64/kernel/idle.c --- linux-2.6.14-rc1/arch/ppc64/kernel/idle.c~debug 2005-10-02 22:56:58.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/idle.c 2005-10-02 22:56:58.000000000 +0530 @@ -45,6 +45,11 @@ int default_idle(void) while (!need_resched() && !cpu_is_offline(cpu)) { ppc64_runlatch_off(); + local_irq_disable(); + if (!need_resched()) + stop_hz_timer(); + local_irq_enable(); + /* * Go into low thread priority and possibly * low power mode. _ -- Thanks and Regards, Srivatsa Vaddagiri, Linux Technology Center, IBM Software Labs, Bangalore, INDIA - 560017 From benh at kernel.crashing.org Mon Oct 3 18:08:12 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Mon, 03 Oct 2005 18:08:12 +1000 Subject: [PATCH] NO_IDLE_HZ implementation for PPC64 In-Reply-To: <20051002174630.GA6786@in.ibm.com> References: <20051002174630.GA6786@in.ibm.com> Message-ID: <1128326892.8267.89.camel@gaston> On Sun, 2005-10-02 at 23:16 +0530, Srivatsa Vaddagiri wrote: > Hello, > The patch below implements NO_IDLE_HZ support for pSeries/PPC64. It > basically lets idle CPUs to cut off their timer ticks until they can. Hi ! I haven't looked at the patch itself yet, but I have a few comments on your notes: > Some notes about the patch: > > - Only pseries_shared_idle and pseries_dedicated_idle routines > have been converted over to use this support, since I felt > cutting off ticks doesnt make sense if the idle routine is > poll-based. It should be fine to do that on native_idle as well (for things like G5 machines). > - Boot CPU cannot skip ticks. This is because of the current design wherein > only boot CPU updates wall-clock/jiffies. That should be fairly easy to fix. > I didn't see any particular reason why it has been designed like that > (maybe to reduce lock contention on xtime_lock?). If we have to allow > boot CPU also to skip ticks (which IMO we should), then this design > needs to change, i.e we should allow xtime/jiffies to be updated > from any CPU (like S390 allows). If people agree that this is the > right direction, then I can give it a shot next. I don't think we care that much which CPU updates xtime and jiffies. I would love xtime to die anyway, kernel should get do_gettimeofday() internally. On ppc64, we only seldomly update xtime. Doing it on CPU0 was, I suppose, a matter of simplicity of the implementation. > - One requirement is that a call to start_hz_timer should be inserted > in every possible interrupt path. Towards this end, have I missed > some interrupt paths? Or have I included some exception path which > I shouldn't be! I think C code is good enough, you probably only need need to add a call to do_IRQ(). The decrementer interrupt should naturally playback lost ticks. Ben. From jimix at watson.ibm.com Tue Oct 4 00:59:40 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Mon, 3 Oct 2005 10:59:40 -0400 Subject: To large page or not to large page In-Reply-To: <1128242759.8267.45.camel@gaston> References: <17213.56267.826654.651632@kitch0.watson.ibm.com> <1128242759.8267.45.camel@gaston> Message-ID: <17217.18268.120919.459499@kitch0.watson.ibm.com> >>>>> "BH" == Benjamin Herrenschmidt writes: BH> Does that segment has to be part of the linear mapping ? Can't it just BH> be mapped afterward using a kernel virtual mapping ? Thanks for the kick, I see now that I can use __ioremap() without forcing CI. thanks. -JX -- "I got an idea, an idea so smart my head would explode if I even began to know what I was talking about." -- Peter Griffin (Family Guy) From vatsa at in.ibm.com Tue Oct 4 02:18:51 2005 From: vatsa at in.ibm.com (Srivatsa Vaddagiri) Date: Mon, 3 Oct 2005 21:48:51 +0530 Subject: [PATCH] NO_IDLE_HZ implementation for PPC64 In-Reply-To: <1128326892.8267.89.camel@gaston> References: <20051002174630.GA6786@in.ibm.com> <1128326892.8267.89.camel@gaston> Message-ID: <20051003161851.GA4244@in.ibm.com> On Mon, Oct 03, 2005 at 06:08:12PM +1000, Benjamin Herrenschmidt wrote: > I don't think we care that much which CPU updates xtime and jiffies. I > would love xtime to die anyway, kernel should get do_gettimeofday() > internally. On ppc64, we only seldomly update xtime. Doing it on CPU0 > was, I suppose, a matter of simplicity of the implementation. Ok, in that case I will fix that as well in the next version of the patch. > I think C code is good enough, you probably only need need to add a call > to do_IRQ(). The decrementer interrupt should naturally playback lost > ticks. Aren't other exceptions possible when a CPU is in its idle loop? If not, then yes we can insert a call to start_hz_timer only in do_IRQ. BTW, in the patch that I had sent, I had got the calculation of next_dec (in stop_hz_timer) wrong. I had considered that tb_last_stamp is ahead of jiffies, which is not correct. So the modified code should be something like below: do { seq = read_seqbegin(&xtime_lock); delta = next_timer_interrupt() - jiffies; if (delta < MIN_SKIP) { cpu_clear(cpu, nohz_cpu_mask); return; } if (delta > MAX_SKIP) delta = MAX_SKIP; next_dec = tb_last_stamp + delta * tb_ticks_per_jiffy; } while (read_seqretry(&xtime_lock, seq)); next_dec -= get_tb(); set_dec(next_dec); -- Thanks and Regards, Srivatsa Vaddagiri, Linux Technology Center, IBM Software Labs, Bangalore, INDIA - 560017 From linas at austin.ibm.com Tue Oct 4 04:57:39 2005 From: linas at austin.ibm.com (linas) Date: Mon, 3 Oct 2005 13:57:39 -0500 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add Message-ID: <20051003185739.GR29826@austin.ibm.com> 08-hotplug-bugfix.patch In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add of a hotplug slot will crash the system, with the following (abbreviated) stack trace: cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 c000000000060754 .notifier_call_chain+0x68/0x9c The root cause was that the phb was not marked "dynamic", and so instead of having kmalloc() being called, the __init __alloc_bootmem() was called, resulting in access of garage data. The patch below fixes this crash, and adds some docs to clarify the code. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.011393833 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:52:26.421786761 -0500 @@ -121,6 +121,12 @@ return NULL; } +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,17 +207,19 @@ .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** pci_devs_phb_init -- Initialize phbs and pci devs under them. + * + * When this is called, the buswalk of PHB's has not happened yet. */ void __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { pci_devs_phb_init_dynamic(phb); + phb->is_dynamic = 1; + } pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); } From brking at us.ibm.com Tue Oct 4 06:26:30 2005 From: brking at us.ibm.com (brking at us.ibm.com) Date: Mon, 03 Oct 2005 15:26:30 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option Message-ID: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> Add a .config option to default the scsi scan inquiry timeout. Due to a broken device (SCSI/ATA converter card) that is very common on IBM iSeries/pSeries machines, these architectures need a longer default inquiry timeout. Signed-off-by: Brian King --- linux-2.6-bjking1/arch/ppc64/configs/iSeries_defconfig | 1 + linux-2.6-bjking1/arch/ppc64/configs/pSeries_defconfig | 1 + linux-2.6-bjking1/drivers/scsi/Kconfig | 8 ++++++++ linux-2.6-bjking1/drivers/scsi/scsi_scan.c | 2 +- 4 files changed, 11 insertions(+), 1 deletion(-) diff -puN drivers/scsi/Kconfig~scsi_inq_timeout_config drivers/scsi/Kconfig --- linux-2.6/drivers/scsi/Kconfig~scsi_inq_timeout_config 2005-10-03 11:15:08.000000000 -0500 +++ linux-2.6-bjking1/drivers/scsi/Kconfig 2005-10-03 11:17:37.000000000 -0500 @@ -209,6 +209,14 @@ config SCSI_LOGGING there should be no noticeable performance impact as long as you have logging turned off. +config SCSI_INQUIRY_TIMEOUT + int "default timeout in seconds for INQUIRY scan" + depends on SCSI + default "5" + ---help--- + Timeout (in seconds) waiting for devices to answer INQUIRY. + Default is 5. Some non-compliant devices need more. + menu "SCSI Transport Attributes" depends on SCSI diff -puN drivers/scsi/scsi_scan.c~scsi_inq_timeout_config drivers/scsi/scsi_scan.c --- linux-2.6/drivers/scsi/scsi_scan.c~scsi_inq_timeout_config 2005-10-03 11:17:44.000000000 -0500 +++ linux-2.6-bjking1/drivers/scsi/scsi_scan.c 2005-10-03 11:18:58.000000000 -0500 @@ -102,7 +102,7 @@ MODULE_PARM_DESC(max_report_luns, "REPORT LUNS maximum number of LUNS received (should be" " between 1 and 16384)"); -static unsigned int scsi_inq_timeout = SCSI_TIMEOUT/HZ+3; +static unsigned int scsi_inq_timeout = CONFIG_SCSI_INQUIRY_TIMEOUT; module_param_named(inq_timeout, scsi_inq_timeout, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(inq_timeout, diff -puN arch/ppc64/configs/pSeries_defconfig~scsi_inq_timeout_config arch/ppc64/configs/pSeries_defconfig --- linux-2.6/arch/ppc64/configs/pSeries_defconfig~scsi_inq_timeout_config 2005-10-03 11:19:18.000000000 -0500 +++ linux-2.6-bjking1/arch/ppc64/configs/pSeries_defconfig 2005-10-03 11:21:18.000000000 -0500 @@ -436,6 +436,7 @@ CONFIG_CHR_DEV_SG=y CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set +CONFIG_SCSI_INQUIRY_TIMEOUT=30 # # SCSI Transport Attributes diff -puN arch/ppc64/configs/iSeries_defconfig~scsi_inq_timeout_config arch/ppc64/configs/iSeries_defconfig --- linux-2.6/arch/ppc64/configs/iSeries_defconfig~scsi_inq_timeout_config 2005-10-03 11:24:14.000000000 -0500 +++ linux-2.6-bjking1/arch/ppc64/configs/iSeries_defconfig 2005-10-03 11:24:29.000000000 -0500 @@ -343,6 +343,7 @@ CONFIG_CHR_DEV_SG=y CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set +CONFIG_SCSI_INQUIRY_TIMEOUT=30 # # SCSI Transport Attributes _ From linas at austin.ibm.com Tue Oct 4 06:39:24 2005 From: linas at austin.ibm.com (linas) Date: Mon, 3 Oct 2005 15:39:24 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> Message-ID: <20051003203924.GS29826@austin.ibm.com> On Mon, Oct 03, 2005 at 03:26:30PM -0500, brking at us.ibm.com was heard to remark: > > Add a .config option to default the scsi scan inquiry timeout. > Due to a broken device (SCSI/ATA converter card) that is very > common on IBM iSeries/pSeries machines, these architectures > need a longer default inquiry timeout. Hmm, I thought I fixed this several yeas ago. I guess it didn't stay fixed? --linas From brking at us.ibm.com Tue Oct 4 06:44:28 2005 From: brking at us.ibm.com (Brian King) Date: Mon, 03 Oct 2005 15:44:28 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <20051003203924.GS29826@austin.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <20051003203924.GS29826@austin.ibm.com> Message-ID: <4341982C.8070702@us.ibm.com> linas wrote: > On Mon, Oct 03, 2005 at 03:26:30PM -0500, brking at us.ibm.com was heard to remark: > >>Add a .config option to default the scsi scan inquiry timeout. >>Due to a broken device (SCSI/ATA converter card) that is very >>common on IBM iSeries/pSeries machines, these architectures >>need a longer default inquiry timeout. > > > Hmm, I thought I fixed this several yeas ago. I guess it didn't > stay fixed? Some distro kernels have the default inquiry timeout changed for ppc64, but mainline only has the module parameter. -- Brian King eServer Storage I/O IBM Linux Technology Center From James.Bottomley at SteelEye.com Tue Oct 4 07:01:45 2005 From: James.Bottomley at SteelEye.com (James Bottomley) Date: Mon, 03 Oct 2005 17:01:45 -0400 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> Message-ID: <1128373305.5825.3.camel@mulgrave> On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > Add a .config option to default the scsi scan inquiry timeout. > Due to a broken device (SCSI/ATA converter card) that is very > common on IBM iSeries/pSeries machines, these architectures > need a longer default inquiry timeout. The inquiry timeout has already been changed a while ago to be a module parameter (or kernel parameter) for this very case. Why does it now need to be a config option as well? James From jdl at freescale.com Tue Oct 4 07:19:38 2005 From: jdl at freescale.com (Jon Loeliger) Date: Mon, 03 Oct 2005 16:19:38 -0500 Subject: EXPORT_SYMBOL(foo) Trend? Message-ID: <1128374378.22452.16.camel@cashmere.sps.mot.com> Guys, Is the trend these days towards EXPORT_SYMBOL(foo) near its definition, or in the one large mondo ppc_ksym.c file? Thanks, jdl From arnd at arndb.de Tue Oct 4 07:23:48 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Mon, 3 Oct 2005 23:23:48 +0200 Subject: EXPORT_SYMBOL(foo) Trend? In-Reply-To: <1128374378.22452.16.camel@cashmere.sps.mot.com> References: <1128374378.22452.16.camel@cashmere.sps.mot.com> Message-ID: <200510032323.48967.arnd@arndb.de> On Maandag 03 Oktober 2005 23:19, Jon Loeliger wrote: > Is the trend these days towards EXPORT_SYMBOL(foo) near > its definition, or in the one large mondo ppc_ksym.c file? > Near the definition. We're trying to phase out ppc_ksym.c for anything coming from C files, though it's still needed for assembly files. Arnd <>< From brking at us.ibm.com Tue Oct 4 07:31:17 2005 From: brking at us.ibm.com (Brian King) Date: Mon, 03 Oct 2005 16:31:17 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <1128373305.5825.3.camel@mulgrave> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> Message-ID: <4341A325.5070406@us.ibm.com> James Bottomley wrote: > On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > >>Add a .config option to default the scsi scan inquiry timeout. >>Due to a broken device (SCSI/ATA converter card) that is very >>common on IBM iSeries/pSeries machines, these architectures >>need a longer default inquiry timeout. > > > The inquiry timeout has already been changed a while ago to be a module > parameter (or kernel parameter) for this very case. Why does it now > need to be a config option as well? So that when a distro goes off and builds a PPC64 install kernel, it can boot on one of these broken DVD-ROMs without requiring each distro to add special module options inside their initrd. -- Brian King eServer Storage I/O IBM Linux Technology Center From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ From brking at us.ibm.com Tue Oct 4 07:42:54 2005 From: brking at us.ibm.com (Brian King) Date: Mon, 03 Oct 2005 16:42:54 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> <4341A325.5070406@us.ibm.com> Message-ID: <4341A5DE.5030804@us.ibm.com> Randy.Dunlap wrote: > On Mon, 3 Oct 2005, Brian King wrote: > > >>James Bottomley wrote: >> >>>On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: >>> >>> >>>>Add a .config option to default the scsi scan inquiry timeout. >>>>Due to a broken device (SCSI/ATA converter card) that is very >>>>common on IBM iSeries/pSeries machines, these architectures >>>>need a longer default inquiry timeout. >>> >>> >>>The inquiry timeout has already been changed a while ago to be a module >>>parameter (or kernel parameter) for this very case. Why does it now >>>need to be a config option as well? >> >>So that when a distro goes off and builds a PPC64 install kernel, >>it can boot on one of these broken DVD-ROMs without requiring each >>distro to add special module options inside their initrd. > > > My reading on mailing lists is that distros prefer > module or kernel parameters more than they do kernel config > options (in general) because they are more dynamic -- it > allows them to build one kernel instead of many kernels > with various config options. I agree with that statement, however.... Right now I am sitting with an install ISO, trying to boot off this broken DVD-ROM, with scsi compiled as a module in the install kernel, so I can't pass any boot parameters to it in order to make it boot... Additionally, this kernel config option does not decrease this dynamic ability. The config option simply alters the default inquiry timeout. If a module parm is passed at module load time, it will override the default. -- Brian King eServer Storage I/O IBM Linux Technology Center From rdunlap at xenotime.net Tue Oct 4 07:34:26 2005 From: rdunlap at xenotime.net (Randy.Dunlap) Date: Mon, 3 Oct 2005 14:34:26 -0700 (PDT) Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <4341A325.5070406@us.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> <4341A325.5070406@us.ibm.com> Message-ID: On Mon, 3 Oct 2005, Brian King wrote: > James Bottomley wrote: > > On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > > > >>Add a .config option to default the scsi scan inquiry timeout. > >>Due to a broken device (SCSI/ATA converter card) that is very > >>common on IBM iSeries/pSeries machines, these architectures > >>need a longer default inquiry timeout. > > > > > > The inquiry timeout has already been changed a while ago to be a module > > parameter (or kernel parameter) for this very case. Why does it now > > need to be a config option as well? > > So that when a distro goes off and builds a PPC64 install kernel, > it can boot on one of these broken DVD-ROMs without requiring each > distro to add special module options inside their initrd. My reading on mailing lists is that distros prefer module or kernel parameters more than they do kernel config options (in general) because they are more dynamic -- it allows them to build one kernel instead of many kernels with various config options. -- ~Randy From rdunlap at xenotime.net Tue Oct 4 07:47:22 2005 From: rdunlap at xenotime.net (Randy.Dunlap) Date: Mon, 3 Oct 2005 14:47:22 -0700 (PDT) Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <4341A5DE.5030804@us.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> <4341A325.5070406@us.ibm.com> <4341A5DE.5030804@us.ibm.com> Message-ID: On Mon, 3 Oct 2005, Brian King wrote: > Randy.Dunlap wrote: > > On Mon, 3 Oct 2005, Brian King wrote: > > > > > >>James Bottomley wrote: > >> > >>>On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > >>> > >>> > >>>>Add a .config option to default the scsi scan inquiry timeout. > >>>>Due to a broken device (SCSI/ATA converter card) that is very > >>>>common on IBM iSeries/pSeries machines, these architectures > >>>>need a longer default inquiry timeout. > >>> > >>> > >>>The inquiry timeout has already been changed a while ago to be a module > >>>parameter (or kernel parameter) for this very case. Why does it now > >>>need to be a config option as well? > >> > >>So that when a distro goes off and builds a PPC64 install kernel, > >>it can boot on one of these broken DVD-ROMs without requiring each > >>distro to add special module options inside their initrd. > > > > > > My reading on mailing lists is that distros prefer > > module or kernel parameters more than they do kernel config > > options (in general) because they are more dynamic -- it > > allows them to build one kernel instead of many kernels > > with various config options. > > I agree with that statement, however.... Right now I am sitting with > an install ISO, trying to boot off this broken DVD-ROM, with scsi > compiled as a module in the install kernel, so I can't pass any > boot parameters to it in order to make it boot... and you can't modify the module parameters (on the DVD)... Yes, I've seen that kind of problem recently too (not the same problem, however). > Additionally, this kernel config option does not decrease this dynamic > ability. The config option simply alters the default inquiry timeout. > If a module parm is passed at module load time, it will override the > default. True. and we have precedent(s) for some options living in both .config and as kernel/module parameters/options. -- ~Randy From linas at austin.ibm.com Tue Oct 4 07:49:40 2005 From: linas at austin.ibm.com (linas) Date: Mon, 3 Oct 2005 16:49:40 -0500 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003213430.GD7554@mipter.zuzino.mipt.ru> References: <20051003185739.GR29826@austin.ibm.com> <20051003213430.GD7554@mipter.zuzino.mipt.ru> Message-ID: <20051003214940.GT29826@austin.ibm.com> On Tue, Oct 04, 2005 at 01:34:30AM +0400, Alexey Dobriyan was heard to remark: > > Please, add docs in a proper way: Done, new patch attached. --linas 08-hotplug-bugfix.patch In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add of a hotplug slot will crash the system, with the following (abbreviated) stack trace: cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 c000000000060754 .notifier_call_chain+0x68/0x9c The root cause was that the phb was not marked "dynamic", and so instead of having kmalloc() being called, the __init __alloc_bootmem() was called, resulting in access of garage data. The patch below fixes this crash, and adds some docs to clarify the code. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.000000000 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-03 16:46:33.816658976 -0500 @@ -121,6 +121,14 @@ return NULL; } +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,17 +209,24 @@ .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. */ void __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { pci_devs_phb_init_dynamic(phb); + phb->is_dynamic = 1; + } pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); } From benh at kernel.crashing.org Tue Oct 4 08:23:26 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 04 Oct 2005 08:23:26 +1000 Subject: [PATCH] NO_IDLE_HZ implementation for PPC64 In-Reply-To: <20051003161851.GA4244@in.ibm.com> References: <20051002174630.GA6786@in.ibm.com> <1128326892.8267.89.camel@gaston> <20051003161851.GA4244@in.ibm.com> Message-ID: <1128378206.8267.102.camel@gaston> > Aren't other exceptions possible when a CPU is in its idle loop? If not, > then yes we can insert a call to start_hz_timer only in do_IRQ. Nothign that matters imho.. You can probably get machine checks or system reset, but I wouldn't even try to replay ticks in those as they aren't synchronized with the rest (not blocked by MSR:EE). No, you really only should care about External Interrupts, Decrementer Interrupts and Performance Monitor interrupts (yah, forgot about those). > BTW, in the patch that I had sent, I had got the calculation of next_dec > (in stop_hz_timer) wrong. I had considered that tb_last_stamp is ahead of > jiffies, which is not correct. So the modified code should be something like > below: Why not just call the decrementer interrupt ? It should replay already... Ben. From paulus at samba.org Tue Oct 4 10:06:56 2005 From: paulus at samba.org (Paul Mackerras) Date: Tue, 4 Oct 2005 10:06:56 +1000 Subject: EXPORT_SYMBOL(foo) Trend? In-Reply-To: <1128374378.22452.16.camel@cashmere.sps.mot.com> References: <1128374378.22452.16.camel@cashmere.sps.mot.com> Message-ID: <17217.51104.316654.114287@cargo.ozlabs.ibm.com> Jon Loeliger writes: > Is the trend these days towards EXPORT_SYMBOL(foo) near > its definition, or in the one large mondo ppc_ksym.c file? The former. We should only need to have exports from assembly code in ppc_ksyms.c. Paul. From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From geoffrey.levand at am.sony.com Tue Oct 4 11:18:25 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Mon, 03 Oct 2005 18:18:25 -0700 Subject: spufs: User space thread library In-Reply-To: <200509300014.21756.arnd@arndb.de> References: <200509160840.31071.arnd@arndb.de> <433C314C.80409@am.sony.com> <200509300014.21756.arnd@arndb.de> Message-ID: <4341D861.2050306@am.sony.com> Arnd Bergmann wrote: > On Dunnersdag 29 September 2005 20:24, Geoff Levand wrote: > >>I noticed some parts of the package are not very cross-build >>friendly. ?Have you thought about using an autoconf based >>build system? >> > > I think we now have some very rudimentary cross build support > in our internal version, basically just setting CROSS and > DESTDIR to some predefined values when not building on a > ppc64 system. > > A patch to add a configure.ac is certainly welcome ;-) > OK, I set up an autoconf based build system. I moved some things around to make coding the makefiles easier. I also found a conflict with the system's spe.h, so renamed that file. I put the results here: http://tree.celinuxforum.org/downloads/libspe-0.9-autoconf-05.10.03.18.01.19.tar.bz2 Its just a first cut. I think more work is needed in setting up the spu compiler. Also, more work is needed to make the scripts in the tools directory use the host's cross toolchain. -Geoff From rdunlap at xenotime.net Tue Oct 4 07:47:22 2005 From: rdunlap at xenotime.net (Randy.Dunlap) Date: Mon, 3 Oct 2005 14:47:22 -0700 (PDT) Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <4341A5DE.5030804@us.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> <4341A325.5070406@us.ibm.com> <4341A5DE.5030804@us.ibm.com> Message-ID: On Mon, 3 Oct 2005, Brian King wrote: > Randy.Dunlap wrote: > > On Mon, 3 Oct 2005, Brian King wrote: > > > > > >>James Bottomley wrote: > >> > >>>On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > >>> > >>> > >>>>Add a .config option to default the scsi scan inquiry timeout. > >>>>Due to a broken device (SCSI/ATA converter card) that is very > >>>>common on IBM iSeries/pSeries machines, these architectures > >>>>need a longer default inquiry timeout. > >>> > >>> > >>>The inquiry timeout has already been changed a while ago to be a module > >>>parameter (or kernel parameter) for this very case. Why does it now > >>>need to be a config option as well? > >> > >>So that when a distro goes off and builds a PPC64 install kernel, > >>it can boot on one of these broken DVD-ROMs without requiring each > >>distro to add special module options inside their initrd. > > > > > > My reading on mailing lists is that distros prefer > > module or kernel parameters more than they do kernel config > > options (in general) because they are more dynamic -- it > > allows them to build one kernel instead of many kernels > > with various config options. > > I agree with that statement, however.... Right now I am sitting with > an install ISO, trying to boot off this broken DVD-ROM, with scsi > compiled as a module in the install kernel, so I can't pass any > boot parameters to it in order to make it boot... and you can't modify the module parameters (on the DVD)... Yes, I've seen that kind of problem recently too (not the same problem, however). > Additionally, this kernel config option does not decrease this dynamic > ability. The config option simply alters the default inquiry timeout. > If a module parm is passed at module load time, it will override the > default. True. and we have precedent(s) for some options living in both .config and as kernel/module parameters/options. -- ~Randy - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html From brking at us.ibm.com Tue Oct 4 06:44:28 2005 From: brking at us.ibm.com (Brian King) Date: Mon, 03 Oct 2005 15:44:28 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <20051003203924.GS29826@austin.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <20051003203924.GS29826@austin.ibm.com> Message-ID: <4341982C.8070702@us.ibm.com> linas wrote: > On Mon, Oct 03, 2005 at 03:26:30PM -0500, brking at us.ibm.com was heard to remark: > >>Add a .config option to default the scsi scan inquiry timeout. >>Due to a broken device (SCSI/ATA converter card) that is very >>common on IBM iSeries/pSeries machines, these architectures >>need a longer default inquiry timeout. > > > Hmm, I thought I fixed this several yeas ago. I guess it didn't > stay fixed? Some distro kernels have the default inquiry timeout changed for ppc64, but mainline only has the module parameter. -- Brian King eServer Storage I/O IBM Linux Technology Center - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html From rdunlap at xenotime.net Tue Oct 4 07:34:26 2005 From: rdunlap at xenotime.net (Randy.Dunlap) Date: Mon, 3 Oct 2005 14:34:26 -0700 (PDT) Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <4341A325.5070406@us.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> <4341A325.5070406@us.ibm.com> Message-ID: On Mon, 3 Oct 2005, Brian King wrote: > James Bottomley wrote: > > On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > > > >>Add a .config option to default the scsi scan inquiry timeout. > >>Due to a broken device (SCSI/ATA converter card) that is very > >>common on IBM iSeries/pSeries machines, these architectures > >>need a longer default inquiry timeout. > > > > > > The inquiry timeout has already been changed a while ago to be a module > > parameter (or kernel parameter) for this very case. Why does it now > > need to be a config option as well? > > So that when a distro goes off and builds a PPC64 install kernel, > it can boot on one of these broken DVD-ROMs without requiring each > distro to add special module options inside their initrd. My reading on mailing lists is that distros prefer module or kernel parameters more than they do kernel config options (in general) because they are more dynamic -- it allows them to build one kernel instead of many kernels with various config options. -- ~Randy - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html From linas at austin.ibm.com Tue Oct 4 07:49:40 2005 From: linas at austin.ibm.com (linas) Date: Mon, 3 Oct 2005 16:49:40 -0500 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003213430.GD7554@mipter.zuzino.mipt.ru> References: <20051003185739.GR29826@austin.ibm.com> <20051003213430.GD7554@mipter.zuzino.mipt.ru> Message-ID: <20051003214940.GT29826@austin.ibm.com> On Tue, Oct 04, 2005 at 01:34:30AM +0400, Alexey Dobriyan was heard to remark: > > Please, add docs in a proper way: Done, new patch attached. --linas 08-hotplug-bugfix.patch In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add of a hotplug slot will crash the system, with the following (abbreviated) stack trace: cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 c000000000060754 .notifier_call_chain+0x68/0x9c The root cause was that the phb was not marked "dynamic", and so instead of having kmalloc() being called, the __init __alloc_bootmem() was called, resulting in access of garage data. The patch below fixes this crash, and adds some docs to clarify the code. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.000000000 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-03 16:46:33.816658976 -0500 @@ -121,6 +121,14 @@ return NULL; } +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,17 +209,24 @@ .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. */ void __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { pci_devs_phb_init_dynamic(phb); + phb->is_dynamic = 1; + } pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); } - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From adobriyan at gmail.com Tue Oct 4 07:34:30 2005 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Tue, 4 Oct 2005 01:34:30 +0400 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051003213430.GD7554@mipter.zuzino.mipt.ru> On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas wrote: > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) Please, add docs in a proper way: /** * foo - bar * a: b * * Does bar. */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From James.Bottomley at SteelEye.com Tue Oct 4 07:01:45 2005 From: James.Bottomley at SteelEye.com (James Bottomley) Date: Mon, 03 Oct 2005 17:01:45 -0400 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> Message-ID: <1128373305.5825.3.camel@mulgrave> On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > Add a .config option to default the scsi scan inquiry timeout. > Due to a broken device (SCSI/ATA converter card) that is very > common on IBM iSeries/pSeries machines, these architectures > need a longer default inquiry timeout. The inquiry timeout has already been changed a while ago to be a module parameter (or kernel parameter) for this very case. Why does it now need to be a config option as well? James - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html From brking at us.ibm.com Tue Oct 4 07:31:17 2005 From: brking at us.ibm.com (Brian King) Date: Mon, 03 Oct 2005 16:31:17 -0500 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <1128373305.5825.3.camel@mulgrave> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> <1128373305.5825.3.camel@mulgrave> Message-ID: <4341A325.5070406@us.ibm.com> James Bottomley wrote: > On Mon, 2005-10-03 at 15:26 -0500, brking at us.ibm.com wrote: > >>Add a .config option to default the scsi scan inquiry timeout. >>Due to a broken device (SCSI/ATA converter card) that is very >>common on IBM iSeries/pSeries machines, these architectures >>need a longer default inquiry timeout. > > > The inquiry timeout has already been changed a while ago to be a module > parameter (or kernel parameter) for this very case. Why does it now > need to be a config option as well? So that when a distro goes off and builds a PPC64 install kernel, it can boot on one of these broken DVD-ROMs without requiring each distro to add special module options inside their initrd. -- Brian King eServer Storage I/O IBM Linux Technology Center - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html From linas at austin.ibm.com Tue Oct 4 07:49:40 2005 From: linas at austin.ibm.com (linas) Date: Mon, 3 Oct 2005 16:49:40 -0500 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003213430.GD7554@mipter.zuzino.mipt.ru> References: <20051003185739.GR29826@austin.ibm.com> <20051003213430.GD7554@mipter.zuzino.mipt.ru> Message-ID: <20051003214940.GT29826@austin.ibm.com> On Tue, Oct 04, 2005 at 01:34:30AM +0400, Alexey Dobriyan was heard to remark: > > Please, add docs in a proper way: Done, new patch attached. --linas 08-hotplug-bugfix.patch In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add of a hotplug slot will crash the system, with the following (abbreviated) stack trace: cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 c000000000060754 .notifier_call_chain+0x68/0x9c The root cause was that the phb was not marked "dynamic", and so instead of having kmalloc() being called, the __init __alloc_bootmem() was called, resulting in access of garage data. The patch below fixes this crash, and adds some docs to clarify the code. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.000000000 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-03 16:46:33.816658976 -0500 @@ -121,6 +121,14 @@ return NULL; } +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,17 +209,24 @@ .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. */ void __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { pci_devs_phb_init_dynamic(phb); + phb->is_dynamic = 1; + } pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); } - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From benh at kernel.crashing.org Tue Oct 4 15:30:41 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 04 Oct 2005 15:30:41 +1000 Subject: [PATCH] ppc64: Add cpufreq support for SMU based G5 Message-ID: <1128403842.31063.24.camel@gaston> iMac G5 and latest single CPU desktop G5 (SMU based machines) have a 970FX DD3 CPU that supports frequency & vooltage switching. This patch adds support for simple dual frequency switch. It is required for the upcoming thermal control patch for these machines. Signed-off-by: Benjamin Herrenschmidt Index: linux-work/arch/ppc64/kernel/misc.S =================================================================== --- linux-work.orig/arch/ppc64/kernel/misc.S 2005-09-23 12:43:21.000000000 +1000 +++ linux-work/arch/ppc64/kernel/misc.S 2005-09-29 17:03:04.000000000 +1000 @@ -616,7 +616,7 @@ isync blr - /* +/* * Do an IO access in real mode */ _GLOBAL(real_writeb) @@ -649,6 +649,76 @@ #endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ /* + * SCOM access functions for 970 (FX only for now) + * + * unsigned long scom970_read(unsigned int address); + * void scom970_write(unsigned int address, unsigned long value); + * + * The address passed in is the 24 bits register address. This code + * is 970 specific and will not check the status bits, so you should + * know what you are doing. + */ +_GLOBAL(scom970_read) + /* interrupts off */ + mfmsr r4 + ori r0,r4,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd, + * and finally or in RW bit + */ + rlwinm r3,r3,8,0,15 + ori r3,r3,0x8000 + + /* do the actual scom read */ + sync + mtspr SPRN_SCOMC,r3 + isync + mfspr r3,SPRN_SCOMD + isync + mfspr r0,SPRN_SCOMC + isync + + /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah + * that's the best we can do). Not implemented yet as we don't use + * the scom on any of the bogus CPUs yet, but may have to be done + * ultimately + */ + + /* restore interrupts */ + mtmsrd r4,1 + blr + + +_GLOBAL(scom970_write) + /* interrupts off */ + mfmsr r5 + ori r0,r5,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd. + */ + + rlwinm r3,r3,8,0,15 + + sync + mtspr SPRN_SCOMD,r4 /* write data */ + isync + mtspr SPRN_SCOMC,r3 /* write command */ + isync + mfspr 3,SPRN_SCOMC + isync + + /* restore interrupts */ + mtmsrd r5,1 + blr + + +/* * Create a kernel thread * kernel_thread(fn, arg, flags) */ Index: linux-work/include/asm-ppc64/processor.h =================================================================== --- linux-work.orig/include/asm-ppc64/processor.h 2005-09-23 12:44:12.000000000 +1000 +++ linux-work/include/asm-ppc64/processor.h 2005-09-27 11:42:50.000000000 +1000 @@ -177,6 +177,9 @@ #define SPRN_CTRLT 0x098 #define CTRL_RUNLATCH 0x1 +#define SPRN_SCOMC 0x114 +#define SPRN_SCOMD 0x115 + /* Performance monitor SPRs */ #define SPRN_SIAR 780 #define SPRN_SDAR 781 @@ -536,6 +539,9 @@ } } +extern unsigned long scom970_read(unsigned int address); +extern void scom970_write(unsigned int address, unsigned long value); + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ Index: linux-work/arch/ppc64/Kconfig =================================================================== --- linux-work.orig/arch/ppc64/Kconfig 2005-09-23 12:43:21.000000000 +1000 +++ linux-work/arch/ppc64/Kconfig 2005-09-28 10:41:27.000000000 +1000 @@ -159,6 +159,17 @@ support. As of this writing the exact hardware interface is strongly in flux, so no good recommendation can be made. +source "drivers/cpufreq/Kconfig" + +config CPU_FREQ_PMAC + bool "Support for Apple G5" + depends on CPU_FREQ && PPC_PMAC64 + select CPU_FREQ_TABLE + help + This adds support for frequency switching on some Apple G5 + machine. This is currently very experimental and works only + on some iMac G5. + config IBMVIO depends on PPC_PSERIES || PPC_ISERIES bool Index: linux-work/arch/ppc64/kernel/Makefile =================================================================== --- linux-work.orig/arch/ppc64/kernel/Makefile 2005-09-23 12:43:21.000000000 +1000 +++ linux-work/arch/ppc64/kernel/Makefile 2005-09-27 11:42:50.000000000 +1000 @@ -60,6 +60,7 @@ obj-$(CONFIG_PPC_PMAC) += pmac_setup.o pmac_feature.o pmac_pci.o \ pmac_time.o pmac_nvram.o pmac_low_i2c.o \ udbg_scc.o +obj-$(CONFIG_CPU_FREQ_PMAC) += pmac_cpufreq.o obj-$(CONFIG_PPC_MAPLE) += maple_setup.o maple_pci.o maple_time.o \ udbg_16550.o Index: linux-work/arch/ppc64/kernel/pmac_cpufreq.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/arch/ppc64/kernel/pmac_cpufreq.c 2005-09-27 11:42:50.000000000 +1000 @@ -0,0 +1,297 @@ +/* + * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt + * and Markus Demleitner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs, + * that is iMac G5 and latest single CPU desktop. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +/* see 970FX user manual */ + +#define SCOM_PCR 0x0aa001 /* PCR scom addr */ + +#define PCR_HILO_SELECT 0x80000000U /* 1 = PCR, 0 = PCRH */ +#define PCR_SPEED_FULL 0x00000000U /* 1:1 speed value */ +#define PCR_SPEED_HALF 0x00020000U /* 1:2 speed value */ +#define PCR_SPEED_QUARTER 0x00040000U /* 1:4 speed value */ +#define PCR_SPEED_MASK 0x000e0000U /* speed mask */ +#define PCR_SPEED_SHIFT 17 +#define PCR_FREQ_REQ_VALID 0x00010000U /* freq request valid */ +#define PCR_VOLT_REQ_VALID 0x00008000U /* volt request valid */ +#define PCR_TARGET_TIME_MASK 0x00006000U /* target time */ +#define PCR_STATLAT_MASK 0x00001f00U /* STATLAT value */ +#define PCR_SNOOPLAT_MASK 0x000000f0U /* SNOOPLAT value */ +#define PCR_SNOOPACC_MASK 0x0000000fU /* SNOOPACC value */ + +#define SCOM_PSR 0x408001 /* PSR scom addr */ +/* warning: PSR is a 64 bits register */ +#define PSR_CMD_RECEIVED 0x2000000000000000U /* command received */ +#define PSR_CMD_COMPLETED 0x1000000000000000U /* command completed */ +#define PSR_CUR_SPEED_MASK 0x0300000000000000U /* current speed */ +#define PSR_CUR_SPEED_SHIFT (56) + +/* + * The G5 only supports two frequencies (Quarter speed is not supported) + */ +#define CPUFREQ_HIGH 0 +#define CPUFREQ_LOW 1 + +static struct cpufreq_frequency_table g5_cpu_freqs[] = { + {CPUFREQ_HIGH, 0}, + {CPUFREQ_LOW, 0}, + {0, CPUFREQ_TABLE_END}, +}; + +static struct freq_attr* g5_cpu_freqs_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +/* Power mode data is an array of the 32 bits PCR values to use for + * the various frequencies, retreived from the device-tree + */ +static u32 *g5_pmode_data; +static int g5_pmode_max; +static int g5_pmode_cur; + + +static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ +static int g5_fvt_count; /* number of op. points */ +static int g5_fvt_cur; /* current op. point */ + +/* ----------------- real hardware interface */ + +static void g5_switch_volt(int speed_mode) +{ + struct smu_simple_cmd cmd; + + DECLARE_COMPLETION(comp); + smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, smu_done_complete, + &comp, 'V', 'S', 'L', 'E', 'W', + 0xff, g5_fvt_cur+1, speed_mode); + wait_for_completion(&comp); +} + +static int g5_switch_freq(int speed_mode) +{ + int to; + + if (g5_pmode_cur == speed_mode) + return 0; + + /* If frequency is going up, first ramp up the voltage */ + if (speed_mode < g5_pmode_cur) + g5_switch_volt(speed_mode); + + /* Clear PCR high */ + scom970_write(SCOM_PCR, 0); + /* Clear PCR low */ + scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0); + /* Set PCR low */ + scom970_write(SCOM_PCR, PCR_HILO_SELECT | + g5_pmode_data[speed_mode]); + + /* Wait for completion */ + for (to = 0; to < 10; to++) { + unsigned long psr = scom970_read(SCOM_PSR); + + if ((psr & PSR_CMD_RECEIVED) == 0 && + (((psr >> PSR_CUR_SPEED_SHIFT) ^ + (g5_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3) + == 0) + break; + if (psr & PSR_CMD_COMPLETED) + break; + udelay(100); + } + + /* If frequency is going down, last ramp the voltage */ + if (speed_mode > g5_pmode_cur) + g5_switch_volt(speed_mode); + + g5_pmode_cur = speed_mode; + ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; + + return 0; +} + +static int g5_query_freq(void) +{ + unsigned long psr = scom970_read(SCOM_PSR); + int i; + + for (i = 0; i <= g5_pmode_max; i++) + if ((((psr >> PSR_CUR_SPEED_SHIFT) ^ + (g5_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0) + break; + return i; +} + +/* ----------------- cpufreq bookkeeping */ +static int __pmac g5_cpufreq_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, g5_cpu_freqs); +} + +static int __pmac g5_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, g5_cpu_freqs, + target_freq, relation, &newstate)) + return -EINVAL; + + return g5_switch_freq(newstate); +} + +static int __pmac g5_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + if (policy->cpu != 0) + return -ENODEV; + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = g5_cpu_freqs[g5_query_freq()].frequency; + cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu); + + return cpufreq_frequency_table_cpuinfo(policy, + g5_cpu_freqs); +} + + +static struct cpufreq_driver g5_cpufreq_driver = { + .name = "powermac", + .owner = THIS_MODULE, + .flags = CPUFREQ_CONST_LOOPS, + .init = g5_cpufreq_cpu_init, + .verify = g5_cpufreq_verify, + .target = g5_cpufreq_target, + .attr = g5_cpu_freqs_attr, +}; + + +static int __init g5_cpufreq_init(void) +{ + struct device_node *cpunode; + unsigned int psize, ssize; + struct smu_sdbp_header *shdr; + unsigned long max_freq; + u32 *valp; + int rc = -ENODEV; + + /* Look for CPU and SMU nodes */ + cpunode = of_find_node_by_type(NULL, "cpu"); + if (!cpunode) { + DBG("No CPU node !\n"); + return -ENODEV; + } + + /* Check 970FX for now */ + valp = (u32 *)get_property(cpunode, "cpu-version", NULL); + if (!valp) { + DBG("No cpu-version property !\n"); + goto bail_noprops; + } + if (((*valp) >> 16) != 0x3c) { + DBG("Wrong CPU version: %08x\n", *valp); + goto bail_noprops; + } + + /* Look for the powertune data in the device-tree */ + g5_pmode_data = (u32 *)get_property(cpunode, "power-mode-data",&psize); + if (!g5_pmode_data) { + DBG("No power-mode-data !\n"); + goto bail_noprops; + } + g5_pmode_max = psize / sizeof(u32) - 1; + + /* Look for the FVT table */ + shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL); + if (!shdr) + goto bail_noprops; + g5_fvt_table = (struct smu_sdbp_fvt *)&shdr[1]; + ssize = (shdr->len * sizeof(u32)) - sizeof(struct smu_sdbp_header); + g5_fvt_count = ssize / sizeof(struct smu_sdbp_fvt); + g5_fvt_cur = 0; + + /* Sanity checking */ + if (g5_fvt_count < 1 || g5_pmode_max < 1) + goto bail_noprops; + + /* + * From what I see, clock-frequency is always the maximal frequency. + * The current driver can not slew sysclk yet, so we really only deal + * with powertune steps for now. We also only implement full freq and + * half freq in this version. So far, I haven't yet seen a machine + * supporting anything else. + */ + valp = (u32 *)get_property(cpunode, "clock-frequency", NULL); + if (!valp) + return -ENODEV; + max_freq = (*valp)/1000; + g5_cpu_freqs[0].frequency = max_freq; + g5_cpu_freqs[1].frequency = max_freq/2; + + /* Check current frequency */ + g5_pmode_cur = g5_query_freq(); + if (g5_pmode_cur > 1) { + /* We don't support anything but 1:1 and 1:2, fixup ... */ + g5_switch_freq(1); + g5_pmode_cur = 1; + } + + printk(KERN_INFO "Registering G5 CPU frequency driver\n"); + printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", + g5_cpu_freqs[1].frequency/1000, + g5_cpu_freqs[0].frequency/1000, + g5_cpu_freqs[g5_pmode_cur].frequency/1000); + + rc = cpufreq_register_driver(&g5_cpufreq_driver); + + /* We keep the CPU node on hold... hopefully, Apple G5 don't have + * hotplug CPU with a dynamic device-tree ... + */ + return rc; + + bail_noprops: + of_node_put(cpunode); + + return rc; +} + +module_init(g5_cpufreq_init); + + +MODULE_LICENSE("GPL"); Index: linux-work/drivers/macintosh/smu.c =================================================================== --- linux-work.orig/drivers/macintosh/smu.c 2005-09-26 11:48:36.000000000 +1000 +++ linux-work/drivers/macintosh/smu.c 2005-09-29 16:56:59.000000000 +1000 @@ -843,6 +843,18 @@ return 0; } +struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size) +{ + char pname[32]; + + if (!smu) + return NULL; + + sprintf(pname, "sdb-partition-%02x", id); + return (struct smu_sdbp_header *)get_property(smu->of_node, + pname, size); +} +EXPORT_SYMBOL(smu_get_sdb_partition); /* Index: linux-work/include/asm-ppc64/smu.h =================================================================== --- linux-work.orig/include/asm-ppc64/smu.h 2005-09-26 11:48:37.000000000 +1000 +++ linux-work/include/asm-ppc64/smu.h 2005-09-29 16:56:59.000000000 +1000 @@ -144,7 +144,11 @@ * - lenght 8 ("VSLEWxyz") has 3 additional bytes appended, and is * used to set the voltage slewing point. The SMU replies with "DONE" * I yet have to figure out their exact meaning of those 3 bytes in - * both cases. + * both cases. They seem to be: + * x = processor mask + * y = op. point index + * z = processor freq. step index + * I haven't yet decyphered result codes * */ #define SMU_CMD_POWER_COMMAND 0xaa @@ -244,6 +248,7 @@ */ extern void smu_done_complete(struct smu_cmd *cmd, void *misc); + /* * Synchronous helpers. Will spin-wait for completion of a command */ @@ -334,6 +339,59 @@ #endif /* __KERNEL__ */ /* + * - SMU "sdb" partitions informations - + */ + + +/* + * Partition header format + */ +struct smu_sdbp_header { + __u8 id; + __u8 len; + __u8 version; + __u8 flags; +}; + +/* + * 32 bits integers are usually encoded with 2x16 bits swapped, + * this demangles them + */ +#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) + +/* This is the definition of the SMU sdb-partition-0x12 table (called + * CPU F/V/T operating points in Darwin). The definition for all those + * SMU tables should be moved to some separate file + */ +#define SMU_SDB_FVT_ID 0x12 + +struct smu_sdbp_fvt { + __u32 sysclk; /* Base SysClk frequency in Hz for + * this operating point + */ + __u8 pad; + __u8 maxtemp; /* Max temp. supported by this + * operating point + */ + + __u16 volts[3]; /* CPU core voltage for the 3 + * PowerTune modes, a mode with + * 0V = not supported. + */ +}; + +#ifdef __KERNEL__ +/* + * This returns the pointer to an SMU "sdb" partition data or NULL + * if not found. The data format is described below + */ +extern struct smu_sdbp_header *smu_get_sdb_partition(int id, + unsigned int *size); + +#endif /* __KERNEL__ */ + + +/* * - Userland interface - */ @@ -376,4 +434,5 @@ __u32 reply_len; /* Lenght of data follwing */ }; + #endif /* _SMU_H */ From benh at kernel.crashing.org Tue Oct 4 15:34:33 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 04 Oct 2005 15:34:33 +1000 Subject: [PATCH] ppc64: Support retreiving missing SMU partitions Message-ID: <1128404073.31063.28.camel@gaston> The SMU chip has an EEPROM that contains various informations about the motherboard, like thermal calibration infos, etc... This EEPROM is divided in "partitions", and the firmware only extracts some of these and publish them in the device-tree. This patch adds a mecanism to retreive the missing ones which is necessary for the upcoming thermal control patch. In order to make this accessible to userland as well, the patch adds the ability to the /proc/device-tree code to get new properties added at runtime and simplify the code. Signed-off-by: Benjamin Herrenschmidt Index: linux-work/drivers/macintosh/smu.c =================================================================== --- linux-work.orig/drivers/macintosh/smu.c 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/drivers/macintosh/smu.c 2005-09-29 17:06:05.000000000 +1000 @@ -47,13 +47,13 @@ #include #include -#define VERSION "0.6" +#define VERSION "0.7" #define AUTHOR "(c) 2005 Benjamin Herrenschmidt, IBM Corp." #undef DEBUG_SMU #ifdef DEBUG_SMU -#define DPRINTK(fmt, args...) do { printk(KERN_DEBUG fmt , ##args); } while (0) +#define DPRINTK(fmt, args...) do { udbg_printf(KERN_DEBUG fmt , ##args); } while (0) #else #define DPRINTK(fmt, args...) do { } while (0) #endif @@ -92,7 +92,7 @@ * for now, just hard code that */ static struct smu_device *smu; - +static DECLARE_MUTEX(smu_part_access); /* * SMU driver low level stuff @@ -113,9 +113,11 @@ DPRINTK("SMU: starting cmd %x, %d bytes data\n", cmd->cmd, cmd->data_len); - DPRINTK("SMU: data buffer: %02x %02x %02x %02x ...\n", + DPRINTK("SMU: data buffer: %02x %02x %02x %02x %02x %02x %02x %02x\n", ((u8 *)cmd->data_buf)[0], ((u8 *)cmd->data_buf)[1], - ((u8 *)cmd->data_buf)[2], ((u8 *)cmd->data_buf)[3]); + ((u8 *)cmd->data_buf)[2], ((u8 *)cmd->data_buf)[3], + ((u8 *)cmd->data_buf)[4], ((u8 *)cmd->data_buf)[5], + ((u8 *)cmd->data_buf)[6], ((u8 *)cmd->data_buf)[7]); /* Fill the SMU command buffer */ smu->cmd_buf->cmd = cmd->cmd; @@ -438,7 +440,7 @@ EXPORT_SYMBOL(smu_present); -int smu_init (void) +int __init smu_init (void) { struct device_node *np; u32 *data; @@ -843,16 +845,154 @@ return 0; } -struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size) +/* + * Handling of "partitions" + */ + +static int smu_read_datablock(u8 *dest, unsigned int addr, unsigned int len) +{ + DECLARE_COMPLETION(comp); + unsigned int chunk; + struct smu_cmd cmd; + int rc; + u8 params[8]; + + /* We currently use a chunk size of 0xe. We could check the + * SMU firmware version and use bigger sizes though + */ + chunk = 0xe; + + while (len) { + unsigned int clen = min(len, chunk); + + cmd.cmd = SMU_CMD_MISC_ee_COMMAND; + cmd.data_len = 7; + cmd.data_buf = params; + cmd.reply_len = chunk; + cmd.reply_buf = dest; + cmd.done = smu_done_complete; + cmd.misc = ∁ + params[0] = SMU_CMD_MISC_ee_GET_DATABLOCK_REC; + params[1] = 0x4; + *((u32 *)¶ms[2]) = addr; + params[6] = clen; + + rc = smu_queue_cmd(&cmd); + if (rc) + return rc; + wait_for_completion(&comp); + if (cmd.status != 0) + return rc; + if (cmd.reply_len != clen) { + printk(KERN_DEBUG "SMU: short read in " + "smu_read_datablock, got: %d, want: %d\n", + cmd.reply_len, clen); + return -EIO; + } + len -= clen; + addr += clen; + dest += clen; + } + return 0; +} + +static struct smu_sdbp_header *smu_create_sdb_partition(int id) +{ + DECLARE_COMPLETION(comp); + struct smu_simple_cmd cmd; + unsigned int addr, len, tlen; + struct smu_sdbp_header *hdr; + struct property *prop; + + /* First query the partition info */ + smu_queue_simple(&cmd, SMU_CMD_PARTITION_COMMAND, 2, + smu_done_complete, &comp, + SMU_CMD_PARTITION_LATEST, id); + wait_for_completion(&comp); + + /* Partition doesn't exist (or other error) */ + if (cmd.cmd.status != 0 || cmd.cmd.reply_len != 6) + return NULL; + + /* Fetch address and length from reply */ + addr = *((u16 *)cmd.buffer); + len = cmd.buffer[3] << 2; + /* Calucluate total length to allocate, including the 17 bytes + * for "sdb-partition-XX" that we append at the end of the buffer + */ + tlen = sizeof(struct property) + len + 18; + + prop = kcalloc(tlen, 1, GFP_KERNEL); + if (prop == NULL) + return NULL; + hdr = (struct smu_sdbp_header *)(prop + 1); + prop->name = ((char *)prop) + tlen - 18; + sprintf(prop->name, "sdb-partition-%02x", id); + prop->length = len; + prop->value = (unsigned char *)hdr; + prop->next = NULL; + + /* Read the datablock */ + if (smu_read_datablock((u8 *)hdr, addr, len)) { + printk(KERN_DEBUG "SMU: datablock read failed while reading " + "partition %02x !\n", id); + goto failure; + } + + /* Got it, check a few things and create the property */ + if (hdr->id != id) { + printk(KERN_DEBUG "SMU: Reading partition %02x and got " + "%02x !\n", id, hdr->id); + goto failure; + } + if (prom_add_property(smu->of_node, prop)) { + printk(KERN_DEBUG "SMU: Failed creating sdb-partition-%02x " + "property !\n", id); + goto failure; + } + + return hdr; + failure: + kfree(prop); + return NULL; +} + +/* Note: Only allowed to return error code in pointers (using ERR_PTR) + * when interruptible is 1 + */ +struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size, + int interruptible) { char pname[32]; + struct smu_sdbp_header *part; if (!smu) return NULL; sprintf(pname, "sdb-partition-%02x", id); - return (struct smu_sdbp_header *)get_property(smu->of_node, + + if (interruptible) { + int rc; + rc = down_interruptible(&smu_part_access); + if (rc) + return ERR_PTR(rc); + } else + down(&smu_part_access); + + part = (struct smu_sdbp_header *)get_property(smu->of_node, pname, size); + if (part == NULL) { + part = smu_create_sdb_partition(id); + if (part != NULL && size) + *size = part->len << 2; + } + up(&smu_part_access); + return part; +} + +struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size) +{ + return __smu_get_sdb_partition(id, size, 0); } EXPORT_SYMBOL(smu_get_sdb_partition); @@ -928,6 +1068,14 @@ else if (hdr.cmdtype == SMU_CMDTYPE_WANTS_EVENTS) { pp->mode = smu_file_events; return 0; + } else if (hdr.cmdtype == SMU_CMDTYPE_GET_PARTITION) { + struct smu_sdbp_header *part; + part = __smu_get_sdb_partition(hdr.cmd, NULL, 1); + if (part == NULL) + return -EINVAL; + else if (IS_ERR(part)) + return PTR_ERR(part); + return 0; } else if (hdr.cmdtype != SMU_CMDTYPE_SMU) return -EINVAL; else if (pp->mode != smu_file_commands) Index: linux-work/include/asm-ppc64/smu.h =================================================================== --- linux-work.orig/include/asm-ppc64/smu.h 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/include/asm-ppc64/smu.h 2005-09-29 17:06:05.000000000 +1000 @@ -20,10 +20,23 @@ /* * Partition info commands * - * I do not know what those are for at this point + * These commands are used to retreive the sdb-partition-XX datas from + * the SMU. The lenght is always 2. First byte is the subcommand code + * and second byte is the partition ID. + * + * The reply is 6 bytes: + * + * - 0..1 : partition address + * - 2 : a byte containing the partition ID + * - 3 : length (maybe other bits are rest of header ?) + * + * The data must then be obtained with calls to another command: + * SMU_CMD_MISC_ee_GET_DATABLOCK_REC (described below). */ #define SMU_CMD_PARTITION_COMMAND 0x3e - +#define SMU_CMD_PARTITION_LATEST 0x01 +#define SMU_CMD_PARTITION_BASE 0x02 +#define SMU_CMD_PARTITION_UPDATE 0x03 /* * Fan control @@ -176,6 +189,25 @@ * Misc commands * * This command seem to be a grab bag of various things + * + * SMU_CMD_MISC_ee_GET_DATABLOCK_REC is used, among others, to + * transfer blocks of data from the SMU. So far, I've decrypted it's + * usage to retreive partition data. In order to do that, you have to + * break your transfer in "chunks" since that command cannot transfer + * more than a chunk at a time. The chunk size used by OF is 0xe bytes, + * but it seems that the darwin driver will let you do 0x1e bytes if + * your "PMU" version is >= 0x30. You can get the "PMU" version apparently + * either in the last 16 bits of property "smu-version-pmu" or as the 16 + * bytes at offset 1 of "smu-version-info" + * + * For each chunk, the command takes 7 bytes of arguments: + * byte 0: subcommand code (0x02) + * byte 1: 0x04 (always, I don't know what it means, maybe the address + * space to use or some other nicety. It's hard coded in OF) + * byte 2..5: SMU address of the chunk (big endian 32 bits) + * byte 6: size to transfer (up to max chunk size) + * + * The data is returned directly */ #define SMU_CMD_MISC_ee_COMMAND 0xee #define SMU_CMD_MISC_ee_GET_DATABLOCK_REC 0x02 @@ -357,13 +389,13 @@ * 32 bits integers are usually encoded with 2x16 bits swapped, * this demangles them */ -#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) +//#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) /* This is the definition of the SMU sdb-partition-0x12 table (called * CPU F/V/T operating points in Darwin). The definition for all those * SMU tables should be moved to some separate file */ -#define SMU_SDB_FVT_ID 0x12 +#define SMU_SDB_FVT_ID 0x12 struct smu_sdbp_fvt { __u32 sysclk; /* Base SysClk frequency in Hz for @@ -380,6 +412,9 @@ */ }; +/* Other partitions without known structures */ +#define SMU_SDB_DEBUG_SWITCHES_ID 0x05 + #ifdef __KERNEL__ /* * This returns the pointer to an SMU "sdb" partition data or NULL @@ -417,14 +452,22 @@ * It is illegal to send SMU commands through a file descriptor configured * for events reception * + * The special SMU_CMDTYPE_GET_PARTITION command can be used to retreive + * SMU sdb-partition's from the SMU when not available. The command will also + * cause the new partition to be added to the device-tree. That command has + * a data_len of 0, you pass the partition ID in the "cmd" field. It will + * not trigger any reply and is not asynchronous. Just fetch the partition + * from the device-tree after it's done. */ struct smu_user_cmd_hdr { __u32 cmdtype; #define SMU_CMDTYPE_SMU 0 /* SMU command */ #define SMU_CMDTYPE_WANTS_EVENTS 1 /* switch fd to events mode */ +#define SMU_CMDTYPE_GET_PARTITION 2 /* retreive an sdb partition */ __u8 cmd; /* SMU command byte */ + __u8 pad[3]; /* padding */ __u32 data_len; /* Lenght of data following */ }; Index: linux-work/arch/ppc64/kernel/prom.c =================================================================== --- linux-work.orig/arch/ppc64/kernel/prom.c 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/arch/ppc64/kernel/prom.c 2005-09-29 17:06:05.000000000 +1000 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -1893,17 +1894,32 @@ EXPORT_SYMBOL(get_property); /* - * Add a property to a node + * Add a property to a node. */ -void +int prom_add_property(struct device_node* np, struct property* prop) { - struct property **next = &np->properties; + struct property **next; prop->next = NULL; - while (*next) + write_lock(&devtree_lock); + next = &np->properties; + while (*next) { + if (strcmp(prop->name, (*next)->name) == 0) { + /* duplicate ! don't insert it */ + write_unlock(&devtree_lock); + return -1; + } next = &(*next)->next; + } *next = prop; + write_unlock(&devtree_lock); + + /* try to add to proc as well if it was initialized */ + if (np->pde) + proc_device_tree_add_prop(np->pde, prop); + + return 0; } #if 0 Index: linux-work/fs/proc/proc_devtree.c =================================================================== --- linux-work.orig/fs/proc/proc_devtree.c 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/fs/proc/proc_devtree.c 2005-09-29 17:06:05.000000000 +1000 @@ -49,6 +49,39 @@ */ /* + * Add a property to a node + */ +static struct proc_dir_entry * +__proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp) +{ + struct proc_dir_entry *ent; + + /* + * Unfortunately proc_register puts each new entry + * at the beginning of the list. So we rearrange them. + */ + ent = create_proc_read_entry(pp->name, + strncmp(pp->name, "security-", 9) + ? S_IRUGO : S_IRUSR, de, + property_read_proc, pp); + if (ent == NULL) + return NULL; + + if (!strncmp(pp->name, "security-", 9)) + ent->size = 0; /* don't leak number of password chars */ + else + ent->size = pp->length; + + return ent; +} + + +void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop) +{ + __proc_device_tree_add_prop(pde, prop); +} + +/* * Process a node, adding entries for its children and its properties. */ void proc_device_tree_add_node(struct device_node *np, @@ -57,11 +90,9 @@ struct property *pp; struct proc_dir_entry *ent; struct device_node *child; - struct proc_dir_entry *list = NULL, **lastp; const char *p; set_node_proc_entry(np, de); - lastp = &list; for (child = NULL; (child = of_get_next_child(np, child));) { p = strrchr(child->full_name, '/'); if (!p) @@ -71,9 +102,6 @@ ent = proc_mkdir(p, de); if (ent == 0) break; - *lastp = ent; - ent->next = NULL; - lastp = &ent->next; proc_device_tree_add_node(child, ent); } of_node_put(child); @@ -84,7 +112,7 @@ * properties are quite unimportant for us though, thus we * simply "skip" them here, but we do have to check. */ - for (ent = list; ent != NULL; ent = ent->next) + for (ent = de->subdir; ent != NULL; ent = ent->next) if (!strcmp(ent->name, pp->name)) break; if (ent != NULL) { @@ -94,25 +122,10 @@ continue; } - /* - * Unfortunately proc_register puts each new entry - * at the beginning of the list. So we rearrange them. - */ - ent = create_proc_read_entry(pp->name, - strncmp(pp->name, "security-", 9) - ? S_IRUGO : S_IRUSR, de, - property_read_proc, pp); + ent = __proc_device_tree_add_prop(de, pp); if (ent == 0) break; - if (!strncmp(pp->name, "security-", 9)) - ent->size = 0; /* don't leak number of password chars */ - else - ent->size = pp->length; - ent->next = NULL; - *lastp = ent; - lastp = &ent->next; } - de->subdir = list; } /* Index: linux-work/include/asm-ppc/prom.h =================================================================== --- linux-work.orig/include/asm-ppc/prom.h 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/include/asm-ppc/prom.h 2005-09-29 17:06:05.000000000 +1000 @@ -93,7 +93,7 @@ extern int machine_is_compatible(const char *compat); extern unsigned char *get_property(struct device_node *node, const char *name, int *lenp); -extern void prom_add_property(struct device_node* np, struct property* prop); +extern int prom_add_property(struct device_node* np, struct property* prop); extern void prom_get_irq_senses(unsigned char *, int, int); extern int prom_n_addr_cells(struct device_node* np); extern int prom_n_size_cells(struct device_node* np); Index: linux-work/include/asm-ppc64/prom.h =================================================================== --- linux-work.orig/include/asm-ppc64/prom.h 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/include/asm-ppc64/prom.h 2005-09-29 17:06:05.000000000 +1000 @@ -201,6 +201,6 @@ extern int prom_n_size_cells(struct device_node* np); extern int prom_n_intr_cells(struct device_node* np); extern void prom_get_irq_senses(unsigned char *senses, int off, int max); -extern void prom_add_property(struct device_node* np, struct property* prop); +extern int prom_add_property(struct device_node* np, struct property* prop); #endif /* _PPC64_PROM_H */ Index: linux-work/include/linux/proc_fs.h =================================================================== --- linux-work.orig/include/linux/proc_fs.h 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/include/linux/proc_fs.h 2005-09-29 17:06:05.000000000 +1000 @@ -139,15 +139,12 @@ /* * proc_devtree.c */ +#ifdef CONFIG_PROC_DEVICETREE struct device_node; +struct property; extern void proc_device_tree_init(void); -#ifdef CONFIG_PROC_DEVICETREE extern void proc_device_tree_add_node(struct device_node *, struct proc_dir_entry *); -#else /* !CONFIG_PROC_DEVICETREE */ -static inline void proc_device_tree_add_node(struct device_node *np, struct proc_dir_entry *pde) -{ - return; -} +extern void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop); #endif /* CONFIG_PROC_DEVICETREE */ extern struct proc_dir_entry *proc_symlink(const char *, Index: linux-work/arch/ppc/syslib/prom.c =================================================================== --- linux-work.orig/arch/ppc/syslib/prom.c 2005-09-29 16:56:59.000000000 +1000 +++ linux-work/arch/ppc/syslib/prom.c 2005-09-29 17:06:05.000000000 +1000 @@ -1165,7 +1165,7 @@ /* * Add a property to a node */ -void __openfirmware +int __openfirmware prom_add_property(struct device_node* np, struct property* prop) { struct property **next = &np->properties; @@ -1174,6 +1174,8 @@ while (*next) next = &(*next)->next; *next = prop; + + return 0; } /* I quickly hacked that one, check against spec ! */ From benh at kernel.crashing.org Tue Oct 4 15:36:54 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 04 Oct 2005 15:36:54 +1000 Subject: [PATCH] ppc64: Thermal control for SMU based machines Message-ID: <1128404215.31063.32.camel@gaston> This is the actual thermal control support for PowerMac8,1, PowerMac8,2 and PowerMac9,1 machines (SMU based), that is iMac G5 and single CPU desktop. It requires CPUFREQ to be enabled to properly deal with overtemp conditions. The new thermal control code implements a new framework (nicknamed "windfarm") to which I expect to port the old G5 thermal control, and possibly some of the powerbook thermal control drivers as well in the future. Signed-off-by: Benjamin Herrenschmidt Index: linux-work/drivers/macintosh/smu.c =================================================================== --- linux-work.orig/drivers/macintosh/smu.c 2005-10-04 15:17:21.000000000 +1000 +++ linux-work/drivers/macintosh/smu.c 2005-10-04 15:17:32.000000000 +1000 @@ -590,6 +590,8 @@ sprintf(name, "smu-i2c-%02x", *reg); of_platform_device_create(np, name, &smu->of_dev->dev); } + if (device_is_compatible(np, "smu-sensors")) + of_platform_device_create(np, "smu-sensors", &smu->of_dev->dev); } } Index: linux-work/drivers/macintosh/Kconfig =================================================================== --- linux-work.orig/drivers/macintosh/Kconfig 2005-10-04 15:17:21.000000000 +1000 +++ linux-work/drivers/macintosh/Kconfig 2005-10-04 15:17:33.000000000 +1000 @@ -169,6 +169,16 @@ This driver provides thermostat and fan control for the desktop G5 machines. +config WINDFARM + tristate "New PowerMac thermal control infrastructure" + +config WINDFARM_SMU + tristate "Support for thermal management on SMU based PowerMacs" + depends on WINDFARM && I2C && CPU_FREQ_PMAC && PMAC_SMU + select I2C_PMAC_SMU + help + This driver provides thermal control for iMacG5 and newer + config ANSLCD tristate "Support for ANS LCD display" depends on ADB_CUDA && PPC_PMAC Index: linux-work/drivers/macintosh/Makefile =================================================================== --- linux-work.orig/drivers/macintosh/Makefile 2005-10-04 15:17:21.000000000 +1000 +++ linux-work/drivers/macintosh/Makefile 2005-10-04 15:17:33.000000000 +1000 @@ -26,3 +26,8 @@ obj-$(CONFIG_THERM_PM72) += therm_pm72.o obj-$(CONFIG_THERM_WINDTUNNEL) += therm_windtunnel.o obj-$(CONFIG_THERM_ADT746X) += therm_adt746x.o +obj-$(CONFIG_WINDFARM) += windfarm_core.o +obj-$(CONFIG_WINDFARM_SMU) += windfarm_smu_controls.o \ + windfarm_smu_sensors.o \ + windfarm_lm75_sensor.o windfarm_pid.o \ + windfarm_cpufreq_clamp.o windfarm_smu.o Index: linux-work/drivers/macintosh/windfarm.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm.h 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,122 @@ +#ifndef __WINDFARM_H__ +#define __WINDFARM_H__ + +#include +#include +#include +#include + +/* Display a 16.16 fixed point value */ +#define FIX32TOPRINT(f) ((f) >> 16),((((f) & 0xffff) * 1000) >> 16) + +/* + * Control objects + */ + +struct wf_control; + +struct wf_control_ops { + int (*set_value)(struct wf_control *ct, s32 val); + int (*get_value)(struct wf_control *ct, s32 *val); + s32 (*get_min)(struct wf_control *ct); + s32 (*get_max)(struct wf_control *ct); + void (*release)(struct wf_control *ct); + struct module *owner; +}; + +struct wf_control { + struct list_head link; + struct wf_control_ops *ops; + char *name; + int type; + struct kref ref; +}; + +#define WF_CONTROL_TYPE_GENERIC 0 +#define WF_CONTROL_RPM_FAN 1 +#define WF_CONTROL_PWM_FAN 2 + + +/* Note about lifetime rules: wf_register_control() will initialize + * the kref and wf_unregister_control will decrement it, thus the + * object creating/disposing a given control shouldn't assume it + * still exists after wf_unregister_control has been called. + * wf_find_control will inc the refcount for you + */ +extern int wf_register_control(struct wf_control *ct); +extern void wf_unregister_control(struct wf_control *ct); +extern struct wf_control * wf_find_control(const char *name); +extern int wf_get_control(struct wf_control *ct); +extern void wf_put_control(struct wf_control *ct); + +static inline int wf_control_set_max(struct wf_control *ct) +{ + s32 vmax = ct->ops->get_max(ct); + return ct->ops->set_value(ct, vmax); +} + +static inline int wf_control_set_min(struct wf_control *ct) +{ + s32 vmin = ct->ops->get_min(ct); + return ct->ops->set_value(ct, vmin); +} + +/* + * Sensor objects + */ + +struct wf_sensor; + +struct wf_sensor_ops { + int (*get_value)(struct wf_sensor *sr, s32 *val); + void (*release)(struct wf_sensor *sr); + struct module *owner; +}; + +struct wf_sensor { + struct list_head link; + struct wf_sensor_ops *ops; + char *name; + struct kref ref; +}; + +/* Same lifetime rules as controls */ +extern int wf_register_sensor(struct wf_sensor *sr); +extern void wf_unregister_sensor(struct wf_sensor *sr); +extern struct wf_sensor * wf_find_sensor(const char *name); +extern int wf_get_sensor(struct wf_sensor *sr); +extern void wf_put_sensor(struct wf_sensor *sr); + +/* For use by clients. Note that we are a bit racy here since + * notifier_block doesn't have a module owner field. I may fix + * it one day ... + * + * LOCKING NOTE ! + * + * All "events" except WF_EVENT_TICK are called with an internal mutex + * held which will deadlock if you call basically any core routine. + * So don't ! Just take note of the event and do your actual operations + * from the ticker. + * + */ +extern int wf_register_client(struct notifier_block *nb); +extern int wf_unregister_client(struct notifier_block *nb); + +/* Overtemp conditions. Those are refcounted */ +extern void wf_set_overtemp(void); +extern void wf_clear_overtemp(void); +extern int wf_is_overtemp(void); + +#define WF_EVENT_NEW_CONTROL 0 /* param is wf_control * */ +#define WF_EVENT_NEW_SENSOR 1 /* param is wf_sensor * */ +#define WF_EVENT_OVERTEMP 2 /* no param */ +#define WF_EVENT_NORMALTEMP 3 /* overtemp condition cleared */ +#define WF_EVENT_TICK 4 /* 1 second tick */ + +/* Note: If that driver gets more broad use, we could replace the + * simplistic overtemp bits with "environmental conditions". That + * could then be used to also notify of things like fan failure, + * case open, battery conditions, ... + */ + +#endif /* __WINDFARM_H__ */ Index: linux-work/drivers/macintosh/windfarm_core.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_core.c 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,428 @@ +/* + * Windfarm PowerMac thermal control. Core + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + * + * This core code tracks the list of sensors & controls, register + * clients, and holds the kernel thread used for control. + * + * TODO: + * + * Add some information about sensor/control type and data format to + * sensors/controls, and have the sysfs attribute stuff be moved + * generically here instead of hard coded in the platform specific + * driver as it us currently + * + * This however requires solving some annoying lifetime issues with + * sysfs which doesn't seem to have lifetime rules for struct attribute, + * I may have to create full features kobjects for every sensor/control + * instead which is a bit of an overkill imho + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.2" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +static LIST_HEAD(wf_controls); +static LIST_HEAD(wf_sensors); +static DECLARE_MUTEX(wf_lock); +static struct notifier_block *wf_client_list; +static int wf_client_count; +static unsigned int wf_overtemp; +static unsigned int wf_overtemp_counter; +struct task_struct *wf_thread; + +/* + * Utilities & tick thread + */ + +static inline void wf_notify(int event, void *param) +{ + notifier_call_chain(&wf_client_list, event, param); +} + +int wf_critical_overtemp(void) +{ + static char * critical_overtemp_path = "/sbin/critical_overtemp"; + char *argv[] = { critical_overtemp_path, NULL }; + static char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL }; + + return call_usermodehelper(critical_overtemp_path, argv, envp, 0); +} +EXPORT_SYMBOL_GPL(wf_critical_overtemp); + +static int wf_thread_func(void *data) +{ + unsigned long next, delay; + + next = jiffies; + + DBG("wf: thread started\n"); + + while(!kthread_should_stop()) { + try_to_freeze(); + + if (time_after_eq(jiffies, next)) { + wf_notify(WF_EVENT_TICK, NULL); + if (wf_overtemp) { + wf_overtemp_counter++; + /* 10 seconds overtemp, notify userland */ + if (wf_overtemp_counter > 10) + wf_critical_overtemp(); + /* 30 seconds, shutdown */ + if (wf_overtemp_counter > 30) { + printk(KERN_ERR "windfarm: Overtemp " + "for more than 30" + " seconds, shutting down\n"); + machine_power_off(); + } + } + next += HZ; + } + + set_current_state(TASK_INTERRUPTIBLE); + delay = next - jiffies; + if (delay <= HZ) + schedule_timeout(delay); + set_current_state(TASK_RUNNING); + + /* there should be no signal, but oh well */ + if (signal_pending(current)) { + printk(KERN_WARNING "windfarm: thread got sigl !\n"); + break; + } + } + + DBG("wf: thread stopped\n"); + + return 0; +} + +static void wf_start_thread(void) +{ + wf_thread = kthread_run(wf_thread_func, NULL, "kwindfarm"); + if (IS_ERR(wf_thread)) { + printk(KERN_ERR "windfarm: failed to create thread,err %ld\n", + PTR_ERR(wf_thread)); + wf_thread = NULL; + } +} + + +static void wf_stop_thread(void) +{ + if (wf_thread) + kthread_stop(wf_thread); + wf_thread = NULL; +} + +/* + * Controls + */ + +static void wf_control_release(struct kref *kref) +{ + struct wf_control *ct = container_of(kref, struct wf_control, ref); + + DBG("wf: Deleting control %s\n", ct->name); + + if (ct->ops && ct->ops->release) + ct->ops->release(ct); + else + kfree(ct); +} + +int wf_register_control(struct wf_control *new_ct) +{ + struct wf_control *ct; + + down(&wf_lock); + list_for_each_entry(ct, &wf_controls, link) { + if (!strcmp(ct->name, new_ct->name)) { + printk(KERN_WARNING "windfarm: trying to register" + " duplicate control %s\n", ct->name); + up(&wf_lock); + return -EEXIST; + } + } + kref_init(&new_ct->ref); + list_add(&new_ct->link, &wf_controls); + + DBG("wf: Registered control %s\n", new_ct->name); + + wf_notify(WF_EVENT_NEW_CONTROL, new_ct); + up(&wf_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(wf_register_control); + +void wf_unregister_control(struct wf_control *ct) +{ + down(&wf_lock); + list_del(&ct->link); + up(&wf_lock); + + DBG("wf: Unregistered control %s\n", ct->name); + + kref_put(&ct->ref, wf_control_release); +} +EXPORT_SYMBOL_GPL(wf_unregister_control); + +struct wf_control * wf_find_control(const char *name) +{ + struct wf_control *ct; + + down(&wf_lock); + list_for_each_entry(ct, &wf_controls, link) { + if (!strcmp(ct->name, name)) { + if (wf_get_control(ct)) + ct = NULL; + up(&wf_lock); + return ct; + } + } + up(&wf_lock); + return NULL; +} +EXPORT_SYMBOL_GPL(wf_find_control); + +int wf_get_control(struct wf_control *ct) +{ + if (!try_module_get(ct->ops->owner)) + return -ENODEV; + kref_get(&ct->ref); + return 0; +} +EXPORT_SYMBOL_GPL(wf_get_control); + +void wf_put_control(struct wf_control *ct) +{ + struct module *mod = ct->ops->owner; + kref_put(&ct->ref, wf_control_release); + module_put(mod); +} +EXPORT_SYMBOL_GPL(wf_put_control); + + +/* + * Sensors + */ + + +static void wf_sensor_release(struct kref *kref) +{ + struct wf_sensor *sr = container_of(kref, struct wf_sensor, ref); + + DBG("wf: Deleting sensor %s\n", sr->name); + + if (sr->ops && sr->ops->release) + sr->ops->release(sr); + else + kfree(sr); +} + +int wf_register_sensor(struct wf_sensor *new_sr) +{ + struct wf_sensor *sr; + + down(&wf_lock); + list_for_each_entry(sr, &wf_sensors, link) { + if (!strcmp(sr->name, new_sr->name)) { + printk(KERN_WARNING "windfarm: trying to register" + " duplicate sensor %s\n", sr->name); + up(&wf_lock); + return -EEXIST; + } + } + kref_init(&new_sr->ref); + list_add(&new_sr->link, &wf_sensors); + + DBG("wf: Registered sensor %s\n", new_sr->name); + + wf_notify(WF_EVENT_NEW_SENSOR, new_sr); + up(&wf_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(wf_register_sensor); + +void wf_unregister_sensor(struct wf_sensor *sr) +{ + down(&wf_lock); + list_del(&sr->link); + up(&wf_lock); + + DBG("wf: Unregistered sensor %s\n", sr->name); + + wf_put_sensor(sr); +} +EXPORT_SYMBOL_GPL(wf_unregister_sensor); + +struct wf_sensor * wf_find_sensor(const char *name) +{ + struct wf_sensor *sr; + + down(&wf_lock); + list_for_each_entry(sr, &wf_sensors, link) { + if (!strcmp(sr->name, name)) { + if (wf_get_sensor(sr)) + sr = NULL; + up(&wf_lock); + return sr; + } + } + up(&wf_lock); + return NULL; +} +EXPORT_SYMBOL_GPL(wf_find_sensor); + +int wf_get_sensor(struct wf_sensor *sr) +{ + if (!try_module_get(sr->ops->owner)) + return -ENODEV; + kref_get(&sr->ref); + return 0; +} +EXPORT_SYMBOL_GPL(wf_get_sensor); + +void wf_put_sensor(struct wf_sensor *sr) +{ + struct module *mod = sr->ops->owner; + kref_put(&sr->ref, wf_sensor_release); + module_put(mod); +} +EXPORT_SYMBOL_GPL(wf_put_sensor); + + +/* + * Client & notification + */ + +int wf_register_client(struct notifier_block *nb) +{ + int rc; + struct wf_control *ct; + struct wf_sensor *sr; + + down(&wf_lock); + rc = notifier_chain_register(&wf_client_list, nb); + if (rc != 0) + goto bail; + wf_client_count++; + list_for_each_entry(ct, &wf_controls, link) + wf_notify(WF_EVENT_NEW_CONTROL, ct); + list_for_each_entry(sr, &wf_sensors, link) + wf_notify(WF_EVENT_NEW_SENSOR, sr); + if (wf_client_count == 1) + wf_start_thread(); + up(&wf_lock); + bail: + return rc; +} +EXPORT_SYMBOL_GPL(wf_register_client); + +int wf_unregister_client(struct notifier_block *nb) +{ + down(&wf_lock); + notifier_chain_unregister(&wf_client_list, nb); + wf_client_count++; + if (wf_client_count == 0) + wf_stop_thread(); + up(&wf_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(wf_unregister_client); + +void wf_set_overtemp(void) +{ + down(&wf_lock); + wf_overtemp++; + if (wf_overtemp == 1) { + printk(KERN_WARNING "windfarm: Overtemp condition detected !\n"); + wf_overtemp_counter = 0; + wf_notify(WF_EVENT_OVERTEMP, NULL); + } + up(&wf_lock); +} +EXPORT_SYMBOL_GPL(wf_set_overtemp); + +void wf_clear_overtemp(void) +{ + down(&wf_lock); + WARN_ON(wf_overtemp == 0); + if (wf_overtemp == 0) { + up(&wf_lock); + return; + } + wf_overtemp--; + if (wf_overtemp == 0) { + printk(KERN_WARNING "windfarm: Overtemp condition cleared !\n"); + wf_notify(WF_EVENT_NORMALTEMP, NULL); + } + up(&wf_lock); +} +EXPORT_SYMBOL_GPL(wf_clear_overtemp); + +int wf_is_overtemp(void) +{ + return (wf_overtemp != 0); +} +EXPORT_SYMBOL_GPL(wf_is_overtemp); + +static struct platform_device wf_platform_device = { + .name = "windfarm", +}; + +static int __init windfarm_core_init(void) +{ + DBG("wf: core loaded\n"); + + platform_device_register(&wf_platform_device); + return 0; +} + +static void __exit windfarm_core_exit(void) +{ + BUG_ON(wf_client_count != 0); + + DBG("wf: core unloaded\n"); + + platform_device_unregister(&wf_platform_device); +} + + +module_init(windfarm_core_init); +module_exit(windfarm_core_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("Core component of PowerMac thermal control"); +MODULE_LICENSE("GPL"); + Index: linux-work/drivers/macintosh/windfarm_smu_controls.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_smu_controls.c 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,274 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.3" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +/* + * SMU fans control object + */ + +static LIST_HEAD(smu_fans); + +struct smu_fan_control { + struct list_head link; + int fan_type; /* 0 = rpm, 1 = pwm */ + u32 reg; /* index in SMU */ + s32 value; /* current value */ + s32 min, max; /* min/max values */ + struct wf_control ctrl; +}; +#define to_smu_fan(c) container_of(c, struct smu_fan_control, ctrl) + +static int smu_set_fan(int pwm, u8 id, u16 value) +{ + struct smu_cmd cmd; + u8 buffer[16]; + DECLARE_COMPLETION(comp); + int rc; + + /* Fill SMU command structure */ + cmd.cmd = SMU_CMD_FAN_COMMAND; + cmd.data_len = 14; + cmd.reply_len = 16; + cmd.data_buf = cmd.reply_buf = buffer; + cmd.status = 0; + cmd.done = smu_done_complete; + cmd.misc = ∁ + + /* Fill argument buffer */ + memset(buffer, 0, 16); + buffer[0] = pwm ? 0x10 : 0x00; + buffer[1] = 0x01 << id; + *((u16 *)&buffer[2 + id * 2]) = value; + + rc = smu_queue_cmd(&cmd); + if (rc) + return rc; + wait_for_completion(&comp); + return cmd.status; +} + +static void smu_fan_release(struct wf_control *ct) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + + kfree(fct); +} + +static int smu_fan_set(struct wf_control *ct, s32 value) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + + if (value < fct->min) + value = fct->min; + if (value > fct->max) + value = fct->max; + fct->value = value; + + return smu_set_fan(fct->fan_type, fct->reg, value); +} + +static int smu_fan_get(struct wf_control *ct, s32 *value) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + *value = fct->value; /* todo: read from SMU */ + return 0; +} + +static s32 smu_fan_min(struct wf_control *ct) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + return fct->min; +} + +static s32 smu_fan_max(struct wf_control *ct) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + return fct->max; +} + +static struct wf_control_ops smu_fan_ops = { + .set_value = smu_fan_set, + .get_value = smu_fan_get, + .get_min = smu_fan_min, + .get_max = smu_fan_max, + .release = smu_fan_release, + .owner = THIS_MODULE, +}; + +static struct smu_fan_control *smu_fan_create(struct device_node *node, + int pwm_fan) +{ + struct smu_fan_control *fct; + s32 *v; u32 *reg; + char *l; + + fct = kmalloc(sizeof(struct smu_fan_control), GFP_KERNEL); + if (fct == NULL) + return NULL; + fct->ctrl.ops = &smu_fan_ops; + l = (char *)get_property(node, "location", NULL); + if (l == NULL) + goto fail; + + fct->fan_type = pwm_fan; + fct->ctrl.type = pwm_fan ? WF_CONTROL_PWM_FAN : WF_CONTROL_RPM_FAN; + + /* We use the name & location here the same way we do for SMU sensors, + * see the comment in windfarm_smu_sensors.c. The locations are a bit + * less consistent here between the iMac and the desktop models, but + * that is good enough for our needs for now at least. + * + * One problem though is that Apple seem to be inconsistent with case + * and the kernel doesn't have strcasecmp =P + */ + + fct->ctrl.name = NULL; + + /* Names used on desktop models */ + if (!strcmp(l, "Rear Fan 0") || !strcmp(l, "Rear Fan") || + !strcmp(l, "Rear fan 0") || !strcmp(l, "Rear fan")) + fct->ctrl.name = "cpu-rear-fan-0"; + else if (!strcmp(l, "Rear Fan 1") || !strcmp(l, "Rear fan 1")) + fct->ctrl.name = "cpu-rear-fan-1"; + else if (!strcmp(l, "Front Fan 0") || !strcmp(l, "Front Fan") || + !strcmp(l, "Front fan 0") || !strcmp(l, "Front fan")) + fct->ctrl.name = "cpu-front-fan-0"; + else if (!strcmp(l, "Front Fan 1") || !strcmp(l, "Front fan 1")) + fct->ctrl.name = "cpu-front-fan-1"; + else if (!strcmp(l, "Slots Fan") || !strcmp(l, "Slots fan")) + fct->ctrl.name = "slots-fan"; + else if (!strcmp(l, "Drive Bay") || !strcmp(l, "Drive bay")) + fct->ctrl.name = "drive-bay-fan"; + + /* Names used on iMac models */ + if (!strcmp(l, "System Fan") || !strcmp(l, "System fan")) + fct->ctrl.name = "system-fan"; + else if (!strcmp(l, "CPU Fan") || !strcmp(l, "CPU fan")) + fct->ctrl.name = "cpu-fan"; + else if (!strcmp(l, "Hard Drive") || !strcmp(l, "Hard drive")) + fct->ctrl.name = "drive-bay-fan"; + + /* Unrecognized fan, bail out */ + if (fct->ctrl.name == NULL) + goto fail; + + /* Get min & max values*/ + v = (s32 *)get_property(node, "min-value", NULL); + if (v == NULL) + goto fail; + fct->min = *v; + v = (s32 *)get_property(node, "max-value", NULL); + if (v == NULL) + goto fail; + fct->max = *v; + + /* Get "reg" value */ + reg = (u32 *)get_property(node, "reg", NULL); + if (reg == NULL) + goto fail; + fct->reg = *reg; + + if (wf_register_control(&fct->ctrl)) + goto fail; + + return fct; + fail: + kfree(fct); + return NULL; +} + + +static int __init smu_controls_init(void) +{ + struct device_node *smu, *fans, *fan; + + if (!smu_present()) + return -ENODEV; + + smu = of_find_node_by_type(NULL, "smu"); + if (smu == NULL) + return -ENODEV; + + /* Look for RPM fans */ + for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;) + if (!strcmp(fans->name, "rpm-fans")) + break; + for (fan = NULL; + fans && (fan = of_get_next_child(fans, fan)) != NULL;) { + struct smu_fan_control *fct; + + fct = smu_fan_create(fan, 0); + if (fct == NULL) { + printk(KERN_WARNING "windfarm: Failed to create SMU " + "RPM fan %s\n", fan->name); + continue; + } + list_add(&fct->link, &smu_fans); + } + of_node_put(fans); + + + /* Look for PWM fans */ + for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;) + if (!strcmp(fans->name, "pwm-fans")) + break; + for (fan = NULL; + fans && (fan = of_get_next_child(fans, fan)) != NULL;) { + struct smu_fan_control *fct; + + fct = smu_fan_create(fan, 1); + if (fct == NULL) { + printk(KERN_WARNING "windfarm: Failed to create SMU " + "PWM fan %s\n", fan->name); + continue; + } + list_add(&fct->link, &smu_fans); + } + of_node_put(fans); + of_node_put(smu); + + return 0; +} + +static void __exit smu_controls_exit(void) +{ + struct smu_fan_control *fct; + + while (!list_empty(&smu_fans)) { + fct = list_entry(smu_fans.next, struct smu_fan_control, link); + list_del(&fct->link); + wf_unregister_control(&fct->ctrl); + } +} + + +module_init(smu_controls_init); +module_exit(smu_controls_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("SMU control objects for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + Index: linux-work/drivers/macintosh/windfarm_smu_sensors.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_smu_sensors.c 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,471 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.2" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +/* + * Various SMU "partitions" calibration objects for which we + * keep pointers here for use by bits & pieces of the driver + */ +static struct smu_sdbp_cpuvcp *cpuvcp; +static int cpuvcp_version; +static struct smu_sdbp_cpudiode *cpudiode; +static struct smu_sdbp_slotspow *slotspow; +static u8 *debugswitches; + +/* + * SMU basic sensors objects + */ + +static LIST_HEAD(smu_ads); + +struct smu_ad_sensor { + struct list_head link; + u32 reg; /* index in SMU */ + struct wf_sensor sens; +}; +#define to_smu_ads(c) container_of(c, struct smu_ad_sensor, sens) + +static void smu_ads_release(struct wf_sensor *sr) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + + kfree(ads); +} + +static int smu_read_adc(u8 id, s32 *value) +{ + struct smu_simple_cmd cmd; + DECLARE_COMPLETION(comp); + int rc; + + rc = smu_queue_simple(&cmd, SMU_CMD_READ_ADC, 1, + smu_done_complete, &comp, id); + if (rc) + return rc; + wait_for_completion(&comp); + if (cmd.cmd.status != 0) + return cmd.cmd.status; + if (cmd.cmd.reply_len != 2) { + printk(KERN_ERR "winfarm: read ADC 0x%x returned %d bytes !\n", + id, cmd.cmd.reply_len); + return -EIO; + } + *value = *((u16 *)cmd.buffer); + return 0; +} + +static int smu_cputemp_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + int rc; + s32 val; + s64 scaled; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read CPU temp failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s64)(((u64)val) * (u64)cpudiode->m_value); + scaled >>= 3; + scaled += ((s64)cpudiode->b_value) << 9; + *value = (s32)(scaled << 1); + + return 0; +} + +static int smu_cpuamp_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + s32 val, scaled; + int rc; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read CPU current failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s32)(val * (u32)cpuvcp->curr_scale); + scaled += (s32)cpuvcp->curr_offset; + *value = scaled << 4; + + return 0; +} + +static int smu_cpuvolt_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + s32 val, scaled; + int rc; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read CPU voltage failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s32)(val * (u32)cpuvcp->volt_scale); + scaled += (s32)cpuvcp->volt_offset; + *value = scaled << 4; + + return 0; +} + +static int smu_slotspow_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + s32 val, scaled; + int rc; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read slots power failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s32)(val * (u32)slotspow->pow_scale); + scaled += (s32)slotspow->pow_offset; + *value = scaled << 4; + + return 0; +} + + +static struct wf_sensor_ops smu_cputemp_ops = { + .get_value = smu_cputemp_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; +static struct wf_sensor_ops smu_cpuamp_ops = { + .get_value = smu_cpuamp_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; +static struct wf_sensor_ops smu_cpuvolt_ops = { + .get_value = smu_cpuvolt_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; +static struct wf_sensor_ops smu_slotspow_ops = { + .get_value = smu_slotspow_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; + + +static struct smu_ad_sensor *smu_ads_create(struct device_node *node) +{ + struct smu_ad_sensor *ads; + char *c, *l; + u32 *v; + + ads = kmalloc(sizeof(struct smu_ad_sensor), GFP_KERNEL); + if (ads == NULL) + return NULL; + c = (char *)get_property(node, "device_type", NULL); + l = (char *)get_property(node, "location", NULL); + if (c == NULL || l == NULL) + goto fail; + + /* We currently pick the sensors based on the OF name and location + * properties, while Darwin uses the sensor-id's. + * The problem with the IDs is that they are model specific while it + * looks like apple has been doing a reasonably good job at keeping + * the names and locations consistents so I'll stick with the names + * and locations for now. + */ + if (!strcmp(c, "temp-sensor") && + !strcmp(l, "CPU T-Diode")) { + ads->sens.ops = &smu_cputemp_ops; + ads->sens.name = "cpu-temp"; + } else if (!strcmp(c, "current-sensor") && + !strcmp(l, "CPU Current")) { + ads->sens.ops = &smu_cpuamp_ops; + ads->sens.name = "cpu-current"; + } else if (!strcmp(c, "voltage-sensor") && + !strcmp(l, "CPU Voltage")) { + ads->sens.ops = &smu_cpuvolt_ops; + ads->sens.name = "cpu-voltage"; + } else if (!strcmp(c, "power-sensor") && + !strcmp(l, "Slots Power")) { + ads->sens.ops = &smu_slotspow_ops; + ads->sens.name = "slots-power"; + if (slotspow == NULL) { + DBG("wf: slotspow partition (%02x) not found\n", + SMU_SDB_SLOTSPOW_ID); + goto fail; + } + } else + goto fail; + + v = (u32 *)get_property(node, "reg", NULL); + if (v == NULL) + goto fail; + ads->reg = *v; + + if (wf_register_sensor(&ads->sens)) + goto fail; + return ads; + fail: + kfree(ads); + return NULL; +} + +/* + * SMU Power combo sensor object + */ + +struct smu_cpu_power_sensor { + struct list_head link; + struct wf_sensor *volts; + struct wf_sensor *amps; + int fake_volts : 1; + int quadratic : 1; + struct wf_sensor sens; +}; +#define to_smu_cpu_power(c) container_of(c, struct smu_cpu_power_sensor, sens) + +static struct smu_cpu_power_sensor *smu_cpu_power; + +static void smu_cpu_power_release(struct wf_sensor *sr) +{ + struct smu_cpu_power_sensor *pow = to_smu_cpu_power(sr); + + if (pow->volts) + wf_put_sensor(pow->volts); + if (pow->amps) + wf_put_sensor(pow->amps); + kfree(pow); +} + +static int smu_cpu_power_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_cpu_power_sensor *pow = to_smu_cpu_power(sr); + s32 volts, amps, power; + u64 tmps, tmpa, tmpb; + int rc; + + rc = pow->amps->ops->get_value(pow->amps, &s); + if (rc) + return rc; + + if (pow->fake_volts) { + *value = amps * 12 - 0x30000; + return 0; + } + + rc = pow->volts->ops->get_value(pow->volts, &volts); + if (rc) + return rc; + + power = (s32)((((u64)volts) * ((u64)amps)) >> 16); + if (!pow->quadratic) { + *value = power; + return 0; + } + tmps = (((u64)power) * ((u64)power)) >> 16; + tmpa = ((u64)cpuvcp->power_quads[0]) * tmps; + tmpb = ((u64)cpuvcp->power_quads[1]) * ((u64)power); + *value = (tmpa >> 28) + (tmpb >> 28) + (cpuvcp->power_quads[2] >> 12); + + return 0; +} + +static struct wf_sensor_ops smu_cpu_power_ops = { + .get_value = smu_cpu_power_get, + .release = smu_cpu_power_release, + .owner = THIS_MODULE, +}; + + +static struct smu_cpu_power_sensor * +smu_cpu_power_create(struct wf_sensor *volts, struct wf_sensor *amps) +{ + struct smu_cpu_power_sensor *pow; + + pow = kmalloc(sizeof(struct smu_cpu_power_sensor), GFP_KERNEL); + if (pow == NULL) + return NULL; + pow->sens.ops = &smu_cpu_power_ops; + pow->sens.name = "cpu-power"; + + wf_get_sensor(volts); + pow->volts = volts; + wf_get_sensor(amps); + pow->amps = amps; + + /* Some early machines need a faked voltage */ + if (debugswitches && ((*debugswitches) & 0x80)) { + printk(KERN_INFO "windfarm: CPU Power sensor using faked" + " voltage !\n"); + pow->fake_volts = 1; + } else + pow->fake_volts = 0; + + /* Try to use quadratic transforms on PowerMac8,1 and 9,1 for now, + * I yet have to figure out what's up with 8,2 and will have to + * adjust for later, unless we can 100% trust the SDB partition... + */ + if ((machine_is_compatible("PowerMac8,1") || + machine_is_compatible("PowerMac8,2") || + machine_is_compatible("PowerMac9,1")) && + cpuvcp_version >= 2) { + pow->quadratic = 1; + DBG("windfarm: CPU Power using quadratic transform\n"); + } else + pow->quadratic = 0; + + if (wf_register_sensor(&pow->sens)) + goto fail; + return pow; + fail: + kfree(pow); + return NULL; +} + +static int smu_fetch_param_partitions(void) +{ + struct smu_sdbp_header *hdr; + + /* Get CPU voltage/current/power calibration data */ + hdr = smu_get_sdb_partition(SMU_SDB_CPUVCP_ID, NULL); + if (hdr == NULL) { + DBG("wf: cpuvcp partition (%02x) not found\n", + SMU_SDB_CPUVCP_ID); + return -ENODEV; + } + cpuvcp = (struct smu_sdbp_cpuvcp *)&hdr[1]; + /* Keep version around */ + cpuvcp_version = hdr->version; + + /* Get CPU diode calibration data */ + hdr = smu_get_sdb_partition(SMU_SDB_CPUDIODE_ID, NULL); + if (hdr == NULL) { + DBG("wf: cpudiode partition (%02x) not found\n", + SMU_SDB_CPUDIODE_ID); + return -ENODEV; + } + cpudiode = (struct smu_sdbp_cpudiode *)&hdr[1]; + + /* Get slots power calibration data if any */ + hdr = smu_get_sdb_partition(SMU_SDB_SLOTSPOW_ID, NULL); + if (hdr != NULL) + slotspow = (struct smu_sdbp_slotspow *)&hdr[1]; + + /* Get debug switches if any */ + hdr = smu_get_sdb_partition(SMU_SDB_DEBUG_SWITCHES_ID, NULL); + if (hdr != NULL) + debugswitches = (u8 *)&hdr[1]; + + return 0; +} + +static int __init smu_sensors_init(void) +{ + struct device_node *smu, *sensors, *s; + struct smu_ad_sensor *volt_sensor = NULL, *curr_sensor = NULL; + int rc; + + if (!smu_present()) + return -ENODEV; + + /* Get parameters partitions */ + rc = smu_fetch_param_partitions(); + if (rc) + return rc; + + smu = of_find_node_by_type(NULL, "smu"); + if (smu == NULL) + return -ENODEV; + + /* Look for sensors subdir */ + for (sensors = NULL; + (sensors = of_get_next_child(smu, sensors)) != NULL;) + if (!strcmp(sensors->name, "sensors")) + break; + + of_node_put(smu); + + /* Create basic sensors */ + for (s = NULL; + sensors && (s = of_get_next_child(sensors, s)) != NULL;) { + struct smu_ad_sensor *ads; + + ads = smu_ads_create(s); + if (ads == NULL) + continue; + list_add(&ads->link, &smu_ads); + /* keep track of cpu voltage & current */ + if (!strcmp(ads->sens.name, "cpu-voltage")) + volt_sensor = ads; + else if (!strcmp(ads->sens.name, "cpu-current")) + curr_sensor = ads; + } + + of_node_put(sensors); + + /* Create CPU power sensor if possible */ + if (volt_sensor && curr_sensor) + smu_cpu_power = smu_cpu_power_create(&volt_sensor->sens, + &curr_sensor->sens); + + return 0; +} + +static void __exit smu_sensors_exit(void) +{ + struct smu_ad_sensor *ads; + + /* dispose of power sensor */ + if (smu_cpu_power) + wf_unregister_sensor(&smu_cpu_power->sens); + + /* dispose of basic sensors */ + while (!list_empty(&smu_ads)) { + ads = list_entry(smu_ads.next, struct smu_ad_sensor, link); + list_del(&ads->link); + wf_unregister_sensor(&ads->sens); + } +} + + +module_init(smu_sensors_init); +module_exit(smu_sensors_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("SMU sensor objects for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + Index: linux-work/drivers/macintosh/windfarm_smu.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_smu.c 2005-10-04 15:19:19.000000000 +1000 @@ -0,0 +1,1221 @@ +/* + * Windfarm PowerMac thermal control. SMU based machines control loops + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + * + * The algorithm used is the PID control algorithm, used the same + * way the published Darwin code does, using the same values that + * are present in the Darwin 8.2 snapshot property lists (note however + * that none of the code has been re-used, it's a complete re-implementation + * + * The various control loops found in Darwin config file are: + * + * PowerMac8,1 and PowerMac8,2 + * =========================== + * + * System Fans control loop. Different based on models. In addition to the + * usual PID algorithm, the control loop gets 2 additional pairs of linear + * scaling factors (scale/offsets) expressed as 4.12 fixed point values + * signed offset, unsigned scale) + * + * The targets are modified such as: + * - the linked control (second control) gets the target value as-is + * (typically the drive fan) + * - the main control (first control) gets the target value scaled with + * the first pair of factors, and is then modified as below + * - the value of the target of the CPU Fan control loop is retreived, + * scaled with the second pair of factors, and the max of that and + * the scaled target is applied to the main control. + * + * # model_id: 2 + * controls : system-fan, drive-bay-fan + * sensors : hd-temp + * PID params : G_d = 0x15400000 + * G_p = 0x00200000 + * G_r = 0x000002fd + * History = 2 entries + * Input target = 0x3a0000 + * Interval = 5s + * linear-factors : offset = 0xff38 scale = 0x0ccd + * offset = 0x0208 scale = 0x07ae + * + * # model_id: 3 + * controls : system-fan, drive-bay-fan + * sensors : hd-temp + * PID params : G_d = 0x08e00000 + * G_p = 0x00566666 + * G_r = 0x0000072b + * History = 2 entries + * Input target = 0x350000 + * Interval = 5s + * linear-factors : offset = 0xff38 scale = 0x0ccd + * offset = 0x0000 scale = 0x0000 + * + * # model_id: 5 + * controls : system-fan + * sensors : hd-temp + * PID params : G_d = 0x15400000 + * G_p = 0x00233333 + * G_r = 0x000002fd + * History = 2 entries + * Input target = 0x3a0000 + * Interval = 5s + * linear-factors : offset = 0x0000 scale = 0x1000 + * offset = 0x0091 scale = 0x0bae + * + * CPU Fan control loop. The loop is identical for all models. it + * has an additional pair of scaling factor. This is used to scale the + * systems fan control loop target result (the one before it gets scaled + * by the System Fans control loop itself). Then, the max value of the + * calculated target value and system fan value is sent to the fans + * + * controls : cpu-fan + * sensors : cpu-temp cpu-power + * PID params : From SMU sdb partition + * linear-factors : offset = 0xfb50 scale = 0x1000 + * + * CPU Slew control loop. Not implemented. The cpufreq driver in linux is + * completely separate for now, though we could find a way to link it, either + * as a client reacting to overtemp notifications, or directling monitoring + * the CPU temperature + * + * WARNING ! The CPU control loop requires the CPU tmax for the current + * operating point. However, we currently are completely separated from + * the cpufreq driver and thus do not know what the current operating + * point is. Fortunately, we also do not have any hardware supporting anything + * but operating point 0 at the moment, thus we just peek that value directly + * from the SDB partition. If we ever end up with actually slewing the system + * clock and thus changing operating points, we'll have to find a way to + * communicate with the CPU freq driver; + * + * PowerMac9,1 + * =========== + * + * Has 3 control loops: CPU fans is similar to PowerMac8,1 (though it doesn't + * try to play with other control loops fans). Drive bay is rather basic PID + * with one sensor and one fan. Slots area is a bit different as the Darwin + * driver is supposed to be capable of working in a special "AGP" mode which + * involves the presence of an AGP sensor and an AGP fan (possibly on the + * AGP card itself). I can't deal with that special mode as I don't have + * access to those additional sensor/fans for now (though ultimately, it would + * be possible to add sensor objects for them) so I'm only implementing the + * basic PCI slot control loop + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" +#include "windfarm_pid.h" + +#define VERSION "0.3" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +/* define this to force CPU overtemp to 74 degree, useful for testing + * the overtemp code + */ +#undef HACKED_OVERTEMP + +/* Machine identification */ +#define MACHINE_PM81 81 /* PM81, PM82 */ +#define MACHINE_PM91 91 /* PM91 */ + +static int machine; /* machine */ +static int mach_model; /* machine model id */ + +static struct device *wf_dev; + +/* Controls & sensors */ +static struct wf_sensor *sensor_cpu_power; +static struct wf_sensor *sensor_cpu_temp; +static struct wf_sensor *sensor_hd_temp; +static struct wf_sensor *sensor_slots_power; +static struct wf_control *fan_cpu_main; +static struct wf_control *fan_cpu_second; +static struct wf_control *fan_cpu_third; +static struct wf_control *fan_hd; +static struct wf_control *fan_system; +static struct wf_control *fan_slots; +static struct wf_control *cpufreq_clamp; + +/* Set to kick the control loop into life */ +static int all_controls_ok, all_sensors_ok, started; + +/* Failure handling.. could be nicer */ +#define FAILURE_FAN 0x01 +#define FAILURE_SENSOR 0x02 +#define FAILURE_OVERTEMP 0x04 + +static unsigned int failure_state; +static int readjust, skipping; + +/* + * ****** System Fans Control Loop ****** + * + * (PowerMac8,1 and 8,2 only) + */ + +/* Parameters for the System Fans control loop. Parameters + * not in this table such as interval, history size, ... + * are common to all versions and thus hard coded for now. + */ +struct wf_smu_sys_fans_param { + int model_id; + s32 itarget; + s32 gd, gp, gr; + + s16 offset0; + u16 scale0; + s16 offset1; + u16 scale1; +}; + +#define WF_SMU_SYS_FANS_INTERVAL 5 +#define WF_SMU_SYS_FANS_HISTORY_SIZE 2 + +/* State data used by the system fans control loop + * (MACHINE_PM81 only) + */ +struct wf_smu_sys_fans_state { + int ticks; + s32 sys_setpoint; + s32 hd_setpoint; + s16 offset0; + u16 scale0; + s16 offset1; + u16 scale1; + struct wf_pid_state pid; +}; + +/* Only 3 known configs */ +#define WF_SMU_SYS_FANS_NUM_CONFIGS 3 + +/* + * Configs for SMU Sytem Fan control loop + */ +static struct wf_smu_sys_fans_param wf_smu_sys_all_params[] = { + /* Model ID 2 */ + { + .model_id = 2, + .itarget = 0x3a0000, + .gd = 0x15400000, + .gp = 0x00200000, + .gr = 0x000002fd, + .offset0 = 0xff38, + .scale0 = 0x0ccd, + .offset1 = 0x0208, + .scale1 = 0x07ae, + }, + /* Model ID 3 */ + { + .model_id = 2, + .itarget = 0x350000, + .gd = 0x08e00000, + .gp = 0x00566666, + .gr = 0x0000072b, + .offset0 = 0xff38, + .scale0 = 0x0ccd, + .offset1 = 0x0000, + .scale1 = 0x0000, + }, + /* Model ID 5 */ + { + .model_id = 2, + .itarget = 0x3a0000, + .gd = 0x15400000, + .gp = 0x00233333, + .gr = 0x000002fd, + .offset0 = 0x0000, + .scale0 = 0x1000, + .offset1 = 0x0091, + .scale1 = 0x0bae, + }, +}; + +static struct wf_smu_sys_fans_state *wf_smu_sys_fans; + +/* + * ****** CPU Fans Control Loop ****** + * + */ + + +#define WF_SMU_CPU_FANS_INTERVAL 1 +#define WF_SMU_CPU_FANS_MAX_HISTORY 16 +#define WF_SMU_CPU_FANS_SIBLING_SCALE 0x00001000 +#define WF_SMU_CPU_FANS_SIBLING_OFFSET 0xfffffb50 + +/* State data used by the cpu fans control loop + */ +struct wf_smu_cpu_fans_state { + int ticks; + s32 cpu_setpoint; + s32 scale; + s32 offset; + struct wf_cpu_pid_state pid; +}; + +static struct wf_smu_cpu_fans_state *wf_smu_cpu_fans; + + + +/* + * ****** Drive Fan Control Loop ****** + * + */ + +struct wf_smu_drive_fans_state { + int ticks; + s32 setpoint; + struct wf_pid_state pid; +}; + +static struct wf_smu_drive_fans_state *wf_smu_drive_fans; + +/* + * ****** Slots Fan Control Loop ****** + * + */ + +struct wf_smu_slots_fans_state { + int ticks; + s32 setpoint; + struct wf_pid_state pid; +}; + +static struct wf_smu_slots_fans_state *wf_smu_slots_fans; + +/* + * ***** Implementation ***** + * + */ + +static void wf_smu_create_sys_fans(void) +{ + struct wf_smu_sys_fans_param *param = NULL; + struct wf_pid_param pid_param; + int i; + + /* First, locate the params for this model */ + for (i = 0; i < WF_SMU_SYS_FANS_NUM_CONFIGS; i++) + if (wf_smu_sys_all_params[i].model_id == mach_model) { + param = &wf_smu_sys_all_params[i]; + break; + } + /* No params found, put fans to max */ + if (param == NULL) { + printk(KERN_WARNING "windfarm: System fan config not found " + "for this machine model, max fan speed\n"); + goto fail; + } + + /* Alloc & initialize state */ + wf_smu_sys_fans = kmalloc(sizeof(struct wf_smu_sys_fans_state), + GFP_KERNEL); + if (wf_smu_sys_fans == NULL) { + printk(KERN_WARNING "windfarm: Memory allocation error" + " max fan speed\n"); + goto fail; + } + wf_smu_sys_fans->ticks = 1; + wf_smu_sys_fans->scale0 = param->scale0; + wf_smu_sys_fans->offset0 = param->offset0; + wf_smu_sys_fans->scale1 = param->scale1; + wf_smu_sys_fans->offset1 = param->offset1; + + /* Fill PID params */ + pid_param.gd = param->gd; + pid_param.gp = param->gp; + pid_param.gr = param->gr; + pid_param.interval = WF_SMU_SYS_FANS_INTERVAL; + pid_param.history_len = WF_SMU_SYS_FANS_HISTORY_SIZE; + pid_param.itarget = param->itarget; + pid_param.min = fan_system->ops->get_min(fan_system); + pid_param.max = fan_system->ops->get_max(fan_system); + if (fan_hd) { + pid_param.min =max(pid_param.min,fan_hd->ops->get_min(fan_hd)); + pid_param.max =min(pid_param.max,fan_hd->ops->get_max(fan_hd)); + } + wf_pid_init(&wf_smu_sys_fans->pid, &pid_param); + + DBG("wf: System Fan control initialized.\n"); + DBG(" itarged=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(pid_param.itarget), pid_param.min, pid_param.max); + return; + + fail: + + if (fan_system) + wf_control_set_max(fan_system); + if (fan_hd) + wf_control_set_max(fan_hd); +} + +static void wf_smu_sys_fans_tick(struct wf_smu_sys_fans_state *st) +{ + s32 new_setpoint, temp, scaled, cputarget; + int rc; + + if (--st->ticks != 0) { + if (readjust) + goto readjust; + return; + } + st->ticks = WF_SMU_SYS_FANS_INTERVAL; + + rc = sensor_hd_temp->ops->get_value(sensor_hd_temp, &temp); + if (rc) { + printk(KERN_WARNING "windfarm: HD temp sensor error %d\n", + rc); + failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: System Fans tick ! HD temp: %d.%03d\n", + FIX32TOPRINT(temp)); + + if (temp > (st->pid.param.itarget + 0x50000)) + failure_state |= FAILURE_OVERTEMP; + + new_setpoint = wf_pid_run(&st->pid, temp); + + DBG("wf_smu: new_setpoint: %d RPM\n", (int)new_setpoint); + + scaled = ((((s64)new_setpoint) * (s64)st->scale0) >> 12) + st->offset0; + + DBG("wf_smu: scaled setpoint: %d RPM\n", (int)scaled); + + cputarget = wf_smu_cpu_fans ? wf_smu_cpu_fans->pid.target : 0; + cputarget = ((((s64)cputarget) * (s64)st->scale1) >> 12) + st->offset1; + scaled = max(scaled, cputarget); + scaled = max(scaled, st->pid.param.min); + scaled = min(scaled, st->pid.param.max); + + DBG("wf_smu: adjusted setpoint: %d RPM\n", (int)scaled); + + if (st->sys_setpoint == scaled && new_setpoint == st->hd_setpoint) + return; + st->sys_setpoint = scaled; + st->hd_setpoint = new_setpoint; + readjust: + if (fan_system && failure_state == 0) { + rc = fan_system->ops->set_value(fan_system, st->sys_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: Sys fan error %d\n", + rc); + failure_state |= FAILURE_FAN; + } + } + if (fan_hd && failure_state == 0) { + rc = fan_hd->ops->set_value(fan_hd, st->hd_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: HD fan error %d\n", + rc); + failure_state |= FAILURE_FAN; + } + } +} + +static void wf_smu_create_cpu_fans(void) +{ + struct wf_cpu_pid_param pid_param; + struct smu_sdbp_header *hdr; + struct smu_sdbp_cpupiddata *piddata; + struct smu_sdbp_fvt *fvt; + s32 tmax, tdelta, maxpow, powadj; + + /* First, locate the PID params in SMU SBD */ + hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL); + if (hdr == 0) { + printk(KERN_WARNING "windfarm: CPU PID fan config not found " + "max fan speed\n"); + goto fail; + } + piddata = (struct smu_sdbp_cpupiddata *)&hdr[1]; + + /* Get the FVT params for operating point 0 (the only supported one + * for now) in order to get tmax + */ + hdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL); + if (hdr) { + fvt = (struct smu_sdbp_fvt *)&hdr[1]; + tmax = ((s32)fvt->maxtemp) << 16; + } else + tmax = 0x5e0000; /* 94 degree default */ + + /* Alloc & initialize state */ + wf_smu_cpu_fans = kmalloc(sizeof(struct wf_smu_cpu_fans_state), + GFP_KERNEL); + if (wf_smu_cpu_fans == NULL) + goto fail; + wf_smu_cpu_fans->ticks = 1; + + if (machine == MACHINE_PM81) { + wf_smu_cpu_fans->scale = WF_SMU_CPU_FANS_SIBLING_SCALE; + wf_smu_cpu_fans->offset = WF_SMU_CPU_FANS_SIBLING_OFFSET; + } + + /* Fill PID params */ + pid_param.interval = WF_SMU_CPU_FANS_INTERVAL; + pid_param.history_len = piddata->history_len; + if (pid_param.history_len > WF_CPU_PID_MAX_HISTORY) { + printk(KERN_WARNING "windfarm: History size overflow on " + "CPU control loop (%d)\n", piddata->history_len); + pid_param.history_len = WF_CPU_PID_MAX_HISTORY; + } + pid_param.gd = piddata->gd; + pid_param.gp = piddata->gp; + pid_param.gr = piddata->gr / pid_param.history_len; + + tdelta = ((s32)piddata->target_temp_delta) << 16; + maxpow = ((s32)piddata->max_power) << 16; + powadj = ((s32)piddata->power_adj) << 16; + + pid_param.tmax = tmax; + pid_param.ttarget = tmax - tdelta; + pid_param.pmaxadj = maxpow - powadj; + + pid_param.min = fan_cpu_main->ops->get_min(fan_cpu_main); + pid_param.max = fan_cpu_main->ops->get_max(fan_cpu_main); + + wf_cpu_pid_init(&wf_smu_cpu_fans->pid, &pid_param); + + DBG("wf: CPU Fan control initialized.\n"); + DBG(" ttarged=%d.%03d, tmax=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(pid_param.ttarget), FIX32TOPRINT(pid_param.tmax), + pid_param.min, pid_param.max); + + return; + + fail: + printk(KERN_WARNING "windfarm: CPU fan config not found\n" + "for this machine model, max fan speed\n"); + + if (cpufreq_clamp) + wf_control_set_max(cpufreq_clamp); + if (fan_cpu_main) + wf_control_set_max(fan_cpu_main); +} + +static void wf_smu_cpu_fans_tick(struct wf_smu_cpu_fans_state *st) +{ + s32 new_setpoint, temp, power, systarget; + int rc; + + if (--st->ticks != 0) { + if (readjust) + goto readjust; + return; + } + st->ticks = WF_SMU_CPU_FANS_INTERVAL; + + rc = sensor_cpu_temp->ops->get_value(sensor_cpu_temp, &temp); + if (rc) { + printk(KERN_WARNING "windfarm: CPU temp sensor error %d\n", + rc); + failure_state |= FAILURE_SENSOR; + return; + } + + rc = sensor_cpu_power->ops->get_value(sensor_cpu_power, &power); + if (rc) { + printk(KERN_WARNING "windfarm: CPU power sensor error %d\n", + rc); + failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: CPU Fans tick ! CPU temp: %d.%03d, power: %d.%03d\n", + FIX32TOPRINT(temp), FIX32TOPRINT(power)); + +#ifdef HACKED_OVERTEMP + if (temp > 0x4a0000) + failure_state |= FAILURE_OVERTEMP; +#else + if (temp > st->pid.param.tmax) + failure_state |= FAILURE_OVERTEMP; +#endif + new_setpoint = wf_cpu_pid_run(&st->pid, power, temp); + + DBG("wf_smu: new_setpoint: %d RPM\n", (int)new_setpoint); + + if (machine == MACHINE_PM81) { + systarget = wf_smu_sys_fans ? wf_smu_sys_fans->pid.target : 0; + systarget = ((((s64)systarget) * (s64)st->scale) >> 12) + + st->offset; + new_setpoint = max(new_setpoint, systarget); + new_setpoint = max(new_setpoint, st->pid.param.min); + new_setpoint = min(new_setpoint, st->pid.param.max); + + DBG("wf_smu: adjusted setpoint: %d RPM\n", (int)new_setpoint); + } + if (st->cpu_setpoint == new_setpoint) + return; + st->cpu_setpoint = new_setpoint; + readjust: + if (fan_cpu_main && failure_state == 0) { + rc = fan_cpu_main->ops->set_value(fan_cpu_main, + st->cpu_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: CPU main fan" + " error %d\n", rc); + failure_state |= FAILURE_FAN; + } + } + if (fan_cpu_second && failure_state == 0) { + rc = fan_cpu_second->ops->set_value(fan_cpu_second, + st->cpu_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: CPU second fan" + " error %d\n", rc); + failure_state |= FAILURE_FAN; + } + } + if (fan_cpu_third && failure_state == 0) { + rc = fan_cpu_main->ops->set_value(fan_cpu_third, + st->cpu_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: CPU third fan" + " error %d\n", rc); + failure_state |= FAILURE_FAN; + } + } +} + +static void wf_smu_create_drive_fans(void) +{ + struct wf_pid_param param = { + .interval = 5, + .history_len = 2, + .gd = 0x01e00000, + .gp = 0x00500000, + .gr = 0x00000000, + .itarget = 0x00200000, + }; + + /* Alloc & initialize state */ + wf_smu_drive_fans = kmalloc(sizeof(struct wf_smu_drive_fans_state), + GFP_KERNEL); + if (wf_smu_drive_fans == NULL) { + printk(KERN_WARNING "windfarm: Memory allocation error" + " max fan speed\n"); + goto fail; + } + wf_smu_drive_fans->ticks = 1; + + /* Fill PID params */ + param.additive = (fan_hd->type == WF_CONTROL_RPM_FAN); + param.min = fan_hd->ops->get_min(fan_hd); + param.max = fan_hd->ops->get_max(fan_hd); + wf_pid_init(&wf_smu_drive_fans->pid, ¶m); + + DBG("wf: Drive Fan control initialized.\n"); + DBG(" itarged=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(param.itarget), param.min, param.max); + return; + + fail: + if (fan_hd) + wf_control_set_max(fan_hd); +} + +static void wf_smu_drive_fans_tick(struct wf_smu_drive_fans_state *st) +{ + s32 new_setpoint, temp; + int rc; + + if (--st->ticks != 0) { + if (readjust) + goto readjust; + return; + } + st->ticks = st->pid.param.interval; + + rc = sensor_hd_temp->ops->get_value(sensor_hd_temp, &temp); + if (rc) { + printk(KERN_WARNING "windfarm: HD temp sensor error %d\n", + rc); + failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: Drive Fans tick ! HD temp: %d.%03d\n", + FIX32TOPRINT(temp)); + + if (temp > (st->pid.param.itarget + 0x50000)) + failure_state |= FAILURE_OVERTEMP; + + new_setpoint = wf_pid_run(&st->pid, temp); + + DBG("wf_smu: new_setpoint: %d\n", (int)new_setpoint); + + if (st->setpoint == new_setpoint) + return; + st->setpoint = new_setpoint; + readjust: + if (fan_hd && failure_state == 0) { + rc = fan_hd->ops->set_value(fan_hd, st->setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: HD fan error %d\n", + rc); + failure_state |= FAILURE_FAN; + } + } +} + +static void wf_smu_create_slots_fans(void) +{ + struct wf_pid_param param = { + .interval = 1, + .history_len = 8, + .gd = 0x00000000, + .gp = 0x00000000, + .gr = 0x00020000, + .itarget = 0x00000000 + }; + + /* Alloc & initialize state */ + wf_smu_slots_fans = kmalloc(sizeof(struct wf_smu_slots_fans_state), + GFP_KERNEL); + if (wf_smu_slots_fans == NULL) { + printk(KERN_WARNING "windfarm: Memory allocation error" + " max fan speed\n"); + goto fail; + } + wf_smu_slots_fans->ticks = 1; + + /* Fill PID params */ + param.additive = (fan_slots->type == WF_CONTROL_RPM_FAN); + param.min = fan_slots->ops->get_min(fan_slots); + param.max = fan_slots->ops->get_max(fan_slots); + wf_pid_init(&wf_smu_slots_fans->pid, ¶m); + + DBG("wf: Slots Fan control initialized.\n"); + DBG(" itarged=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(param.itarget), param.min, param.max); + return; + + fail: + if (fan_slots) + wf_control_set_max(fan_slots); +} + +static void wf_smu_slots_fans_tick(struct wf_smu_slots_fans_state *st) +{ + s32 new_setpoint, power; + int rc; + + if (--st->ticks != 0) { + if (readjust) + goto readjust; + return; + } + st->ticks = st->pid.param.interval; + + rc = sensor_slots_power->ops->get_value(sensor_slots_power, &power); + if (rc) { + printk(KERN_WARNING "windfarm: Slots power sensor error %d\n", + rc); + failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: Slots Fans tick ! Slots power: %d.%03d\n", + FIX32TOPRINT(power)); + +#if 0 /* Check what makes a good overtemp condition */ + if (power > (st->pid.param.itarget + 0x50000)) + failure_state |= FAILURE_OVERTEMP; +#endif + + new_setpoint = wf_pid_run(&st->pid, power); + + DBG("wf_smu: new_setpoint: %d\n", (int)new_setpoint); + + if (st->setpoint == new_setpoint) + return; + st->setpoint = new_setpoint; + readjust: + if (fan_slots && failure_state == 0) { + rc = fan_slots->ops->set_value(fan_slots, st->setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: Slots fan error %d\n", + rc); + failure_state |= FAILURE_FAN; + } + } +} + + +/* + * ****** Attributes ****** + * + */ + +#define BUILD_SHOW_FUNC_FIX(name, data) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + ssize_t r; \ + s32 val = 0; \ + data->ops->get_value(data, &val); \ + r = sprintf(buf, "%d.%03d", FIX32TOPRINT(val)); \ + return r; \ +} \ +static DEVICE_ATTR(name,S_IRUGO,show_##name, NULL); + + +#define BUILD_SHOW_FUNC_INT(name, data) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + s32 val = 0; \ + data->ops->get_value(data, &val); \ + return sprintf(buf, "%d", val); \ +} \ +static DEVICE_ATTR(name,S_IRUGO,show_##name, NULL); + +BUILD_SHOW_FUNC_INT(cpu_fan, fan_cpu_main); +BUILD_SHOW_FUNC_INT(sys_fan, fan_system); +BUILD_SHOW_FUNC_INT(hd_fan, fan_hd); +BUILD_SHOW_FUNC_INT(slots_fan, fan_slots); + +BUILD_SHOW_FUNC_FIX(cpu_temp, sensor_cpu_temp); +BUILD_SHOW_FUNC_FIX(cpu_power, sensor_cpu_power); +BUILD_SHOW_FUNC_FIX(hd_temp, sensor_hd_temp); +BUILD_SHOW_FUNC_FIX(slots_power, sensor_slots_power); + +/* + * ****** Setup / Init / Misc ... ****** + * + */ + +static void wf_smu_tick(void) +{ + unsigned int last_failure = failure_state; + unsigned int new_failure; + + if (!started) { + DBG("wf: creating control loops !\n"); + if (machine == MACHINE_PM81) { + wf_smu_create_sys_fans(); + wf_smu_create_cpu_fans(); + } else if (machine == MACHINE_PM91) { + wf_smu_create_drive_fans(); + wf_smu_create_slots_fans(); + wf_smu_create_cpu_fans(); + } + started = 1; + } + + /* Skipping ticks */ + if (skipping && --skipping) + return; + + failure_state = 0; + if (wf_smu_sys_fans) + wf_smu_sys_fans_tick(wf_smu_sys_fans); + if (wf_smu_drive_fans) + wf_smu_drive_fans_tick(wf_smu_drive_fans); + if (wf_smu_slots_fans) + wf_smu_slots_fans_tick(wf_smu_slots_fans); + if (wf_smu_cpu_fans) + wf_smu_cpu_fans_tick(wf_smu_cpu_fans); + + readjust = 0; + new_failure = failure_state & ~last_failure; + + /* If entering failure mode, clamp cpufreq and ramp all + * fans to full speed. + */ + if (failure_state && !last_failure) { + if (cpufreq_clamp) + wf_control_set_max(cpufreq_clamp); + if (fan_system) + wf_control_set_max(fan_system); + if (fan_cpu_main) + wf_control_set_max(fan_cpu_main); + if (fan_cpu_second) + wf_control_set_max(fan_cpu_second); + if (fan_cpu_third) + wf_control_set_max(fan_cpu_third); + if (fan_hd) + wf_control_set_max(fan_hd); + if (fan_slots) + wf_control_set_max(fan_slots); + } + + /* If leaving failure mode, unclamp cpufreq and readjust + * all fans on next iteration + */ + if (!failure_state && last_failure) { + if (cpufreq_clamp) + wf_control_set_min(cpufreq_clamp); + readjust = 1; + } + + /* Overtemp condition detected, notify and start skipping a couple + * ticks to let the temperature go down + */ + if (new_failure & FAILURE_OVERTEMP) { + wf_set_overtemp(); + skipping = 2; + } + + /* We only clear the overtemp condition if overtemp is cleared + * _and_ no other failure is present. Since a sensor error will + * clear the overtemp condition (can't measure temperature) at + * the control loop levels, but we don't want to keep it clear + * here in this case + */ + if (new_failure == 0 && last_failure & FAILURE_OVERTEMP) + wf_clear_overtemp(); +} + +static void wf_smu_new_control81(struct wf_control *ct) +{ + if (all_controls_ok) + return; + + if (fan_cpu_main == NULL && !strcmp(ct->name, "cpu-fan")) { + if (wf_get_control(ct) == 0) { + fan_cpu_main = ct; + device_create_file(wf_dev, &dev_attr_cpu_fan); + } + } + + if (fan_system == NULL && !strcmp(ct->name, "system-fan")) { + if (wf_get_control(ct) == 0) { + fan_system = ct; + device_create_file(wf_dev, &dev_attr_sys_fan); + } + } + + if (cpufreq_clamp == NULL && !strcmp(ct->name, "cpufreq-clamp")) { + if (wf_get_control(ct) == 0) + cpufreq_clamp = ct; + } + + /* Darwin property list says the HD fan is only for model ID + * 0, 1, 2 and 3 + */ + + if (mach_model > 3) { + if (fan_system && fan_cpu_main && cpufreq_clamp) + all_controls_ok = 1; + return; + } + + if (fan_hd == NULL && !strcmp(ct->name, "drive-bay-fan")) { + if (wf_get_control(ct) == 0) { + fan_hd = ct; + device_create_file(wf_dev, &dev_attr_hd_fan); + } + } + + if (fan_system && fan_hd && fan_cpu_main && cpufreq_clamp) + all_controls_ok = 1; +} + +static void wf_smu_new_control91(struct wf_control *ct) +{ + if (all_controls_ok) + return; + + if (fan_cpu_main == NULL && !strcmp(ct->name, "cpu-rear-fan-0")) { + if (wf_get_control(ct) == 0) { + fan_cpu_main = ct; + device_create_file(wf_dev, &dev_attr_cpu_fan); + } + } + + if (fan_cpu_second == NULL && !strcmp(ct->name, "cpu-rear-fan-1")) { + if (wf_get_control(ct) == 0) + fan_cpu_second = ct; + } + + if (fan_cpu_third == NULL && !strcmp(ct->name, "cpu-front-fan-0")) { + if (wf_get_control(ct) == 0) + fan_cpu_third = ct; + } + + if (cpufreq_clamp == NULL && !strcmp(ct->name, "cpufreq-clamp")) { + if (wf_get_control(ct) == 0) + cpufreq_clamp = ct; + } + + if (fan_hd == NULL && !strcmp(ct->name, "drive-bay-fan")) { + if (wf_get_control(ct) == 0) { + fan_hd = ct; + device_create_file(wf_dev, &dev_attr_hd_fan); + } + } + + if (fan_slots == NULL && !strcmp(ct->name, "slots-fan")) { + if (wf_get_control(ct) == 0) { + fan_slots = ct; + device_create_file(wf_dev, &dev_attr_slots_fan); + } + } + + if (fan_cpu_main && (fan_cpu_second || fan_cpu_third) && fan_hd && + fan_slots && cpufreq_clamp) + all_controls_ok = 1; +} + +static void wf_smu_new_sensor(struct wf_sensor *sr) +{ + if (all_sensors_ok) + return; + + if (sensor_cpu_power == NULL && !strcmp(sr->name, "cpu-power")) { + if (wf_get_sensor(sr) == 0) { + sensor_cpu_power = sr; + device_create_file(wf_dev, &dev_attr_cpu_power); + } + } + + if (sensor_cpu_temp == NULL && !strcmp(sr->name, "cpu-temp")) { + if (wf_get_sensor(sr) == 0) { + sensor_cpu_temp = sr; + device_create_file(wf_dev, &dev_attr_cpu_temp); + } + } + + if (sensor_hd_temp == NULL && !strcmp(sr->name, "hd-temp")) { + if (wf_get_sensor(sr) == 0) { + sensor_hd_temp = sr; + device_create_file(wf_dev, &dev_attr_hd_temp); + } + } + + if (sensor_slots_power == NULL && !strcmp(sr->name, "slots-power")) { + if (wf_get_sensor(sr) == 0) { + sensor_slots_power = sr; + device_create_file(wf_dev, &dev_attr_slots_power); + } + } + + if (machine == MACHINE_PM81 && sensor_cpu_power && + sensor_cpu_temp && sensor_hd_temp) + all_sensors_ok = 1; + + if (machine == MACHINE_PM91 && sensor_cpu_power && + sensor_cpu_temp && sensor_hd_temp && sensor_slots_power) + all_sensors_ok = 1; +} + + +static int wf_smu_notify(struct notifier_block *self, + unsigned long event, void *data) +{ + switch(event) { + case WF_EVENT_NEW_CONTROL: + DBG("wf: new control %s detected\n", + ((struct wf_control *)data)->name); + if (machine == MACHINE_PM81) + wf_smu_new_control81(data); + else + wf_smu_new_control91(data); + readjust = 1; + break; + case WF_EVENT_NEW_SENSOR: + DBG("wf: new sensor %s detected\n", + ((struct wf_sensor *)data)->name); + wf_smu_new_sensor(data); + break; + case WF_EVENT_TICK: + if (all_controls_ok && all_sensors_ok) + wf_smu_tick(); + }; + + return 0; +} + +static struct notifier_block events = { + .notifier_call = wf_smu_notify, +}; + +static int wf_init_pm81(void) +{ + struct smu_sdbp_header *hdr; + + machine = MACHINE_PM81; + + hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL); + if (hdr != 0) { + struct smu_sdbp_sensortree *st = + (struct smu_sdbp_sensortree *)&hdr[1]; + mach_model = st->model_id; + } + + printk(KERN_INFO "windfarm: Initializing for iMacG5 model ID %d\n", + mach_model); + + return 0; +} + +static int wf_init_pm91(void) +{ + machine = MACHINE_PM91; + + printk(KERN_INFO "windfarm: Initializing for Desktop G5 model\n"); + + return 0; +} + +static int wf_smu_probe(struct device *ddev) +{ + wf_dev = ddev; + + wf_register_client(&events); + + return 0; +} + +static int wf_smu_remove(struct device *ddev) +{ + wf_unregister_client(&events); + + /* XXX We don't have yet a guarantee that our callback isn't + * in progress when returning from wf_unregister_client, so + * we add an arbitrary delay. I'll have to fix that in the core + */ + msleep(1000); + + /* Release all sensors */ + /* One more crappy race: I don't think we have any guarantee here + * that the attribute callback won't race with the sensor beeing + * disposed of, and I'm not 100% certain what best way to deal + * with that except by adding locks all over... I'll do that + * eventually but heh, who ever rmmod this module anyway ? + */ + if (sensor_cpu_power) { + device_remove_file(wf_dev, &dev_attr_cpu_power); + wf_put_sensor(sensor_cpu_power); + } + if (sensor_cpu_temp) { + device_remove_file(wf_dev, &dev_attr_cpu_temp); + wf_put_sensor(sensor_cpu_temp); + } + if (sensor_hd_temp) { + device_remove_file(wf_dev, &dev_attr_hd_temp); + wf_put_sensor(sensor_hd_temp); + } + if (sensor_slots_power) { + device_remove_file(wf_dev, &dev_attr_slots_power); + wf_put_sensor(sensor_slots_power); + } + + /* Release all controls */ + if (fan_cpu_main) { + device_remove_file(wf_dev, &dev_attr_cpu_fan); + wf_put_control(fan_cpu_main); + } + if (fan_cpu_second) + wf_put_control(fan_cpu_second); + if (fan_cpu_third) + wf_put_control(fan_cpu_third); + if (fan_hd) { + device_remove_file(wf_dev, &dev_attr_hd_fan); + wf_put_control(fan_hd); + } + if (fan_system) { + device_remove_file(wf_dev, &dev_attr_sys_fan); + wf_put_control(fan_system); + } + if (fan_slots) { + device_remove_file(wf_dev, &dev_attr_slots_fan); + wf_put_control(fan_slots); + } + if (cpufreq_clamp) + wf_put_control(cpufreq_clamp); + + /* Destroy control loops state structures */ + if (wf_smu_sys_fans) + kfree(wf_smu_sys_fans); + if (wf_smu_slots_fans) + kfree(wf_smu_cpu_fans); + if (wf_smu_drive_fans) + kfree(wf_smu_cpu_fans); + if (wf_smu_cpu_fans) + kfree(wf_smu_cpu_fans); + + wf_dev = NULL; + + return 0; +} + +static struct device_driver wf_smu_driver = { + .name = "windfarm", + .bus = &platform_bus_type, + .probe = wf_smu_probe, + .remove = wf_smu_remove, +}; + + +static int __init wf_smu_init(void) +{ + int rc = -ENODEV; + + if (machine_is_compatible("PowerMac8,1") || + machine_is_compatible("PowerMac8,2")) + rc = wf_init_pm81(); + else if (machine_is_compatible("PowerMac9,1")) + rc = wf_init_pm91(); + + if (rc == 0) { +#ifdef MODULE + request_module("windfarm_smu_controls"); + request_module("windfarm_smu_sensors"); + request_module("windfarm_lm75_sensor"); + +#endif /* MODULE */ + driver_register(&wf_smu_driver); + } + + return rc; +} + +static void __exit wf_smu_exit(void) +{ + + driver_unregister(&wf_smu_driver); +} + + +module_init(wf_smu_init); +module_exit(wf_smu_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("Thermal control logic for SMU based PowerMacs"); +MODULE_LICENSE("GPL"); + Index: linux-work/include/asm-ppc64/smu.h =================================================================== --- linux-work.orig/include/asm-ppc64/smu.h 2005-10-04 15:17:21.000000000 +1000 +++ linux-work/include/asm-ppc64/smu.h 2005-10-04 15:17:33.000000000 +1000 @@ -41,8 +41,30 @@ /* * Fan control * - * This is a "mux" for fan control commands, first byte is the - * "sub" command. + * This is a "mux" for fan control commands. The command seem to + * act differently based on the number of arguments. With 1 byte + * of argument, this seem to be queries for fans status, setpoint, + * etc..., while with 0xe arguments, we will set the fans speeds. + * + * Queries (1 byte arg): + * --------------------- + * + * arg=0x01: read RPM fans status + * arg=0x02: read RPM fans setpoint + * arg=0x11: read PWM fans status + * arg=0x12: read PWM fans setpoint + * + * the "status" queries return the current speed while the "setpoint" ones + * return the programmed/target speed. It _seems_ that the result is a bit + * mask in the first byte of active/available fans, followed by 6 words (16 + * bits) containing the requested speed. + * + * Setpoint (14 bytes arg): + * ------------------------ + * + * first arg byte is 0 for RPM fans and 0x10 for PWM. Second arg byte is the + * mask of fans affected by the command. Followed by 6 words containing the + * setpoint value for selected fans in the mask (or 0 if mask value is 0) */ #define SMU_CMD_FAN_COMMAND 0x4a @@ -169,7 +191,16 @@ #define SMU_CMD_POWER_SHUTDOWN "SHUTDOWN" #define SMU_CMD_POWER_VOLTAGE_SLEW "VSLEW" -/* Misc commands +/* + * Read ADC sensors + * + * This command takes one byte of parameter: the sensor ID (or "reg" + * value in the device-tree) and returns a 16 bits value + */ +#define SMU_CMD_READ_ADC 0xd8 + +/* + * Misc commands * * This command seem to be a grab bag of various things */ @@ -386,10 +417,12 @@ }; /* - * 32 bits integers are usually encoded with 2x16 bits swapped, - * this demangles them + * demangle 16 and 32 bits integer in some SMU partitions + * (currently, afaik, this concerns only the FVT partition + * (0x12) */ -//#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) +#define SMU_U16_MIX(x) le16_to_cpu(x); +#define SMU_U32_MIX(x) ((((x) & 0xff00ff00u) >> 8)|(((x) & 0x00ff00ffu) << 8)) /* This is the definition of the SMU sdb-partition-0x12 table (called * CPU F/V/T operating points in Darwin). The definition for all those @@ -399,7 +432,8 @@ struct smu_sdbp_fvt { __u32 sysclk; /* Base SysClk frequency in Hz for - * this operating point + * this operating point. Value need to + * be unmixed with SMU_U32_MIX() */ __u8 pad; __u8 maxtemp; /* Max temp. supported by this @@ -408,10 +442,69 @@ __u16 volts[3]; /* CPU core voltage for the 3 * PowerTune modes, a mode with - * 0V = not supported. + * 0V = not supported. Value need + * to be unmixed with SMU_U16_MIX() */ }; +/* This partition contains voltage & current sensor calibration + * informations + */ +#define SMU_SDB_CPUVCP_ID 0x21 + +struct smu_sdbp_cpuvcp { + __u16 volt_scale; /* u4.12 fixed point */ + __s16 volt_offset; /* s4.12 fixed point */ + __u16 curr_scale; /* u4.12 fixed point */ + __s16 curr_offset; /* s4.12 fixed point */ + __s32 power_quads[3]; /* s4.28 fixed point */ +}; + +/* This partition contains CPU thermal diode calibration + */ +#define SMU_SDB_CPUDIODE_ID 0x18 + +struct smu_sdbp_cpudiode { + __u16 m_value; /* u1.15 fixed point */ + __s16 b_value; /* s10.6 fixed point */ + +}; + +/* This partition contains Slots power calibration + */ +#define SMU_SDB_SLOTSPOW_ID 0x78 + +struct smu_sdbp_slotspow { + __u16 pow_scale; /* u4.12 fixed point */ + __s16 pow_offset; /* s4.12 fixed point */ +}; + +/* This partition contains machine specific version information about + * the sensor/control layout + */ +#define SMU_SDB_SENSORTREE_ID 0x25 + +struct smu_sdbp_sensortree { + u8 model_id; + u8 unknown[3]; +}; + +/* This partition contains CPU thermal control PID informations. So far + * only single CPU machines have been seen with an SMU, so we assume this + * carries only informations for those + */ +#define SMU_SDB_CPUPIDDATA_ID 0x17 + +struct smu_sdbp_cpupiddata { + u8 unknown1; + u8 target_temp_delta; + u8 unknown2; + u8 history_len; + s16 power_adj; + u16 max_power; + s32 gp,gr,gd; +}; + /* Other partitions without known structures */ #define SMU_SDB_DEBUG_SWITCHES_ID 0x05 Index: linux-work/drivers/macintosh/windfarm_lm75_sensor.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_lm75_sensor.c 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,255 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.1" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +struct wf_lm75_sensor { + int ds1775 : 1; + int inited : 1; + struct i2c_client i2c; + struct wf_sensor sens; +}; +#define wf_to_lm75(c) container_of(c, struct wf_lm75_sensor, sens) +#define i2c_to_lm75(c) container_of(c, struct wf_lm75_sensor, i2c) + +static int wf_lm75_attach(struct i2c_adapter *adapter); +static int wf_lm75_detach(struct i2c_client *client); + +static struct i2c_driver wf_lm75_driver = { + .owner = THIS_MODULE, + .name = "wf_lm75", + .flags = I2C_DF_NOTIFY, + .attach_adapter = wf_lm75_attach, + .detach_client = wf_lm75_detach, +}; + +static int wf_lm75_get(struct wf_sensor *sr, s32 *value) +{ + struct wf_lm75_sensor *lm = wf_to_lm75(sr); + s32 data; + + if (lm->i2c.adapter == NULL) + return -ENODEV; + + /* Init chip if necessary */ + if (!lm->inited) { + u8 cfg_new, cfg = (u8)i2c_smbus_read_byte_data(&lm->i2c, 1); + + DBG("wf_lm75: Initializing %s, cfg was: %02x\n", + sr->name, cfg); + + /* clear shutdown bit, keep other settings as left by + * the firmware for now + */ + cfg_new = cfg & ~0x01; + i2c_smbus_write_byte_data(&lm->i2c, 1, cfg_new); + lm->inited = 1; + + /* If we just powered it up, let's wait 200 ms */ + msleep(200); + } + + /* Read temperature register */ + data = (s32)le16_to_cpu(i2c_smbus_read_word_data(&lm->i2c, 0)); + data <<= 8; + *value = data; + + return 0; +} + +static void wf_lm75_release(struct wf_sensor *sr) +{ + struct wf_lm75_sensor *lm = wf_to_lm75(sr); + + /* check if client is registered and detach from i2c */ + if (lm->i2c.adapter) { + i2c_detach_client(&lm->i2c); + lm->i2c.adapter = NULL; + } + + kfree(lm); +} + +static struct wf_sensor_ops wf_lm75_ops = { + .get_value = wf_lm75_get, + .release = wf_lm75_release, + .owner = THIS_MODULE, +}; + +static struct wf_lm75_sensor *wf_lm75_create(struct i2c_adapter *adapter, + u8 addr, int ds1775, + const char *loc) +{ + struct wf_lm75_sensor *lm; + + DBG("wf_lm75: creating %s device at address 0x%02x\n", + ds1775 ? "ds1775" : "lm75", addr); + + lm = kmalloc(sizeof(struct wf_lm75_sensor), GFP_KERNEL); + if (lm == NULL) + return NULL; + memset(lm, 0, sizeof(struct wf_lm75_sensor)); + + /* Usual rant about sensor names not beeing very consistent in + * the device-tree, oh well ... + * Add more entries below as you deal with more setups + */ + if (!strcmp(loc, "Hard drive") || !strcmp(loc, "DRIVE BAY")) + lm->sens.name = "hd-temp"; + else + goto fail; + + lm->inited = 0; + lm->sens.ops = &wf_lm75_ops; + lm->ds1775 = ds1775; + lm->i2c.addr = (addr >> 1) & 0x7f; + lm->i2c.adapter = adapter; + lm->i2c.driver = &wf_lm75_driver; + strncpy(lm->i2c.name, lm->sens.name, I2C_NAME_SIZE-1); + + if (i2c_attach_client(&lm->i2c)) { + printk(KERN_ERR "windfarm: failed to attach %s %s to i2c\n", + ds1775 ? "ds1775" : "lm75", lm->i2c.name); + goto fail; + } + + if (wf_register_sensor(&lm->sens)) { + i2c_detach_client(&lm->i2c); + goto fail; + } + + return lm; + fail: + kfree(lm); + return NULL; +} + +static int wf_lm75_attach(struct i2c_adapter *adapter) +{ + u8 bus_id; + struct device_node *smu, *bus, *dev; + + /* We currently only deal with LM75's hanging off the SMU + * i2c busses. If we extend that driver to other/older + * machines, we should split this function into SMU-i2c, + * keywest-i2c, PMU-i2c, ... + */ + + DBG("wf_lm75: adapter %s detected\n", adapter->name); + + if (strncmp(adapter->name, "smu-i2c-", 8) != 0) + return 0; + smu = of_find_node_by_type(NULL, "smu"); + if (smu == NULL) + return 0; + + /* Look for the bus in the device-tree */ + bus_id = (u8)simple_strtoul(adapter->name + 8, NULL, 16); + + DBG("wf_lm75: bus ID is %x\n", bus_id); + + /* Look for sensors subdir */ + for (bus = NULL; + (bus = of_get_next_child(smu, bus)) != NULL;) { + u32 *reg; + + if (strcmp(bus->name, "i2c")) + continue; + reg = (u32 *)get_property(bus, "reg", NULL); + if (reg == NULL) + continue; + if (bus_id == *reg) + break; + } + of_node_put(smu); + if (bus == NULL) { + printk(KERN_WARNING "windfarm: SMU i2c bus 0x%x not found" + " in device-tree !\n", bus_id); + return 0; + } + + DBG("wf_lm75: bus found, looking for device...\n"); + + /* Now look for lm75(s) in there */ + for (dev = NULL; + (dev = of_get_next_child(bus, dev)) != NULL;) { + const char *loc = + get_property(dev, "hwsensor-location", NULL); + u32 *reg = (u32 *)get_property(dev, "reg", NULL); + DBG(" dev: %s... (loc: %p, reg: %p)\n", dev->name, loc, reg); + if (loc == NULL || reg == NULL) + continue; + /* real lm75 */ + if (device_is_compatible(dev, "lm75")) + wf_lm75_create(adapter, *reg, 0, loc); + /* ds1775 (compatible, better resolution */ + else if (device_is_compatible(dev, "ds1775")) + wf_lm75_create(adapter, *reg, 1, loc); + } + + of_node_put(bus); + + return 0; +} + +static int wf_lm75_detach(struct i2c_client *client) +{ + struct wf_lm75_sensor *lm = i2c_to_lm75(client); + + DBG("wf_lm75: i2c detatch called for %s\n", lm->sens.name); + + /* Mark client detached */ + lm->i2c.adapter = NULL; + + /* release sensor */ + wf_unregister_sensor(&lm->sens); + + return 0; +} + +static int __init wf_lm75_sensor_init(void) +{ + int rc; + + rc = i2c_add_driver(&wf_lm75_driver); + if (rc < 0) + return rc; + return 0; +} + +static void __exit wf_lm75_sensor_exit(void) +{ + i2c_del_driver(&wf_lm75_driver); +} + + +module_init(wf_lm75_sensor_init); +module_exit(wf_lm75_sensor_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("LM75 sensor objects for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + Index: linux-work/drivers/macintosh/windfarm_pid.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_pid.c 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,146 @@ +/* + * Windfarm PowerMac thermal control. Generic PID helpers + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + */ + +#include +#include +#include +#include +#include +#include + +#include "windfarm_pid.h" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +void wf_pid_init(struct wf_pid_state *st, struct wf_pid_param *param) +{ + memset(st, 0, sizeof(struct wf_pid_state)); + st->param = *param; + st->first = 1; +} +EXPORT_SYMBOL_GPL(wf_pid_init); + +s32 wf_pid_run(struct wf_pid_state *st, s32 new_sample) +{ + s64 error, integ, deriv; + s32 target; + int i, hlen = st->param.history_len; + + /* Calculate error term */ + error = new_sample - st->param.itarget; + + /* Get samples into our history buffer */ + if (st->first) { + for (i = 0; i < hlen; i++) { + st->samples[i] = new_sample; + st->errors[i] = error; + } + st->first = 0; + st->index = 0; + } else { + st->index = (st->index + 1) % hlen; + st->samples[st->index] = new_sample; + st->errors[st->index] = error; + } + + /* Calculate integral term */ + for (i = 0, integ = 0; i < hlen; i++) + integ += st->errors[(st->index + hlen - i) % hlen]; + integ *= st->param.interval; + + /* Calculate derivative term */ + deriv = st->errors[st->index] - + st->errors[(st->index + hlen - 1) % hlen]; + deriv /= st->param.interval; + + /* Calculate target */ + target = (s32)((integ * (s64)st->param.gr + deriv * (s64)st->param.gd + + error * (s64)st->param.gp) >> 36); + if (st->param.additive) + target += st->target; + target = max(target, st->param.min); + target = min(target, st->param.max); + st->target = target; + + return st->target; +} +EXPORT_SYMBOL_GPL(wf_pid_run); + +void wf_cpu_pid_init(struct wf_cpu_pid_state *st, + struct wf_cpu_pid_param *param) +{ + memset(st, 0, sizeof(struct wf_cpu_pid_state)); + st->param = *param; + st->first = 1; +} +EXPORT_SYMBOL_GPL(wf_cpu_pid_init); + +s32 wf_cpu_pid_run(struct wf_cpu_pid_state *st, s32 new_power, s32 new_temp) +{ + s64 error, integ, deriv, prop; + s32 target, sval, adj; + int i, hlen = st->param.history_len; + + /* Calculate error term */ + error = st->param.pmaxadj - new_power; + + /* Get samples into our history buffer */ + if (st->first) { + for (i = 0; i < hlen; i++) { + st->powers[i] = new_power; + st->errors[i] = error; + } + st->temps[0] = st->temps[1] = new_temp; + st->first = 0; + st->index = st->tindex = 0; + } else { + st->index = (st->index + 1) % hlen; + st->powers[st->index] = new_power; + st->errors[st->index] = error; + st->tindex = (st->tindex + 1) % 2; + st->temps[st->tindex] = new_temp; + } + + /* Calculate integral term */ + for (i = 0, integ = 0; i < hlen; i++) + integ += st->errors[(st->index + hlen - i) % hlen]; + integ *= st->param.interval; + integ *= st->param.gr; + sval = st->param.tmax - ((integ >> 20) & 0xffffffff); + adj = min(st->param.ttarget, sval); + + DBG("integ: %lx, sval: %lx, adj: %lx\n", integ, sval, adj); + + /* Calculate derivative term */ + deriv = st->temps[st->tindex] - + st->temps[(st->tindex + 2 - 1) % 2]; + deriv /= st->param.interval; + deriv *= st->param.gd; + + /* Calculate proportional term */ + prop = (new_temp - adj); + prop *= st->param.gp; + + DBG("deriv: %lx, prop: %lx\n", deriv, prop); + + /* Calculate target */ + target = st->target + (s32)((deriv + prop) >> 36); + target = max(target, st->param.min); + target = min(target, st->param.max); + st->target = target; + + return st->target; +} +EXPORT_SYMBOL_GPL(wf_cpu_pid_run); Index: linux-work/drivers/macintosh/windfarm_pid.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_pid.h 2005-10-04 15:17:33.000000000 +1000 @@ -0,0 +1,84 @@ +/* + * Windfarm PowerMac thermal control. Generic PID helpers + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + * + * This is a pair of generic PID helpers that can be used by + * control loops. One is the basic PID implementation, the + * other one is more specifically tailored to the loops used + * for CPU control with 2 input sample types (temp and power) + */ + +/* + * *** Simple PID *** + */ + +#define WF_PID_MAX_HISTORY 32 + +/* This parameter array is passed to the PID algorithm. Currently, + * we don't support changing parameters on the fly as it's not needed + * but could be implemented (with necessary adjustment of the history + * buffer + */ +struct wf_pid_param { + int interval; /* Interval between samples in seconds */ + int history_len; /* Size of history buffer */ + int additive; /* 1: target relative to previous value */ + s32 gd, gp, gr; /* PID gains */ + s32 itarget; /* PID input target */ + s32 min,max; /* min and max target values */ +}; + +struct wf_pid_state { + int first; /* first run of the loop */ + int index; /* index of current sample */ + s32 target; /* current target value */ + s32 samples[WF_PID_MAX_HISTORY]; /* samples history buffer */ + s32 errors[WF_PID_MAX_HISTORY]; /* error history buffer */ + + struct wf_pid_param param; +}; + +extern void wf_pid_init(struct wf_pid_state *st, struct wf_pid_param *param); +extern s32 wf_pid_run(struct wf_pid_state *st, s32 sample); + + +/* + * *** CPU PID *** + */ + +#define WF_CPU_PID_MAX_HISTORY 32 + +/* This parameter array is passed to the CPU PID algorithm. Currently, + * we don't support changing parameters on the fly as it's not needed + * but could be implemented (with necessary adjustment of the history + * buffer + */ +struct wf_cpu_pid_param { + int interval; /* Interval between samples in seconds */ + int history_len; /* Size of history buffer */ + s32 gd, gp, gr; /* PID gains */ + s32 pmaxadj; /* PID max power adjust */ + s32 ttarget; /* PID input target */ + s32 tmax; /* PID input max */ + s32 min,max; /* min and max target values */ +}; + +struct wf_cpu_pid_state { + int first; /* first run of the loop */ + int index; /* index of current power */ + int tindex; /* index of current temp */ + s32 target; /* current target value */ + s32 powers[WF_PID_MAX_HISTORY]; /* power history buffer */ + s32 errors[WF_PID_MAX_HISTORY]; /* error history buffer */ + s32 temps[2]; /* temp. history buffer */ + + struct wf_cpu_pid_param param; +}; + +extern void wf_cpu_pid_init(struct wf_cpu_pid_state *st, + struct wf_cpu_pid_param *param); +extern s32 wf_cpu_pid_run(struct wf_cpu_pid_state *st, s32 power, s32 temp); Index: linux-work/arch/ppc64/kernel/pmac_cpufreq.c =================================================================== --- linux-work.orig/arch/ppc64/kernel/pmac_cpufreq.c 2005-10-04 15:17:21.000000000 +1000 +++ linux-work/arch/ppc64/kernel/pmac_cpufreq.c 2005-10-04 15:18:26.000000000 +1000 @@ -84,7 +84,8 @@ static u32 *g5_pmode_data; static int g5_pmode_max; static int g5_pmode_cur; - +static int g5_driver_active; +static DECLARE_MUTEX(g5_switch_mutex); static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ static int g5_fvt_count; /* number of op. points */ @@ -105,11 +106,20 @@ static int g5_switch_freq(int speed_mode) { + struct cpufreq_freqs freqs; int to; if (g5_pmode_cur == speed_mode) return 0; + down(&g5_switch_mutex); + + freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency; + freqs.new = g5_cpu_freqs[speed_mode].frequency; + freqs.cpu = 0; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + /* If frequency is going up, first ramp up the voltage */ if (speed_mode < g5_pmode_cur) g5_switch_volt(speed_mode); @@ -143,6 +153,10 @@ g5_pmode_cur = speed_mode; ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + up(&g5_switch_mutex); + return 0; } @@ -159,12 +173,12 @@ } /* ----------------- cpufreq bookkeeping */ -static int __pmac g5_cpufreq_verify(struct cpufreq_policy *policy) +static int g5_cpufreq_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, g5_cpu_freqs); } -static int __pmac g5_cpufreq_target(struct cpufreq_policy *policy, +static int g5_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { unsigned int newstate = 0; @@ -173,10 +187,20 @@ target_freq, relation, &newstate)) return -EINVAL; + DBG("g5_cpufreq: Request to switch to %d state: %d\n", + target_freq, newstate); + return g5_switch_freq(newstate); } -static int __pmac g5_cpufreq_cpu_init(struct cpufreq_policy *policy) +static unsigned int g5_cpufreq_get_speed(unsigned int cpu) +{ + DBG("g5_cpufreq: Get speed %d\n", + g5_cpu_freqs[g5_pmode_cur].frequency); + return g5_cpu_freqs[g5_pmode_cur].frequency; +} + +static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy) { if (policy->cpu != 0) return -ENODEV; @@ -198,6 +222,7 @@ .init = g5_cpufreq_cpu_init, .verify = g5_cpufreq_verify, .target = g5_cpufreq_target, + .get = g5_cpufreq_get_speed, .attr = g5_cpu_freqs_attr, }; @@ -266,11 +291,14 @@ /* Check current frequency */ g5_pmode_cur = g5_query_freq(); - if (g5_pmode_cur > 1) { + if (g5_pmode_cur > 1) /* We don't support anything but 1:1 and 1:2, fixup ... */ - g5_switch_freq(1); g5_pmode_cur = 1; - } + + /* Force apply current frequency to make sure everything is in + * sync (voltage is right for example) + */ + g5_switch_freq(g5_pmode_cur); printk(KERN_INFO "Registering G5 CPU frequency driver\n"); printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", @@ -279,6 +307,8 @@ g5_cpu_freqs[g5_pmode_cur].frequency/1000); rc = cpufreq_register_driver(&g5_cpufreq_driver); + if (rc == 0) + g5_driver_active = 1; /* We keep the CPU node on hold... hopefully, Apple G5 don't have * hotplug CPU with a dynamic device-tree ... Index: linux-work/drivers/macintosh/windfarm_cpufreq_clamp.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-work/drivers/macintosh/windfarm_cpufreq_clamp.c 2005-10-04 15:19:48.000000000 +1000 @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.3" + +static int clamped; +static struct wf_control *clamp_control; + +static int clamp_notifier_call(struct notifier_block *self, + unsigned long event, void *data) +{ + struct cpufreq_policy *p = data; + unsigned long max_freq; + + if (event != CPUFREQ_ADJUST) + return 0; + + max_freq = clamped ? (p->cpuinfo.min_freq) : (p->cpuinfo.max_freq); + cpufreq_verify_within_limits(p, 0, max_freq); + + return 0; +} + +static struct notifier_block clamp_notifier = { + .notifier_call = clamp_notifier_call, +}; + +static int clamp_set(struct wf_control *ct, s32 value) +{ + if (value) + printk(KERN_INFO "windfarm: Clamping CPU frequency to " + "minimum !\n"); + else + printk(KERN_INFO "windfarm: CPU frequency unclamped !\n"); + clamped = value; + cpufreq_update_policy(0); + return 0; +} + +static int clamp_get(struct wf_control *ct, s32 *value) +{ + *value = clamped; + return 0; +} + +static s32 clamp_min(struct wf_control *ct) +{ + return 0; +} + +static s32 clamp_max(struct wf_control *ct) +{ + return 1; +} + +static struct wf_control_ops clamp_ops = { + .set_value = clamp_set, + .get_value = clamp_get, + .get_min = clamp_min, + .get_max = clamp_max, + .owner = THIS_MODULE, +}; + +static int __init wf_cpufreq_clamp_init(void) +{ + struct wf_control *clamp; + + clamp = kmalloc(sizeof(struct wf_control), GFP_KERNEL); + if (clamp == NULL) + return -ENOMEM; + cpufreq_register_notifier(&clamp_notifier, CPUFREQ_POLICY_NOTIFIER); + clamp->ops = &clamp_ops; + clamp->name = "cpufreq-clamp"; + if (wf_register_control(clamp)) + goto fail; + clamp_control = clamp; + return 0; + fail: + kfree(clamp); + return -ENODEV; +} + +static void __exit wf_cpufreq_clamp_exit(void) +{ + if (clamp_control) + wf_unregister_control(clamp_control); +} + + +module_init(wf_cpufreq_clamp_init); +module_exit(wf_cpufreq_clamp_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("CPU frequency clamp for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + From Eric.Piel at lifl.fr Tue Oct 4 18:49:06 2005 From: Eric.Piel at lifl.fr (Eric Piel) Date: Tue, 04 Oct 2005 10:49:06 +0200 Subject: [PATCH] ppc64: Add cpufreq support for SMU based G5 In-Reply-To: <1128403842.31063.24.camel@gaston> References: <1128403842.31063.24.camel@gaston> Message-ID: <43424202.7070600@lifl.fr> 10/04/2005 07:30 AM, Benjamin Herrenschmidt wrote/a ?crit: > iMac G5 and latest single CPU desktop G5 (SMU based machines) have a > 970FX DD3 CPU that supports frequency & vooltage switching. This patch > adds support for simple dual frequency switch. It is required for the > upcoming thermal control patch for these machines. > Hello, I know only very little about cpufreq, probably you could post your patch to the cpufreq mailing list for better review : cpufreq at lists.linux.org.uk (you may have to subscride before posting, don't remember). For what have seen, your patch looks pretty good in general. However, is this kind of CPU only in one CPU machines? Your patch doesn't seem support SMP, then it's probably safer to prevent compilation on an SMP kernel in the Makefile? Or you can add SMP support (shouldn't be so hard in theory, but with no hardware to test it might be pointless), you can have a look at other drivers that support it, like in arch/i386/kernel/cpu/cpufreq/p4-clockmod.c . Just a little more thing, concerning: + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; Could you have a look if you could find the real info about how long it takes to change the speed (put the worse case latency)? Maybe the info can be found in some parts of the ROM you read? I don't know if conservative or ondemand governors are supposed to be able to mix with your code (especially wrt Windfarm) but not putting this info will prevent them from ever working... Cheers, Eric From benh at kernel.crashing.org Tue Oct 4 19:12:24 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 04 Oct 2005 19:12:24 +1000 Subject: [PATCH] ppc64: Add cpufreq support for SMU based G5 In-Reply-To: <43424202.7070600@lifl.fr> References: <1128403842.31063.24.camel@gaston> <43424202.7070600@lifl.fr> Message-ID: <1128417145.6291.25.camel@gaston> > I know only very little about cpufreq, probably you could post your > patch to the cpufreq mailing list for better review : > cpufreq at lists.linux.org.uk (you may have to subscride before posting, > don't remember). I should probably have CC'd it... oh well, this isn't terribly important at this point but I'll do if I post a new release. It's powermac specific anyway. > For what have seen, your patch looks pretty good in general. However, is > this kind of CPU only in one CPU machines? So far, only single CPU machines shipped with an SMU. > Your patch doesn't seem > support SMP, then it's probably safer to prevent compilation on an SMP > kernel in the Makefile? Or you can add SMP support (shouldn't be so hard > in theory, but with no hardware to test it might be pointless), you can > have a look at other drivers that support it, like in > arch/i386/kernel/cpu/cpufreq/p4-clockmod.c . There are several problems (and that leads to problems in the cpufreq core too btw). The problem with the cpufreq core is that it disables adjusting of loops_per_jiffies when CONFIG_SMP is set. That can lead to pretty disastrous results when running an SMP kernel on a laptop... Fortunately, the driver provided by this patch doesn't need it as ppc64 has constants loops_per_jiffies (it uses the HW timebase which doesn't change frequency). The other problem is that the 970FX "PowerTune" mecanism will actually broadcast messages to the bus that sync all CPUs to the same speed. That is, all CPUs in the machine will always change frequency simultaneously, thus the whole SMP stuff doesn't make that much sense, and I'm not sure how to "inform" the cpufreq core of that fact (that changing one CPU actually triggered a change of all of them). But as I wrote earlier, there is currently no PowerMac SMP machine that has an SMU chip and a 970FX to which this driver would apply. Finally, as for preventing build with CONFIG_SMP, I think distros would kill me as I don't know any of them who intends to ship a G5 kernel with CONFIG_SMP disabled :) > Just a little more thing, concerning: > + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; > Could you have a look if you could find the real info about how long it > takes to change the speed (put the worse case latency)? I didn't find. Apple didn't bother putting it in the OF device-tree afaik, and while it might be in one of the undocumented SMU data partitions, I have no way to know. The problem isn't the frequency switch per-se which is extremely fast (and I could know), but the voltage switch that goes with it. I suppose I could measure and put an overestimated value in there, but that isn't critical for now. userland powernowd & friends work fine and I need the reduced frequency mostly for the thermal control driver so it can clamp it down when the CPU overtemps. > Maybe the info > can be found in some parts of the ROM you read? I don't know if > conservative or ondemand governors are supposed to be able to mix with > your code (especially wrt Windfarm) but not putting this info will > prevent them from ever working... Which is what I want for now, until I find out more about how well I can make them to work with those machines :) Let's call that a "conservative" approach ;) Ben. From segher at kernel.crashing.org Sun Oct 2 11:06:04 2005 From: segher at kernel.crashing.org (Segher Boessenkool) Date: Sun, 2 Oct 2005 03:06:04 +0200 Subject: [PATCH 8/9] powerpc: make iSeries build In-Reply-To: <20051001001005.348d7798.sfr@canb.auug.org.au> References: <20050930233602.138b6e27.sfr@canb.auug.org.au> <20051001001005.348d7798.sfr@canb.auug.org.au> Message-ID: <27c25b49c3f8c5d3f563a0b9b39bda8c@kernel.crashing.org> > The merge of vmlinux.lds.S would be much cleaner if it is clear that > putting the ..start/end symbols inside the section definitions is OK on > ppc32. It is okay, and putting those symbol definitions outside the section definition (like the ppc32 linker script used to do) can be wrong even, for example, when the output sections need extra alignment. Segher From segher at kernel.crashing.org Sun Oct 2 11:27:11 2005 From: segher at kernel.crashing.org (Segher Boessenkool) Date: Sun, 2 Oct 2005 03:27:11 +0200 Subject: RFC on mem_pieces to LMB work In-Reply-To: <17211.25234.160085.163327@cargo.ozlabs.ibm.com> References: <17211.25234.160085.163327@cargo.ozlabs.ibm.com> Message-ID: > Of course, since > everybody has a device tree with at least one /memory node (right? :) > we can just populate the LMB struct from the device tree > unconditionally. Not everyone has at least one /memory node. Everyone has a "memory" property in /chosen though, which is the integer encoded ihandle of a package that handles memory allocation. To find _all_ random access memory, just walk the whole device tree looking for all nodes with "device_type" set to "memory". Segher From nish.aravamudan at gmail.com Wed Oct 5 02:44:10 2005 From: nish.aravamudan at gmail.com (Nish Aravamudan) Date: Tue, 4 Oct 2005 09:44:10 -0700 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <1128404215.31063.32.camel@gaston> References: <1128404215.31063.32.camel@gaston> Message-ID: <29495f1d0510040944i6d8eb36aud85b63ff12608e8a@mail.gmail.com> On 10/3/05, Benjamin Herrenschmidt wrote: > This is the actual thermal control support for PowerMac8,1, PowerMac8,2 > and PowerMac9,1 machines (SMU based), that is iMac G5 and single CPU desktop. > It requires CPUFREQ to be enabled to properly deal with overtemp conditions. > The new thermal control code implements a new framework (nicknamed "windfarm") > to which I expect to port the old G5 thermal control, and possibly some of the > powerbook thermal control drivers as well in the future. > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ linux-work/drivers/macintosh/windfarm_core.c 2005-10-04 15:17:33.000000000 +1000 > +static int wf_thread_func(void *data) > +{ > + unsigned long next, delay; > + > + next = jiffies; > + > + DBG("wf: thread started\n"); > + > + while(!kthread_should_stop()) { > + try_to_freeze(); > + > + if (time_after_eq(jiffies, next)) { > + wf_notify(WF_EVENT_TICK, NULL); > + if (wf_overtemp) { > + wf_overtemp_counter++; > + /* 10 seconds overtemp, notify userland */ > + if (wf_overtemp_counter > 10) > + wf_critical_overtemp(); > + /* 30 seconds, shutdown */ > + if (wf_overtemp_counter > 30) { > + printk(KERN_ERR "windfarm: Overtemp " > + "for more than 30" > + " seconds, shutting down\n"); > + machine_power_off(); > + } > + } > + next += HZ; > + } > + > + set_current_state(TASK_INTERRUPTIBLE); > + delay = next - jiffies; > + if (delay <= HZ) > + schedule_timeout(delay); > + set_current_state(TASK_RUNNING); This can be schedule_timeout_interruptible(delay); and then you can get rid of the set_current_state(TASK_RUNNING); Thanks, NIsh From hch at infradead.org Wed Oct 5 04:46:01 2005 From: hch at infradead.org (Christoph Hellwig) Date: Tue, 4 Oct 2005 19:46:01 +0100 Subject: [PATCH 1/1] scsi: scsi inquiry timeout config option In-Reply-To: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> References: <200510032026.j93KQViR015934@d01av04.pok.ibm.com> Message-ID: <20051004184601.GA30667@infradead.org> On Mon, Oct 03, 2005 at 03:26:30PM -0500, brking at us.ibm.com wrote: > > Add a .config option to default the scsi scan inquiry timeout. > Due to a broken device (SCSI/ATA converter card) that is very > common on IBM iSeries/pSeries machines, these architectures > need a longer default inquiry timeout. This is really not something that should be a CONFIG_ symbol. We have a perfectly fine working module option for it. From jimix at watson.ibm.com Wed Oct 5 05:35:00 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Tue, 4 Oct 2005 15:35:00 -0400 Subject: [PATCH] PCI dev node without an OF node Message-ID: <17218.55652.57402.465678@kitch0.watson.ibm.com> Handing the pSeries_iommu*LP logic the device tree from a Maple-D results in PCI dev nodes that do not have a corresponding OF node. I cannot be certain if this is a bug with the devtree in PIBS, or if this case is normally possible, I believe it is the latter. The check for dn==NULL used to happen in iommu_dev_setup_pSeries() but that is no longer called as of: http://kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=blobdiff;h=d17f0108a03200c0437146f199acaab21ca6f678;hp=f0fd7fbd6531cd01fb8984d2c81e82a25825b484;hb=1635317facea3094ddf34082cd86797efb1d9f7e;f=arch/ppc64/kernel/pSeries_iommu.c so the following patch catches it. Signed-off-by: Jimi Xenidis diff -r fbe71a6b8d00 arch/ppc64/kernel/pSeries_iommu.c --- a/arch/ppc64/kernel/pSeries_iommu.c Tue Oct 4 19:14:08 2005 +++ b/arch/ppc64/kernel/pSeries_iommu.c Tue Oct 4 15:15:32 2005 @@ -513,6 +513,11 @@ * already allocated. */ dn = pci_device_to_OF_node(dev); + if (dn == NULL) { + DBG("%s, dev %p (%s) has no iommu table\n", + dev, pci_name(dev)); + return; + } for (pdn = dn; pdn && pdn->data && !PCI_DN(pdn)->iommu_table; pdn = pdn->parent) { -- "I got an idea, an idea so smart my head would explode if I even began to know what I was talking about." -- Peter Griffin (Family Guy) From jimix at watson.ibm.com Wed Oct 5 06:06:40 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Tue, 4 Oct 2005 16:06:40 -0400 Subject: [PATCH] PCI dev node without an OF node In-Reply-To: <17218.55652.57402.465678@kitch0.watson.ibm.com> References: <17218.55652.57402.465678@kitch0.watson.ibm.com> Message-ID: <17218.57552.497977.897826@kitch0.watson.ibm.com> >>>>> "JX" == Jimi Xenidis writes: oops sorry sent an early patch file and forgot the __func__ in the DBG statement. Signed-off-by: Jimi Xenidis diff -r 549d78c4d7ed arch/ppc64/kernel/pSeries_iommu.c --- a/arch/ppc64/kernel/pSeries_iommu.c Mon Oct 3 15:07:10 2005 +++ b/arch/ppc64/kernel/pSeries_iommu.c Tue Oct 4 16:04:00 2005 @@ -513,6 +513,11 @@ * already allocated. */ dn = pci_device_to_OF_node(dev); + if (dn == NULL) { + DBG("%s, dev %p (%s) has no iommu table\n", __func__, + dev, pci_name(dev)); + return; + } for (pdn = dn; pdn && pdn->data && !PCI_DN(pdn)->iommu_table; pdn = pdn->parent) { From linas at austin.ibm.com Wed Oct 5 06:30:19 2005 From: linas at austin.ibm.com (linas) Date: Tue, 4 Oct 2005 15:30:19 -0500 Subject: [PATCH] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051003185739.GR29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> Message-ID: <20051004203019.GV29826@austin.ibm.com> After discussion with John Rose, I relize that this patch breaks something else, and so its no good. I'll try to come up with a different patch, which will unfortunately be a bit more complex. --linas On Mon, Oct 03, 2005 at 01:57:39PM -0500, linas was heard to remark: > > 08-hotplug-bugfix.patch > > In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add > of a hotplug slot will crash the system, with the following (abbreviated) > stack trace: > > cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] > pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 > lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 > c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) > c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 > c000000000060754 .notifier_call_chain+0x68/0x9c > > The root cause was that the phb was not marked "dynamic", and so instead > of having kmalloc() being called, the __init __alloc_bootmem() was called, > resulting in access of garage data. The patch below fixes this crash, > and adds some docs to clarify the code. > > Signed-off-by: Linas Vepstas > > > Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c > =================================================================== > --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.011393833 -0500 > +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:52:26.421786761 -0500 > @@ -121,6 +121,12 @@ > return NULL; > } > > +/** pci_devs_phb_init_dynamic -- setup pci devices under this PHB > + * > + * This routine is called both during boot, (before the memory > + * subsystem is set up, before kmalloc is valid) and during the > + * dynamic lpar operation of adding a PHB to a running system. > + */ > void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) > { > struct device_node * dn = (struct device_node *) phb->arch_data; > @@ -201,17 +207,19 @@ > .notifier_call = pci_dn_reconfig_notifier, > }; > > -/* > - * Actually initialize the phbs. > - * The buswalk on this phb has not happened yet. > +/** pci_devs_phb_init -- Initialize phbs and pci devs under them. > + * > + * When this is called, the buswalk of PHB's has not happened yet. > */ > void __init pci_devs_phb_init(void) > { > struct pci_controller *phb, *tmp; > > /* This must be done first so the device nodes have valid pci info! */ > - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) > + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { > pci_devs_phb_init_dynamic(phb); > + phb->is_dynamic = 1; > + } > > pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); > } > _______________________________________________ > Linuxppc64-dev mailing list > Linuxppc64-dev at ozlabs.org > https://ozlabs.org/mailman/listinfo/linuxppc64-dev > From johnrose at austin.ibm.com Wed Oct 5 06:47:12 2005 From: johnrose at austin.ibm.com (John Rose) Date: Tue, 04 Oct 2005 15:47:12 -0500 Subject: [PATCH] Separate pci bits out of struct device_node In-Reply-To: <17181.2658.910786.938698@cargo.ozlabs.ibm.com> References: <17181.2658.910786.938698@cargo.ozlabs.ibm.com> Message-ID: <1128458832.9315.17.camel@sinatra.austin.ibm.com> Hi Paul- > diff -urN linux-2.6/arch/ppc64/kernel/pci_dn.c pcidn/arch/ppc64/kernel/pci_dn.c > --- linux-2.6/arch/ppc64/kernel/pci_dn.c 2005-04-26 15:37:55.000000000 +1000 > +++ pcidn/arch/ppc64/kernel/pci_dn.c 2005-09-06 11:39:47.000000000 +1000 ... > @@ -40,16 +42,26 @@ > struct pci_controller *phb = data; > int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL); > u32 *regs; > + struct pci_dn *pdn; > > - dn->phb = phb; > + if (phb->is_dynamic) > + pdn = kmalloc(sizeof(*pdn), GFP_KERNEL); > + else > + pdn = alloc_bootmem(sizeof(*pdn)); I didn't notice it at first, but this check seems incorrect. The phb->is_dynamic flag indicates whether a PHB was present at boot. Suppose I try to hotplug add a device to a slot with a parent PHB that was present at boot. This code path gets called for every dynamic device node add. With this as-is, we get a runtime call to alloc_bootmem() for the new device nodes - crash. Linas reported this: http://www.ussg.iu.edu/hypermail/linux/kernel/0510.0/0510.html It would seem that the check should be asking whether we are at boot or not, and this flag probably isn't the one for that. I don't know whether mem_init_done is a better idea. This goes back to the "global var vs. init/dynamic versions of allocs" debate, for which we have historically followed the latter. Thoughts? Thanks- John From benh at kernel.crashing.org Wed Oct 5 07:59:10 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 05 Oct 2005 07:59:10 +1000 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <29495f1d0510040944i6d8eb36aud85b63ff12608e8a@mail.gmail.com> References: <1128404215.31063.32.camel@gaston> <29495f1d0510040944i6d8eb36aud85b63ff12608e8a@mail.gmail.com> Message-ID: <1128463151.6417.18.camel@gaston> On Tue, 2005-10-04 at 09:44 -0700, Nish Aravamudan wrote: > > This can be schedule_timeout_interruptible(delay); and then you can > get rid of the set_current_state(TASK_RUNNING); Ah, those lovely new "do-it-all" helpers :) Thanks. Ben. From jdl at freescale.com Wed Oct 5 07:59:50 2005 From: jdl at freescale.com (Jon Loeliger) Date: Tue, 04 Oct 2005 16:59:50 -0500 Subject: PATCH powerpc Move LMB from ppc64 to powerpc Message-ID: <1128463190.22452.29.camel@cashmere.sps.mot.com> Move the LMB code from ppc64 to powerpc. Only compile ppc32's tlb.c code on "standard" mmu machines. Signed-off-by: Jon Loeliger --- arch/powerpc/mm/Makefile | 8 + arch/powerpc/mm/lmb.c | 303 ++++++++++++++++++++++++++++++++++++++++++++ arch/ppc64/kernel/Makefile | 1 arch/ppc64/kernel/lmb.c | 299 ------------------------------------------- include/asm-powerpc/lmb.h | 78 +++++++++++ include/asm-ppc/page.h | 6 + 6 files changed, 391 insertions(+), 304 deletions(-) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -2,11 +2,11 @@ # Makefile for the linux ppc-specific parts of the memory manager. # -obj-y := fault.o mem.o -obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o \ - mem_pieces.o tlb.o +obj-y := fault.o lmb.o mem.o + +obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o mem_pieces.o obj-$(CONFIG_PPC64) += init64.o pgtable64.o mmu_context64.o -obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o +obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o tlb.o obj-$(CONFIG_40x) += 4xx_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c new file mode 100644 --- /dev/null +++ b/arch/powerpc/mm/lmb.c @@ -0,0 +1,303 @@ +/* + * Procedures for interfacing to Open Firmware. + * + * Peter Bergner, IBM Corp. June 2001. + * Copyright (C) 2001 Peter Bergner. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct lmb lmb; + +#undef DEBUG + +void lmb_dump_all(void) +{ +#ifdef DEBUG + unsigned long i; + + udbg_printf("lmb_dump_all:\n"); + udbg_printf(" memory.cnt = 0x%lx\n", + lmb.memory.cnt); + udbg_printf(" memory.size = 0x%lx\n", + lmb.memory.size); + for (i=0; i < lmb.memory.cnt ;i++) { + udbg_printf(" memory.region[0x%x].base = 0x%lx\n", + i, lmb.memory.region[i].base); + udbg_printf(" .size = 0x%lx\n", + lmb.memory.region[i].size); + } + + udbg_printf("\n reserved.cnt = 0x%lx\n", + lmb.reserved.cnt); + udbg_printf(" reserved.size = 0x%lx\n", + lmb.reserved.size); + for (i=0; i < lmb.reserved.cnt ;i++) { + udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", + i, lmb.reserved.region[i].base); + udbg_printf(" .size = 0x%lx\n", + lmb.reserved.region[i].size); + } +#endif /* DEBUG */ +} + +static unsigned long __init +lmb_addrs_overlap(unsigned long base1, unsigned long size1, + unsigned long base2, unsigned long size2) +{ + return ((base1 < (base2+size2)) && (base2 < (base1+size1))); +} + +static long __init +lmb_addrs_adjacent(unsigned long base1, unsigned long size1, + unsigned long base2, unsigned long size2) +{ + if (base2 == base1 + size1) + return 1; + else if (base1 == base2 + size2) + return -1; + + return 0; +} + +static long __init +lmb_regions_adjacent(struct lmb_region *rgn, + unsigned long r1, unsigned long r2) +{ + unsigned long base1 = rgn->region[r1].base; + unsigned long size1 = rgn->region[r1].size; + unsigned long base2 = rgn->region[r2].base; + unsigned long size2 = rgn->region[r2].size; + + return lmb_addrs_adjacent(base1, size1, base2, size2); +} + +/* Assumption: base addr of region 1 < base addr of region 2 */ +static void __init +lmb_coalesce_regions(struct lmb_region *rgn, + unsigned long r1, unsigned long r2) +{ + unsigned long i; + + rgn->region[r1].size += rgn->region[r2].size; + for (i=r2; i < rgn->cnt-1; i++) { + rgn->region[i].base = rgn->region[i+1].base; + rgn->region[i].size = rgn->region[i+1].size; + } + rgn->cnt--; +} + +/* This routine called with relocation disabled. */ +void __init +lmb_init(void) +{ + /* Create a dummy zero size LMB which will get coalesced away later. + * This simplifies the lmb_add() code below... + */ + lmb.memory.region[0].base = 0; + lmb.memory.region[0].size = 0; + lmb.memory.cnt = 1; + + /* Ditto. */ + lmb.reserved.region[0].base = 0; + lmb.reserved.region[0].size = 0; + lmb.reserved.cnt = 1; +} + +/* This routine called with relocation disabled. */ +void __init +lmb_analyze(void) +{ + int i; + + lmb.memory.size = 0; + + for (i = 0; i < lmb.memory.cnt; i++) + lmb.memory.size += lmb.memory.region[i].size; +} + +/* This routine called with relocation disabled. */ +static long __init +lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) +{ + unsigned long i, coalesced = 0; + long adjacent; + + /* First try and coalesce this LMB with another. */ + for (i=0; i < rgn->cnt; i++) { + unsigned long rgnbase = rgn->region[i].base; + unsigned long rgnsize = rgn->region[i].size; + + adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); + if ( adjacent > 0 ) { + rgn->region[i].base -= size; + rgn->region[i].size += size; + coalesced++; + break; + } + else if ( adjacent < 0 ) { + rgn->region[i].size += size; + coalesced++; + break; + } + } + + if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) { + lmb_coalesce_regions(rgn, i, i+1); + coalesced++; + } + + if ( coalesced ) { + return coalesced; + } else if ( rgn->cnt >= MAX_LMB_REGIONS ) { + return -1; + } + + /* Couldn't coalesce the LMB, so add it to the sorted table. */ + for (i=rgn->cnt-1; i >= 0; i--) { + if (base < rgn->region[i].base) { + rgn->region[i+1].base = rgn->region[i].base; + rgn->region[i+1].size = rgn->region[i].size; + } else { + rgn->region[i+1].base = base; + rgn->region[i+1].size = size; + break; + } + } + rgn->cnt++; + + return 0; +} + +/* This routine called with relocation disabled. */ +long __init +lmb_add(unsigned long base, unsigned long size) +{ + struct lmb_region *_rgn = &(lmb.memory); + + /* On pSeries LPAR systems, the first LMB is our RMO region. */ + if ( base == 0 ) + lmb.rmo_size = size; + + return lmb_add_region(_rgn, base, size); + +} + +long __init +lmb_reserve(unsigned long base, unsigned long size) +{ + struct lmb_region *_rgn = &(lmb.reserved); + + return lmb_add_region(_rgn, base, size); +} + +long __init +lmb_overlaps_region(struct lmb_region *rgn, + unsigned long base, unsigned long size) +{ + unsigned long i; + + for (i=0; i < rgn->cnt; i++) { + unsigned long rgnbase = rgn->region[i].base; + unsigned long rgnsize = rgn->region[i].size; + if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { + break; + } + } + + return (i < rgn->cnt) ? i : -1; +} + +unsigned long __init +lmb_alloc(unsigned long size, unsigned long align) +{ + return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); +} + +unsigned long __init +lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) +{ + long i, j; + unsigned long base = 0; + + for (i=lmb.memory.cnt-1; i >= 0; i--) { + unsigned long lmbbase = lmb.memory.region[i].base; + unsigned long lmbsize = lmb.memory.region[i].size; + + if ( max_addr == LMB_ALLOC_ANYWHERE ) + base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); + else if ( lmbbase < max_addr ) + base = _ALIGN_DOWN(min(lmbbase+lmbsize,max_addr)-size, + align); + else + continue; + + while ( (lmbbase <= base) && + ((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { + base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, + align); + } + + if ( (base != 0) && (lmbbase <= base) ) + break; + } + + if ( i < 0 ) + return 0; + + lmb_add_region(&lmb.reserved, base, size); + + return base; +} + +/* You must call lmb_analyze() before this. */ +unsigned long __init +lmb_phys_mem_size(void) +{ + return lmb.memory.size; +} + +unsigned long __init +lmb_end_of_DRAM(void) +{ + int idx = lmb.memory.cnt - 1; + + return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); +} + +/* + * Truncate the lmb list to memory_limit if it's set + * You must call lmb_analyze() after this. + */ +void __init lmb_enforce_memory_limit(void) +{ + extern unsigned long memory_limit; + unsigned long i, limit; + + if (! memory_limit) + return; + + limit = memory_limit; + for (i = 0; i < lmb.memory.cnt; i++) { + if (limit > lmb.memory.region[i].size) { + limit -= lmb.memory.region[i].size; + continue; + } + + lmb.memory.region[i].size = limit; + lmb.memory.cnt = i + 1; + break; + } +} diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -76,3 +76,4 @@ endif # These are here while we do the architecture merge vecemu-y += ../../powerpc/kernel/vecemu.o +lmb-y += ../../powerpc/mm/lmb.o diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c deleted file mode 100644 --- a/arch/ppc64/kernel/lmb.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Procedures for interfacing to Open Firmware. - * - * Peter Bergner, IBM Corp. June 2001. - * Copyright (C) 2001 Peter Bergner. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct lmb lmb; - -#undef DEBUG - -void lmb_dump_all(void) -{ -#ifdef DEBUG - unsigned long i; - - udbg_printf("lmb_dump_all:\n"); - udbg_printf(" memory.cnt = 0x%lx\n", - lmb.memory.cnt); - udbg_printf(" memory.size = 0x%lx\n", - lmb.memory.size); - for (i=0; i < lmb.memory.cnt ;i++) { - udbg_printf(" memory.region[0x%x].base = 0x%lx\n", - i, lmb.memory.region[i].base); - udbg_printf(" .size = 0x%lx\n", - lmb.memory.region[i].size); - } - - udbg_printf("\n reserved.cnt = 0x%lx\n", - lmb.reserved.cnt); - udbg_printf(" reserved.size = 0x%lx\n", - lmb.reserved.size); - for (i=0; i < lmb.reserved.cnt ;i++) { - udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", - i, lmb.reserved.region[i].base); - udbg_printf(" .size = 0x%lx\n", - lmb.reserved.region[i].size); - } -#endif /* DEBUG */ -} - -static unsigned long __init -lmb_addrs_overlap(unsigned long base1, unsigned long size1, - unsigned long base2, unsigned long size2) -{ - return ((base1 < (base2+size2)) && (base2 < (base1+size1))); -} - -static long __init -lmb_addrs_adjacent(unsigned long base1, unsigned long size1, - unsigned long base2, unsigned long size2) -{ - if (base2 == base1 + size1) - return 1; - else if (base1 == base2 + size2) - return -1; - - return 0; -} - -static long __init -lmb_regions_adjacent(struct lmb_region *rgn, unsigned long r1, unsigned long r2) -{ - unsigned long base1 = rgn->region[r1].base; - unsigned long size1 = rgn->region[r1].size; - unsigned long base2 = rgn->region[r2].base; - unsigned long size2 = rgn->region[r2].size; - - return lmb_addrs_adjacent(base1, size1, base2, size2); -} - -/* Assumption: base addr of region 1 < base addr of region 2 */ -static void __init -lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) -{ - unsigned long i; - - rgn->region[r1].size += rgn->region[r2].size; - for (i=r2; i < rgn->cnt-1; i++) { - rgn->region[i].base = rgn->region[i+1].base; - rgn->region[i].size = rgn->region[i+1].size; - } - rgn->cnt--; -} - -/* This routine called with relocation disabled. */ -void __init -lmb_init(void) -{ - /* Create a dummy zero size LMB which will get coalesced away later. - * This simplifies the lmb_add() code below... - */ - lmb.memory.region[0].base = 0; - lmb.memory.region[0].size = 0; - lmb.memory.cnt = 1; - - /* Ditto. */ - lmb.reserved.region[0].base = 0; - lmb.reserved.region[0].size = 0; - lmb.reserved.cnt = 1; -} - -/* This routine called with relocation disabled. */ -void __init -lmb_analyze(void) -{ - int i; - - lmb.memory.size = 0; - - for (i = 0; i < lmb.memory.cnt; i++) - lmb.memory.size += lmb.memory.region[i].size; -} - -/* This routine called with relocation disabled. */ -static long __init -lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) -{ - unsigned long i, coalesced = 0; - long adjacent; - - /* First try and coalesce this LMB with another. */ - for (i=0; i < rgn->cnt; i++) { - unsigned long rgnbase = rgn->region[i].base; - unsigned long rgnsize = rgn->region[i].size; - - adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); - if ( adjacent > 0 ) { - rgn->region[i].base -= size; - rgn->region[i].size += size; - coalesced++; - break; - } - else if ( adjacent < 0 ) { - rgn->region[i].size += size; - coalesced++; - break; - } - } - - if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) { - lmb_coalesce_regions(rgn, i, i+1); - coalesced++; - } - - if ( coalesced ) { - return coalesced; - } else if ( rgn->cnt >= MAX_LMB_REGIONS ) { - return -1; - } - - /* Couldn't coalesce the LMB, so add it to the sorted table. */ - for (i=rgn->cnt-1; i >= 0; i--) { - if (base < rgn->region[i].base) { - rgn->region[i+1].base = rgn->region[i].base; - rgn->region[i+1].size = rgn->region[i].size; - } else { - rgn->region[i+1].base = base; - rgn->region[i+1].size = size; - break; - } - } - rgn->cnt++; - - return 0; -} - -/* This routine called with relocation disabled. */ -long __init -lmb_add(unsigned long base, unsigned long size) -{ - struct lmb_region *_rgn = &(lmb.memory); - - /* On pSeries LPAR systems, the first LMB is our RMO region. */ - if ( base == 0 ) - lmb.rmo_size = size; - - return lmb_add_region(_rgn, base, size); - -} - -long __init -lmb_reserve(unsigned long base, unsigned long size) -{ - struct lmb_region *_rgn = &(lmb.reserved); - - return lmb_add_region(_rgn, base, size); -} - -long __init -lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, unsigned long size) -{ - unsigned long i; - - for (i=0; i < rgn->cnt; i++) { - unsigned long rgnbase = rgn->region[i].base; - unsigned long rgnsize = rgn->region[i].size; - if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { - break; - } - } - - return (i < rgn->cnt) ? i : -1; -} - -unsigned long __init -lmb_alloc(unsigned long size, unsigned long align) -{ - return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); -} - -unsigned long __init -lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) -{ - long i, j; - unsigned long base = 0; - - for (i=lmb.memory.cnt-1; i >= 0; i--) { - unsigned long lmbbase = lmb.memory.region[i].base; - unsigned long lmbsize = lmb.memory.region[i].size; - - if ( max_addr == LMB_ALLOC_ANYWHERE ) - base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); - else if ( lmbbase < max_addr ) - base = _ALIGN_DOWN(min(lmbbase+lmbsize,max_addr)-size, align); - else - continue; - - while ( (lmbbase <= base) && - ((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { - base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, align); - } - - if ( (base != 0) && (lmbbase <= base) ) - break; - } - - if ( i < 0 ) - return 0; - - lmb_add_region(&lmb.reserved, base, size); - - return base; -} - -/* You must call lmb_analyze() before this. */ -unsigned long __init -lmb_phys_mem_size(void) -{ - return lmb.memory.size; -} - -unsigned long __init -lmb_end_of_DRAM(void) -{ - int idx = lmb.memory.cnt - 1; - - return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); -} - -/* - * Truncate the lmb list to memory_limit if it's set - * You must call lmb_analyze() after this. - */ -void __init lmb_enforce_memory_limit(void) -{ - extern unsigned long memory_limit; - unsigned long i, limit; - - if (! memory_limit) - return; - - limit = memory_limit; - for (i = 0; i < lmb.memory.cnt; i++) { - if (limit > lmb.memory.region[i].size) { - limit -= lmb.memory.region[i].size; - continue; - } - - lmb.memory.region[i].size = limit; - lmb.memory.cnt = i + 1; - break; - } -} diff --git a/include/asm-powerpc/lmb.h b/include/asm-powerpc/lmb.h new file mode 100644 --- /dev/null +++ b/include/asm-powerpc/lmb.h @@ -0,0 +1,78 @@ +#ifndef _ASM_POWERPC_LMB_H +#define _ASM_POWERPC_LMB_H + +/* + * Low-level Memory Block management. + * + * Copyright (C) 2001 Peter Bergner, IBM Corp. + */ + +#include +#include + +#define MAX_LMB_REGIONS 128 + +#define LMB_ALLOC_ANYWHERE 0 + +struct lmb_block { + unsigned long base; + unsigned long size; +}; + +struct lmb_region { + unsigned long cnt; + unsigned long size; + struct lmb_block region[MAX_LMB_REGIONS+1]; +}; + +struct lmb { + unsigned long debug; + unsigned long rmo_size; + struct lmb_region memory; + struct lmb_region reserved; +}; + +extern struct lmb lmb; + +extern void __init lmb_init(void); +extern void __init lmb_analyze(void); +extern long __init lmb_add(unsigned long, unsigned long); +extern long __init lmb_reserve(unsigned long, unsigned long); +extern unsigned long __init lmb_alloc(unsigned long, unsigned long); +extern unsigned long __init lmb_alloc_base(unsigned long, unsigned long, + unsigned long); +extern unsigned long __init lmb_phys_mem_size(void); +extern unsigned long __init lmb_end_of_DRAM(void); +extern unsigned long __init lmb_abs_to_phys(unsigned long); +extern void __init lmb_enforce_memory_limit(void); + +extern void lmb_dump_all(void); + +extern unsigned long io_hole_start; + +static inline unsigned long +lmb_size_bytes(struct lmb_region *type, unsigned long region_nr) +{ + return type->region[region_nr].size; +} + +static inline unsigned long +lmb_size_pages(struct lmb_region *type, unsigned long region_nr) +{ + return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT; +} + +static inline unsigned long +lmb_start_pfn(struct lmb_region *type, unsigned long region_nr) +{ + return type->region[region_nr].base >> PAGE_SHIFT; +} + +static inline unsigned long +lmb_end_pfn(struct lmb_region *type, unsigned long region_nr) +{ + return lmb_start_pfn(type, region_nr) + + lmb_size_pages(type, region_nr); +} + +#endif /* _ASM_POWERPC_LMB_H */ diff --git a/include/asm-ppc/page.h b/include/asm-ppc/page.h --- a/include/asm-ppc/page.h +++ b/include/asm-ppc/page.h @@ -77,8 +77,12 @@ typedef unsigned long pgprot_t; #endif +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) +#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) + /* align addr on a size boundary - adjust address up if needed -- Cort */ -#define _ALIGN(addr,size) (((addr)+(size)-1)&(~((size)-1))) +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) From benh at kernel.crashing.org Wed Oct 5 08:08:05 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 05 Oct 2005 08:08:05 +1000 Subject: [PATCH] PCI dev node without an OF node In-Reply-To: <17218.55652.57402.465678@kitch0.watson.ibm.com> References: <17218.55652.57402.465678@kitch0.watson.ibm.com> Message-ID: <1128463686.6417.25.camel@gaston> On Tue, 2005-10-04 at 15:35 -0400, Jimi Xenidis wrote: > Handing the pSeries_iommu*LP logic the device tree from a Maple-D results > in PCI dev nodes that do not have a corresponding OF node. > > I cannot be certain if this is a bug with the devtree in PIBS, or if > this case is normally possible, I believe it is the latter. Yes, it's accepted to have only OF nodes for bridges. > The check for dn==NULL used to happen in iommu_dev_setup_pSeries() but > that is no longer called as of: > http://kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=blobdiff;h=d17f0108a03200c0437146f199acaab21ca6f678;hp=f0fd7fbd6531cd01fb8984d2c81e82a25825b484;hb=1635317facea3094ddf34082cd86797efb1d9f7e;f=arch/ppc64/kernel/pSeries_iommu.c > > so the following patch catches it. I'm not sure your patch is 100% correct, but then, we also shouldn't have called pSeries_iommu.c code on Maple neither in the first place unless you have put some kind of PAPR-like hypervisor on it (but still, the Maple has no TCEs so it shouldn't call this code). One must make sure that the dma_map* routines always get the closest parent device node though if the device has NULL. We used to populate all PCI devices wih the PHB node by default but that may have been broken. Ben. > Signed-off-by: Jimi Xenidis > > > diff -r fbe71a6b8d00 arch/ppc64/kernel/pSeries_iommu.c > --- a/arch/ppc64/kernel/pSeries_iommu.c Tue Oct 4 19:14:08 2005 > +++ b/arch/ppc64/kernel/pSeries_iommu.c Tue Oct 4 15:15:32 2005 > @@ -513,6 +513,11 @@ > * already allocated. > */ > dn = pci_device_to_OF_node(dev); > + if (dn == NULL) { > + DBG("%s, dev %p (%s) has no iommu table\n", > + dev, pci_name(dev)); > + return; > + } > > for (pdn = dn; pdn && pdn->data && !PCI_DN(pdn)->iommu_table; > pdn = pdn->parent) { > From jimix at watson.ibm.com Wed Oct 5 08:15:36 2005 From: jimix at watson.ibm.com (Jimi Xenidis) Date: Tue, 4 Oct 2005 18:15:36 -0400 Subject: [PATCH] PCI dev node without an OF node In-Reply-To: <1128463686.6417.25.camel@gaston> References: <17218.55652.57402.465678@kitch0.watson.ibm.com> <1128463686.6417.25.camel@gaston> Message-ID: <17218.65288.929638.35946@kitch0.watson.ibm.com> >>>>> "BH" == Benjamin Herrenschmidt writes: BH> On Tue, 2005-10-04 at 15:35 -0400, Jimi Xenidis wrote: >> Handing the pSeries_iommu*LP logic the device tree from a Maple-D results >> in PCI dev nodes that do not have a corresponding OF node. BH> I'm not sure your patch is 100% correct, but then, we also shouldn't BH> have called pSeries_iommu.c code on Maple neither in the first place BH> unless you have put some kind of PAPR-like hypervisor on it (but still, BH> the Maple has no TCEs so it shouldn't call this code). In our world of Xen/Hypervisor on Maple this is entirely possible for the Dom0/Alpha-LPAR, where the HTAB and DART are controlled via PAPR H_CALLS. :-) -- "I got an idea, an idea so smart my head would explode if I even began to know what I was talking about." -- Peter Griffin (Family Guy) From benh at kernel.crashing.org Wed Oct 5 08:43:13 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 05 Oct 2005 08:43:13 +1000 Subject: [PATCH] PCI dev node without an OF node In-Reply-To: <17218.65288.929638.35946@kitch0.watson.ibm.com> References: <17218.55652.57402.465678@kitch0.watson.ibm.com> <1128463686.6417.25.camel@gaston> <17218.65288.929638.35946@kitch0.watson.ibm.com> Message-ID: <1128465794.6417.38.camel@gaston> On Tue, 2005-10-04 at 18:15 -0400, Jimi Xenidis wrote: > >>>>> "BH" == Benjamin Herrenschmidt writes: > > BH> On Tue, 2005-10-04 at 15:35 -0400, Jimi Xenidis wrote: > >> Handing the pSeries_iommu*LP logic the device tree from a Maple-D results > >> in PCI dev nodes that do not have a corresponding OF node. > > BH> I'm not sure your patch is 100% correct, but then, we also shouldn't > BH> have called pSeries_iommu.c code on Maple neither in the first place > BH> unless you have put some kind of PAPR-like hypervisor on it (but still, > BH> the Maple has no TCEs so it shouldn't call this code). > > In our world of Xen/Hypervisor on Maple this is entirely possible for > the Dom0/Alpha-LPAR, where the HTAB and DART are controlled via PAPR > H_CALLS. :-) Ah, indeed... Oh well, just make sure that when a device calls pci_map_blah() it works wether the device has a node or not. Ben. From linas at austin.ibm.com Wed Oct 5 09:59:00 2005 From: linas at austin.ibm.com (linas) Date: Tue, 4 Oct 2005 18:59:00 -0500 Subject: [PATCH 1/2] ppc64: Crash in DLPAR code on PCI hotplug add In-Reply-To: <20051004203019.GV29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> <20051004203019.GV29826@austin.ibm.com> Message-ID: <20051004235900.GW29826@austin.ibm.com> Paul, A new-improved variant of the previous patch in this thread. Please apply. 08-hotplug-bugfix.patch In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add of a hotplug slot will crash the system, with the following (abbreviated) stack trace: cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 c000000000060754 .notifier_call_chain+0x68/0x9c The root cause was that __init __alloc_bootmem() was called long after boot had finished, resulting in a crash because this routine is undefined after boot time. The patch below fixes this crash, and adds some docs to clarify the code. p.s. congrats to all for getting slashdotted on this yesterday! Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.000000000 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-04 15:37:49.761245845 -0500 @@ -44,7 +44,7 @@ u32 *regs; struct pci_dn *pdn; - if (phb->is_dynamic) + if (mem_init_done) pdn = kmalloc(sizeof(*pdn), GFP_KERNEL); else pdn = alloc_bootmem(sizeof(*pdn)); @@ -121,6 +121,14 @@ return NULL; } +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,9 +209,14 @@ .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. */ void __init pci_devs_phb_init(void) { From linas at austin.ibm.com Wed Oct 5 10:01:16 2005 From: linas at austin.ibm.com (linas) Date: Tue, 4 Oct 2005 19:01:16 -0500 Subject: [PATCH 2/2] ppc64: Crash in DLPAR code on remove operation In-Reply-To: <20051004203019.GV29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> <20051004203019.GV29826@austin.ibm.com> Message-ID: <20051005000116.GX29826@austin.ibm.com> This patch fixes two bugs related to dlpar slot removal and add. -- Both crashes are due to the fact the some children of pci nodes are not pci nodes themselves, and thus do not have pci_dn structures. For example: /pci at 800000020000002/pci at 2,3/usb at 1/hub at 1 /pci at 800000020000002/pci at 2,3/usb at 1,1/hub at 1 Strangely, though, sometimes the following appears, and I don't quite understand why. /interrupt-controller at 3fe0000a400 A typical stack trace: Vector: 300 (Data Access) at [c0000000555637d0] pc: c000000000202a50: .dlpar_add_slot+0x108/0x410 c000000000202e78 .add_slot_store+0x7c/0xac c000000000202da0 .dlpar_attr_store+0x48/0x64 c0000000000f8ee4 .sysfs_write_file+0x100/0x1a0 A similar stack trace is involved for the slot remove. This code survived testing, of adding and removing different slots, 23 times each, so far, as of this writing. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pSeries_iommu.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pSeries_iommu.c 2005-10-04 16:47:09.175705100 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pSeries_iommu.c 2005-10-04 17:12:54.123928903 -0500 @@ -478,10 +478,13 @@ { int err = NOTIFY_OK; struct device_node *np = node; - struct pci_dn *pci = np->data; + struct pci_dn *pci; switch (action) { case PSERIES_RECONFIG_REMOVE: + pci = PCI_DN(np); + if (!pci) + return NOTIFY_OK; if (pci->iommu_table && get_property(np, "ibm,dma-window", NULL)) iommu_free_table(np); Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-04 15:37:49.761245845 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-04 17:58:47.344628793 -0500 @@ -195,7 +195,10 @@ switch (action) { case PSERIES_RECONFIG_ADD: - pci = np->parent->data; + pci = PCI_DN(np->parent); + if (!pci) + return NOTIFY_OK; + update_dn_pci_info(np, pci->phb); break; default: From linas at austin.ibm.com Wed Oct 5 10:04:26 2005 From: linas at austin.ibm.com (linas) Date: Tue, 4 Oct 2005 19:04:26 -0500 Subject: [PATCH] rpaphp: PCI Hotplug crash on PHB DLPAR add In-Reply-To: <20051004203019.GV29826@austin.ibm.com> References: <20051003185739.GR29826@austin.ibm.com> <20051004203019.GV29826@austin.ibm.com> Message-ID: <20051005000426.GY29826@austin.ibm.com> This patch fixes a bug related to dlpar PHB add, after a PHB removal. -- The crash was due to the PHB not having a pci_dn structure yet, when the phb is being added. This code survived testing, of adding and removeig the PHB and all slots underneath it, 17 times so far, as of this writing. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpadlpar_core.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/pci/hotplug/rpadlpar_core.c 2005-10-04 16:40:12.539168432 -0500 +++ linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpadlpar_core.c 2005-10-04 17:55:43.165471615 -0500 @@ -303,7 +303,7 @@ { struct pci_controller *phb; - if (PCI_DN(dn)->phb) { + if (PCI_DN(dn) && PCI_DN(dn)->phb) { /* PHB already exists */ return -EINVAL; } From kumar.gala at freescale.com Wed Oct 5 15:17:35 2005 From: kumar.gala at freescale.com (Kumar Gala) Date: Wed, 5 Oct 2005 00:17:35 -0500 Subject: PATCH powerpc Move LMB from ppc64 to powerpc In-Reply-To: <1128463190.22452.29.camel@cashmere.sps.mot.com> References: <1128463190.22452.29.camel@cashmere.sps.mot.com> Message-ID: <01889CD2-412A-495A-9F2B-25605B543F00@freescale.com> Jon, Look good. As your removing the use of mem_pieces from arch/powerpc, we should see if the LMB data structures can always have 64-bit addresses. The reason for this is to handle the > 32-bit address case on ppc32. Anyways, something to keep in the back of your mind while looking at this code. - kumar On Oct 4, 2005, at 4:59 PM, Loeliger Jon-LOELIGER wrote: > Move the LMB code from ppc64 to powerpc. > Only compile ppc32's tlb.c code on "standard" mmu machines. > > Signed-off-by: Jon Loeliger > --- > > arch/powerpc/mm/Makefile | 8 + > arch/powerpc/mm/lmb.c | 303 > ++++++++++++++++++++++++++++++++++++++++++++ > arch/ppc64/kernel/Makefile | 1 > arch/ppc64/kernel/lmb.c | 299 > ------------------------------------------- > include/asm-powerpc/lmb.h | 78 +++++++++++ > include/asm-ppc/page.h | 6 + > 6 files changed, 391 insertions(+), 304 deletions(-) > > > diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile > --- a/arch/powerpc/mm/Makefile > +++ b/arch/powerpc/mm/Makefile > @@ -2,11 +2,11 @@ > # Makefile for the linux ppc-specific parts of the memory manager. > # > > -obj-y := fault.o mem.o > -obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o \ > - mem_pieces.o tlb.o > +obj-y := fault.o lmb.o mem.o > + > +obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o > mem_pieces.o > obj-$(CONFIG_PPC64) += init64.o pgtable64.o mmu_context64.o > -obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o > +obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o tlb.o > obj-$(CONFIG_40x) += 4xx_mmu.o > obj-$(CONFIG_44x) += 44x_mmu.o > obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o > diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c > new file mode 100644 > --- /dev/null > +++ b/arch/powerpc/mm/lmb.c > @@ -0,0 +1,303 @@ > +/* > + * Procedures for interfacing to Open Firmware. > + * > + * Peter Bergner, IBM Corp. June 2001. > + * Copyright (C) 2001 Peter Bergner. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +struct lmb lmb; > + > +#undef DEBUG > + > +void lmb_dump_all(void) > +{ > +#ifdef DEBUG > + unsigned long i; > + > + udbg_printf("lmb_dump_all:\n"); > + udbg_printf(" memory.cnt = 0x%lx\n", > + lmb.memory.cnt); > + udbg_printf(" memory.size = 0x%lx\n", > + lmb.memory.size); > + for (i=0; i < lmb.memory.cnt ;i++) { > + udbg_printf(" memory.region[0x%x].base = > 0x%lx\n", > + i, lmb.memory.region[i].base); > + udbg_printf(" .size = 0x%lx\n", > + lmb.memory.region[i].size); > + } > + > + udbg_printf("\n reserved.cnt = 0x%lx\n", > + lmb.reserved.cnt); > + udbg_printf(" reserved.size = 0x%lx\n", > + lmb.reserved.size); > + for (i=0; i < lmb.reserved.cnt ;i++) { > + udbg_printf(" reserved.region[0x%x].base = > 0x%lx\n", > + i, lmb.reserved.region[i].base); > + udbg_printf(" .size = 0x%lx\n", > + lmb.reserved.region[i].size); > + } > +#endif /* DEBUG */ > +} > + > +static unsigned long __init > +lmb_addrs_overlap(unsigned long base1, unsigned long size1, > + unsigned long base2, unsigned long size2) > +{ > + return ((base1 < (base2+size2)) && (base2 < (base1+size1))); > +} > + > +static long __init > +lmb_addrs_adjacent(unsigned long base1, unsigned long size1, > + unsigned long base2, unsigned long size2) > +{ > + if (base2 == base1 + size1) > + return 1; > + else if (base1 == base2 + size2) > + return -1; > + > + return 0; > +} > + > +static long __init > +lmb_regions_adjacent(struct lmb_region *rgn, > + unsigned long r1, unsigned long r2) > +{ > + unsigned long base1 = rgn->region[r1].base; > + unsigned long size1 = rgn->region[r1].size; > + unsigned long base2 = rgn->region[r2].base; > + unsigned long size2 = rgn->region[r2].size; > + > + return lmb_addrs_adjacent(base1, size1, base2, size2); > +} > + > +/* Assumption: base addr of region 1 < base addr of region 2 */ > +static void __init > +lmb_coalesce_regions(struct lmb_region *rgn, > + unsigned long r1, unsigned long r2) > +{ > + unsigned long i; > + > + rgn->region[r1].size += rgn->region[r2].size; > + for (i=r2; i < rgn->cnt-1; i++) { > + rgn->region[i].base = rgn->region[i+1].base; > + rgn->region[i].size = rgn->region[i+1].size; > + } > + rgn->cnt--; > +} > + > +/* This routine called with relocation disabled. */ > +void __init > +lmb_init(void) > +{ > + /* Create a dummy zero size LMB which will get coalesced away > later. > + * This simplifies the lmb_add() code below... > + */ > + lmb.memory.region[0].base = 0; > + lmb.memory.region[0].size = 0; > + lmb.memory.cnt = 1; > + > + /* Ditto. */ > + lmb.reserved.region[0].base = 0; > + lmb.reserved.region[0].size = 0; > + lmb.reserved.cnt = 1; > +} > + > +/* This routine called with relocation disabled. */ > +void __init > +lmb_analyze(void) > +{ > + int i; > + > + lmb.memory.size = 0; > + > + for (i = 0; i < lmb.memory.cnt; i++) > + lmb.memory.size += lmb.memory.region[i].size; > +} > + > +/* This routine called with relocation disabled. */ > +static long __init > +lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned > long size) > +{ > + unsigned long i, coalesced = 0; > + long adjacent; > + > + /* First try and coalesce this LMB with another. */ > + for (i=0; i < rgn->cnt; i++) { > + unsigned long rgnbase = rgn->region[i].base; > + unsigned long rgnsize = rgn->region[i].size; > + > + adjacent = > lmb_addrs_adjacent(base,size,rgnbase,rgnsize); > + if ( adjacent > 0 ) { > + rgn->region[i].base -= size; > + rgn->region[i].size += size; > + coalesced++; > + break; > + } > + else if ( adjacent < 0 ) { > + rgn->region[i].size += size; > + coalesced++; > + break; > + } > + } > + > + if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) { > + lmb_coalesce_regions(rgn, i, i+1); > + coalesced++; > + } > + > + if ( coalesced ) { > + return coalesced; > + } else if ( rgn->cnt >= MAX_LMB_REGIONS ) { > + return -1; > + } > + > + /* Couldn't coalesce the LMB, so add it to the sorted table. */ > + for (i=rgn->cnt-1; i >= 0; i--) { > + if (base < rgn->region[i].base) { > + rgn->region[i+1].base = rgn->region[i].base; > + rgn->region[i+1].size = rgn->region[i].size; > + } else { > + rgn->region[i+1].base = base; > + rgn->region[i+1].size = size; > + break; > + } > + } > + rgn->cnt++; > + > + return 0; > +} > + > +/* This routine called with relocation disabled. */ > +long __init > +lmb_add(unsigned long base, unsigned long size) > +{ > + struct lmb_region *_rgn = &(lmb.memory); > + > + /* On pSeries LPAR systems, the first LMB is our RMO region. */ > + if ( base == 0 ) > + lmb.rmo_size = size; > + > + return lmb_add_region(_rgn, base, size); > + > +} > + > +long __init > +lmb_reserve(unsigned long base, unsigned long size) > +{ > + struct lmb_region *_rgn = &(lmb.reserved); > + > + return lmb_add_region(_rgn, base, size); > +} > + > +long __init > +lmb_overlaps_region(struct lmb_region *rgn, > + unsigned long base, unsigned long size) > +{ > + unsigned long i; > + > + for (i=0; i < rgn->cnt; i++) { > + unsigned long rgnbase = rgn->region[i].base; > + unsigned long rgnsize = rgn->region[i].size; > + if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { > + break; > + } > + } > + > + return (i < rgn->cnt) ? i : -1; > +} > + > +unsigned long __init > +lmb_alloc(unsigned long size, unsigned long align) > +{ > + return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); > +} > + > +unsigned long __init > +lmb_alloc_base(unsigned long size, unsigned long align, unsigned long > max_addr) > +{ > + long i, j; > + unsigned long base = 0; > + > + for (i=lmb.memory.cnt-1; i >= 0; i--) { > + unsigned long lmbbase = lmb.memory.region[i].base; > + unsigned long lmbsize = lmb.memory.region[i].size; > + > + if ( max_addr == LMB_ALLOC_ANYWHERE ) > + base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); > + else if ( lmbbase < max_addr ) > + base = > _ALIGN_DOWN(min(lmbbase+lmbsize,max_addr)-size, > + align); > + else > + continue; > + > + while ( (lmbbase <= base) && > + ((j = > lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { > + base = > _ALIGN_DOWN(lmb.reserved.region[j].base-size, > + align); > + } > + > + if ( (base != 0) && (lmbbase <= base) ) > + break; > + } > + > + if ( i < 0 ) > + return 0; > + > + lmb_add_region(&lmb.reserved, base, size); > + > + return base; > +} > + > +/* You must call lmb_analyze() before this. */ > +unsigned long __init > +lmb_phys_mem_size(void) > +{ > + return lmb.memory.size; > +} > + > +unsigned long __init > +lmb_end_of_DRAM(void) > +{ > + int idx = lmb.memory.cnt - 1; > + > + return (lmb.memory.region[idx].base + > lmb.memory.region[idx].size); > +} > + > +/* > + * Truncate the lmb list to memory_limit if it's set > + * You must call lmb_analyze() after this. > + */ > +void __init lmb_enforce_memory_limit(void) > +{ > + extern unsigned long memory_limit; > + unsigned long i, limit; > + > + if (! memory_limit) > + return; > + > + limit = memory_limit; > + for (i = 0; i < lmb.memory.cnt; i++) { > + if (limit > lmb.memory.region[i].size) { > + limit -= lmb.memory.region[i].size; > + continue; > + } > + > + lmb.memory.region[i].size = limit; > + lmb.memory.cnt = i + 1; > + break; > + } > +} > diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile > --- a/arch/ppc64/kernel/Makefile > +++ b/arch/ppc64/kernel/Makefile > @@ -76,3 +76,4 @@ endif > > # These are here while we do the architecture merge > vecemu-y += ../../powerpc/kernel/vecemu.o > +lmb-y += ../../powerpc/mm/lmb.o > diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c > deleted file mode 100644 > --- a/arch/ppc64/kernel/lmb.c > +++ /dev/null > @@ -1,299 +0,0 @@ > -/* > - * Procedures for interfacing to Open Firmware. > - * > - * Peter Bergner, IBM Corp. June 2001. > - * Copyright (C) 2001 Peter Bergner. > - * > - * This program is free software; you can redistribute it and/or > - * modify it under the terms of the GNU General Public License > - * as published by the Free Software Foundation; either version > - * 2 of the License, or (at your option) any later version. > - */ > - > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > - > -struct lmb lmb; > - > -#undef DEBUG > - > -void lmb_dump_all(void) > -{ > -#ifdef DEBUG > - unsigned long i; > - > - udbg_printf("lmb_dump_all:\n"); > - udbg_printf(" memory.cnt = 0x%lx\n", > - lmb.memory.cnt); > - udbg_printf(" memory.size = 0x%lx\n", > - lmb.memory.size); > - for (i=0; i < lmb.memory.cnt ;i++) { > - udbg_printf(" memory.region[0x%x].base = > 0x%lx\n", > - i, lmb.memory.region[i].base); > - udbg_printf(" .size = 0x%lx\n", > - lmb.memory.region[i].size); > - } > - > - udbg_printf("\n reserved.cnt = 0x%lx\n", > - lmb.reserved.cnt); > - udbg_printf(" reserved.size = 0x%lx\n", > - lmb.reserved.size); > - for (i=0; i < lmb.reserved.cnt ;i++) { > - udbg_printf(" reserved.region[0x%x].base = > 0x%lx\n", > - i, lmb.reserved.region[i].base); > - udbg_printf(" .size = 0x%lx\n", > - lmb.reserved.region[i].size); > - } > -#endif /* DEBUG */ > -} > - > -static unsigned long __init > -lmb_addrs_overlap(unsigned long base1, unsigned long size1, > - unsigned long base2, unsigned long size2) > -{ > - return ((base1 < (base2+size2)) && (base2 < (base1+size1))); > -} > - > -static long __init > -lmb_addrs_adjacent(unsigned long base1, unsigned long size1, > - unsigned long base2, unsigned long size2) > -{ > - if (base2 == base1 + size1) > - return 1; > - else if (base1 == base2 + size2) > - return -1; > - > - return 0; > -} > - > -static long __init > -lmb_regions_adjacent(struct lmb_region *rgn, unsigned long r1, > unsigned > long r2) > -{ > - unsigned long base1 = rgn->region[r1].base; > - unsigned long size1 = rgn->region[r1].size; > - unsigned long base2 = rgn->region[r2].base; > - unsigned long size2 = rgn->region[r2].size; > - > - return lmb_addrs_adjacent(base1, size1, base2, size2); > -} > - > -/* Assumption: base addr of region 1 < base addr of region 2 */ > -static void __init > -lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, > unsigned > long r2) > -{ > - unsigned long i; > - > - rgn->region[r1].size += rgn->region[r2].size; > - for (i=r2; i < rgn->cnt-1; i++) { > - rgn->region[i].base = rgn->region[i+1].base; > - rgn->region[i].size = rgn->region[i+1].size; > - } > - rgn->cnt--; > -} > - > -/* This routine called with relocation disabled. */ > -void __init > -lmb_init(void) > -{ > - /* Create a dummy zero size LMB which will get coalesced away > later. > - * This simplifies the lmb_add() code below... > - */ > - lmb.memory.region[0].base = 0; > - lmb.memory.region[0].size = 0; > - lmb.memory.cnt = 1; > - > - /* Ditto. */ > - lmb.reserved.region[0].base = 0; > - lmb.reserved.region[0].size = 0; > - lmb.reserved.cnt = 1; > -} > - > -/* This routine called with relocation disabled. */ > -void __init > -lmb_analyze(void) > -{ > - int i; > - > - lmb.memory.size = 0; > - > - for (i = 0; i < lmb.memory.cnt; i++) > - lmb.memory.size += lmb.memory.region[i].size; > -} > - > -/* This routine called with relocation disabled. */ > -static long __init > -lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned > long size) > -{ > - unsigned long i, coalesced = 0; > - long adjacent; > - > - /* First try and coalesce this LMB with another. */ > - for (i=0; i < rgn->cnt; i++) { > - unsigned long rgnbase = rgn->region[i].base; > - unsigned long rgnsize = rgn->region[i].size; > - > - adjacent = > lmb_addrs_adjacent(base,size,rgnbase,rgnsize); > - if ( adjacent > 0 ) { > - rgn->region[i].base -= size; > - rgn->region[i].size += size; > - coalesced++; > - break; > - } > - else if ( adjacent < 0 ) { > - rgn->region[i].size += size; > - coalesced++; > - break; > - } > - } > - > - if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) { > - lmb_coalesce_regions(rgn, i, i+1); > - coalesced++; > - } > - > - if ( coalesced ) { > - return coalesced; > - } else if ( rgn->cnt >= MAX_LMB_REGIONS ) { > - return -1; > - } > - > - /* Couldn't coalesce the LMB, so add it to the sorted table. */ > - for (i=rgn->cnt-1; i >= 0; i--) { > - if (base < rgn->region[i].base) { > - rgn->region[i+1].base = rgn->region[i].base; > - rgn->region[i+1].size = rgn->region[i].size; > - } else { > - rgn->region[i+1].base = base; > - rgn->region[i+1].size = size; > - break; > - } > - } > - rgn->cnt++; > - > - return 0; > -} > - > -/* This routine called with relocation disabled. */ > -long __init > -lmb_add(unsigned long base, unsigned long size) > -{ > - struct lmb_region *_rgn = &(lmb.memory); > - > - /* On pSeries LPAR systems, the first LMB is our RMO region. */ > - if ( base == 0 ) > - lmb.rmo_size = size; > - > - return lmb_add_region(_rgn, base, size); > - > -} > - > -long __init > -lmb_reserve(unsigned long base, unsigned long size) > -{ > - struct lmb_region *_rgn = &(lmb.reserved); > - > - return lmb_add_region(_rgn, base, size); > -} > - > -long __init > -lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, > unsigned long size) > -{ > - unsigned long i; > - > - for (i=0; i < rgn->cnt; i++) { > - unsigned long rgnbase = rgn->region[i].base; > - unsigned long rgnsize = rgn->region[i].size; > - if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { > - break; > - } > - } > - > - return (i < rgn->cnt) ? i : -1; > -} > - > -unsigned long __init > -lmb_alloc(unsigned long size, unsigned long align) > -{ > - return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); > -} > - > -unsigned long __init > -lmb_alloc_base(unsigned long size, unsigned long align, unsigned long > max_addr) > -{ > - long i, j; > - unsigned long base = 0; > - > - for (i=lmb.memory.cnt-1; i >= 0; i--) { > - unsigned long lmbbase = lmb.memory.region[i].base; > - unsigned long lmbsize = lmb.memory.region[i].size; > - > - if ( max_addr == LMB_ALLOC_ANYWHERE ) > - base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); > - else if ( lmbbase < max_addr ) > - base = > _ALIGN_DOWN(min(lmbbase+lmbsize,max_addr)-size, align); > - else > - continue; > - > - while ( (lmbbase <= base) && > - ((j = > lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { > - base = > _ALIGN_DOWN(lmb.reserved.region[j].base-size, align); > - } > - > - if ( (base != 0) && (lmbbase <= base) ) > - break; > - } > - > - if ( i < 0 ) > - return 0; > - > - lmb_add_region(&lmb.reserved, base, size); > - > - return base; > -} > - > -/* You must call lmb_analyze() before this. */ > -unsigned long __init > -lmb_phys_mem_size(void) > -{ > - return lmb.memory.size; > -} > - > -unsigned long __init > -lmb_end_of_DRAM(void) > -{ > - int idx = lmb.memory.cnt - 1; > - > - return (lmb.memory.region[idx].base + > lmb.memory.region[idx].size); > -} > - > -/* > - * Truncate the lmb list to memory_limit if it's set > - * You must call lmb_analyze() after this. > - */ > -void __init lmb_enforce_memory_limit(void) > -{ > - extern unsigned long memory_limit; > - unsigned long i, limit; > - > - if (! memory_limit) > - return; > - > - limit = memory_limit; > - for (i = 0; i < lmb.memory.cnt; i++) { > - if (limit > lmb.memory.region[i].size) { > - limit -= lmb.memory.region[i].size; > - continue; > - } > - > - lmb.memory.region[i].size = limit; > - lmb.memory.cnt = i + 1; > - break; > - } > -} > diff --git a/include/asm-powerpc/lmb.h b/include/asm-powerpc/lmb.h > new file mode 100644 > --- /dev/null > +++ b/include/asm-powerpc/lmb.h > @@ -0,0 +1,78 @@ > +#ifndef _ASM_POWERPC_LMB_H > +#define _ASM_POWERPC_LMB_H > + > +/* > + * Low-level Memory Block management. > + * > + * Copyright (C) 2001 Peter Bergner, IBM Corp. > + */ > + > +#include > +#include > + > +#define MAX_LMB_REGIONS 128 > + > +#define LMB_ALLOC_ANYWHERE 0 > + > +struct lmb_block { > + unsigned long base; > + unsigned long size; > +}; > + > +struct lmb_region { > + unsigned long cnt; > + unsigned long size; > + struct lmb_block region[MAX_LMB_REGIONS+1]; > +}; > + > +struct lmb { > + unsigned long debug; > + unsigned long rmo_size; > + struct lmb_region memory; > + struct lmb_region reserved; > +}; > + > +extern struct lmb lmb; > + > +extern void __init lmb_init(void); > +extern void __init lmb_analyze(void); > +extern long __init lmb_add(unsigned long, unsigned long); > +extern long __init lmb_reserve(unsigned long, unsigned long); > +extern unsigned long __init lmb_alloc(unsigned long, unsigned long); > +extern unsigned long __init lmb_alloc_base(unsigned long, unsigned > long, > + unsigned long); > +extern unsigned long __init lmb_phys_mem_size(void); > +extern unsigned long __init lmb_end_of_DRAM(void); > +extern unsigned long __init lmb_abs_to_phys(unsigned long); > +extern void __init lmb_enforce_memory_limit(void); > + > +extern void lmb_dump_all(void); > + > +extern unsigned long io_hole_start; > + > +static inline unsigned long > +lmb_size_bytes(struct lmb_region *type, unsigned long region_nr) > +{ > + return type->region[region_nr].size; > +} > + > +static inline unsigned long > +lmb_size_pages(struct lmb_region *type, unsigned long region_nr) > +{ > + return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT; > +} > + > +static inline unsigned long > +lmb_start_pfn(struct lmb_region *type, unsigned long region_nr) > +{ > + return type->region[region_nr].base >> PAGE_SHIFT; > +} > + > +static inline unsigned long > +lmb_end_pfn(struct lmb_region *type, unsigned long region_nr) > +{ > + return lmb_start_pfn(type, region_nr) + > + lmb_size_pages(type, region_nr); > +} > + > +#endif /* _ASM_POWERPC_LMB_H */ > diff --git a/include/asm-ppc/page.h b/include/asm-ppc/page.h > --- a/include/asm-ppc/page.h > +++ b/include/asm-ppc/page.h > @@ -77,8 +77,12 @@ typedef unsigned long pgprot_t; > #endif > > > +/* align addr on a size boundary - adjust address up/down if > needed */ > +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) > +#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) > + > /* align addr on a size boundary - adjust address up if needed -- > Cort > */ > -#define _ALIGN(addr,size) (((addr)+(size)-1)&(~((size)-1))) > +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) > > /* to align the pointer to the (next) page boundary */ > #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) > > > _______________________________________________ > Linuxppc64-dev mailing list > Linuxppc64-dev at ozlabs.org > https://ozlabs.org/mailman/listinfo/linuxppc64-dev > From paulus at samba.org Wed Oct 5 21:11:43 2005 From: paulus at samba.org (Paul Mackerras) Date: Wed, 5 Oct 2005 21:11:43 +1000 Subject: [PATCH 1/7] ppc64: EEH typos, include files, macros, whitespace In-Reply-To: <20050930005141.GA6173@austin.ibm.com> References: <20050930004800.GL29826@austin.ibm.com> <20050930005141.GA6173@austin.ibm.com> Message-ID: <17219.46319.501091.93202@cargo.ozlabs.ibm.com> Linas writes: > 01-eeh-minor-cleanup.patch Some trivial comments on a trivial patch... :) > - printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", > - pci_name(dev)); > + printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev)); This makes the line go over 80 columns, which seems unnecessary. > - * @token i/o token, should be address in the form 0xE.... > + * @token i/o token, should be address in the form 0xA.... I think the virtual addresses we get from ioremap these days start with 0xD00008... Regards, Paul. From paulus at samba.org Wed Oct 5 21:23:11 2005 From: paulus at samba.org (Paul Mackerras) Date: Wed, 5 Oct 2005 21:23:11 +1000 Subject: [PATCH 6/7] ppc64: EEH Avoid racing reports of errors In-Reply-To: <20050930010038.GF6173@austin.ibm.com> References: <20050930004800.GL29826@austin.ibm.com> <20050930010038.GF6173@austin.ibm.com> Message-ID: <17219.47007.44643.148022@cargo.ozlabs.ibm.com> Linas writes: > 06-eeh-report-race.patch > +/** Mark all devices that are peers of this device as failed. > + * Mark the device driver too, so that it can see the failure > + * immediately; this is critical, since some drivers poll > + * status registers in interrupts ... If a driver is polling, > + * and the slot is frozen, then the driver can deadlock in > + * an interrupt context, which is bad. > + */ > + > +static inline void __eeh_mark_slot (struct device_node *dn) > +{ > + while (dn) { > + PCI_DN(dn)->eeh_mode |= EEH_MODE_ISOLATED; > + > + if (dn->child) > + __eeh_mark_slot (dn->child); > + dn = dn->sibling; > + } > +} So this does the device node that we pass in, plus all the nodes that come after it in its parent's list of children. On that basis I expected you to pass in the first child of the EADS bridge, but I see: > + pe_dn = find_device_pe (dn); > + __eeh_mark_slot (pe_dn); My understanding is that pe_dn will end up pointing to the device node for the EADS bridge. Shouldn't you pass in pe_dn->child here, or alternatively rearrange __eeh_mark_slot to do the node you give it plus its children (recursively)? Two other comments about __eeh_mark_slot: (1) despite the comment, the function doesn't do anything to any pci_dev or pci_driver (not that it should be touching any pci_driver), and (2) a recursive function can't really be inline (unless gcc is smart enough to turn arbitrary recursive functions into iterative functions, which I doubt :). Regards, Paul. From paulus at samba.org Wed Oct 5 21:14:58 2005 From: paulus at samba.org (Paul Mackerras) Date: Wed, 5 Oct 2005 21:14:58 +1000 Subject: [PATCH 3/7] ppc64: EEH Add event/internal state statistics In-Reply-To: <20050930005451.GC6173@austin.ibm.com> References: <20050930004800.GL29826@austin.ibm.com> <20050930005451.GC6173@austin.ibm.com> Message-ID: <17219.46514.903283.21680@cargo.ozlabs.ibm.com> Linas writes: > 03-eeh-statistics.patch > + if (!dn) { > + __get_cpu_var(no_dn)++; We have to make sure we are not preemptible when we use __get_cpu_var, since it uses smp_processor_id(). It's not clear to me that we have ensured that in every case where we use __get_cpu_var. Are you sure that we hold a spinlock, or are at interrupt level, or have explicitly disabled preemption at every point where we use __get_cpu_var? Regards, Paul. From arnd at arndb.de Wed Oct 5 23:07:41 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Wed, 5 Oct 2005 15:07:41 +0200 Subject: spufs: User space thread library In-Reply-To: <4341D861.2050306@am.sony.com> References: <200509160840.31071.arnd@arndb.de> <200509300014.21756.arnd@arndb.de> <4341D861.2050306@am.sony.com> Message-ID: <200510051507.42252.arnd@arndb.de> On Dinsdag 04 Oktober 2005 03:18, Geoff Levand wrote: > > OK, I set up an autoconf based build system. ?I moved some things > around to make coding the makefiles easier. ?I also found a conflict > with the system's spe.h, so renamed that file. We have found the same problem independently and already changed to 'libspe.h'. The contents of the current 'mfc.h' will be merged into that as well. > I put the results here: > > http://tree.celinuxforum.org/downloads/libspe-0.9-autoconf-05.10.03.18.01.19.tar.bz2 > > Its just a first cut. ?I think more work is needed in setting up > the spu compiler. ?Also, more work is needed to make the scripts > in the tools directory use the host's cross toolchain. Actually, I thought of using only autoconf, but not automake and libtool. We don't really have a complicated setup, so I'd prefer to avoid the extra overhead from those tools. Do you see a strong reason to use them? Arnd <>< From geoffrey.levand at am.sony.com Thu Oct 6 01:03:44 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Wed, 05 Oct 2005 08:03:44 -0700 Subject: spufs: User space thread library In-Reply-To: <200510051507.42252.arnd@arndb.de> References: <200510051507.42252.arnd@arndb.de> Message-ID: <4343EB50.1080202@am.sony.com> Arnd Bergmann wrote: > On Dinsdag 04 Oktober 2005 03:18, Geoff Levand wrote: > >>OK, I set up an autoconf based build system. I moved some things >>around to make coding the makefiles easier. I also found a conflict >>with the system's spe.h, so renamed that file. > > > We have found the same problem independently and already changed to > 'libspe.h'. The contents of the current 'mfc.h' will be merged into > that as well. > > >>I put the results here: >> >> > > http://tree.celinuxforum.org/downloads/libspe-0.9-autoconf-05.10.03.18.0 > 1.19.tar.bz2 > >>Its just a first cut. I think more work is needed in setting up >>the spu compiler. Also, more work is needed to make the scripts >>in the tools directory use the host's cross toolchain. > > > Actually, I thought of using only autoconf, but not automake and > libtool. > We don't really have a complicated setup, so I'd prefer to avoid the > extra overhead from those tools. > > Do you see a strong reason to use them? > It makes maintenance of both the build system and the distribution packaging easier by providing higher level abstractions and standard behavior. There are weaknesses with automake and libtool, but those are well known. At any rate, its your package, just use what you find useful. What 'extra overhead' are you concerned about? -Geoff From vatsa at in.ibm.com Thu Oct 6 03:52:44 2005 From: vatsa at in.ibm.com (Srivatsa Vaddagiri) Date: Wed, 5 Oct 2005 23:22:44 +0530 Subject: [PATCH] Let any CPU update xtime/jiffies Message-ID: <20051005175244.GA3844@in.ibm.com> Currently, only boot CPU updates xtime/jiffies _and_ invokes ppc_adjtimex during decrementer interrupt. This makes it difficult for the boot cpu to skip ticks when idle. Following patch changes that behavior so that any CPU can now update xtime/jiffies. Notes: - This was tested against 2.6.14-rc1 on a 4way Power4 (p630) box alongwith the NO_IDLE_HZ patch. - This patch does not serialize ppc_adjtimex. Do we need a spinlock to serialize it? Signed-off-by : Srivatsa Vaddagiri --- linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c | 24 ++++++++++++------------ 1 files changed, 12 insertions(+), 12 deletions(-) diff -puN arch/ppc64/kernel/time.c~boot_cpu_fix arch/ppc64/kernel/time.c --- linux-2.6.14-rc1/arch/ppc64/kernel/time.c~boot_cpu_fix 2005-10-05 15:12:37.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c 2005-10-05 16:37:16.000000000 +0530 @@ -342,21 +342,21 @@ int timer_interrupt(struct pt_regs * reg */ if (!cpu_is_offline(cpu)) update_process_times(user_mode(regs)); - /* - * No need to check whether cpu is offline here; boot_cpuid - * should have been fixed up by now. - */ - if (cpu == boot_cpuid) { - write_seqlock(&xtime_lock); - tb_last_stamp = lpaca->next_jiffy_update_tb; - timer_recalc_offset(lpaca->next_jiffy_update_tb); + + write_seqlock(&xtime_lock); + cur_tb = get_tb(); + if (cur_tb - tb_last_stamp > tb_ticks_per_jiffy) { + tb_last_stamp += tb_ticks_per_jiffy; + timer_recalc_offset(cur_tb); do_timer(regs); - timer_sync_xtime(lpaca->next_jiffy_update_tb); + timer_sync_xtime(cur_tb); timer_check_rtc(); - write_sequnlock(&xtime_lock); - if ( adjusting_time && (time_adjust == 0) ) - ppc_adjtimex(); } + write_sequnlock(&xtime_lock); + /* Fixme: This needs to be serialized as well */ + if ( adjusting_time && (time_adjust == 0) ) + ppc_adjtimex(); + lpaca->next_jiffy_update_tb += tb_ticks_per_jiffy; } _ -- Thanks and Regards, Srivatsa Vaddagiri, Linux Technology Center, IBM Software Labs, Bangalore, INDIA - 560017 From vatsa at in.ibm.com Thu Oct 6 04:01:42 2005 From: vatsa at in.ibm.com (Srivatsa Vaddagiri) Date: Wed, 5 Oct 2005 23:31:42 +0530 Subject: [PATCH] NO_IDLE_HZ implementation for ppc64 - v2 Message-ID: <20051005180142.GB3844@in.ibm.com> Ben, Here's the revised version, taking into account some of the comments you had. Changes since last time: - native_idle also converted over - Fixed a bug in calculation of next_dec in stop_hz_timer - Removed call to start_hz_timer from head.S - Added a call to start_hz_timer in performance_monitor_exception This has been tested against 2.6.14-rc1 on a 4way Power4 box (p630) with some additional patch (the same test patch I had sent earlier which showed decrementer statistics in /proc). I will rebase this patch against latest -mm if you think this is in the right direction. Signed-off-by: Srivatsa Vaddagiri --- linux-2.6.14-rc1-root/arch/ppc64/Kconfig | 6 linux-2.6.14-rc1-root/arch/ppc64/kernel/idle.c | 6 linux-2.6.14-rc1-root/arch/ppc64/kernel/irq.c | 3 linux-2.6.14-rc1-root/arch/ppc64/kernel/pSeries_setup.c | 10 - linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c | 112 ++++++++++++++-- linux-2.6.14-rc1-root/arch/ppc64/kernel/traps.c | 2 linux-2.6.14-rc1-root/include/asm-ppc64/time.h | 8 + linux-2.6.14-rc1-root/kernel/sysctl.c | 20 +- 8 files changed, 141 insertions(+), 26 deletions(-) diff -puN arch/ppc64/kernel/time.c~ppc64 arch/ppc64/kernel/time.c --- linux-2.6.14-rc1/arch/ppc64/kernel/time.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/time.c 2005-10-05 16:34:51.000000000 +0530 @@ -315,23 +315,13 @@ static void iSeries_tb_recal(void) unsigned long tb_last_stamp __cacheline_aligned_in_smp; -/* - * timer_interrupt - gets called when the decrementer overflows, - * with interrupts disabled. - */ -int timer_interrupt(struct pt_regs * regs) +static void account_ticks(struct pt_regs *regs) { int next_dec; unsigned long cur_tb; struct paca_struct *lpaca = get_paca(); unsigned long cpu = smp_processor_id(); - irq_enter(); - - profile_tick(CPU_PROFILING, regs); - - lpaca->lppaca.int_dword.fields.decr_int = 0; - while (lpaca->next_jiffy_update_tb <= (cur_tb = get_tb())) { /* * We cannot disable the decrementer, so in the period @@ -364,6 +354,43 @@ int timer_interrupt(struct pt_regs * reg if (next_dec > lpaca->default_decr) next_dec = lpaca->default_decr; set_dec(next_dec); +} + +#ifdef CONFIG_NO_IDLE_HZ +/* Returns 1 if this CPU was set in the mask */ +static inline int clear_hzless_mask(void) +{ + unsigned long cpu = smp_processor_id(); + int rc = 0; + + if (unlikely(cpu_isset(cpu, nohz_cpu_mask))) { + cpu_clear(cpu, nohz_cpu_mask); + rc = 1; + } + + return rc; +} +#else +static inline int clear_hzless_mask(void) { return 0;} +#endif + +/* + * timer_interrupt - gets called when the decrementer overflows, + * with interrupts disabled. + */ +int timer_interrupt(struct pt_regs * regs) +{ + struct paca_struct *lpaca = get_paca(); + + irq_enter(); + + clear_hzless_mask(); + + profile_tick(CPU_PROFILING, regs); + + lpaca->lppaca.int_dword.fields.decr_int = 0; + + account_ticks(regs); #ifdef CONFIG_PPC_ISERIES if (hvlpevent_is_pending()) @@ -381,6 +408,69 @@ int timer_interrupt(struct pt_regs * reg return 1; } +#ifdef CONFIG_NO_IDLE_HZ + +#define MAX_DEC_COUNT (UINT_MAX) /* Decrementer is 32-bit */ +#define MIN_SKIP 2 +#define MAX_SKIP (MAX_DEC_COUNT/tb_ticks_per_jiffy) + +int sysctl_hz_timer = 1; + +/* Avoid the HZ timer (decrementer) interrupt on this CPU for "some" time. + * Has to be called with interrupts disabled. + * + * The HZ timer frequency is restored upon the occurence of an interrupt or + * exception on this CPU. Caller has to ensure that the CPU doesnt exit + * idle mode via other means. + */ +void stop_hz_timer(void) +{ + unsigned long cpu = smp_processor_id(), seq, delta; + int next_dec; + + if (sysctl_hz_timer != 0) + return; + + cpu_set(cpu, nohz_cpu_mask); + mb(); + if (rcu_pending(cpu) || local_softirq_pending()) { + cpu_clear(cpu, nohz_cpu_mask); + return; + } + + do { + seq = read_seqbegin(&xtime_lock); + + delta = next_timer_interrupt() - jiffies; + + if (delta < MIN_SKIP) { + cpu_clear(cpu, nohz_cpu_mask); + return; + } + + if (delta > MAX_SKIP) + delta = MAX_SKIP; + + next_dec = tb_last_stamp + delta * tb_ticks_per_jiffy; + + } while (read_seqretry(&xtime_lock, seq)); + + next_dec -= get_tb(); + set_dec(next_dec); + + return; +} + +/* Take into account skipped ticks and restore the HZ timer frequency */ +void start_hz_timer(struct pt_regs *regs) +{ + if (clear_hzless_mask()) + account_ticks(regs); +} + +#endif /* CONFIG_NO_IDLE_HZ */ + + /* * Scheduler clock - returns current time in nanosec units. * diff -puN arch/ppc64/kernel/irq.c~ppc64 arch/ppc64/kernel/irq.c --- linux-2.6.14-rc1/arch/ppc64/kernel/irq.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/irq.c 2005-10-05 16:33:06.000000000 +0530 @@ -55,6 +55,7 @@ #include #include #include +#include #ifdef CONFIG_SMP extern void iSeries_smp_message_recv( struct pt_regs * ); @@ -313,6 +314,8 @@ void do_IRQ(struct pt_regs *regs) irq_enter(); + start_hz_timer(regs); + #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 2KB free? */ { diff -puN include/asm-ppc64/time.h~ppc64 include/asm-ppc64/time.h --- linux-2.6.14-rc1/include/asm-ppc64/time.h~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/include/asm-ppc64/time.h 2005-10-05 16:33:06.000000000 +0530 @@ -102,6 +102,14 @@ static inline unsigned long tb_ticks_sin return get_tb() - tstamp; } +#ifdef CONFIG_NO_IDLE_HZ +extern void stop_hz_timer(void); +extern void start_hz_timer(struct pt_regs *); +#else +static inline void stop_hz_timer(void) { } +static inline void start_hz_timer(struct pt_regs *regs) { } +#endif + #define mulhwu(x,y) \ ({unsigned z; asm ("mulhwu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;}) #define mulhdu(x,y) \ diff -puN arch/ppc64/Kconfig~ppc64 arch/ppc64/Kconfig --- linux-2.6.14-rc1/arch/ppc64/Kconfig~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/Kconfig 2005-10-05 16:33:06.000000000 +0530 @@ -146,6 +146,12 @@ config PPC_SPLPAR processors, that is, which share physical processors between two or more partitions. +config NO_IDLE_HZ + depends on EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC || PPC_MAPLE) + bool "No HZ timer ticks in idle" + help + Switches the HZ timer interrupts off when a CPU is idle. + config KEXEC bool "kexec system call (EXPERIMENTAL)" depends on PPC_MULTIPLATFORM && EXPERIMENTAL diff -puN kernel/sysctl.c~ppc64 kernel/sysctl.c --- linux-2.6.14-rc1/kernel/sysctl.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/kernel/sysctl.c 2005-10-05 16:33:06.000000000 +0530 @@ -544,6 +544,16 @@ static ctl_table kern_table[] = { .extra1 = &minolduid, .extra2 = &maxolduid, }, +#ifdef CONFIG_NO_IDLE_HZ + { + .ctl_name = KERN_HZ_TIMER, + .procname = "hz_timer", + .data = &sysctl_hz_timer, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif #ifdef CONFIG_ARCH_S390 #ifdef CONFIG_MATHEMU { @@ -555,16 +565,6 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif -#ifdef CONFIG_NO_IDLE_HZ - { - .ctl_name = KERN_HZ_TIMER, - .procname = "hz_timer", - .data = &sysctl_hz_timer, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif { .ctl_name = KERN_S390_USER_DEBUG_LOGGING, .procname = "userprocess_debug", diff -puN arch/ppc64/kernel/pSeries_setup.c~ppc64 arch/ppc64/kernel/pSeries_setup.c --- linux-2.6.14-rc1/arch/ppc64/kernel/pSeries_setup.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/pSeries_setup.c 2005-10-05 16:33:06.000000000 +0530 @@ -475,9 +475,10 @@ static inline void dedicated_idle_sleep( * a prod occurs. Returning from the cede enables external * interrupts. */ - if (!need_resched()) + if (!need_resched()) { + stop_hz_timer(); cede_processor(); - else + } else local_irq_enable(); } else { /* @@ -570,9 +571,10 @@ static int pseries_shared_idle(void) * Check need_resched() again with interrupts disabled * to avoid a race. */ - if (!need_resched()) + if (!need_resched()) { + stop_hz_timer(); cede_processor(); - else + } else local_irq_enable(); HMT_medium(); diff -puN arch/ppc64/kernel/traps.c~ppc64 arch/ppc64/kernel/traps.c --- linux-2.6.14-rc1/arch/ppc64/kernel/traps.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/traps.c 2005-10-05 16:33:06.000000000 +0530 @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUGGER int (*__debugger)(struct pt_regs *regs); @@ -470,6 +471,7 @@ extern perf_irq_t perf_irq; void performance_monitor_exception(struct pt_regs *regs) { + start_hz_timer(regs); perf_irq(regs); } diff -puN arch/ppc64/kernel/idle.c~ppc64 arch/ppc64/kernel/idle.c --- linux-2.6.14-rc1/arch/ppc64/kernel/idle.c~ppc64 2005-10-05 16:33:06.000000000 +0530 +++ linux-2.6.14-rc1-root/arch/ppc64/kernel/idle.c 2005-10-05 16:33:06.000000000 +0530 @@ -73,8 +73,12 @@ int native_idle(void) while (1) { ppc64_runlatch_off(); - if (!need_resched()) + local_irq_disable(); + if (!need_resched()) { + stop_hz_timer(); + local_irq_enable(); power4_idle(); + } if (need_resched()) { ppc64_runlatch_on(); _ -- Thanks and Regards, Srivatsa Vaddagiri, Linux Technology Center, IBM Software Labs, Bangalore, INDIA - 560017 From paubert at iram.es Thu Oct 6 04:20:31 2005 From: paubert at iram.es (Gabriel Paubert) Date: Wed, 5 Oct 2005 20:20:31 +0200 Subject: [PATCH] powerpc: improved byte swapping functions In-Reply-To: <20050927211534.GA32173@iram.es> References: <20050927211534.GA32173@iram.es> Message-ID: <20051005182031.GA15359@iram.es> From: Gabriel Paubert The previous versions of ___arch__swab16 and ___arch__swab32 were not optimal. In most cases the code can be made shorter and faster with this patch. Signed-off-by: Gabriel Paubert --- Additional notes: 1) for ___arch__swab16, the trick is to let the compiler generate a single rlwinm instruction for the final right shift and cast. 2) For ___arch_swab32, the rotated value passed as a parameter already has 2 bytes at the right place, so only 2 rlwimi instructions are necessary to complete the byte swap. 3) edit if you don't like the formatting of the result. 4) I've been reading the thread about how to format patches and I hope that I got it right. But I believe that the diffstat output is overkill for such a small patch. Regards, Gabriel diff --git a/include/asm-powerpc/byteorder.h b/include/asm-powerpc/byteorder.h --- a/include/asm-powerpc/byteorder.h +++ b/include/asm-powerpc/byteorder.h @@ -42,23 +42,22 @@ static __inline__ void st_le32(volatile static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 value) { - __u16 result; + __u32 tmp; - __asm__("rlwimi %0,%1,8,16,23" - : "=r" (result) - : "r" (value), "0" (value >> 8)); - return result; + __asm__("rlwimi %0,%0,16,8,15" + : "=r" (tmp) : "0" (value)); + return (__u16)(tmp>>8); } static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 value) { __u32 result; - __asm__("rlwimi %0,%1,24,16,23\n\t" - "rlwimi %0,%1,8,8,15\n\t" - "rlwimi %0,%1,24,0,7" + __asm__( +" rlwimi %0,%1,24,16,23\n" +" rlwimi %0,%1,24,0,7\n" : "=r" (result) - : "r" (value), "0" (value >> 24)); + : "r" (value), "0" ((value >> 24)|(value<<8))); return result; } From geoffrey.levand at am.sony.com Thu Oct 6 07:06:15 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Wed, 05 Oct 2005 14:06:15 -0700 Subject: kgdb for ppc64 now available In-Reply-To: <20050930063234.GA4763@krispykreme> References: <433C22EF.8050007@am.sony.com> <20050930063234.GA4763@krispykreme> Message-ID: <43444047.2030008@am.sony.com> Anton Blanchard wrote: > Hi, > >>For those interested, kgdb now supports ppc64 SMP. On powermac only >>an Ethernet connection is supported currently, but generic 8250 UART >>PCI cards will be supported soon. > > > Nice! A few comments: > > + { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */ > + { 0x0400, 0x0a /* SIGBUS */ }, /* instruction access */ > > 0x380 data segment miss is not in the list of traps. > OK, I put it in as such: { 0x0380, 0x0b /* SIGSEGV */ }, /* data SLB access */ I guess this is better than the default SIGHUP, unless you think there's a better one to map to. > + return kgdb_handle_exception(0, computeSignal(regs->trap), 0, regs); > > We should use TRAP(regs) instead of regs->trap since the bottom bits may > not be zero for an exception that only saves a partial register set. Thanks. > @@ -2,7 +2,7 @@ > # Makefile for the linux ppc64 kernel. > # > > -EXTRA_CFLAGS += -mno-minimal-toc > +#EXTRA_CFLAGS += -mno-minimal-toc > extra-y := head.o vmlinux.lds > > obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ > > Is there a problem with compiling arch/ppc64/kernel -mno-minimal-toc? There doesn't seem to be any reason for this. I think it just slipped in when someone was hunting for a bug... > I notice x86-64 is using the new die hooks, Im about to convert xmon > over to it and once that is done it will make sense to move kgdb and kdb > across. Please let me know when you've done this so I can see what's needed. > In regs_to_gdb_regs it would be nice to send out the dar, dsisr and > perhaps softe (used on iseries and will be useful once we go to soft > interrupt disable on pseries). Even though we already get the trap > number delived to gdb via the signal number, it would be useful to dump > it in the regs since the bottom bits are important (to work out if > only a partial register set has been saved). That leaves orig_gpr3 and > result as the only pt_regs not dumped. gdb as of version 6.3 doesn't support these regs. There would need to be changes there to support these. If there is a real demand, I don't think it would be much effort to do. > > + /* vr registers not used by kernel, leave zero */ > + ptr += 64; > > Now that we use altivec in the kernel a bit, it might make sense to dump > these registers. Not sure yet. > > Index: linux-2.6.13/arch/ppc64/mm/fault.c > @@ -306,6 +307,13 @@ void bad_page_fault(struct pt_regs *regs > regs->nip = entry->fixup; > return; > } > +#ifdef CONFIG_KGDB > + if (atomic_read(&debugger_active) && kgdb_may_fault) { > + /* Restore our previous state. */ > + kgdb_fault_longjmp(kgdb_fault_jmp_regs); > + /* Not reached. */ > + } > +#endif > > We might need a new notify_die hook to cover this case. > > We could use hardware data breakpoint support in kgdb. > > Eventually it would be nice to be able to select debuggers at > boot/runtime. I have no current plans for these last items. Thanks for the comments, all very helpful. -Geoff From arnd at arndb.de Thu Oct 6 08:23:23 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Thu, 6 Oct 2005 00:23:23 +0200 Subject: spufs: User space thread library In-Reply-To: <4343EB50.1080202@am.sony.com> References: <200510051507.42252.arnd@arndb.de> <4343EB50.1080202@am.sony.com> Message-ID: <200510060023.23780.arnd@arndb.de> On Middeweken 05 Oktober 2005 17:03, Geoff Levand wrote: > > It makes maintenance of both the build system and the distribution > packaging easier by providing higher level abstractions and > standard behavior. ?There are weaknesses with automake and > libtool, but those are well known. ?At any rate, its your > package, just use what you find useful. > > What 'extra overhead' are you concerned about? > The overhead I see is in debugging the stuff when it doesn't work. Autoconf can be hard to debug already, but I think that's worth it because most users know how to run ./configure rather than having to look at the Makefiles. I definitely like your configure.ac file and the reordering of the files. For the other autotools, I think they make work harder instead of easier for small projects, so I'd rather do simple Makefile.in files in the places that need configuration. BTW: Are you ok with licensing your configure.ac under LGPL instead of GPL or did you copy parts from some other GPL package? IMHO, the files in the package all need have the same license for simplicity. Arnd <>< From geoffrey.levand at am.sony.com Thu Oct 6 08:48:22 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Wed, 05 Oct 2005 15:48:22 -0700 Subject: spufs: User space thread library In-Reply-To: <200510060023.23780.arnd@arndb.de> References: <200510051507.42252.arnd@arndb.de> <4343EB50.1080202@am.sony.com> <200510060023.23780.arnd@arndb.de> Message-ID: <43445836.2040205@am.sony.com> Arnd Bergmann wrote: > On Middeweken 05 Oktober 2005 17:03, Geoff Levand wrote: > >>It makes maintenance of both the build system and the distribution >>packaging easier by providing higher level abstractions and >>standard behavior. There are weaknesses with automake and >>libtool, but those are well known. At any rate, its your >>package, just use what you find useful. >> >>What 'extra overhead' are you concerned about? >> > > The overhead I see is in debugging the stuff when it doesn't work. > Autoconf can be hard to debug already, but I think that's worth it > because most users know how to run ./configure rather than having > to look at the Makefiles. I definitely like your configure.ac > file and the reordering of the files. > > For the other autotools, I think they make work harder instead of > easier for small projects, so I'd rather do simple Makefile.in > files in the places that need configuration. Sounds reasonable. > BTW: Are you ok with licensing your configure.ac under LGPL > instead of GPL or did you copy parts from some other GPL package? > IMHO, the files in the package all need have the same license > for simplicity. > LGPL is OK, I codded it from scratch, so feel free to change it to this license announcement. You can also then remove the file COPYING. ## configure.ac -- Process this file with autoconf to produce configure # # Copyright 2005 Sony Corp. # # This file is free software; you can redistribute it and/or # modify it under the terms of the GNU Library General Public # License as published by the Free Software Foundation; # version 2 of the License. # # This file is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Library General Public License for more details. # # You should have received a copy of the GNU Library General Public # License along with this file; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -Geoff From geoffrey.levand at am.sony.com Thu Oct 6 09:25:33 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Wed, 05 Oct 2005 16:25:33 -0700 Subject: install BOOTIMAGE Message-ID: <434460ED.9070605@am.sony.com> With the way the current arch/ppc64/boot/Makefile (2.6.13) calls install.sh, it seems 'make install' will always install vmlinux. It seems the proper behavior is for install.sh to install BOOTIMAGE, which for all but iSeries is a kind of zImage. Does this change below cause problems for anyone? -Geoff # Arguments: # $1 - kernel version # $2 - kernel image file # $3 - kernel map file # $4 - default install path (blank if root directory) # $5 - kernel boot file, the zImage Index: linux-2.6.13/arch/ppc64/boot/install.sh =================================================================== --- linux-2.6.13.orig/arch/ppc64/boot/install.sh 2005-08-28 16:41:01.000000000 -0700 +++ linux-2.6.13/arch/ppc64/boot/install.sh 2005-10-05 15:26:52.000000000 -0700 @@ -38,5 +38,5 @@ mv $4/System.map $4/System.old fi -cat $2 > $4/$image_name +cat $5 > $4/$image_name cp $3 $4/System.map From olh at suse.de Thu Oct 6 16:38:23 2005 From: olh at suse.de (Olaf Hering) Date: Thu, 6 Oct 2005 08:38:23 +0200 Subject: install BOOTIMAGE In-Reply-To: <434460ED.9070605@am.sony.com> References: <434460ED.9070605@am.sony.com> Message-ID: <20051006063823.GA13103@suse.de> On Wed, Oct 05, Geoff Levand wrote: > With the way the current arch/ppc64/boot/Makefile (2.6.13) calls > install.sh, it seems 'make install' will always install vmlinux. > > It seems the proper behavior is for install.sh to install > BOOTIMAGE, which for all but iSeries is a kind of zImage. Does > this change below cause problems for anyone? I dont think booting the zimage from the local hard disk should be prefered over the plain vmlinux. -- short story of a lazy sysadmin: alias appserv=wotan From vatsa at in.ibm.com Thu Oct 6 20:30:59 2005 From: vatsa at in.ibm.com (Srivatsa Vaddagiri) Date: Thu, 6 Oct 2005 16:00:59 +0530 Subject: [PATCH] NO_IDLE_HZ implementation for PPC64 In-Reply-To: <1128378206.8267.102.camel@gaston> References: <20051002174630.GA6786@in.ibm.com> <1128326892.8267.89.camel@gaston> <20051003161851.GA4244@in.ibm.com> <1128378206.8267.102.camel@gaston> Message-ID: <20051006103058.GA12620@in.ibm.com> On Tue, Oct 04, 2005 at 08:23:26AM +1000, Benjamin Herrenschmidt wrote: > Why not just call the decrementer interrupt ? It should replay > already... The only concern about that is wrt update_process_times - it may account all skipped ticks towards irq context rather than to idle context. That is possible if an external irq (do_IRQ) wakes the CPU up (leading to irq_enter being called twice ..) This could lead to, say, wrong idle statistics in vmstat? -- Thanks and Regards, Srivatsa Vaddagiri, Linux Technology Center, IBM Software Labs, Bangalore, INDIA - 560017 From olh at suse.de Thu Oct 6 21:47:07 2005 From: olh at suse.de (Olaf Hering) Date: Thu, 6 Oct 2005 13:47:07 +0200 Subject: R_PPC64_TOC16 not handled in apply_relocate_add Message-ID: <20051006114707.GA23913@suse.de> The current toolchain (in opensuse) creates unhandled relocations for rtas_flash and scanlog: rtas_flash: Unknown ADD relocation: 47 I think it is type R_PPC64_TOC16. gcc version 4.0.2 20050901 (prerelease) (SUSE Linux) GNU ld version 2.16.91.0.2 20050720 (SuSE Linux) Any ideas how to handle them? -- short story of a lazy sysadmin: alias appserv=wotan From anton at samba.org Fri Oct 7 00:16:14 2005 From: anton at samba.org (Anton Blanchard) Date: Fri, 7 Oct 2005 00:16:14 +1000 Subject: [PATCH] ppc64: Fix PCI hotplug Message-ID: <20051006141614.GC5210@krispykreme> pSeries_irq_bus_setup is marked __devinit but references s7a_workaround which is marked __initdata. Depending on who got the memory for s7a_workaround (and if the value was now positive), it was possible for PCI hotplugged devices to have 3 subtracted from their interrupt number. This would happen randomly and caused me much confusion :) Signed-off-by: Anton Blanchard --- Index: gr_work/arch/ppc64/kernel/pSeries_pci.c =================================================================== --- gr_work.orig/arch/ppc64/kernel/pSeries_pci.c 2005-10-06 07:09:17.344243988 -0500 +++ gr_work/arch/ppc64/kernel/pSeries_pci.c 2005-10-06 07:09:40.018114696 -0500 @@ -32,7 +32,7 @@ #include "pci.h" -static int __initdata s7a_workaround = -1; +static int __devinitdata s7a_workaround = -1; #if 0 void pcibios_name_device(struct pci_dev *dev) @@ -60,7 +60,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); #endif -static void __init check_s7a(void) +static void __devinit check_s7a(void) { struct device_node *root; char *model; From aw-confirm at eBay.com Fri Oct 7 06:57:37 2005 From: aw-confirm at eBay.com (aw-confirm at eBay.com) Date: Thu, 6 Oct 2005 16:57:37 -0400 (EDT) Subject: TKO Notice: ***Urgent Safeharbor Department Notice*** Message-ID: <20051006205737.2D63E86FEDD@athens.9degrees.com> An HTML attachment was scrubbed... URL: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051006/246b9c75/attachment.htm From linas at austin.ibm.com Fri Oct 7 09:20:32 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:20:32 -0500 Subject: [PATCH 0/22] ppc64: Full sequence of PCI Error recovery patches Message-ID: <20051006232032.GA29826@austin.ibm.com> [PATCH 0/22] ppc64: Full sequence of PCI Error recovery patches The following sequence of patches implement the full set of PCI error recovery functions for ppc64. There are a large numer of patches because I've attempted to keep the scope of each patch reasonably small, and thus easy to review. (The system should remain usable and functional after applying each patch). A detailed explanation of what this is and how it works is in patch 6/22; if you don't already know what this is about, that would be the place to start reading. These patches result in systems that have survived multi-hour runs with thousands of PCI errors injected. Although this is good, I still can't warrent that this is bug-free, as there are still hardware combos that haven't been tested. But for now, it seems to work. Signed-off-by: Linas Vepstas From linas at austin.ibm.com Fri Oct 7 09:23:16 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:23:16 -0500 Subject: [PATCH 1/22] ppc64: Dynamic LPAR bugfix In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006232316.GB29826@austin.ibm.com> 01-hotplug-bugfix.patch In the current 2.6.14-rc2-git6 kernel, performing a Dynamic LPAR Add of a hotplug slot will crash the system, with the following (abbreviated) stack trace: cpu 0x3: Vector: 700 (Program Check) at [c000000053dff7f0] pc: c0000000004f5974: .__alloc_bootmem+0x0/0xb0 lr: c0000000000258a0: .update_dn_pci_info+0x108/0x118 c0000000000257c8 .update_dn_pci_info+0x30/0x118 (unreliable) c0000000000258fc .pci_dn_reconfig_notifier+0x4c/0x64 c000000000060754 .notifier_call_chain+0x68/0x9c The root cause was that __init __alloc_bootmem() was called long after boot had finished, resulting in a crash because this routine is undefined after boot time. The patch below fixes this crash, and adds some docs to clarify the code. p.s. congrats to all for getting slashdotted on this yesterday! Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-03 13:45:58.000000000 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-04 15:37:49.761245845 -0500 @@ -44,7 +44,7 @@ u32 *regs; struct pci_dn *pdn; - if (phb->is_dynamic) + if (mem_init_done) pdn = kmalloc(sizeof(*pdn), GFP_KERNEL); else pdn = alloc_bootmem(sizeof(*pdn)); @@ -121,6 +121,14 @@ return NULL; } +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,9 +209,14 @@ .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. */ void __init pci_devs_phb_init(void) { From linas at austin.ibm.com Fri Oct 7 09:25:04 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:25:04 -0500 Subject: [PATCH 2/22] ppc64: Enable detection bugfix In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006232504.GC29826@austin.ibm.com> 02-EEH-enable-bugfix.patch Bugfix: With the curent linux-2.6.14-rc2-git6, EEH errors are ignored because thier detection requires an unusued, uninitialized flag to be set. This patch removes the unused flag. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-04 15:32:17.844809875 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-04 15:54:21.769066567 -0500 @@ -631,11 +631,12 @@ pdn = PCI_DN(dn); /* Access to IO BARs might get this far and still not want checking. */ - if (!pdn->eeh_capable || !(pdn->eeh_mode & EEH_MODE_SUPPORTED) || + if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || pdn->eeh_mode & EEH_MODE_NOCHECK) { __get_cpu_var(ignored_check)++; #ifdef DEBUG - printk ("EEH:ignored check for %s %s\n", pci_name (dev), dn->full_name); + printk ("EEH:ignored check (%x) for %s %s\n", + pdn->eeh_mode, pci_name (dev), dn->full_name); #endif return 0; } Index: linux-2.6.14-rc2-git6/include/asm-ppc64/pci-bridge.h =================================================================== --- linux-2.6.14-rc2-git6.orig/include/asm-ppc64/pci-bridge.h 2005-10-04 15:32:17.845809735 -0500 +++ linux-2.6.14-rc2-git6/include/asm-ppc64/pci-bridge.h 2005-10-04 15:54:21.769066567 -0500 @@ -61,7 +61,6 @@ int devfn; /* for pci devices */ int eeh_mode; /* See eeh.h for possible EEH_MODEs */ int eeh_config_addr; - int eeh_capable; /* from firmware */ int eeh_check_count; /* # times driver ignored error */ int eeh_freeze_count; /* # times this device froze up. */ int eeh_is_bridge; /* device is pci-to-pci bridge */ From linas at austin.ibm.com Fri Oct 7 09:26:28 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:26:28 -0500 Subject: [PATCH 3/22] ppc64: EEH Recovery dispatcher thread In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006232628.GD29826@austin.ibm.com> 03-eeh-event-dispatcher.patch ppc64: EEH Recovery dispatcher thread This patch adds a mechanism to create recovery threads when an EEH event is received. Since an EEH freeze state may be detected within an interrupt context, we need to get out of the interrupt context before starting recovery. This dispatcher does this in two steps: first, it uses a workqueue to get out, and then lanuches a kernel thread, so that the recovery routine can sleep for exteded periods without upseting the keventd. A kernel thread is created with each EEH event, rather than having one long-running daemon started at boot time. This is because it is anticipated that EEH events will be very rare (very very rare, ideally) and so its pointless to cluter the process tables with a daemon that will almost never run. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/Makefile =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/Makefile 2005-10-04 15:32:13.000000000 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/Makefile 2005-10-06 17:50:25.365604176 -0500 @@ -37,7 +37,7 @@ bpa_iic.o spider-pic.o obj-$(CONFIG_KEXEC) += machine_kexec.o -obj-$(CONFIG_EEH) += eeh.o +obj-$(CONFIG_EEH) += eeh.o eeh_event.o obj-$(CONFIG_PROC_FS) += proc_ppc64.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_SMP) += smp.o Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-04 15:54:21.000000000 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-06 17:50:31.847694674 -0500 @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -33,6 +32,7 @@ #include #include #include "pci.h" +#include "eeh_event.h" #undef DEBUG @@ -70,14 +70,6 @@ * and sent out for processing. */ -/* EEH event workqueue setup. */ -static DEFINE_SPINLOCK(eeh_eventlist_lock); -LIST_HEAD(eeh_eventlist); -static void eeh_event_handler(void *); -DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL); - -static struct notifier_block *eeh_notifier_chain; - /* If a device driver keeps reading an MMIO register in an interrupt * handler after a slot isolation event has occurred, we assume it * is broken and panic. This sets the threshold for how many read @@ -421,24 +413,6 @@ } /** - * eeh_register_notifier - Register to find out about EEH events. - * @nb: notifier block to callback on events - */ -int eeh_register_notifier(struct notifier_block *nb) -{ - return notifier_chain_register(&eeh_notifier_chain, nb); -} - -/** - * eeh_unregister_notifier - Unregister to an EEH event notifier. - * @nb: notifier block to callback on events - */ -int eeh_unregister_notifier(struct notifier_block *nb) -{ - return notifier_chain_unregister(&eeh_notifier_chain, nb); -} - -/** * read_slot_reset_state - Read the reset state of a device node's slot * @dn: device node to read * @rets: array to return results in @@ -461,73 +435,6 @@ } /** - * eeh_panic - call panic() for an eeh event that cannot be handled. - * The philosophy of this routine is that it is better to panic and - * halt the OS than it is to risk possible data corruption by - * oblivious device drivers that don't know better. - * - * @dev pci device that had an eeh event - * @reset_state current reset state of the device slot - */ -static void eeh_panic(struct pci_dev *dev, int reset_state) -{ - /* - * XXX We should create a separate sysctl for this. - * - * Since the panic_on_oops sysctl is used to halt the system - * in light of potential corruption, we can use it here. - */ - if (panic_on_oops) { - struct device_node *dn = pci_device_to_OF_node(dev); - eeh_slot_error_detail (PCI_DN(dn), 2 /* Permanent Error */); - panic("EEH: MMIO failure (%d) on device:%s\n", reset_state, - pci_name(dev)); - } - else { - __get_cpu_var(ignored_failures)++; - printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n", - reset_state, pci_name(dev)); - } -} - -/** - * eeh_event_handler - dispatch EEH events. The detection of a frozen - * slot can occur inside an interrupt, where it can be hard to do - * anything about it. The goal of this routine is to pull these - * detection events out of the context of the interrupt handler, and - * re-dispatch them for processing at a later time in a normal context. - * - * @dummy - unused - */ -static void eeh_event_handler(void *dummy) -{ - unsigned long flags; - struct eeh_event *event; - - while (1) { - spin_lock_irqsave(&eeh_eventlist_lock, flags); - event = NULL; - if (!list_empty(&eeh_eventlist)) { - event = list_entry(eeh_eventlist.next, struct eeh_event, list); - list_del(&event->list); - } - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - if (event == NULL) - break; - - printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device " - "%s\n", event->reset_state, - pci_name(event->dev)); - - notifier_call_chain (&eeh_notifier_chain, - EEH_NOTIFY_FREEZE, event); - - pci_dev_put(event->dev); - kfree(event); - } -} - -/** * eeh_token_to_phys - convert EEH address token to phys address * @token i/o token, should be address in the form 0xA.... */ @@ -613,8 +520,6 @@ int ret; int rets[3]; unsigned long flags; - int reset_state; - struct eeh_event *event; struct pci_dn *pdn; struct device_node *pe_dn; int rc = 0; @@ -722,33 +627,12 @@ __eeh_mark_slot (pe_dn); spin_unlock_irqrestore(&confirm_error_lock, flags); - reset_state = rets[0]; - - eeh_slot_error_detail (pdn, 1 /* Temporary Error */); - - printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n", - rets[0], dn->name, dn->full_name); - event = kmalloc(sizeof(*event), GFP_ATOMIC); - if (event == NULL) { - eeh_panic(dev, reset_state); - return 1; - } - - event->dev = dev; - event->dn = dn; - event->reset_state = reset_state; - - /* We may or may not be called in an interrupt context */ - spin_lock_irqsave(&eeh_eventlist_lock, flags); - list_add(&event->list, &eeh_eventlist); - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - + eeh_send_failure_event (dn, dev, rets[0], rets[2]); + /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ if (rets[0] != 5) dump_stack(); - schedule_work(&eeh_event_wq); - return 1; dn_unlock: @@ -793,6 +677,14 @@ EXPORT_SYMBOL(eeh_check_failure); +/* ------------------------------------------------------------- */ +/* The code below deals with enabling EEH for devices during the + * early boot sequence. EEH must be enabled before any PCI probing + * can be done. + */ + +#define EEH_ENABLE 1 + struct eeh_early_enable_info { unsigned int buid_hi; unsigned int buid_lo; @@ -850,8 +742,9 @@ /* First register entry is addr (00BBSS00) */ /* Try to enable eeh */ ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, - regs[0], info->buid_hi, info->buid_lo, - EEH_ENABLE); + regs[0], info->buid_hi, info->buid_lo, + EEH_ENABLE); + if (ret == 0) { eeh_subsystem_enabled = 1; pdn->eeh_mode |= EEH_MODE_SUPPORTED; Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_event.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_event.h 2005-10-06 17:50:24.089783186 -0500 @@ -0,0 +1,52 @@ +/* + * eeh_event.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2005 Linas Vepstas + */ + +#ifndef ASM_PPC64_EEH_EVENT_H +#define ASM_PPC64_EEH_EVENT_H + +/** EEH event -- structure holding pci controller data that describes + * a change in the isolation status of a PCI slot. A pointer + * to this struct is passed as the data pointer in a notify callback. + */ +struct eeh_event { + struct list_head list; + struct device_node *dn; /* struct device node */ + struct pci_dev *dev; /* affected device */ + int state; + int time_unavail; /* milliseconds until device might be available */ +}; + +/** + * eeh_send_failure_event - generate a PCI error event + * @dev pci device + * + * This routine builds a PCI error event which will be delivered + * to all listeners on the peh_notifier_chain. + * + * This routine can be called within an interrupt context; + * the actual event will be delivered in a normal context + * (from a workqueue). + */ +int eeh_send_failure_event (struct device_node *dn, + struct pci_dev *dev, + int reset_state, + int time_unavail); + +#endif /* ASM_PPC64_EEH_EVENT_H */ Index: linux-2.6.14-rc2-git6/include/asm-ppc64/eeh.h =================================================================== --- linux-2.6.14-rc2-git6.orig/include/asm-ppc64/eeh.h 2005-10-04 15:32:13.000000000 -0500 +++ linux-2.6.14-rc2-git6/include/asm-ppc64/eeh.h 2005-10-06 17:51:48.669915765 -0500 @@ -1,4 +1,4 @@ -/* +/* * eeh.h * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation. * @@ -6,12 +6,12 @@ * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA @@ -27,8 +27,6 @@ struct pci_dev; struct device_node; -struct device_node; -struct notifier_block; #ifdef CONFIG_EEH @@ -37,6 +35,10 @@ #define EEH_MODE_NOCHECK (1<<1) #define EEH_MODE_ISOLATED (1<<2) +/* Max number of EEH freezes allowed before we consider the device + * to be permanently disabled. */ +#define EEH_MAX_ALLOWED_FREEZES 5 + void __init eeh_init(void); unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val); @@ -59,36 +61,14 @@ * eeh_remove_device - undo EEH setup for the indicated pci device * @dev: pci device to be removed * - * This routine should be when a device is removed from a running - * system (e.g. by hotplug or dlpar). + * This routine should be called when a device is removed from + * a running system (e.g. by hotplug or dlpar). It unregisters + * the PCI device from the EEH subsystem. I/O errors affecting + * this device will no longer be detected after this call; thus, + * i/o errors affecting this slot may leave this device unusable. */ void eeh_remove_device(struct pci_dev *); -#define EEH_DISABLE 0 -#define EEH_ENABLE 1 -#define EEH_RELEASE_LOADSTORE 2 -#define EEH_RELEASE_DMA 3 - -/** - * Notifier event flags. - */ -#define EEH_NOTIFY_FREEZE 1 - -/** EEH event -- structure holding pci slot data that describes - * a change in the isolation status of a PCI slot. A pointer - * to this struct is passed as the data pointer in a notify callback. - */ -struct eeh_event { - struct list_head list; - struct pci_dev *dev; - struct device_node *dn; - int reset_state; -}; - -/** Register to find out about EEH events. */ -int eeh_register_notifier(struct notifier_block *nb); -int eeh_unregister_notifier(struct notifier_block *nb); - /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. * @@ -129,7 +109,7 @@ #define EEH_IO_ERROR_VALUE(size) (-1UL) #endif /* CONFIG_EEH */ -/* +/* * MMIO read/write operations with EEH support. */ static inline u8 eeh_readb(const volatile void __iomem *addr) Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_event.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_event.c 2005-10-06 17:50:24.089783186 -0500 @@ -0,0 +1,155 @@ +/* + * eeh_event.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2005 Linas Vepstas + */ + +#include +#include +#include "eeh_event.h" + +/** Overview: + * EEH error states may be detected within exception handlers; + * however, the recovery processing needs to occur asynchronously + * in a normal kernel context and not an interrupt context. + * This pair of routines creates an event and queues it onto a + * work-queue, where a worker thread can drive recovery. + */ + +/* EEH event workqueue setup. */ +static spinlock_t eeh_eventlist_lock = SPIN_LOCK_UNLOCKED; +LIST_HEAD(eeh_eventlist); +static void eeh_thread_launcher(void *); +DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL); + +/** + * eeh_panic - call panic() for an eeh event that cannot be handled. + * The philosophy of this routine is that it is better to panic and + * halt the OS than it is to risk possible data corruption by + * oblivious device drivers that don't know better. + * + * @dev pci device that had an eeh event + * @reset_state current reset state of the device slot + */ +static void eeh_panic(struct pci_dev *dev, int reset_state) +{ + /* + * Since the panic_on_oops sysctl is used to halt the system + * in light of potential corruption, we can use it here. + */ + if (panic_on_oops) { + panic("EEH: MMIO failure (%d) on device:%s\n", reset_state, + pci_name(dev)); + } + else { + printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n", + reset_state, pci_name(dev)); + } +} + +/** + * eeh_event_handler - dispatch EEH events. The detection of a frozen + * slot can occur inside an interrupt, where it can be hard to do + * anything about it. The goal of this routine is to pull these + * detection events out of the context of the interrupt handler, and + * re-dispatch them for processing at a later time in a normal context. + * + * @dummy - unused + */ +static int eeh_event_handler(void * dummy) +{ + unsigned long flags; + struct eeh_event *event; + + daemonize ("eehd"); + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + + spin_lock_irqsave(&eeh_eventlist_lock, flags); + event = NULL; + if (!list_empty(&eeh_eventlist)) { + event = list_entry(eeh_eventlist.next, struct eeh_event, list); + list_del(&event->list); + } + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + if (event == NULL) + break; + + printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", + pci_name(event->dev)); + + eeh_panic (event->dev, event->state); + + kfree(event); + } + + return 0; +} + +/** + * eeh_thread_launcher + * + * @dummy - unused + */ +static void eeh_thread_launcher(void *dummy) +{ + if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0) + printk(KERN_ERR "Failed to start EEH daemon\n"); +} + +/** + * eeh_send_failure_event - generate a PCI error event + * @dev pci device + * + * This routine can be called within an interrupt context; + * the actual event will be delivered in a normal context + * (from a workqueue). + */ +int eeh_send_failure_event (struct device_node *dn, + struct pci_dev *dev, + int state, + int time_unavail) +{ + unsigned long flags; + struct eeh_event *event; + + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (event == NULL) { + printk (KERN_ERR "EEH: out of memory, event not handled\n"); + return 1; + } + + if (dev) + pci_dev_get(dev); + + event->dn = dn; + event->dev = dev; + event->state = state; + event->time_unavail = time_unavail; + + /* We may or may not be called in an interrupt context */ + spin_lock_irqsave(&eeh_eventlist_lock, flags); + list_add(&event->list, &eeh_eventlist); + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + + schedule_work(&eeh_event_wq); + + return 0; +} + +/********************** END OF FILE ******************************/ From linas at austin.ibm.com Fri Oct 7 09:28:31 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:28:31 -0500 Subject: [PATCH 4/22] ppc64: EEH Recovery support routines In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006232831.GE29826@austin.ibm.com> 04-eeh-recovery-support-routines.patch EEH Recovery support routines This patch adds routines required to help drive the recovery of EEH-frozen slots. The main function is to drive the PCI #RST signal line high for a qurter of a second, and then allow for a second & a half of settle time. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci.h 2005-10-06 17:50:31.847694674 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h 2005-10-06 17:51:58.844488173 -0500 @@ -51,4 +51,18 @@ extern unsigned long pci_assign_all_buses; extern int pci_read_irq_line(struct pci_dev *pci_dev); +/* ---- EEH internal-use-only related routines ---- */ +#ifdef CONFIG_EEH +/** + * rtas_set_slot_reset -- unfreeze a frozen slot + * + * Clear the EEH-frozen condition on a slot. This routine + * does this by asserting the PCI #RST line for 1/8th of + * a second; this routine will sleep while the adapter is + * being reset. + */ +void rtas_set_slot_reset (struct pci_dn *); + +#endif + #endif /* __PPC_KERNEL_PCI_H__ */ Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-06 17:50:31.847694674 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-06 17:52:27.908410223 -0500 @@ -17,6 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include @@ -678,6 +679,104 @@ EXPORT_SYMBOL(eeh_check_failure); /* ------------------------------------------------------------- */ +/* The code below deals with error recovery */ + +/** Return negative value if a permanent error, else return + * a number of milliseconds to wait until the PCI slot is + * ready to be used. + */ +static int +eeh_slot_availability(struct pci_dn *pdn) +{ + int rc; + int rets[3]; + + rc = read_slot_reset_state(pdn, rets); + + if (rc) return rc; + + if (rets[1] == 0) return -1; /* EEH is not supported */ + if (rets[0] == 0) return 0; /* Oll Korrect */ + if (rets[0] == 5) { + if (rets[2] == 0) return -1; /* permanently unavailable */ + return rets[2]; /* number of millisecs to wait */ + } + return -1; +} + +/** rtas_pci_slot_reset raises/lowers the pci #RST line + * state: 1/0 to raise/lower the #RST + * + * Clear the EEH-frozen condition on a slot. This routine + * asserts the PCI #RST line if the 'state' argument is '1', + * and drops the #RST line if 'state is '0'. This routine is + * safe to call in an interrupt context. + * + */ + +static void +rtas_pci_slot_reset(struct pci_dn *pdn, int state) +{ + int rc; + + BUG_ON (pdn==NULL); + + if (!pdn->phb) { + printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n", + pdn->node->full_name); + return; + } + + rc = rtas_call(ibm_set_slot_reset,4,1, NULL, + pdn->eeh_config_addr, + BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), + state); + if (rc) { + printk (KERN_WARNING "EEH: Unable to reset the failed slot, (%d) #RST=%d dn=%s\n", + rc, state, pdn->node->full_name); + return; + } + + if (state == 0) + eeh_clear_slot (pdn->node->parent->child); +} + +/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second + * dn -- device node to be reset. + */ + +void +rtas_set_slot_reset(struct pci_dn *pdn) +{ + int i, rc; + + rtas_pci_slot_reset (pdn, 1); + + /* The PCI bus requires that the reset be held high for at least + * a 100 milliseconds. We wait a bit longer 'just in case'. */ + +#define PCI_BUS_RST_HOLD_TIME_MSEC 250 + msleep (PCI_BUS_RST_HOLD_TIME_MSEC); + rtas_pci_slot_reset (pdn, 0); + + /* After a PCI slot has been reset, the PCI Express spec requires + * a 1.5 second idle time for the bus to stabilize, before starting + * up traffic. */ +#define PCI_BUS_SETTLE_TIME_MSEC 1800 + msleep (PCI_BUS_SETTLE_TIME_MSEC); + + /* Now double check with the firmware to make sure the device is + * ready to be used; if not, wait for recovery. */ + for (i=0; i<10; i++) { + rc = eeh_slot_availability (pdn); + if (rc <= 0) break; + + msleep (rc+100); + } +} + +/* ------------------------------------------------------------- */ /* The code below deals with enabling EEH for devices during the * early boot sequence. EEH must be enabled before any PCI probing * can be done. From linas at austin.ibm.com Fri Oct 7 09:29:59 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:29:59 -0500 Subject: [PATCH 5/22] ppc64: Device BAR save and restore In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006232959.GF29826@austin.ibm.com> 05-eeh-device-bar-save.patch After a PCI device has been resest, the device BAR's and other config space info must be restored to the same state as they were in when the firmware first handed us this device. This will allow the PCI device driver, when restarted, to correctly recognize and set up the device. Tis patch saves the device config space as early as reasonable after the firmware has handed over the device. Te state resore funcion is inteded for use by the EEH recovery routines. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-06 17:52:27.908410223 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-06 17:52:37.399078590 -0500 @@ -78,6 +78,9 @@ */ #define EEH_MAX_FAILS 100000 +/* Misc forward declaraions */ +static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn); + /* RTAS tokens */ static int ibm_set_eeh_option; static int ibm_set_slot_reset; @@ -367,6 +370,7 @@ */ void __init pci_addr_cache_build(void) { + struct device_node *dn; struct pci_dev *dev = NULL; if (!eeh_subsystem_enabled) @@ -380,6 +384,10 @@ continue; } pci_addr_cache_insert_device(dev); + + /* Save the BAR's; firmware doesn't restore these after EEH reset */ + dn = pci_device_to_OF_node(dev); + eeh_save_bars(dev, PCI_DN(dn)); } #ifdef DEBUG @@ -776,6 +784,108 @@ } } +/* ------------------------------------------------------- */ +/** Save and restore of PCI BARs + * + * Although firmware will set up BARs during boot, it doesn't + * set up device BAR's after a device reset, although it will, + * if requested, set up bridge configuration. Thus, we need to + * configure the PCI devices ourselves. + */ + +/** + * __restore_bars - Restore the Base Address Registers + * Loads the PCI configuration space base address registers, + * the expansion ROM base address, the latency timer, and etc. + * from the saved values in the device node. + */ +static inline void __restore_bars (struct pci_dn *pdn) +{ + int i; + + if (NULL==pdn->phb) return; + for (i=4; i<10; i++) { + rtas_write_config(pdn, i*4, 4, pdn->config_space[i]); + } + + /* 12 == Expansion ROM Address */ + rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]); + +#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) +#define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)]) + + rtas_write_config (pdn, PCI_CACHE_LINE_SIZE, 1, + SAVED_BYTE(PCI_CACHE_LINE_SIZE)); + + rtas_write_config (pdn, PCI_LATENCY_TIMER, 1, + SAVED_BYTE(PCI_LATENCY_TIMER)); + + /* max latency, min grant, interrupt pin and line */ + rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]); +} + +/** + * eeh_restore_bars - restore the PCI config space info + * + * This routine performs a recursive walk to the children + * of this device as well. + */ +void eeh_restore_bars(struct pci_dn *pdn) +{ + struct device_node *dn; + if (!pdn) + return; + + if (! pdn->eeh_is_bridge) + __restore_bars (pdn); + + dn = pdn->node->child; + while (dn) { + eeh_restore_bars (PCI_DN(dn)); + dn = dn->sibling; + } +} + +/** + * eeh_save_bars - save device bars + * + * Save the values of the device bars. Unlike the restore + * routine, this routine is *not* recursive. This is because + * PCI devices are added individuallly; but, for the restore, + * an entire slot is reset at a time. + */ +static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn) +{ + int i; + + if (!pdev || !pdn ) + return; + + for (i = 0; i < 16; i++) + pci_read_config_dword(pdev, i * 4, &pdn->config_space[i]); + + if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + pdn->eeh_is_bridge = 1; +} + +void +rtas_configure_bridge(struct pci_dn *pdn) +{ + int token = rtas_token ("ibm,configure-bridge"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return; + rc = rtas_call(token,3,1, NULL, + pdn->eeh_config_addr, + BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid)); + if (rc) { + printk (KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n", + rc, pdn->node->full_name); + } +} + /* ------------------------------------------------------------- */ /* The code below deals with enabling EEH for devices during the * early boot sequence. EEH must be enabled before any PCI probing @@ -978,6 +1088,7 @@ void eeh_add_device_late(struct pci_dev *dev) { struct device_node *dn; + struct pci_dn *pdn; if (!dev || !eeh_subsystem_enabled) return; @@ -988,9 +1099,11 @@ pci_dev_get (dev); dn = pci_device_to_OF_node(dev); - PCI_DN(dn)->pcidev = dev; + pdn = PCI_DN(dn); + pdn->pcidev = dev; pci_addr_cache_insert_device (dev); + eeh_save_bars(dev, pdn); } EXPORT_SYMBOL_GPL(eeh_add_device_late); Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci.h 2005-10-06 17:51:58.844488173 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h 2005-10-06 17:52:37.399078590 -0500 @@ -63,6 +63,29 @@ */ void rtas_set_slot_reset (struct pci_dn *); +/** + * eeh_restore_bars - Restore device configuration info. + * + * A reset of a PCI device will clear out its config space. + * This routines will restore the config space for this + * device, and is children, to values previously obtained + * from the firmware. + */ +void eeh_restore_bars(struct pci_dn *); + +/** + * rtas_configure_bridge -- firmware initialization of pci bridge + * + * Ask the firmware to configure all PCI bridges devices + * located behind the indicated node. Required after a + * pci device reset. Does essentially the same hing as + * eeh_restore_bars, but for brdges, and lets firmware + * do the work. + */ +void rtas_configure_bridge(struct pci_dn *); + +int rtas_write_config(struct pci_dn *, int where, int size, u32 val); + #endif #endif /* __PPC_KERNEL_PCI_H__ */ From linas at austin.ibm.com Fri Oct 7 09:31:06 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:31:06 -0500 Subject: [PATCH 6/22] ppc64: PCI Error Recovery: documentation patch In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006233106.GG29826@austin.ibm.com> PCI Error Recovery: documentation patch Various PCI bus errors can be signaled by newer PCI controllers. Recovering from those errors requires an infrastructure to notify affected device drivers of the error, and a way of walking through a reset sequence. This patch adds documentation describing the current error recovery proposal. Signed-off-by: Linas Vepstas Documentation/pci-error-recovery.txt | 246 +++++++++++++++++++++++++++++++++++ MAINTAINERS | 7 2 files changed, 253 insertions(+) Index: linux-2.6.14-rc2-git6/Documentation/pci-error-recovery.txt =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14-rc2-git6/Documentation/pci-error-recovery.txt 2005-10-06 17:52:47.274692945 -0500 @@ -0,0 +1,246 @@ + + PCI Error Recovery + ------------------ + May 31, 2005 + + Current document maintainer: + Linas Vepstas + + +Some PCI bus controllers are able to detect certain "hard" PCI errors +on the bus, such as parity errors on the data and address busses, as +well as SERR and PERR errors. These chipsets are then able to disable +I/O to/from the affected device, so that, for example, a bad DMA +address doesn't end up corrupting system memory. These same chipsets +are also able to reset the affected PCI device, and return it to +working condition. This document describes a generic API form +performing error recovery. + +The core idea is that after a PCI error has been detected, there must +be a way for the kernel to coordinate with all affected device drivers +so that the pci card can be made operational again, possibly after +performing a full electrical #RST of the PCI card. The API below +provides a generic API for device drivers to be notified of PCI +errors, and to be notified of, and respond to, a reset sequence. + +Preliminary sketch of API, cut-n-pasted-n-modified email from +Ben Herrenschmidt, circa 5 april 2005 + +The error recovery API support is exposed to the driver in the form of +a structure of function pointers pointed to by a new field in struct +pci_driver. The absence of this pointer in pci_driver denotes an +"non-aware" driver, behaviour on these is platform dependant. +Platforms like ppc64 can try to simulate pci hotplug remove/add. + +The definition of "pci_error_token" is not covered here. It is based on +Seto's work on the synchronous error detection. We still need to define +functions for extracting infos out of an opaque error token. This is +separate from this API. + +This structure has the form: + +struct pci_error_handlers +{ + int (*error_detected)(struct pci_dev *dev, pci_error_token error); + int (*mmio_enabled)(struct pci_dev *dev); + int (*resume)(struct pci_dev *dev); + int (*link_reset)(struct pci_dev *dev); + int (*slot_reset)(struct pci_dev *dev); +}; + +A driver doesn't have to implement all of these callbacks. The +only mandatory one is error_detected(). If a callback is not +implemented, the corresponding feature is considered unsupported. +For example, if mmio_enabled() and resume() aren't there, then the +driver is assumed as not doing any direct recovery and requires +a reset. If link_reset() is not implemented, the card is assumed as +not caring about link resets, in which case, if recover is supported, +the core can try recover (but not slot_reset() unless it really did +reset the slot). If slot_reset() is not supported, link_reset() can +be called instead on a slot reset. + +At first, the call will always be : + + 1) error_detected() + + Error detected. This is sent once after an error has been detected. At +this point, the device might not be accessible anymore depending on the +platform (the slot will be isolated on ppc64). The driver may already +have "noticed" the error because of a failing IO, but this is the proper +"synchronisation point", that is, it gives a chance to the driver to +cleanup, waiting for pending stuff (timers, whatever, etc...) to +complete; it can take semaphores, schedule, etc... everything but touch +the device. Within this function and after it returns, the driver +shouldn't do any new IOs. Called in task context. This is sort of a +"quiesce" point. See note about interrupts at the end of this doc. + + Result codes: + - PCIERR_RESULT_CAN_RECOVER: + Driever returns this if it thinks it might be able to recover + the HW by just banging IOs or if it wants to be given + a chance to extract some diagnostic informations (see + below). + - PCIERR_RESULT_NEED_RESET: + Driver returns this if it thinks it can't recover unless the + slot is reset. + - PCIERR_RESULT_DISCONNECT: + Return this if driver thinks it won't recover at all, + (this will detach the driver ? or just leave it + dangling ? to be decided) + +So at this point, we have called error_detected() for all drivers +on the segment that had the error. On ppc64, the slot is isolated. What +happens now typically depends on the result from the drivers. If all +drivers on the segment/slot return PCIERR_RESULT_CAN_RECOVER, we would +re-enable IOs on the slot (or do nothing special if the platform doesn't +isolate slots) and call 2). If not and we can reset slots, we go to 4), +if neither, we have a dead slot. If it's an hotplug slot, we might +"simulate" reset by triggering HW unplug/replug though. + +>>> Current ppc64 implementation assumes that a device driver will +>>> *not* schedule or semaphore in this routine; the current ppc64 +>>> implementation uses one kernel thread to notify all devices; +>>> thus, of one device sleeps/schedules, all devices are affected. +>>> Doing better requires complex multi-threaded logic in the error +>>> recovery implementation (e.g. waiting for all notification threads +>>> to "join" before proceeding with recovery.) This seems excessively +>>> complex and not worth implementing. + +>>> The current ppc64 implementation doesn't much care if the device +>>> attempts i/o at this point, or not. I/O's will fail, returning +>>> a value of 0xff on read, and writes will be dropped. If the device +>>> driver attempts more than 10K I/O's to a frozen adapter, it will +>>> assume that the device driver has gone into an infinite loop, and +>>> it will panic the the kernel. + + 2) mmio_enabled() + + This is the "early recovery" call. IOs are allowed again, but DMA is +not (hrm... to be discussed, I prefer not), with some restrictions. This +is NOT a callback for the driver to start operations again, only to +peek/poke at the device, extract diagnostic information, if any, and +eventually do things like trigger a device local reset or some such, +but not restart operations. This is sent if all drivers on a segment +agree that they can try to recover and no automatic link reset was +performed by the HW. If the platform can't just re-enable IOs without +a slot reset or a link reset, it doesn't call this callback and goes +directly to 3) or 4). All IOs should be done _synchronously_ from +within this callback, errors triggered by them will be returned via +the normal pci_check_whatever() api, no new error_detected() callback +will be issued due to an error happening here. However, such an error +might cause IOs to be re-blocked for the whole segment, and thus +invalidate the recovery that other devices on the same segment might +have done, forcing the whole segment into one of the next states, +that is link reset or slot reset. + + Result codes: + - PCIERR_RESULT_RECOVERED + Driver returns this if it thinks the device is fully + functionnal and thinks it is ready to start + normal driver operations again. There is no + guarantee that the driver will actually be + allowed to proceed, as another driver on the + same segment might have failed and thus triggered a + slot reset on platforms that support it. + + - PCIERR_RESULT_NEED_RESET + Driver returns this if it thinks the device is not + recoverable in it's current state and it needs a slot + reset to proceed. + + - PCIERR_RESULT_DISCONNECT + Same as above. Total failure, no recovery even after + reset driver dead. (To be defined more precisely) + +>>> The current ppc64 implementation does not implement this callback. + + 3) link_reset() + + This is called after the link has been reset. This is typically +a PCI Express specific state at this point and is done whenever a +non-fatal error has been detected that can be "solved" by resetting +the link. This call informs the driver of the reset and the driver +should check if the device appears to be in working condition. +This function acts a bit like 2) mmio_enabled(), in that the driver +is not supposed to restart normal driver I/O operations right away. +Instead, it should just "probe" the device to check it's recoverability +status. If all is right, then the core will call resume() once all +drivers have ack'd link_reset(). + + Result codes: + (identical to mmio_enabled) + +>>> The current ppc64 implementation does not implement this callback. + + 4) slot_reset() + + This is called after the slot has been soft or hard reset by the +platform. A soft reset consists of asserting the adapter #RST line +and then restoring the PCI BARs and PCI configuration header. If the +platform supports PCI hotplug, then it might instead perform a hard +reset by toggling power on the slot off/on. This call gives drivers +the chance to re-initialize the hardware (re-download firmware, etc.), +but drivers shouldn't restart normal I/O processing operations at +this point. (See note about interrupts; interrupts aren't guaranteed +to be delivered until the resume() callback has been called). If all +device drivers report success on this callback, the patform will call +resume() to complete the error handling and let the driver restart +normal I/O processing. + +A driver can still return a critical failure for this function if +it can't get the device operational after reset. If the platform +previously tried a soft reset, it migh now try a hard reset (power +cycle) and then call slot_reset() again. It the device still can't +be recovered, there is nothing more that can be done; the platform +will typically report a "permanent failure" in such a case. The +device will be considered "dead" in this case. + + Result codes: + - PCIERR_RESULT_DISCONNECT + Same as above. + +>>> The current ppc64 implementation does not try a power-cycle reset +>>> if the driver returned PCIERR_RESULT_DISCONNECT. However, it should. + + 5) resume() + + This is called if all drivers on the segment have returned +PCIERR_RESULT_RECOVERED from one of the 3 prevous callbacks. +That basically tells the driver to restart activity, tht everything +is back and running. No result code is taken into account here. If +a new error happens, it will restart a new error handling process. + +That's it. I think this covers all the possibilities. The way those +callbacks are called is platform policy. A platform with no slot reset +capability for example may want to just "ignore" drivers that can't +recover (disconnect them) and try to let other cards on the same segment +recover. Keep in mind that in most real life cases, though, there will +be only one driver per segment. + +Now, there is a note about interrupts. If you get an interrupt and your +device is dead or has been isolated, there is a problem :) + +After much thinking, I decided to leave that to the platform. That is, +the recovery API only precies that: + + - There is no guarantee that interrupt delivery can proceed from any +device on the segment starting from the error detection and until the +restart callback is sent, at which point interrupts are expected to be +fully operational. + + - There is no guarantee that interrupt delivery is stopped, that is, ad +river that gets an interrupts after detecting an error, or that detects +and error within the interrupt handler such that it prevents proper +ack'ing of the interrupt (and thus removal of the source) should just +return IRQ_NOTHANDLED. It's up to the platform to deal with taht +condition, typically by masking the irq source during the duration of +the error handling. It is expected that the platform "knows" which +interrupts are routed to error-management capable slots and can deal +with temporarily disabling that irq number during error processing (this +isn't terribly complex). That means some IRQ latency for other devices +sharing the interrupt, but there is simply no other way. High end +platforms aren't supposed to share interrupts between many devices +anyway :) + + +Revised: 31 May 2005 Linas Vepstas Index: linux-2.6.14-rc2-git6/MAINTAINERS =================================================================== --- linux-2.6.14-rc2-git6.orig/MAINTAINERS 2005-10-06 17:50:30.073943549 -0500 +++ linux-2.6.14-rc2-git6/MAINTAINERS 2005-10-06 17:52:47.296689858 -0500 @@ -1859,6 +1859,13 @@ L: linux-abi-devel at lists.sourceforge.net S: Maintained +PCI ERROR RECOVERY +P: Linas Vepstas +M: linas at austin.ibm.com +L: linux-kernel at vger.kernel.org +L: linux-pci at atrey.karlin.mff.cuni.cz +S: Supported + PCI SOUND DRIVERS (ES1370, ES1371 and SONICVIBES) P: Thomas Sailer M: sailer at ife.ee.ethz.ch From linas at austin.ibm.com Fri Oct 7 09:32:09 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:32:09 -0500 Subject: [PATCH 7/22] PCI Error Recovery: header file patch In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006233208.GH29826@austin.ibm.com> PCI Error Recovery: header file patch Various PCI bus errors can be signaled by newer PCI controllers. Recovering from those errors requires an infrastructure to notify affected device drivers of the error, and a way of walking through a reset sequence. This patch adds a set of callbacks to be used by error recovery routines to notify device drivers of the various stages of recovery. Signed-off-by: Linas Vepstas -- include/linux/pci.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 49 insertions(+) Index: linux-2.6.14-rc2-git6/include/linux/pci.h =================================================================== --- linux-2.6.14-rc2-git6.orig/include/linux/pci.h 2005-10-06 17:50:29.442032212 -0500 +++ linux-2.6.14-rc2-git6/include/linux/pci.h 2005-10-06 17:52:50.634221570 -0500 @@ -78,6 +78,16 @@ #define PCI_UNKNOWN ((pci_power_t __force) 5) #define PCI_POWER_ERROR ((pci_power_t __force) -1) +/** The pci_channel state describes connectivity between the CPU and + * the pci device. If some PCI bus between here and the pci device + * has crashed or locked up, this info is reflected here. + */ +enum pci_channel_state { + pci_channel_io_normal = 0, /* I/O channel is in normal state */ + pci_channel_io_frozen = 1, /* I/O to channel is blocked */ + pci_channel_io_perm_failure, /* PCI card is dead */ +}; + /* * The pci_dev structure is used to describe PCI devices. */ @@ -110,6 +120,7 @@ this is D0-D3, D0 being fully functional, and D3 being off. */ + enum pci_channel_state error_state; /* current connectivity state */ struct device dev; /* Generic device interface */ /* device is compatible with these IDs */ @@ -231,6 +242,43 @@ unsigned int use_driver_data:1; /* pci_driver->driver_data is used */ }; +/* ---------------------------------------------------------------- */ +/** PCI error recovery infrastructure. If a PCI device driver provides + * a set fof callbacks in struct pci_error_handlers, then that device driver + * will be notified of PCI bus errors, and will be driven to recovery + * when an error occurs. + */ + +enum pcierr_result { + PCIERR_RESULT_NONE=0, /* no result/none/not supported in device driver */ + PCIERR_RESULT_CAN_RECOVER=1, /* Device driver can recover without slot reset */ + PCIERR_RESULT_NEED_RESET, /* Device driver wants slot to be reset. */ + PCIERR_RESULT_DISCONNECT, /* Device has completely failed, is unrecoverable */ + PCIERR_RESULT_RECOVERED, /* Device driver is fully recovered and operational */ +}; + +/* PCI bus error event callbacks */ +struct pci_error_handlers +{ + /* PCI bus error detected on this device */ + int (*error_detected)(struct pci_dev *dev, + enum pci_channel_state error); + + /* MMIO has been re-enabled, but not DMA */ + int (*mmio_enabled)(struct pci_dev *dev); + + /* PCI Express link has been reset */ + int (*link_reset)(struct pci_dev *dev); + + /* PCI slot has been reset */ + int (*slot_reset)(struct pci_dev *dev); + + /* Device driver may resume normal operations */ + void (*resume)(struct pci_dev *dev); +}; + +/* ---------------------------------------------------------------- */ + struct module; struct pci_driver { struct list_head node; @@ -244,6 +292,7 @@ int (*enable_wake) (struct pci_dev *dev, pci_power_t state, int enable); /* Enable wake event */ void (*shutdown) (struct pci_dev *dev); + struct pci_error_handlers *err_handler; struct device_driver driver; struct pci_dynids dynids; }; From linas at austin.ibm.com Fri Oct 7 09:33:20 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:33:20 -0500 Subject: [PATCH 8/22] ppc64: Slot Marking Bugfix In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006233320.GI29826@austin.ibm.com> 08-eeh-slot-marking-bug.patch A device that experiences a PCI outage may be just one deivce out of many that was affected. In order to avoid repeated reports of a failure, the entire tree of affected devices should be marked as failed. This patch marks up the entire tree. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-06 17:52:37.399078590 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-06 17:53:02.164603746 -0500 @@ -480,32 +480,47 @@ * an interrupt context, which is bad. */ -static inline void __eeh_mark_slot (struct device_node *dn) +static inline void __eeh_mark_slot (struct device_node *dn, int mode_flag) { while (dn) { - PCI_DN(dn)->eeh_mode |= EEH_MODE_ISOLATED; + if (PCI_DN(dn)) { + PCI_DN(dn)->eeh_mode |= mode_flag; - if (dn->child) - __eeh_mark_slot (dn->child); + if (dn->child) + __eeh_mark_slot (dn->child, mode_flag); + } dn = dn->sibling; } } -static inline void __eeh_clear_slot (struct device_node *dn) +void eeh_mark_slot (struct device_node *dn, int mode_flag) +{ + dn = find_device_pe (dn); + PCI_DN(dn)->eeh_mode |= mode_flag; + __eeh_mark_slot (dn->child, mode_flag); +} + +static inline void __eeh_clear_slot (struct device_node *dn, int mode_flag) { while (dn) { - PCI_DN(dn)->eeh_mode &= ~EEH_MODE_ISOLATED; - if (dn->child) - __eeh_clear_slot (dn->child); + if (PCI_DN(dn)) { + PCI_DN(dn)->eeh_mode &= ~mode_flag; + PCI_DN(dn)->eeh_check_count = 0; + if (dn->child) + __eeh_clear_slot (dn->child, mode_flag); + } dn = dn->sibling; } } -static inline void eeh_clear_slot (struct device_node *dn) +void eeh_clear_slot (struct device_node *dn, int mode_flag) { unsigned long flags; spin_lock_irqsave(&confirm_error_lock, flags); - __eeh_clear_slot (dn); + dn = find_device_pe (dn); + PCI_DN(dn)->eeh_mode &= ~mode_flag; + PCI_DN(dn)->eeh_check_count = 0; + __eeh_clear_slot (dn->child, mode_flag); spin_unlock_irqrestore(&confirm_error_lock, flags); } @@ -530,7 +545,6 @@ int rets[3]; unsigned long flags; struct pci_dn *pdn; - struct device_node *pe_dn; int rc = 0; __get_cpu_var(total_mmio_ffs)++; @@ -632,8 +646,7 @@ /* Avoid repeated reports of this failure, including problems * with other functions on this device, and functions under * bridges. */ - pe_dn = find_device_pe (dn); - __eeh_mark_slot (pe_dn); + eeh_mark_slot (dn, EEH_MODE_ISOLATED); spin_unlock_irqrestore(&confirm_error_lock, flags); eeh_send_failure_event (dn, dev, rets[0], rets[2]); @@ -745,9 +758,6 @@ rc, state, pdn->node->full_name); return; } - - if (state == 0) - eeh_clear_slot (pdn->node->parent->child); } /** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second @@ -766,6 +776,12 @@ #define PCI_BUS_RST_HOLD_TIME_MSEC 250 msleep (PCI_BUS_RST_HOLD_TIME_MSEC); + + /* We might get hit with another EEH freeze as soon as the + * pci slot reset line is dropped. Make sure we don't miss + * these, and clear the flag now. */ + eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED); + rtas_pci_slot_reset (pdn, 0); /* After a PCI slot has been reset, the PCI Express spec requires Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci.h 2005-10-06 17:52:37.399078590 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h 2005-10-06 17:53:02.165603605 -0500 @@ -86,6 +86,13 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val); +/** + * mark and clear slots: find "partition endpoint" PE and set or + * clear the flags for each subnode of the PE. + */ +void eeh_mark_slot (struct device_node *dn, int mode_flag); +void eeh_clear_slot (struct device_node *dn, int mode_flag); + #endif #endif /* __PPC_KERNEL_PCI_H__ */ From linas at austin.ibm.com Fri Oct 7 09:35:02 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:35:02 -0500 Subject: [PATCH 9/22] ppc64: DLPAR slot add and remove bugfixes In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006233502.GJ29826@austin.ibm.com> 09-crash-on-pci-slot-remove.patch This patch fixes two bugs related to dlpar slot removal and add. -- Both crashes are due to the fact the some children of pci nodes are not pci nodes themselves, and thus do not have pci_dn structures. For example: /pci at 800000020000002/pci at 2,3/usb at 1/hub at 1 /pci at 800000020000002/pci at 2,3/usb at 1,1/hub at 1 Strangely, though, sometimes the following appears, and I don't quite understand why. /interrupt-controller at 3fe0000a400 A typical stack trace: Vector: 300 (Data Access) at [c0000000555637d0] pc: c000000000202a50: .dlpar_add_slot+0x108/0x410 c000000000202e78 .add_slot_store+0x7c/0xac c000000000202da0 .dlpar_attr_store+0x48/0x64 c0000000000f8ee4 .sysfs_write_file+0x100/0x1a0 A similar stack trace is involved for the slot remove. This code survived testing, of adding and removing different slots, 23 times each, so far, as of this writing. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pSeries_iommu.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pSeries_iommu.c 2005-10-06 17:50:28.197206873 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pSeries_iommu.c 2005-10-06 17:53:46.650361968 -0500 @@ -478,10 +478,13 @@ { int err = NOTIFY_OK; struct device_node *np = node; - struct pci_dn *pci = np->data; + struct pci_dn *pci; switch (action) { case PSERIES_RECONFIG_REMOVE: + pci = PCI_DN(np); + if (!pci) + return NOTIFY_OK; if (pci->iommu_table && get_property(np, "ibm,dma-window", NULL)) iommu_free_table(np); Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci_dn.c 2005-10-06 17:50:28.198206733 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dn.c 2005-10-06 17:53:46.660360565 -0500 @@ -195,7 +195,10 @@ switch (action) { case PSERIES_RECONFIG_ADD: - pci = np->parent->data; + pci = PCI_DN(np->parent); + if (!pci) + return NOTIFY_OK; + update_dn_pci_info(np, pci->phb); break; default: From linas at austin.ibm.com Fri Oct 7 09:36:22 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:36:22 -0500 Subject: [PATCH 10/22] ppc64: Crash on DLPAR PHB add In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006233622.GK29826@austin.ibm.com> 10-rpaphp-crashing.patch This patch fixes a bug related to dlpar PHB add, after a PHB removal. -- The crash was due to the PHB not having a pci_dn structure yet, when the phb is being added. This code survived testing, of adding and removeig the PHB and all slots underneath it, 17 times so far, as of this writing. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpadlpar_core.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/pci/hotplug/rpadlpar_core.c 2005-10-06 17:50:27.631286278 -0500 +++ linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpadlpar_core.c 2005-10-06 17:53:50.226860151 -0500 @@ -303,7 +303,7 @@ { struct pci_controller *phb; - if (PCI_DN(dn)->phb) { + if (PCI_DN(dn) && PCI_DN(dn)->phb) { /* PHB already exists */ return -EINVAL; } From linas at austin.ibm.com Fri Oct 7 09:39:23 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:39:23 -0500 Subject: [PATCH 11/22] ppc64: RPA PHP and EEH common code In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006233923.GL29826@austin.ibm.com> 11-rpaphp-eeh-cleanup.patch This patch move some code from the rpaphp directory, to the ppc64 directory, where it should have been all along (Among other things, I need it in the ppc64 directory for the PCI error recovery.) Please note that patch affects TWO maintainers: Paul, after applying the ppc64 part, please ask that GregKH appli the PCI part. It is safe to have the ppc64 part go in first. It would be bad to have the PCI part go in first. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-06 17:53:02.164603746 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-06 17:53:52.475544639 -0500 @@ -1094,6 +1094,15 @@ } EXPORT_SYMBOL_GPL(eeh_add_device_early); +void eeh_add_device_tree_early(struct device_node *dn) +{ + struct device_node *sib; + for (sib = dn->child; sib; sib = sib->sibling) + eeh_add_device_tree_early(sib); + eeh_add_device_early(dn); +} +EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); + /** * eeh_add_device_late - perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH @@ -1148,6 +1157,23 @@ } EXPORT_SYMBOL_GPL(eeh_remove_device); +void eeh_remove_bus_device(struct pci_dev *dev) +{ + eeh_remove_device(dev); + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + struct pci_bus *bus = dev->subordinate; + struct list_head *ln; + if (!bus) + return; + for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) { + struct pci_dev *pdev = pci_dev_b(ln); + if (pdev) + eeh_remove_bus_device(pdev); + } + } +} +EXPORT_SYMBOL_GPL(eeh_remove_bus_device); + static int proc_eeh_show(struct seq_file *m, void *v) { unsigned int cpu; Index: linux-2.6.14-rc2-git6/include/asm-ppc64/eeh.h =================================================================== --- linux-2.6.14-rc2-git6.orig/include/asm-ppc64/eeh.h 2005-10-06 17:51:48.669915765 -0500 +++ linux-2.6.14-rc2-git6/include/asm-ppc64/eeh.h 2005-10-06 17:53:52.476544499 -0500 @@ -55,6 +55,7 @@ * to finish the eeh setup for this device. */ void eeh_add_device_early(struct device_node *); +void eeh_add_device_tree_early(struct device_node *); void eeh_add_device_late(struct pci_dev *); /** @@ -70,6 +71,15 @@ void eeh_remove_device(struct pci_dev *); /** + * eeh_remove_device_recursive - undo EEH for device & children. + * @dev: pci device to be removed + * + * As above, this removes the device; it also removes child + * pci devices as well. + */ +void eeh_remove_bus_device(struct pci_dev *); + +/** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. * * If this macro yields TRUE, the caller relays to eeh_check_failure() Index: linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpaphp_pci.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/pci/hotplug/rpaphp_pci.c 2005-10-06 17:50:27.039369330 -0500 +++ linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpaphp_pci.c 2005-10-06 17:53:52.477544359 -0500 @@ -251,17 +251,6 @@ return dev; } -static void enable_eeh(struct device_node *dn) -{ - struct device_node *sib; - - for (sib = dn->child; sib; sib = sib->sibling) - enable_eeh(sib); - eeh_add_device_early(dn); - return; - -} - static void print_slot_pci_funcs(struct pci_bus *bus) { struct device_node *dn; @@ -287,7 +276,7 @@ if (!dn) goto exit; - enable_eeh(dn); + eeh_add_device_tree_early(dn); dev = rpaphp_pci_config_slot(bus); if (!dev) { err("%s: can't find any devices.\n", __FUNCTION__); @@ -301,30 +290,12 @@ } EXPORT_SYMBOL_GPL(rpaphp_config_pci_adapter); -static void rpaphp_eeh_remove_bus_device(struct pci_dev *dev) -{ - eeh_remove_device(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *bus = dev->subordinate; - struct list_head *ln; - if (!bus) - return; - for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) { - struct pci_dev *pdev = pci_dev_b(ln); - if (pdev) - rpaphp_eeh_remove_bus_device(pdev); - } - - } - return; -} - int rpaphp_unconfig_pci_adapter(struct pci_bus *bus) { struct pci_dev *dev, *tmp; list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { - rpaphp_eeh_remove_bus_device(dev); + eeh_remove_bus_device(dev); pci_remove_bus_device(dev); } return 0; From linas at austin.ibm.com Fri Oct 7 09:40:44 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:40:44 -0500 Subject: [PATCH 12/22] ppc64: RPA PHP cleanup In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006234044.GM29826@austin.ibm.com> 12-rpaphp-cleanup.patch This patch cleans up some rpa dlpar code. Basically, the rpaphp_config_pci_adapter() was a wrapper routine, which made two calls, and wrapped a bunch of verbose no-op code around it. This was consolidated wih the routine it called. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpaphp_pci.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/pci/hotplug/rpaphp_pci.c 2005-10-06 17:53:52.477544359 -0500 +++ linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpaphp_pci.c 2005-10-06 17:53:55.542114371 -0500 @@ -219,18 +219,21 @@ rpaphp_pci_config_slot() will configure all devices under the given slot->dn and return the the first pci_dev. *****************************************************************************/ -static struct pci_dev * -rpaphp_pci_config_slot(struct pci_bus *bus) +int +rpaphp_config_pci_adapter(struct pci_bus *bus) { struct device_node *dn = pci_bus_to_OF_node(bus); struct pci_dev *dev = NULL; + int rc = -ENODEV; int slotno; int num; dbg("Enter %s: dn=%s bus=%s\n", __FUNCTION__, dn->full_name, bus->name); if (!dn || !dn->child) - return NULL; + goto exit; + eeh_add_device_tree_early(dn); + slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); /* pci_scan_slot should find all children */ @@ -241,15 +244,23 @@ } if (list_empty(&bus->devices)) { err("%s: No new device found\n", __FUNCTION__); - return NULL; + goto exit; } list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) rpaphp_pci_config_bridge(dev); } - return dev; + dbg("%s: pci_devs of slot[%s]\n", __FUNCTION__, dn->full_name); + list_for_each_entry (dev, &bus->devices, bus_list) + dbg("\t%s\n", pci_name(dev)); + + rc = 0; +exit: + dbg("Exit %s: rc=%d\n", __FUNCTION__, rc); + return rc; } +EXPORT_SYMBOL_GPL(rpaphp_config_pci_adapter); static void print_slot_pci_funcs(struct pci_bus *bus) { @@ -266,30 +277,6 @@ return; } -int rpaphp_config_pci_adapter(struct pci_bus *bus) -{ - struct device_node *dn = pci_bus_to_OF_node(bus); - struct pci_dev *dev; - int rc = -ENODEV; - - dbg("Entry %s: slot[%s]\n", __FUNCTION__, dn->full_name); - if (!dn) - goto exit; - - eeh_add_device_tree_early(dn); - dev = rpaphp_pci_config_slot(bus); - if (!dev) { - err("%s: can't find any devices.\n", __FUNCTION__); - goto exit; - } - print_slot_pci_funcs(bus); - rc = 0; -exit: - dbg("Exit %s: rc=%d\n", __FUNCTION__, rc); - return rc; -} -EXPORT_SYMBOL_GPL(rpaphp_config_pci_adapter); - int rpaphp_unconfig_pci_adapter(struct pci_bus *bus) { struct pci_dev *dev, *tmp; From linas at austin.ibm.com Fri Oct 7 09:44:43 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:44:43 -0500 Subject: [PATCH 13/22] ppc64: RPAPHP duplicated code removal In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006234443.GN29826@austin.ibm.com> 13-rpaphp-eliminate-dupe-code.patch The RPAPHP code contains two routines that appear to be gratiuitous copies of very similar pci code. In particular, rpaphp_claim_resource ~~ pci_claim_resource rpadlpar_claim_one_bus == pcibios_claim_one_bus This patch removes the rpaphp versions of the code. This patch survived an overnight run of thousands of add/remove of the slots and phb. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpaphp_pci.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/pci/hotplug/rpaphp_pci.c 2005-10-06 17:53:55.542114371 -0500 +++ linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpaphp_pci.c 2005-10-06 17:53:57.832792967 -0500 @@ -60,28 +60,6 @@ } EXPORT_SYMBOL_GPL(rpaphp_find_pci_bus); -int rpaphp_claim_resource(struct pci_dev *dev, int resource) -{ - struct resource *res = &dev->resource[resource]; - struct resource *root = pci_find_parent_resource(dev, res); - char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; - int err = -EINVAL; - - if (root != NULL) { - err = request_resource(root, res); - } - - if (err) { - err("PCI: %s region %d of %s %s [%lx:%lx]\n", - root ? "Address space collision on" : - "No parent found for", - resource, dtype, pci_name(dev), res->start, res->end); - } - return err; -} - -EXPORT_SYMBOL_GPL(rpaphp_claim_resource); - static int rpaphp_get_sensor_state(struct slot *slot, int *state) { int rc; @@ -176,7 +154,7 @@ if (r->parent || !r->start || !r->flags) continue; - rpaphp_claim_resource(dev, i); + pci_claim_resource(dev, i); } } } Index: linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpadlpar_core.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/pci/hotplug/rpadlpar_core.c 2005-10-06 17:53:50.226860151 -0500 +++ linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpadlpar_core.c 2005-10-06 17:53:57.834792686 -0500 @@ -109,28 +109,6 @@ return NULL; } -static void rpadlpar_claim_one_bus(struct pci_bus *b) -{ - struct list_head *ld; - struct pci_bus *child_bus; - - for (ld = b->devices.next; ld != &b->devices; ld = ld->next) { - struct pci_dev *dev = pci_dev_b(ld); - int i; - - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - struct resource *r = &dev->resource[i]; - - if (r->parent || !r->start || !r->flags) - continue; - rpaphp_claim_resource(dev, i); - } - } - - list_for_each_entry(child_bus, &b->children, node) - rpadlpar_claim_one_bus(child_bus); -} - static int pci_add_secondary_bus(struct device_node *dn, struct pci_dev *bridge_dev) { @@ -155,7 +133,7 @@ pcibios_fixup_bus(child); /* Claim new bus resources */ - rpadlpar_claim_one_bus(bridge_dev->bus); + pcibios_claim_one_bus(bridge_dev->bus); if (hose->last_busno < child->number) hose->last_busno = child->number; Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci.c 2005-10-06 17:50:25.899529261 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.c 2005-10-06 17:53:57.836792405 -0500 @@ -198,7 +198,7 @@ spin_unlock(&hose_spinlock); } -static void __init pcibios_claim_one_bus(struct pci_bus *b) +void __devinit pcibios_claim_one_bus(struct pci_bus *b) { struct pci_dev *dev; struct pci_bus *child_bus; Index: linux-2.6.14-rc2-git6/include/asm-ppc64/pci.h =================================================================== --- linux-2.6.14-rc2-git6.orig/include/asm-ppc64/pci.h 2005-10-06 17:50:25.899529261 -0500 +++ linux-2.6.14-rc2-git6/include/asm-ppc64/pci.h 2005-10-06 17:53:57.836792405 -0500 @@ -160,6 +160,8 @@ extern void pcibios_fixup_device_resources(struct pci_dev *dev, struct pci_bus *bus); +extern void pcibios_claim_one_bus(struct pci_bus *b); + extern struct pci_controller *init_phb_dynamic(struct device_node *dn); extern int pci_read_irq_line(struct pci_dev *dev); From linas at austin.ibm.com Fri Oct 7 09:46:24 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:46:24 -0500 Subject: [PATCH 14/22] ppc64: RPA PHP to EEH code movement In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006234624.GO29826@austin.ibm.com> 14-rpaphp-migrate.patch This patch moves some pci device add & remove code from the PCI hotplug directory to the arch/ppc64/kernel directory, and cleans it up a tad. The primary reason for this is that the code performs some fairly generic operations that are shared with the PCI error recovery code (living in the arch/ppc64/kernel directory). Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dlpar.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci_dlpar.c 2005-10-06 17:54:00.306445890 -0500 @@ -0,0 +1,174 @@ +/* + * PCI Dynamic LPAR, PCI Hot Plug and PCI EEH recovery code + * for RPA-compliant PPC64 platform. + * Copyright (C) 2003 Linda Xie + * Copyright (C) 2005 International Business Machines + * + * Updates, 2005, John Rose + * Updates, 2005, Linas Vepstas + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include + +static struct pci_bus * +find_bus_among_children(struct pci_bus *bus, + struct device_node *dn) +{ + struct pci_bus *child = NULL; + struct list_head *tmp; + struct device_node *busdn; + + busdn = pci_bus_to_OF_node(bus); + if (busdn == dn) + return bus; + + list_for_each(tmp, &bus->children) { + child = find_bus_among_children(pci_bus_b(tmp), dn); + if (child) + break; + }; + return child; +} + +struct pci_bus * +pcibios_find_pci_bus(struct device_node *dn) +{ + struct pci_dn *pdn = dn->data; + + if (!pdn || !pdn->phb || !pdn->phb->bus) + return NULL; + + return find_bus_among_children(pdn->phb->bus, dn); +} + +/** + * pcibios_remove_pci_devices - remove all devices under this bus + * + * Remove all of the PCI devices under this bus both from the + * linux pci device tree, and from the ppc64 EEH address cache. + */ +void +pcibios_remove_pci_devices(struct pci_bus *bus) +{ + struct pci_dev *dev, *tmp; + + list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { + eeh_remove_bus_device(dev); + pci_remove_bus_device(dev); + } +} + +/* Must be called before pci_bus_add_devices */ +static void +pcibios_fixup_new_pci_devices(struct pci_bus *bus, int fix_bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + /* + * Skip already-present devices (which are on the + * global device list.) + */ + if (list_empty(&dev->global_list)) { + int i; + + /* Need to setup IOMMU tables */ + ppc_md.iommu_dev_setup(dev); + + if(fix_bus) + pcibios_fixup_device_resources(dev, bus); + pci_read_irq_line(dev); + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r = &dev->resource[i]; + + if (r->parent || !r->start || !r->flags) + continue; + pci_claim_resource(dev, i); + } + } + } +} + +static int +pcibios_pci_config_bridge(struct pci_dev *dev) +{ + u8 sec_busno; + struct pci_bus *child_bus; + struct pci_dev *child_dev; + + /* Get busno of downstream bus */ + pci_read_config_byte(dev, PCI_SECONDARY_BUS, &sec_busno); + + /* Add to children of PCI bridge dev->bus */ + child_bus = pci_add_new_bus(dev->bus, dev, sec_busno); + if (!child_bus) { + printk (KERN_ERR "%s: could not add second bus\n", __FUNCTION__); + return -EIO; + } + sprintf(child_bus->name, "PCI Bus #%02x", child_bus->number); + + pci_scan_child_bus(child_bus); + + list_for_each_entry(child_dev, &child_bus->devices, bus_list) { + eeh_add_device_late(child_dev); + } + + /* Fixup new pci devices without touching bus struct */ + pcibios_fixup_new_pci_devices(child_bus, 0); + + /* Make the discovered devices available */ + pci_bus_add_devices(child_bus); + return 0; +} + +/** + * pcibios_add_pci_devices - adds new pci devices to bus + * + * This routine will find and fixup new pci devices under + * the indicated bus. This routine presumes that there + * might already be some devices under this pridge, so + * it carefully treis o add only new devices. (And that + * is how this routine differes from other, similar pcibios + * routines.) + */ +void +pcibios_add_pci_devices(struct pci_bus * bus) +{ + int slotno, num; + struct pci_dev *dev; + struct device_node *dn = pci_bus_to_OF_node(bus); + + eeh_add_device_tree_early(dn); + + /* pci_scan_slot should find all children */ + slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); + num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + if (num) { + pcibios_fixup_new_pci_devices(bus, 1); + pci_bus_add_devices(bus); + } + + list_for_each_entry(dev, &bus->devices, bus_list) { + eeh_add_device_late (dev); + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + pcibios_pci_config_bridge(dev); + } +} Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/Makefile =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/Makefile 2005-10-06 17:50:25.365604176 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/Makefile 2005-10-06 17:54:00.307445749 -0500 @@ -37,7 +37,7 @@ bpa_iic.o spider-pic.o obj-$(CONFIG_KEXEC) += machine_kexec.o -obj-$(CONFIG_EEH) += eeh.o eeh_event.o +obj-$(CONFIG_EEH) += eeh.o eeh_event.o pci_dlpar.o obj-$(CONFIG_PROC_FS) += proc_ppc64.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_SMP) += smp.o Index: linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpaphp_pci.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/pci/hotplug/rpaphp_pci.c 2005-10-06 17:53:57.832792967 -0500 +++ linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpaphp_pci.c 2005-10-06 17:54:00.308445609 -0500 @@ -30,36 +30,6 @@ #include "rpaphp.h" -static struct pci_bus *find_bus_among_children(struct pci_bus *bus, - struct device_node *dn) -{ - struct pci_bus *child = NULL; - struct list_head *tmp; - struct device_node *busdn; - - busdn = pci_bus_to_OF_node(bus); - if (busdn == dn) - return bus; - - list_for_each(tmp, &bus->children) { - child = find_bus_among_children(pci_bus_b(tmp), dn); - if (child) - break; - } - return child; -} - -struct pci_bus *rpaphp_find_pci_bus(struct device_node *dn) -{ - struct pci_dn *pdn = dn->data; - - if (!pdn || !pdn->phb || !pdn->phb->bus) - return NULL; - - return find_bus_among_children(pdn->phb->bus, dn); -} -EXPORT_SYMBOL_GPL(rpaphp_find_pci_bus); - static int rpaphp_get_sensor_state(struct slot *slot, int *state) { int rc; @@ -118,7 +88,7 @@ /* config/unconfig adapter */ *value = slot->state; } else { - bus = rpaphp_find_pci_bus(slot->dn); + bus = pcibios_find_pci_bus(slot->dn); if (bus && !list_empty(&bus->devices)) *value = CONFIGURED; else @@ -129,117 +99,6 @@ return rc; } -/* Must be called before pci_bus_add_devices */ -static void -rpaphp_fixup_new_pci_devices(struct pci_bus *bus, int fix_bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - /* - * Skip already-present devices (which are on the - * global device list.) - */ - if (list_empty(&dev->global_list)) { - int i; - - /* Need to setup IOMMU tables */ - ppc_md.iommu_dev_setup(dev); - - if(fix_bus) - pcibios_fixup_device_resources(dev, bus); - pci_read_irq_line(dev); - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - struct resource *r = &dev->resource[i]; - - if (r->parent || !r->start || !r->flags) - continue; - pci_claim_resource(dev, i); - } - } - } -} - -static int rpaphp_pci_config_bridge(struct pci_dev *dev) -{ - u8 sec_busno; - struct pci_bus *child_bus; - struct pci_dev *child_dev; - - dbg("Enter %s: BRIDGE dev=%s\n", __FUNCTION__, pci_name(dev)); - - /* get busno of downstream bus */ - pci_read_config_byte(dev, PCI_SECONDARY_BUS, &sec_busno); - - /* add to children of PCI bridge dev->bus */ - child_bus = pci_add_new_bus(dev->bus, dev, sec_busno); - if (!child_bus) { - err("%s: could not add second bus\n", __FUNCTION__); - return -EIO; - } - sprintf(child_bus->name, "PCI Bus #%02x", child_bus->number); - /* do pci_scan_child_bus */ - pci_scan_child_bus(child_bus); - - list_for_each_entry(child_dev, &child_bus->devices, bus_list) { - eeh_add_device_late(child_dev); - } - - /* fixup new pci devices without touching bus struct */ - rpaphp_fixup_new_pci_devices(child_bus, 0); - - /* Make the discovered devices available */ - pci_bus_add_devices(child_bus); - return 0; -} - -/***************************************************************************** - rpaphp_pci_config_slot() will configure all devices under the - given slot->dn and return the the first pci_dev. - *****************************************************************************/ -int -rpaphp_config_pci_adapter(struct pci_bus *bus) -{ - struct device_node *dn = pci_bus_to_OF_node(bus); - struct pci_dev *dev = NULL; - int rc = -ENODEV; - int slotno; - int num; - - dbg("Enter %s: dn=%s bus=%s\n", __FUNCTION__, dn->full_name, bus->name); - if (!dn || !dn->child) - goto exit; - - eeh_add_device_tree_early(dn); - - slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); - - /* pci_scan_slot should find all children */ - num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); - if (num) { - rpaphp_fixup_new_pci_devices(bus, 1); - pci_bus_add_devices(bus); - } - if (list_empty(&bus->devices)) { - err("%s: No new device found\n", __FUNCTION__); - goto exit; - } - list_for_each_entry(dev, &bus->devices, bus_list) { - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - rpaphp_pci_config_bridge(dev); - } - - dbg("%s: pci_devs of slot[%s]\n", __FUNCTION__, dn->full_name); - list_for_each_entry (dev, &bus->devices, bus_list) - dbg("\t%s\n", pci_name(dev)); - - rc = 0; -exit: - dbg("Exit %s: rc=%d\n", __FUNCTION__, rc); - return rc; -} -EXPORT_SYMBOL_GPL(rpaphp_config_pci_adapter); - static void print_slot_pci_funcs(struct pci_bus *bus) { struct device_node *dn; @@ -255,17 +114,6 @@ return; } -int rpaphp_unconfig_pci_adapter(struct pci_bus *bus) -{ - struct pci_dev *dev, *tmp; - - list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { - eeh_remove_bus_device(dev); - pci_remove_bus_device(dev); - } - return 0; -} - static int setup_pci_hotplug_slot_info(struct slot *slot) { dbg("%s Initilize the PCI slot's hotplug->info structure ...\n", @@ -301,7 +149,7 @@ struct pci_bus *bus; BUG_ON(!dn); - bus = rpaphp_find_pci_bus(dn); + bus = pcibios_find_pci_bus(dn); if (!bus) { err("%s: no pci_bus for dn %s\n", __FUNCTION__, dn->full_name); goto exit_rc; @@ -326,10 +174,7 @@ if (slot->hotplug_slot->info->adapter_status == NOT_CONFIGURED) { dbg("%s CONFIGURING pci adapter in slot[%s]\n", __FUNCTION__, slot->name); - if (rpaphp_config_pci_adapter(slot->bus)) { - err("%s: CONFIG pci adapter failed\n", __FUNCTION__); - goto exit_rc; - } + pcibios_add_pci_devices(slot->bus); } else if (slot->hotplug_slot->info->adapter_status != CONFIGURED) { err("%s: slot[%s]'s adapter_status is NOT_VALID.\n", @@ -375,16 +220,10 @@ /* if slot is not empty, enable the adapter */ if (state == PRESENT) { dbg("%s : slot[%s] is occupied.\n", __FUNCTION__, slot->name); - retval = rpaphp_config_pci_adapter(slot->bus); - if (!retval) { - slot->state = CONFIGURED; - dbg("%s: PCI devices in slot[%s] has been configured\n", + pcibios_add_pci_devices(slot->bus); + slot->state = CONFIGURED; + dbg("%s: PCI devices in slot[%s] has been configured\n", __FUNCTION__, slot->name); - } else { - slot->state = NOT_CONFIGURED; - dbg("%s: no pci_dev struct for adapter in slot[%s]\n", - __FUNCTION__, slot->name); - } } else if (state == EMPTY) { dbg("%s : slot[%s] is empty\n", __FUNCTION__, slot->name); slot->state = EMPTY; Index: linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpadlpar_core.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/pci/hotplug/rpadlpar_core.c 2005-10-06 17:53:57.834792686 -0500 +++ linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpadlpar_core.c 2005-10-06 17:54:00.309445469 -0500 @@ -194,9 +194,8 @@ static int dlpar_add_pci_slot(char *drc_name, struct device_node *dn) { struct pci_dev *dev; - int rc; - if (rpaphp_find_pci_bus(dn)) + if (pcibios_find_pci_bus(dn)) return -EINVAL; /* Add pci bus */ @@ -208,12 +207,7 @@ } if (dn->child) { - rc = rpaphp_config_pci_adapter(dev->subordinate); - if (rc < 0) { - printk(KERN_ERR "%s: unable to enable slot %s\n", - __FUNCTION__, drc_name); - return -EIO; - } + pcibios_add_pci_devices(dev->subordinate); } /* Add hotplug slot */ @@ -252,7 +246,7 @@ struct pci_dn *pdn; int rc = 0; - if (!rpaphp_find_pci_bus(dn)) + if (!pcibios_find_pci_bus(dn)) return -EINVAL; slot = find_slot(dn); @@ -397,7 +391,7 @@ struct pci_bus *bus; struct slot *slot; - bus = rpaphp_find_pci_bus(dn); + bus = pcibios_find_pci_bus(dn); if (!bus) return -EINVAL; Index: linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpaphp_core.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/pci/hotplug/rpaphp_core.c 2005-10-06 17:50:25.366604035 -0500 +++ linux-2.6.14-rc2-git6/drivers/pci/hotplug/rpaphp_core.c 2005-10-06 17:54:00.310445328 -0500 @@ -426,7 +426,8 @@ dbg("DISABLING SLOT %s\n", slot->name); down(&rpaphp_sem); - retval = rpaphp_unconfig_pci_adapter(slot->bus); + pcibios_remove_pci_devices(slot->bus); + retval = 0; up(&rpaphp_sem); slot->state = NOT_CONFIGURED; info("%s: devices in slot[%s] unconfigured.\n", __FUNCTION__, Index: linux-2.6.14-rc2-git6/include/asm-ppc64/pci-bridge.h =================================================================== --- linux-2.6.14-rc2-git6.orig/include/asm-ppc64/pci-bridge.h 2005-10-06 17:50:25.366604035 -0500 +++ linux-2.6.14-rc2-git6/include/asm-ppc64/pci-bridge.h 2005-10-06 17:54:00.310445328 -0500 @@ -103,9 +103,18 @@ return bus->sysdata; /* Must be root bus (PHB) */ } +/** Find the bus corresponding to the indicated device node */ +struct pci_bus * pcibios_find_pci_bus(struct device_node *dn); + extern void pci_process_bridge_OF_ranges(struct pci_controller *hose, struct device_node *dev); +/** Remove all of the PCI devices under this bus */ +void pcibios_remove_pci_devices(struct pci_bus *bus); + +/** Discover new pci devices under this bus, and add them */ +void pcibios_add_pci_devices(struct pci_bus * bus); + extern int pcibios_remove_root_bus(struct pci_controller *phb); extern void phbs_remap_io(void); From linas at austin.ibm.com Fri Oct 7 09:47:42 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:47:42 -0500 Subject: [PATCH 15/22] ppc64: PCI Error Recovery: PPC64 core recovery routines In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006234742.GP29826@austin.ibm.com> PCI Error Recovery: PPC64 core recovery routines Various PCI bus errors can be signaled by newer PCI controllers. The core error recovery routines are architecture dependent. This patch adds a recovery infrastructure for the PPC64 pSeries systems. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-06 17:53:52.475544639 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-06 17:54:14.494455177 -0500 @@ -486,6 +486,11 @@ if (PCI_DN(dn)) { PCI_DN(dn)->eeh_mode |= mode_flag; + /* Mark the pci device driver too */ + struct pci_dev *dev = PCI_DN(dn)->pcidev; + if (dev && dev->driver) + dev->error_state = pci_channel_io_frozen; + if (dn->child) __eeh_mark_slot (dn->child, mode_flag); } @@ -545,6 +550,7 @@ int rets[3]; unsigned long flags; struct pci_dn *pdn; + enum pci_channel_state state; int rc = 0; __get_cpu_var(total_mmio_ffs)++; @@ -649,8 +655,13 @@ eeh_mark_slot (dn, EEH_MODE_ISOLATED); spin_unlock_irqrestore(&confirm_error_lock, flags); - eeh_send_failure_event (dn, dev, rets[0], rets[2]); - + state = pci_channel_io_normal; + if ((rets[0] == 2) || (rets[0] == 4)) + state = pci_channel_io_frozen; + if (rets[0] == 5) + state = pci_channel_io_perm_failure; + eeh_send_failure_event (dn, dev, state, rets[2]); + /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ @@ -954,8 +965,10 @@ * But there are a few cases like display devices that make sense. */ enable = 1; /* i.e. we will do checking */ +#if 0 if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY) enable = 0; +#endif if (!enable) pdn->eeh_mode |= EEH_MODE_NOCHECK; Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_driver.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_driver.c 2005-10-06 17:54:14.495455037 -0500 @@ -0,0 +1,376 @@ +/* + * PCI Error Recovery Driver for RPA-compliant PPC64 platform. + * Copyright (C) 2004, 2005 Linas Vepstas + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eeh_event.h" +#include "pci.h" + +static inline const char * pcid_name (struct pci_dev *pdev) +{ + if (pdev->dev.driver) + return pdev->dev.driver->name; + return ""; +} + +/** + * Return the "partitionable endpoint" (pe) under which this device lies + */ +static struct device_node * find_device_pe(struct device_node *dn) +{ + while ((dn->parent) && PCI_DN(dn->parent) && + (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { + dn = dn->parent; + } + return dn; +} + + +#ifdef DEBUG +static void print_device_node_tree (struct pci_dn *pdn, int dent) +{ + int i; + if (!pdn) return; + for (i=0;inode->name, pdn->eeh_mode, pdn->eeh_config_addr, + pdn->eeh_pe_config_addr, pdn->node->full_name); + dent += 3; + struct device_node *pc = pdn->node->child; + while (pc) { + print_device_node_tree(PCI_DN(pc), dent); + pc = pc->sibling; + } +} +#endif + +/** + * irq_in_use - return true if this irq is being used + */ +static int irq_in_use(unsigned int irq) +{ + int rc = 0; + unsigned long flags; + struct irq_desc *desc = irq_desc + irq; + + spin_lock_irqsave(&desc->lock, flags); + if (desc->action) + rc = 1; + spin_unlock_irqrestore(&desc->lock, flags); + return rc; +} + +/* ------------------------------------------------------- */ +/** eeh_report_error - report an EEH error to each device, + * collect up and merge the device responses. + */ + +static void eeh_report_error(struct pci_dev *dev, void *userdata) +{ + enum pcierr_result rc, *res = userdata; + struct pci_driver *driver = dev->driver; + + dev->error_state = pci_channel_io_frozen; + + if (!driver) + return; + + if (irq_in_use (dev->irq)) { + struct device_node *dn = pci_device_to_OF_node(dev); + PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; + disable_irq_nosync(dev->irq); + } + if (!driver->err_handler) + return; + if (!driver->err_handler->error_detected) + return; + + rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen); + if (*res == PCIERR_RESULT_NONE) *res = rc; + if (*res == PCIERR_RESULT_NEED_RESET) return; + if (*res == PCIERR_RESULT_DISCONNECT && + rc == PCIERR_RESULT_NEED_RESET) *res = rc; +} + +/** eeh_report_reset -- tell this device that the pci slot + * has been reset. + */ + +static void eeh_report_reset(struct pci_dev *dev, void *userdata) +{ + struct pci_driver *driver = dev->driver; + struct device_node *dn = pci_device_to_OF_node(dev); + + if (!driver) + return; + + if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) { + PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; + enable_irq(dev->irq); + } + if (!driver->err_handler) + return; + if (!driver->err_handler->slot_reset) + return; + + driver->err_handler->slot_reset(dev); +} + +static void eeh_report_resume(struct pci_dev *dev, void *userdata) +{ + struct pci_driver *driver = dev->driver; + + dev->error_state = pci_channel_io_normal; + + if (!driver) + return; + if (!driver->err_handler) + return; + if (!driver->err_handler->resume) + return; + + driver->err_handler->resume(dev); +} + +static void eeh_report_failure(struct pci_dev *dev, void *userdata) +{ + struct pci_driver *driver = dev->driver; + + dev->error_state = pci_channel_io_perm_failure; + + if (!driver) + return; + + if (irq_in_use (dev->irq)) { + struct device_node *dn = pci_device_to_OF_node(dev); + PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; + disable_irq_nosync(dev->irq); + } + if (!driver->err_handler) + return; + if (!driver->err_handler->error_detected) + return; + driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); +} + +/* ------------------------------------------------------- */ +/** + * handle_eeh_events -- reset a PCI device after hard lockup. + * + * pSeries systems will isolate a PCI slot if the PCI-Host + * bridge detects address or data parity errors, DMA's + * occuring to wild addresses (which usually happen due to + * bugs in device drivers or in PCI adapter firmware). + * Slot isolations also occur if #SERR, #PERR or other misc + * PCI-related errors are detected. + * + * Recovery process consists of unplugging the device driver + * (which generated hotplug events to userspace), then issuing + * a PCI #RST to the device, then reconfiguring the PCI config + * space for all bridges & devices under this slot, and then + * finally restarting the device drivers (which cause a second + * set of hotplug events to go out to userspace). + */ + +/** + * eeh_reset_device() -- perform actual reset of a pci slot + * Args: bus: pointer to the pci bus structure corresponding + * to the isolated slot. A non-null value will + * cause all devices under the bus to be removed + * and then re-added. + * pe_dn: pointer to a "Partionable Endpoint" device node. + * This is the top-level structure on which pci + * bus resets can be performed. + */ + +static void eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) +{ + if (bus) + pcibios_remove_pci_devices(bus); + + /* Reset the pci controller. (Asserts RST#; resets config space). + * Reconfigure bridges and devices */ + rtas_set_slot_reset(pe_dn); + + /* Walk over all functions on this device */ + rtas_configure_bridge(pe_dn); + eeh_restore_bars(pe_dn); + + /* Give the system 5 seconds to finish running the user-space + * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, + * this is a hack, but if we don't do this, and try to bring + * the device up before the scripts have taken it down, + * potentially weird things happen. + */ + if (bus) { + ssleep (5); + pcibios_add_pci_devices(bus); + } +} + +/* The longest amount of time to wait for a pci device + * to come back on line, in seconds. + */ +#define MAX_WAIT_FOR_RECOVERY 15 + +void handle_eeh_events (struct eeh_event *event) +{ + struct device_node *frozen_dn; + struct pci_dn *frozen_pdn; + struct pci_bus *frozen_bus; + struct pci_dev *dev = event->dev; + int perm_failure = 0; + + /* We might not have a pci device, if it was a config space read + * that failed. Find the pci device now. */ + if (!dev) { + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + if (pci_device_to_OF_node(dev) == event->dn) + break; + } + } + + frozen_dn = find_device_pe(event->dn); + frozen_bus = pcibios_find_pci_bus(frozen_dn); + + if (!frozen_dn) { + printk(KERN_ERR "EEH: Cannot find PCI controller for %s\n", + pci_name(dev)); + return; + } + + /* There are two different styles for coming up with the PE. + * In the old style, it was the highest EEH-capable device + * which was always an EADS pci bridge. In the new style, + * there might not be any EADS bridges, and even when there are, + * the firmware marks them as "EEH incapable". So another + * two-step is needed to find the pci bus.. */ + if (!frozen_bus) + frozen_bus = pcibios_find_pci_bus (frozen_dn->parent); + + if (!frozen_bus) { + printk(KERN_ERR "EEH: Cannot find PCI bus for %s\n", + frozen_dn->full_name); + return; + } + + if (!dev) + dev = frozen_bus->self; + +#if 0 + /* We may get "permanent failure" messages on empty slots. + * These are false alarms. Empty slots have no child dn. */ + if ((event->state == pci_channel_io_perm_failure) && (frozen_device == NULL)) + return; +#endif + + frozen_pdn = PCI_DN(frozen_dn); + frozen_pdn->eeh_freeze_count++; + + if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) + perm_failure = 1; + + /* If the reset state is a '5' and the time to reset is 0 (infinity) + * or is more then 15 seconds, then mark this as a permanent failure. + */ + if ((event->state == pci_channel_io_perm_failure) && + ((event->time_unavail <= 0) || + (event->time_unavail > MAX_WAIT_FOR_RECOVERY*1000))) + { + perm_failure = 1; + } + + /* Log the error with the rtas logger. */ + if (perm_failure) { + /* + * About 90% of all real-life EEH failures in the field + * are due to poorly seated PCI cards. Only 10% or so are + * due to actual, failed cards. + */ + printk(KERN_ERR + "EEH: PCI device %s - %s has failed %d times \n" + "and has been permanently disabled. Please try reseating\n" + "this device or replacing it.\n", + pci_name (dev), pcid_name(dev), frozen_pdn->eeh_freeze_count); + + eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */); + + /* Notify all devices that they're about to go down. */ + pci_walk_bus(frozen_bus, eeh_report_failure, 0); + + /* Shut down the device drivers for good. */ + pcibios_remove_pci_devices(frozen_bus); + return; + } + + eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); + printk(KERN_WARNING + "EEH: This PCI device has failed %d times since last reboot: %s - %s\n", + frozen_pdn->eeh_freeze_count, + pci_name (dev), pcid_name(dev)); + + /* Walk the various device drivers attached to this slot through + * a reset sequence, giving each an opportunity to do what it needs + * to accomplish the reset. Each child gets a report of the + * status ... if any child can't handle the reset, then the entire + * slot is dlpar removed and added. + */ + enum pcierr_result result = PCIERR_RESULT_NONE; + pci_walk_bus(frozen_bus, eeh_report_error, &result); + + /* If all device drivers were EEH-unaware, then shut + * down all of the device drivers, and hope they + * go down willingly, without panicing the system. + */ + if (result == PCIERR_RESULT_NONE) { + eeh_reset_device(frozen_pdn, frozen_bus); + } + + /* If any device called out for a reset, then reset the slot */ + if (result == PCIERR_RESULT_NEED_RESET) { + eeh_reset_device(frozen_pdn, NULL); + pci_walk_bus(frozen_bus, eeh_report_reset, 0); + } + + /* If all devices reported they can proceed, the re-enable PIO */ + if (result == PCIERR_RESULT_CAN_RECOVER) { + /* XXX Not supported; we brute-force reset the device */ + eeh_reset_device(frozen_pdn, NULL); + pci_walk_bus(frozen_bus, eeh_report_reset, 0); + } + + /* Tell all device drivers that they can resume operations */ + pci_walk_bus(frozen_bus, eeh_report_resume, 0); +} + +/* ---------- end of file ---------- */ Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_event.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh_event.c 2005-10-06 17:50:24.089783186 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_event.c 2005-10-06 17:56:14.461622651 -0500 @@ -21,6 +21,7 @@ #include #include #include "eeh_event.h" +#include "pci.h" /** Overview: * EEH error states may be detected within exception handlers; @@ -36,30 +37,7 @@ static void eeh_thread_launcher(void *); DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL); -/** - * eeh_panic - call panic() for an eeh event that cannot be handled. - * The philosophy of this routine is that it is better to panic and - * halt the OS than it is to risk possible data corruption by - * oblivious device drivers that don't know better. - * - * @dev pci device that had an eeh event - * @reset_state current reset state of the device slot - */ -static void eeh_panic(struct pci_dev *dev, int reset_state) -{ - /* - * Since the panic_on_oops sysctl is used to halt the system - * in light of potential corruption, we can use it here. - */ - if (panic_on_oops) { - panic("EEH: MMIO failure (%d) on device:%s\n", reset_state, - pci_name(dev)); - } - else { - printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n", - reset_state, pci_name(dev)); - } -} +int handle_eeh_events (struct eeh_event *event); /** * eeh_event_handler - dispatch EEH events. The detection of a frozen @@ -82,10 +60,16 @@ spin_lock_irqsave(&eeh_eventlist_lock, flags); event = NULL; + + /* Unqueue the event, get ready to process. */ if (!list_empty(&eeh_eventlist)) { event = list_entry(eeh_eventlist.next, struct eeh_event, list); list_del(&event->list); } + + if (event) + eeh_mark_slot(event->dn, EEH_MODE_RECOVERING); + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); if (event == NULL) break; @@ -93,8 +77,11 @@ printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", pci_name(event->dev)); - eeh_panic (event->dev, event->state); + handle_eeh_events(event); + + eeh_clear_slot(event->dn, EEH_MODE_RECOVERING); + pci_dev_put(event->dev); kfree(event); } @@ -122,7 +109,7 @@ */ int eeh_send_failure_event (struct device_node *dn, struct pci_dev *dev, - int state, + enum pci_channel_state state, int time_unavail) { unsigned long flags; Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_event.h =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh_event.h 2005-10-06 17:50:24.089783186 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_event.h 2005-10-06 17:54:14.496454897 -0500 @@ -29,7 +29,7 @@ struct list_head list; struct device_node *dn; /* struct device node */ struct pci_dev *dev; /* affected device */ - int state; + enum pci_channel_state state; /* PCI bus state for the affected device */ int time_unavail; /* milliseconds until device might be available */ }; @@ -46,7 +46,7 @@ */ int eeh_send_failure_event (struct device_node *dn, struct pci_dev *dev, - int reset_state, + enum pci_channel_state state, int time_unavail); #endif /* ASM_PPC64_EEH_EVENT_H */ Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/Makefile =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/Makefile 2005-10-06 17:54:00.307445749 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/Makefile 2005-10-06 17:54:14.496454897 -0500 @@ -37,7 +37,7 @@ bpa_iic.o spider-pic.o obj-$(CONFIG_KEXEC) += machine_kexec.o -obj-$(CONFIG_EEH) += eeh.o eeh_event.o pci_dlpar.o +obj-$(CONFIG_EEH) += eeh.o eeh_driver.o eeh_event.o pci_dlpar.o obj-$(CONFIG_PROC_FS) += proc_ppc64.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_SMP) += smp.o Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci.h 2005-10-06 17:53:02.165603605 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h 2005-10-06 17:54:14.497454757 -0500 @@ -54,6 +54,15 @@ /* ---- EEH internal-use-only related routines ---- */ #ifdef CONFIG_EEH /** + * eeh_slot_error_detail -- record and EEH error condition to the log + * @severity: 1 if temporary, 2 if permanent failure. + * + * Obtains the the EEH error details from the RTAS subsystem, + * and then logs these details with the RTAS error log system. + */ +void eeh_slot_error_detail (struct pci_dn *pdn, int severity); + +/** * rtas_set_slot_reset -- unfreeze a frozen slot * * Clear the EEH-frozen condition on a slot. This routine Index: linux-2.6.14-rc2-git6/include/asm-ppc64/eeh.h =================================================================== --- linux-2.6.14-rc2-git6.orig/include/asm-ppc64/eeh.h 2005-10-06 17:53:52.476544499 -0500 +++ linux-2.6.14-rc2-git6/include/asm-ppc64/eeh.h 2005-10-06 17:55:48.203306937 -0500 @@ -31,9 +31,11 @@ #ifdef CONFIG_EEH /* Values for eeh_mode bits in device_node */ -#define EEH_MODE_SUPPORTED (1<<0) -#define EEH_MODE_NOCHECK (1<<1) -#define EEH_MODE_ISOLATED (1<<2) +#define EEH_MODE_SUPPORTED (1<<0) +#define EEH_MODE_NOCHECK (1<<1) +#define EEH_MODE_ISOLATED (1<<2) +#define EEH_MODE_RECOVERING (1<<3) +#define EEH_MODE_IRQ_DISABLED (1<<4) /* Max number of EEH freezes allowed before we consider the device * to be permanently disabled. */ From linas at austin.ibm.com Fri Oct 7 09:53:18 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:53:18 -0500 Subject: [PATCH 16/22] PCI Address cache lookup code In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006235318.GQ29826@austin.ibm.com> 16-pci-address-cache.patch Architecture-independent PCI address caching code. Performs caching and lookup of pci devices based on the I/O addresses that they use. That is, given an I/O address, this can be used to find the pci device that uses that address. Although it currently lives in teh ppc64 directory, it could potentially be common code. This code used to live in the overly large EEH file. This patch splits it out to its own file. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/Makefile =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/Makefile 2005-10-06 17:54:14.496454897 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/Makefile 2005-10-06 17:56:42.934627625 -0500 @@ -37,7 +37,7 @@ bpa_iic.o spider-pic.o obj-$(CONFIG_KEXEC) += machine_kexec.o -obj-$(CONFIG_EEH) += eeh.o eeh_driver.o eeh_event.o pci_dlpar.o +obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o pci_dlpar.o obj-$(CONFIG_PROC_FS) += proc_ppc64.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_SMP) += smp.o Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-06 17:54:14.494455177 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-06 17:56:42.936627345 -0500 @@ -78,9 +78,6 @@ */ #define EEH_MAX_FAILS 100000 -/* Misc forward declaraions */ -static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn); - /* RTAS tokens */ static int ibm_set_eeh_option; static int ibm_set_slot_reset; @@ -108,296 +105,8 @@ static DEFINE_PER_CPU(unsigned long, ignored_failures); static DEFINE_PER_CPU(unsigned long, slot_resets); -/** - * The pci address cache subsystem. This subsystem places - * PCI device address resources into a red-black tree, sorted - * according to the address range, so that given only an i/o - * address, the corresponding PCI device can be **quickly** - * found. It is safe to perform an address lookup in an interrupt - * context; this ability is an important feature. - * - * Currently, the only customer of this code is the EEH subsystem; - * thus, this code has been somewhat tailored to suit EEH better. - * In particular, the cache does *not* hold the addresses of devices - * for which EEH is not enabled. - * - * (Implementation Note: The RB tree seems to be better/faster - * than any hash algo I could think of for this problem, even - * with the penalty of slow pointer chases for d-cache misses). - */ -struct pci_io_addr_range -{ - struct rb_node rb_node; - unsigned long addr_lo; - unsigned long addr_hi; - struct pci_dev *pcidev; - unsigned int flags; -}; - -static struct pci_io_addr_cache -{ - struct rb_root rb_root; - spinlock_t piar_lock; -} pci_io_addr_cache_root; - -static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) -{ - struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; - - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - - if (addr < piar->addr_lo) { - n = n->rb_left; - } else { - if (addr > piar->addr_hi) { - n = n->rb_right; - } else { - pci_dev_get(piar->pcidev); - return piar->pcidev; - } - } - } - - return NULL; -} - -/** - * pci_get_device_by_addr - Get device, given only address - * @addr: mmio (PIO) phys address or i/o port number - * - * Given an mmio phys address, or a port number, find a pci device - * that implements this address. Be sure to pci_dev_put the device - * when finished. I/O port numbers are assumed to be offset - * from zero (that is, they do *not* have pci_io_addr added in). - * It is safe to call this function within an interrupt. - */ -static struct pci_dev *pci_get_device_by_addr(unsigned long addr) -{ - struct pci_dev *dev; - unsigned long flags; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - dev = __pci_get_device_by_addr(addr); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); - return dev; -} - -#ifdef DEBUG -/* - * Handy-dandy debug print routine, does nothing more - * than print out the contents of our addr cache. - */ -static void pci_addr_cache_print(struct pci_io_addr_cache *cache) -{ - struct rb_node *n; - int cnt = 0; - - n = rb_first(&cache->rb_root); - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n", - (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, - piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); - cnt++; - n = rb_next(n); - } -} -#endif - -/* Insert address range into the rb tree. */ -static struct pci_io_addr_range * -pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, - unsigned long ahi, unsigned int flags) -{ - struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; - struct rb_node *parent = NULL; - struct pci_io_addr_range *piar; - - /* Walk tree, find a place to insert into tree */ - while (*p) { - parent = *p; - piar = rb_entry(parent, struct pci_io_addr_range, rb_node); - if (ahi < piar->addr_lo) { - p = &parent->rb_left; - } else if (alo > piar->addr_hi) { - p = &parent->rb_right; - } else { - if (dev != piar->pcidev || - alo != piar->addr_lo || ahi != piar->addr_hi) { - printk(KERN_WARNING "PIAR: overlapping address range\n"); - } - return piar; - } - } - piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); - if (!piar) - return NULL; - - piar->addr_lo = alo; - piar->addr_hi = ahi; - piar->pcidev = dev; - piar->flags = flags; - -#ifdef DEBUG - printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n", - alo, ahi, pci_name (dev)); -#endif - - rb_link_node(&piar->rb_node, parent, p); - rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root); - - return piar; -} - -static void __pci_addr_cache_insert_device(struct pci_dev *dev) -{ - struct device_node *dn; - struct pci_dn *pdn; - int i; - int inserted = 0; - - dn = pci_device_to_OF_node(dev); - if (!dn) { - printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev)); - return; - } - - /* Skip any devices for which EEH is not enabled. */ - pdn = PCI_DN(dn); - if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || - pdn->eeh_mode & EEH_MODE_NOCHECK) { -#ifdef DEBUG - printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n", - pci_name(dev), pdn->node->full_name); -#endif - return; - } - - /* The cache holds a reference to the device... */ - pci_dev_get(dev); - - /* Walk resources on this device, poke them into the tree */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - unsigned long start = pci_resource_start(dev,i); - unsigned long end = pci_resource_end(dev,i); - unsigned int flags = pci_resource_flags(dev,i); - - /* We are interested only bus addresses, not dma or other stuff */ - if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) - continue; - if (start == 0 || ~start == 0 || end == 0 || ~end == 0) - continue; - pci_addr_cache_insert(dev, start, end, flags); - inserted = 1; - } - - /* If there was nothing to add, the cache has no reference... */ - if (!inserted) - pci_dev_put(dev); -} - -/** - * pci_addr_cache_insert_device - Add a device to the address cache - * @dev: PCI device whose I/O addresses we are interested in. - * - * In order to support the fast lookup of devices based on addresses, - * we maintain a cache of devices that can be quickly searched. - * This routine adds a device to that cache. - */ -static void pci_addr_cache_insert_device(struct pci_dev *dev) -{ - unsigned long flags; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __pci_addr_cache_insert_device(dev); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); -} - -static inline void __pci_addr_cache_remove_device(struct pci_dev *dev) -{ - struct rb_node *n; - int removed = 0; - -restart: - n = rb_first(&pci_io_addr_cache_root.rb_root); - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - - if (piar->pcidev == dev) { - rb_erase(n, &pci_io_addr_cache_root.rb_root); - removed = 1; - kfree(piar); - goto restart; - } - n = rb_next(n); - } - - /* The cache no longer holds its reference to this device... */ - if (removed) - pci_dev_put(dev); -} - -/** - * pci_addr_cache_remove_device - remove pci device from addr cache - * @dev: device to remove - * - * Remove a device from the addr-cache tree. - * This is potentially expensive, since it will walk - * the tree multiple times (once per resource). - * But so what; device removal doesn't need to be that fast. - */ -static void pci_addr_cache_remove_device(struct pci_dev *dev) -{ - unsigned long flags; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __pci_addr_cache_remove_device(dev); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); -} - -/** - * pci_addr_cache_build - Build a cache of I/O addresses - * - * Build a cache of pci i/o addresses. This cache will be used to - * find the pci device that corresponds to a given address. - * This routine scans all pci busses to build the cache. - * Must be run late in boot process, after the pci controllers - * have been scaned for devices (after all device resources are known). - */ -void __init pci_addr_cache_build(void) -{ - struct device_node *dn; - struct pci_dev *dev = NULL; - - if (!eeh_subsystem_enabled) - return; - - spin_lock_init(&pci_io_addr_cache_root.piar_lock); - - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - /* Ignore PCI bridges ( XXX why ??) */ - if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) { - continue; - } - pci_addr_cache_insert_device(dev); - - /* Save the BAR's; firmware doesn't restore these after EEH reset */ - dn = pci_device_to_OF_node(dev); - eeh_save_bars(dev, PCI_DN(dn)); - } - -#ifdef DEBUG - /* Verify tree built up above, echo back the list of addrs. */ - pci_addr_cache_print(&pci_io_addr_cache_root); -#endif -} - /* --------------------------------------------------------------- */ -/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */ +/* Below lies the EEH event infrastructure */ void eeh_slot_error_detail (struct pci_dn *pdn, int severity) { @@ -881,7 +590,7 @@ * PCI devices are added individuallly; but, for the restore, * an entire slot is reset at a time. */ -static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn) +void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn) { int i; Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_cache.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh_cache.c 2005-10-06 17:56:42.937627204 -0500 @@ -0,0 +1,316 @@ +/* + * eeh_cache.c + * PCI address cache; allows the lookup of PCI devices based on I/O address + * + * Copyright (C) 2004 Linas Vepstas IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include "pci.h" + +#undef DEBUG + +/** + * The pci address cache subsystem. This subsystem places + * PCI device address resources into a red-black tree, sorted + * according to the address range, so that given only an i/o + * address, the corresponding PCI device can be **quickly** + * found. It is safe to perform an address lookup in an interrupt + * context; this ability is an important feature. + * + * Currently, the only customer of this code is the EEH subsystem; + * thus, this code has been somewhat tailored to suit EEH better. + * In particular, the cache does *not* hold the addresses of devices + * for which EEH is not enabled. + * + * (Implementation Note: The RB tree seems to be better/faster + * than any hash algo I could think of for this problem, even + * with the penalty of slow pointer chases for d-cache misses). + */ +struct pci_io_addr_range +{ + struct rb_node rb_node; + unsigned long addr_lo; + unsigned long addr_hi; + struct pci_dev *pcidev; + unsigned int flags; +}; + +static struct pci_io_addr_cache +{ + struct rb_root rb_root; + spinlock_t piar_lock; +} pci_io_addr_cache_root; + +static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) +{ + struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; + + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + + if (addr < piar->addr_lo) { + n = n->rb_left; + } else { + if (addr > piar->addr_hi) { + n = n->rb_right; + } else { + pci_dev_get(piar->pcidev); + return piar->pcidev; + } + } + } + + return NULL; +} + +/** + * pci_get_device_by_addr - Get device, given only address + * @addr: mmio (PIO) phys address or i/o port number + * + * Given an mmio phys address, or a port number, find a pci device + * that implements this address. Be sure to pci_dev_put the device + * when finished. I/O port numbers are assumed to be offset + * from zero (that is, they do *not* have pci_io_addr added in). + * It is safe to call this function within an interrupt. + */ +struct pci_dev *pci_get_device_by_addr(unsigned long addr) +{ + struct pci_dev *dev; + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + dev = __pci_get_device_by_addr(addr); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); + return dev; +} + +#ifdef DEBUG +/* + * Handy-dandy debug print routine, does nothing more + * than print out the contents of our addr cache. + */ +static void pci_addr_cache_print(struct pci_io_addr_cache *cache) +{ + struct rb_node *n; + int cnt = 0; + + n = rb_first(&cache->rb_root); + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n", + (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, + piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); + cnt++; + n = rb_next(n); + } +} +#endif + +/* Insert address range into the rb tree. */ +static struct pci_io_addr_range * +pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, + unsigned long ahi, unsigned int flags) +{ + struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; + struct rb_node *parent = NULL; + struct pci_io_addr_range *piar; + + /* Walk tree, find a place to insert into tree */ + while (*p) { + parent = *p; + piar = rb_entry(parent, struct pci_io_addr_range, rb_node); + if (ahi < piar->addr_lo) { + p = &parent->rb_left; + } else if (alo > piar->addr_hi) { + p = &parent->rb_right; + } else { + if (dev != piar->pcidev || + alo != piar->addr_lo || ahi != piar->addr_hi) { + printk(KERN_WARNING "PIAR: overlapping address range\n"); + } + return piar; + } + } + piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); + if (!piar) + return NULL; + + piar->addr_lo = alo; + piar->addr_hi = ahi; + piar->pcidev = dev; + piar->flags = flags; + +#ifdef DEBUG + printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n", + alo, ahi, pci_name (dev)); +#endif + + rb_link_node(&piar->rb_node, parent, p); + rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root); + + return piar; +} + +static void __pci_addr_cache_insert_device(struct pci_dev *dev) +{ + struct device_node *dn; + struct pci_dn *pdn; + int i; + int inserted = 0; + + dn = pci_device_to_OF_node(dev); + if (!dn) { + printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev)); + return; + } + + /* Skip any devices for which EEH is not enabled. */ + pdn = PCI_DN(dn); + if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || + pdn->eeh_mode & EEH_MODE_NOCHECK) { +#ifdef DEBUG + printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n", + pci_name(dev), pdn->node->full_name); +#endif + return; + } + + /* The cache holds a reference to the device... */ + pci_dev_get(dev); + + /* Walk resources on this device, poke them into the tree */ + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + unsigned long start = pci_resource_start(dev,i); + unsigned long end = pci_resource_end(dev,i); + unsigned int flags = pci_resource_flags(dev,i); + + /* We are interested only bus addresses, not dma or other stuff */ + if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + if (start == 0 || ~start == 0 || end == 0 || ~end == 0) + continue; + pci_addr_cache_insert(dev, start, end, flags); + inserted = 1; + } + + /* If there was nothing to add, the cache has no reference... */ + if (!inserted) + pci_dev_put(dev); +} + +/** + * pci_addr_cache_insert_device - Add a device to the address cache + * @dev: PCI device whose I/O addresses we are interested in. + * + * In order to support the fast lookup of devices based on addresses, + * we maintain a cache of devices that can be quickly searched. + * This routine adds a device to that cache. + */ +void pci_addr_cache_insert_device(struct pci_dev *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + __pci_addr_cache_insert_device(dev); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); +} + +static inline void __pci_addr_cache_remove_device(struct pci_dev *dev) +{ + struct rb_node *n; + int removed = 0; + +restart: + n = rb_first(&pci_io_addr_cache_root.rb_root); + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + + if (piar->pcidev == dev) { + rb_erase(n, &pci_io_addr_cache_root.rb_root); + removed = 1; + kfree(piar); + goto restart; + } + n = rb_next(n); + } + + /* The cache no longer holds its reference to this device... */ + if (removed) + pci_dev_put(dev); +} + +/** + * pci_addr_cache_remove_device - remove pci device from addr cache + * @dev: device to remove + * + * Remove a device from the addr-cache tree. + * This is potentially expensive, since it will walk + * the tree multiple times (once per resource). + * But so what; device removal doesn't need to be that fast. + */ +void pci_addr_cache_remove_device(struct pci_dev *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + __pci_addr_cache_remove_device(dev); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); +} + +/** + * pci_addr_cache_build - Build a cache of I/O addresses + * + * Build a cache of pci i/o addresses. This cache will be used to + * find the pci device that corresponds to a given address. + * This routine scans all pci busses to build the cache. + * Must be run late in boot process, after the pci controllers + * have been scaned for devices (after all device resources are known). + */ +void __init pci_addr_cache_build(void) +{ + struct device_node *dn; + struct pci_dev *dev = NULL; + + spin_lock_init(&pci_io_addr_cache_root.piar_lock); + + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + /* Ignore PCI bridges */ + if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) + continue; + + pci_addr_cache_insert_device(dev); + + /* Save the BAR's; firmware doesn't restore these after EEH reset */ + dn = pci_device_to_OF_node(dev); + eeh_save_bars(dev, PCI_DN(dn)); + } + +#ifdef DEBUG + /* Verify tree built up above, echo back the list of addrs. */ + pci_addr_cache_print(&pci_io_addr_cache_root); +#endif +} + Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci.h 2005-10-06 17:54:14.497454757 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h 2005-10-06 17:56:42.938627064 -0500 @@ -53,6 +53,14 @@ /* ---- EEH internal-use-only related routines ---- */ #ifdef CONFIG_EEH + +void pci_addr_cache_insert_device(struct pci_dev *dev); +void pci_addr_cache_remove_device(struct pci_dev *dev); +void pci_addr_cache_build(void); +struct pci_dev *pci_get_device_by_addr(unsigned long addr); + +void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn); + /** * eeh_slot_error_detail -- record and EEH error condition to the log * @severity: 1 if temporary, 2 if permanent failure. From linas at austin.ibm.com Fri Oct 7 09:54:36 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:54:36 -0500 Subject: [PATCH 17/22] ppc64: New Partition Endpoin support In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006235436.GR29826@austin.ibm.com> 17-eeh-partition-endpoint.patch New versions of firmware introduce a new method by which the "partition endpoint" (the point at which the pci bus is cut). This code adds the support for this (mandatory) new feature. Signed-off-by: Linas Vepstas Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-06 17:56:42.936627345 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-06 17:56:46.221166493 -0500 @@ -84,6 +84,7 @@ static int ibm_read_slot_reset_state; static int ibm_read_slot_reset_state2; static int ibm_slot_error_detail; +static int ibm_get_config_addr_info; static int eeh_subsystem_enabled; @@ -458,6 +459,7 @@ static void rtas_pci_slot_reset(struct pci_dn *pdn, int state) { + int config_addr; int rc; BUG_ON (pdn==NULL); @@ -468,8 +470,13 @@ return; } + /* Use PE configuration address, if present */ + config_addr = pdn->eeh_config_addr; + if (pdn->eeh_pe_config_addr) + config_addr = pdn->eeh_pe_config_addr; + rc = rtas_call(ibm_set_slot_reset,4,1, NULL, - pdn->eeh_config_addr, + config_addr, BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid), state); @@ -696,8 +703,22 @@ eeh_subsystem_enabled = 1; pdn->eeh_mode |= EEH_MODE_SUPPORTED; pdn->eeh_config_addr = regs[0]; + + /* If the newer, better, ibm,get-config-addr-info is supported, + * then use that instead. */ + pdn->eeh_pe_config_addr = 0; + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { + unsigned int rets[2]; + ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets, + pdn->eeh_config_addr, + info->buid_hi, info->buid_lo, + 0); + if (ret == 0) + pdn->eeh_pe_config_addr = rets[0]; + } #ifdef DEBUG - printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name); + printk(KERN_DEBUG "EEH: %s: eeh enabled, config=%x pe_config=%x\n", + dn->full_name, pdn->eeh_config_addr, pdn->eeh_pe_config_addr); #endif } else { @@ -749,6 +770,7 @@ ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); + ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) return; Index: linux-2.6.14-rc2-git6/include/asm-ppc64/pci-bridge.h =================================================================== --- linux-2.6.14-rc2-git6.orig/include/asm-ppc64/pci-bridge.h 2005-10-06 17:54:00.310445328 -0500 +++ linux-2.6.14-rc2-git6/include/asm-ppc64/pci-bridge.h 2005-10-06 17:56:46.222166353 -0500 @@ -61,6 +61,7 @@ int devfn; /* for pci devices */ int eeh_mode; /* See eeh.h for possible EEH_MODEs */ int eeh_config_addr; + int eeh_pe_config_addr; /* new-style partition endpoint address */ int eeh_check_count; /* # times driver ignored error */ int eeh_freeze_count; /* # times this device froze up. */ int eeh_is_bridge; /* device is pci-to-pci bridge */ From linas at austin.ibm.com Fri Oct 7 09:55:42 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:55:42 -0500 Subject: [PATCH 18/22] PCI Error Recovery: IPR SCSI device driver In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006235542.GS29826@austin.ibm.com> PCI Error Recovery: IPR SCSI device driver Various PCI bus errors can be signaled by newer PCI controllers. This patch adds the PCI error recovery callbacks to the IPR SCSI device driver. The patch has been tested, and appears to work well. Signed-off-by: Linas Vepstas Signed-off-by: Brian King -- arch/ppc64/configs/pSeries_defconfig | 1 drivers/scsi/Kconfig | 8 +++ drivers/scsi/ipr.c | 93 +++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+) Index: linux-2.6.14-rc2-git6/drivers/scsi/Kconfig =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/scsi/Kconfig 2005-10-06 17:50:21.443154534 -0500 +++ linux-2.6.14-rc2-git6/drivers/scsi/Kconfig 2005-10-06 17:56:53.965079951 -0500 @@ -1087,6 +1087,14 @@ If you enable this support, the iprdump daemon can be used to capture adapter failure analysis information. +config SCSI_IPR_EEH_RECOVERY + bool "Enable PCI bus error recovery" + depends on SCSI_IPR && PPC_PSERIES + help + If you say Y here, the driver will be able to recover from + PCI bus errors on many PowerPC platforms. IBM pSeries users + should answer Y. + config SCSI_ZALON tristate "Zalon SCSI support" depends on GSC && SCSI Index: linux-2.6.14-rc2-git6/drivers/scsi/ipr.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/scsi/ipr.c 2005-10-06 17:50:21.444154394 -0500 +++ linux-2.6.14-rc2-git6/drivers/scsi/ipr.c 2005-10-06 17:56:53.972078969 -0500 @@ -5326,6 +5326,94 @@ shutdown_type); } +#ifdef CONFIG_SCSI_IPR_EEH_RECOVERY + +/** If the PCI slot is frozen, hold off all i/o + * activity; then, as soon as the slot is available again, + * initiate an adapter reset. + */ +static int ipr_reset_freeze(struct ipr_cmnd *ipr_cmd) +{ + /* Disallow new interrupts, avoid loop */ + ipr_cmd->ioa_cfg->allow_interrupts = 0; + list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q); + ipr_cmd->done = ipr_reset_ioa_job; + return IPR_RC_JOB_RETURN; +} + +/** ipr_eeh_frozen -- called when slot has experience PCI bus error. + * This routine is called to tell us that the PCI bus is down. + * Can't do anything here, except put the device driver into a + * holding pattern, waiting for the PCI bus to come back. + */ +static void ipr_eeh_frozen (struct pci_dev *pdev) +{ + unsigned long flags = 0; + struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); + + spin_lock_irqsave(ioa_cfg->host->host_lock, flags); + _ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_freeze, IPR_SHUTDOWN_NONE); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); +} + +/** ipr_eeh_slot_reset - called when pci slot has been reset. + * + * This routine is called by the pci error recovery recovery + * code after the PCI slot has been reset, just before we + * should resume normal operations. + */ +static int ipr_eeh_slot_reset(struct pci_dev *pdev) +{ + unsigned long flags = 0; + struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); + + // pci_enable_device(pdev); + // pci_set_master(pdev); + spin_lock_irqsave(ioa_cfg->host->host_lock, flags); + _ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_restore_cfg_space, + IPR_SHUTDOWN_NONE); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); + + return PCIERR_RESULT_RECOVERED; +} + +/** This routine is called when the PCI bus has permanently + * failed. This routine should purge all pending I/O and + * shut down the device driver (close and unload). + */ +static void ipr_eeh_perm_failure(struct pci_dev *pdev) +{ + unsigned long flags = 0; + struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); + + spin_lock_irqsave(ioa_cfg->host->host_lock, flags); + if (ioa_cfg->sdt_state == WAIT_FOR_DUMP) + ioa_cfg->sdt_state = ABORT_DUMP; + ioa_cfg->reset_retries = IPR_NUM_RESET_RELOAD_RETRIES; + ioa_cfg->in_ioa_bringdown = 1; + ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); +} + +static int ipr_eeh_error_detected(struct pci_dev *pdev, + enum pci_channel_state state) +{ + switch (state) { + case pci_channel_io_frozen: + ipr_eeh_frozen (pdev); + return PCIERR_RESULT_NEED_RESET; + + case pci_channel_io_perm_failure: + ipr_eeh_perm_failure (pdev); + return PCIERR_RESULT_DISCONNECT; + break; + default: + break; + } + return PCIERR_RESULT_NEED_RESET; +} +#endif + /** * ipr_probe_ioa_part2 - Initializes IOAs found in ipr_probe_ioa(..) * @ioa_cfg: ioa cfg struct @@ -6063,12 +6151,23 @@ }; MODULE_DEVICE_TABLE(pci, ipr_pci_table); + +#ifdef CONFIG_SCSI_IPR_EEH_RECOVERY +static struct pci_error_handlers ipr_err_handler = { + .error_detected = ipr_eeh_error_detected, + .slot_reset = ipr_eeh_slot_reset, +}; +#endif /* CONFIG_SCSI_IPR_EEH_RECOVERY */ + static struct pci_driver ipr_driver = { .name = IPR_NAME, .id_table = ipr_pci_table, .probe = ipr_probe, .remove = ipr_remove, .shutdown = ipr_shutdown, +#ifdef CONFIG_SCSI_IPR_EEH_RECOVERY + .err_handler = &ipr_err_handler, +#endif /* CONFIG_SCSI_IPR_EEH_RECOVERY */ }; /** Index: linux-2.6.14-rc2-git6/arch/ppc64/configs/pSeries_defconfig =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/configs/pSeries_defconfig 2005-10-06 17:50:21.444154394 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/configs/pSeries_defconfig 2005-10-06 17:56:53.974078688 -0500 @@ -476,6 +476,7 @@ CONFIG_SCSI_IPR=y CONFIG_SCSI_IPR_TRACE=y CONFIG_SCSI_IPR_DUMP=y +CONFIG_SCSI_IPR_EEH_RECOVERY=y # CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set CONFIG_SCSI_QLA2XXX=y From linas at austin.ibm.com Fri Oct 7 09:56:37 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:56:37 -0500 Subject: [PATCH 19/22] PCI Error Recovery: Symbios SCSI device driver In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006235637.GT29826@austin.ibm.com> PCI Error Recovery: Symbios SCSI device driver Various PCI bus errors can be signaled by newer PCI controllers. This patch adds the PCI error recovery callbacks to the Symbios SCSI device driver. The patch has been tested, and appears to work well. Signed-off-by: Linas Vepstas -- arch/ppc64/configs/pSeries_defconfig | 1 drivers/scsi/Kconfig | 8 ++ drivers/scsi/sym53c8xx_2/sym_glue.c | 124 +++++++++++++++++++++++++++++++++++ drivers/scsi/sym53c8xx_2/sym_glue.h | 4 + drivers/scsi/sym53c8xx_2/sym_hipd.c | 16 ++++ 5 files changed, 153 insertions(+) Index: linux-2.6.14-rc2-git6/arch/ppc64/configs/pSeries_defconfig =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/configs/pSeries_defconfig 2005-10-06 10:36:42.939820924 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/configs/pSeries_defconfig 2005-10-06 10:36:46.735288291 -0500 @@ -473,6 +473,7 @@ CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 # CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY=y CONFIG_SCSI_IPR=y CONFIG_SCSI_IPR_TRACE=y CONFIG_SCSI_IPR_DUMP=y Index: linux-2.6.14-rc2-git6/drivers/scsi/Kconfig =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/scsi/Kconfig 2005-10-06 10:36:42.913824572 -0500 +++ linux-2.6.14-rc2-git6/drivers/scsi/Kconfig 2005-10-06 10:36:46.738287870 -0500 @@ -1062,6 +1062,14 @@ the card. This is significantly slower then using memory mapped IO. Most people should answer N. +config SCSI_SYM53C8XX_EEH_RECOVERY + bool "Enable PCI bus error recovery" + depends on SCSI_SYM53C8XX_2 && PPC_PSERIES + help + If you say Y here, the driver will be able to recover from + PCI bus errors on many PowerPC platforms. IBM pSeries users + should answer Y. + config SCSI_IPR tristate "IBM Power Linux RAID adapter support" depends on PCI && SCSI Index: linux-2.6.14-rc2-git6/drivers/scsi/sym53c8xx_2/sym_glue.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/scsi/sym53c8xx_2/sym_glue.c 2005-10-06 10:32:48.850671732 -0500 +++ linux-2.6.14-rc2-git6/drivers/scsi/sym53c8xx_2/sym_glue.c 2005-10-06 10:36:46.741287449 -0500 @@ -685,6 +685,10 @@ struct sym_hcb *np = (struct sym_hcb *)dev_id; if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("["); +#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY + if (np->s.io_state != pci_channel_io_normal) + return IRQ_HANDLED; +#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */ spin_lock_irqsave(np->s.host->host_lock, flags); sym_interrupt(np); @@ -759,6 +763,27 @@ */ static void sym_eh_timeout(u_long p) { __sym_eh_done((struct scsi_cmnd *)p, 1); } +#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY +static void sym_eeh_timeout(u_long p) +{ + struct sym_eh_wait *ep = (struct sym_eh_wait *) p; + if (!ep) + return; + complete(&ep->done); +} + +static void sym_eeh_done(struct sym_eh_wait *ep) +{ + if (!ep) + return; + ep->timed_out = 0; + if (!del_timer(&ep->timer)) + return; + + complete(&ep->done); +} +#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */ + /* * Generic method for our eh processing. * The 'op' argument tells what we have to do. @@ -799,6 +824,37 @@ /* Try to proceed the operation we have been asked for */ sts = -1; +#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY + + /* We may be in an error condition because the PCI bus + * went down. In this case, we need to wait until the + * PCI bus is reset, the card is reset, and only then + * proceed with the scsi error recovery. We'll wait + * for 15 seconds for this to happen. + */ +#define WAIT_FOR_PCI_RECOVERY 15 + if (np->s.io_state != pci_channel_io_normal) { + struct sym_eh_wait eeh, *eep = &eeh; + np->s.io_reset_wait = eep; + init_completion(&eep->done); + init_timer(&eep->timer); + eep->to_do = SYM_EH_DO_WAIT; + eep->timer.expires = jiffies + (WAIT_FOR_PCI_RECOVERY*HZ); + eep->timer.function = sym_eeh_timeout; + eep->timer.data = (u_long)eep; + eep->timed_out = 1; /* Be pessimistic for once :) */ + add_timer(&eep->timer); + spin_unlock_irq(np->s.host->host_lock); + wait_for_completion(&eep->done); + spin_lock_irq(np->s.host->host_lock); + if (eep->timed_out) { + printk (KERN_ERR "%s: Timed out waiting for PCI reset\n", + sym_name(np)); + } + np->s.io_reset_wait = NULL; + } +#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */ + switch(op) { case SYM_EH_ABORT: sts = sym_abort_scsiio(np, cmd, 1); @@ -1584,6 +1640,10 @@ np->maxoffs = dev->chip.offset_max; np->maxburst = dev->chip.burst_max; np->myaddr = dev->host_id; +#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY + np->s.io_state = pci_channel_io_normal; + np->s.io_reset_wait = NULL; +#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */ /* * Edit its name. @@ -1916,6 +1976,59 @@ return 1; } +#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY +/** sym2_io_error_detected() is called when PCI error is detected */ +static int sym2_io_error_detected (struct pci_dev *pdev, enum pci_channel_state state) +{ + struct sym_hcb *np = pci_get_drvdata(pdev); + + np->s.io_state = state; + // XXX If slot is permanently frozen, then what? + // Should we scsi_remove_host() maybe ?? + + /* Request a slot slot reset. */ + return PCIERR_RESULT_NEED_RESET; +} + +/** sym2_io_slot_reset is called when the pci bus has been reset. + * Restart the card from scratch. */ +static int sym2_io_slot_reset (struct pci_dev *pdev) +{ + struct sym_hcb *np = pci_get_drvdata(pdev); + + printk (KERN_INFO "%s: recovering from a PCI slot reset\n", + sym_name(np)); + + if (pci_enable_device(pdev)) + printk (KERN_ERR "%s: device setup failed most egregiously\n", + sym_name(np)); + + pci_set_master(pdev); + enable_irq (pdev->irq); + + /* Perform host reset only on one instance of the card */ + if (0 == PCI_FUNC (pdev->devfn)) + sym_reset_scsi_bus(np, 0); + + return PCIERR_RESULT_RECOVERED; +} + +/** sym2_io_resume is called when the error recovery driver + * tells us that its OK to resume normal operation. + */ +static void sym2_io_resume (struct pci_dev *pdev) +{ + struct sym_hcb *np = pci_get_drvdata(pdev); + + /* Perform device startup only once for this card. */ + if (0 == PCI_FUNC (pdev->devfn)) + sym_start_up (np, 1); + + np->s.io_state = pci_channel_io_normal; + sym_eeh_done (np->s.io_reset_wait); +} +#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */ + /* * Driver host template. */ @@ -2169,11 +2282,22 @@ MODULE_DEVICE_TABLE(pci, sym2_id_table); +#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY +static struct pci_error_handlers sym2_err_handler = { + .error_detected = sym2_io_error_detected, + .slot_reset = sym2_io_slot_reset, + .resume = sym2_io_resume, +}; +#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */ + static struct pci_driver sym2_driver = { .name = NAME53C8XX, .id_table = sym2_id_table, .probe = sym2_probe, .remove = __devexit_p(sym2_remove), +#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY + .err_handler = &sym2_err_handler, +#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */ }; static int __init sym2_init(void) Index: linux-2.6.14-rc2-git6/drivers/scsi/sym53c8xx_2/sym_glue.h =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/scsi/sym53c8xx_2/sym_glue.h 2005-10-06 10:32:48.851671592 -0500 +++ linux-2.6.14-rc2-git6/drivers/scsi/sym53c8xx_2/sym_glue.h 2005-10-06 10:36:46.742287309 -0500 @@ -181,6 +181,10 @@ char chip_name[8]; struct pci_dev *device; + /* pci bus i/o state; waiter for clearing of i/o state */ + enum pci_channel_state io_state; + struct sym_eh_wait *io_reset_wait; + struct Scsi_Host *host; void __iomem * ioaddr; /* MMIO kernel io address */ Index: linux-2.6.14-rc2-git6/drivers/scsi/sym53c8xx_2/sym_hipd.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c 2005-10-06 10:32:48.851671592 -0500 +++ linux-2.6.14-rc2-git6/drivers/scsi/sym53c8xx_2/sym_hipd.c 2005-10-06 10:36:46.749286327 -0500 @@ -2806,6 +2806,7 @@ u_char istat, istatc; u_char dstat; u_short sist; + u_int icnt; /* * interrupt on the fly ? @@ -2847,6 +2848,7 @@ sist = 0; dstat = 0; istatc = istat; + icnt = 0; do { if (istatc & SIP) sist |= INW(np, nc_sist); @@ -2854,6 +2856,20 @@ dstat |= INB(np, nc_dstat); istatc = INB(np, nc_istat); istat |= istatc; +#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY + /* Prevent deadlock waiting on a condition that may never clear. */ + /* XXX this is a temporary kludge; the correct to detect + * a PCI bus error would be to use the io_check interfaces + * proposed by Hidetoshi Seto + * Problem with polling like that is the state flag might not + * be set. + */ + icnt ++; + if (100 < icnt) { + if (np->s.device->error_state != pci_channel_io_normal) + return; + } +#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */ } while (istatc & (SIP|DIP)); if (DEBUG_FLAGS & DEBUG_TINY) From linas at austin.ibm.com Fri Oct 7 09:57:29 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:57:29 -0500 Subject: [PATCH 20/22] PCI Error Recovery: e100 network device driver In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006235729.GU29826@austin.ibm.com> PCI Error Recovery: e100 network device driver Various PCI bus errors can be signaled by newer PCI controllers. This patch adds the PCI error recovery callbacks to the intel ethernet e100 device driver. The patch has been tested, and appears to work well. Signed-off-by: Linas Vepstas -- arch/ppc64/configs/pSeries_defconfig | 1 drivers/net/Kconfig | 8 +++ drivers/net/e100.c | 73 +++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) Index: linux-2.6.14-rc2-git6/arch/ppc64/configs/pSeries_defconfig =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/configs/pSeries_defconfig 2005-09-27 16:15:29.957254295 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/configs/pSeries_defconfig 2005-09-27 16:23:17.992430104 -0500 @@ -574,6 +574,7 @@ # CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y +CONFIG_E100_EEH_RECOVERY=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set Index: linux-2.6.14-rc2-git6/drivers/net/Kconfig =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/net/Kconfig 2005-09-27 14:35:57.000000000 -0500 +++ linux-2.6.14-rc2-git6/drivers/net/Kconfig 2005-09-27 16:23:17.993429963 -0500 @@ -1394,6 +1394,14 @@ . The module will be called e100. +config E100_EEH_RECOVERY + bool "Enable PCI bus error recovery" + depends on E100 && PPC_PSERIES + help + If you say Y here, the driver will be able to recover from + PCI bus errors on many PowerPC platforms. IBM pSeries users + should answer Y. + config LNE390 tristate "Mylex EISA LNE390A/B support (EXPERIMENTAL)" depends on NET_PCI && EISA && EXPERIMENTAL Index: linux-2.6.14-rc2-git6/drivers/net/e100.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/net/e100.c 2005-09-27 14:35:57.825425161 -0500 +++ linux-2.6.14-rc2-git6/drivers/net/e100.c 2005-09-27 16:23:48.110194710 -0500 @@ -2650,6 +2650,76 @@ #endif } +#ifdef CONFIG_E100_EEH_RECOVERY + +/** e100_io_error_detected() is called when PCI error is detected */ +static int e100_io_error_detected(struct pci_dev *pdev, enum pci_channel_state state) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + + /* Same as calling e100_down(netdev_priv(netdev)), but generic */ + netdev->stop(netdev); + + /* Is a detach needed ?? */ + // netif_device_detach(netdev); + + /* Request a slot reset. */ + return PCIERR_RESULT_NEED_RESET; +} + +/** e100_io_slot_reset is called after the pci bus has been reset. + * Restart the card from scratch. */ +static int e100_io_slot_reset(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct nic *nic = netdev_priv(netdev); + + if(pci_enable_device(pdev)) { + printk(KERN_ERR "e100: Cannot re-enable PCI device after reset.\n"); + return PCIERR_RESULT_DISCONNECT; + } + pci_set_master(pdev); + + /* Only one device per card can do a reset */ + if (0 != PCI_FUNC (pdev->devfn)) + return PCIERR_RESULT_RECOVERED; + + e100_hw_reset(nic); + e100_phy_init(nic); + + if(e100_hw_init(nic)) { + DPRINTK(HW, ERR, "e100_hw_init failed\n"); + return PCIERR_RESULT_DISCONNECT; + } + + return PCIERR_RESULT_RECOVERED; +} + +/** e100_io_resume is called when the error recovery driver + * tells us that its OK to resume normal operation. + */ +static void e100_io_resume(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct nic *nic = netdev_priv(netdev); + + /* ack any pending wake events, disable PME */ + pci_enable_wake(pdev, 0, 0); + + netif_device_attach(netdev); + if(netif_running(netdev)) + e100_open (netdev); + + mod_timer(&nic->watchdog, jiffies); +} + +static struct pci_error_handlers e100_err_handler = { + .error_detected = e100_io_error_detected, + .slot_reset = e100_io_slot_reset, + .resume = e100_io_resume, +}; + +#endif /* CONFIG_E100_EEH_RECOVERY */ static struct pci_driver e100_driver = { .name = DRV_NAME, @@ -2661,6 +2731,9 @@ .resume = e100_resume, #endif .shutdown = e100_shutdown, +#ifdef CONFIG_E100_EEH_RECOVERY + .err_handler = &e100_err_handler, +#endif /* CONFIG_E100_EEH_RECOVERY */ }; static int __init e100_init_module(void) From linas at austin.ibm.com Fri Oct 7 09:58:18 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:58:18 -0500 Subject: [PATCH 21/22] PCI Error Recovery: e1000 network device driver In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006235818.GV29826@austin.ibm.com> PCI Error Recovery: e1000 network device driver Various PCI bus errors can be signaled by newer PCI controllers. This patch adds the PCI error recovery callbacks to the intel gigabit ethernet e1000 device driver. The patch has been tested, and appears to work well. Signed-off-by: Linas Vepstas -- arch/ppc64/configs/pSeries_defconfig | 1 drivers/net/Kconfig | 8 ++ drivers/net/e1000/e1000_main.c | 103 ++++++++++++++++++++++++++++++++++- 3 files changed, 111 insertions(+), 1 deletion(-) Index: linux-2.6.14-rc2-git6/arch/ppc64/configs/pSeries_defconfig =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/configs/pSeries_defconfig 2005-10-06 17:47:05.582635736 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/configs/pSeries_defconfig 2005-10-06 17:47:12.737631817 -0500 @@ -593,6 +593,7 @@ # CONFIG_DL2K is not set CONFIG_E1000=y # CONFIG_E1000_NAPI is not set +CONFIG_E1000_EEH_RECOVERY=y # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set Index: linux-2.6.14-rc2-git6/drivers/net/Kconfig =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/net/Kconfig 2005-10-06 17:47:05.582635736 -0500 +++ linux-2.6.14-rc2-git6/drivers/net/Kconfig 2005-10-06 17:47:12.742631116 -0500 @@ -1856,6 +1856,14 @@ If in doubt, say N. +config E1000_EEH_RECOVERY + bool "Enable PCI bus error recovery" + depends on E1000 && PPC_PSERIES + help + If you say Y here, the driver will be able to recover from + PCI bus errors on many PowerPC platforms. IBM pSeries users + should answer Y. + config MYRI_SBUS tristate "MyriCOM Gigabit Ethernet support" depends on SBUS Index: linux-2.6.14-rc2-git6/drivers/net/e1000/e1000_main.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/net/e1000/e1000_main.c 2005-10-06 17:47:05.582635736 -0500 +++ linux-2.6.14-rc2-git6/drivers/net/e1000/e1000_main.c 2005-10-06 17:47:36.880244362 -0500 @@ -172,6 +172,18 @@ static void e1000_netpoll (struct net_device *netdev); #endif +#ifdef CONFIG_E1000_EEH_RECOVERY +static int e1000_io_error_detected(struct pci_dev *pdev, enum pci_channel_state state); +static int e1000_io_slot_reset(struct pci_dev *pdev); +static void e1000_io_resume(struct pci_dev *pdev); + +static struct pci_error_handlers e1000_err_handler = { + .error_detected = e1000_io_error_detected, + .slot_reset = e1000_io_slot_reset, + .resume = e1000_io_resume, +}; +#endif /* CONFIG_E1000_EEH_RECOVERY */ + /* Exported from other modules */ extern void e1000_check_options(struct e1000_adapter *adapter); @@ -184,8 +196,11 @@ /* Power Managment Hooks */ #ifdef CONFIG_PM .suspend = e1000_suspend, - .resume = e1000_resume + .resume = e1000_resume, #endif +#ifdef CONFIG_E1000_EEH_RECOVERY + .err_handler = &e1000_err_handler, +#endif /* CONFIG_E1000_EEH_RECOVERY */ }; MODULE_AUTHOR("Intel Corporation, "); @@ -2446,6 +2461,12 @@ #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF +#ifdef CONFIG_E1000_EEH_RECOVERY + /* Prevent stats update while adapter is being reset */ + if (adapter->link_speed == 0) + return; +#endif /* CONFIG_E1000_EEH_RECOVERY */ + spin_lock_irqsave(&adapter->stats_lock, flags); /* these counters are modified from e1000_adjust_tbi_stats, @@ -3791,4 +3812,90 @@ } #endif +#ifdef CONFIG_E1000_EEH_RECOVERY + +/** e1000_io_error_detected() is called when PCI error is detected */ +static int e1000_io_error_detected(struct pci_dev *pdev, enum pci_channel_state state) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct e1000_adapter *adapter = netdev->priv; + + if (netif_running(netdev)) + e1000_down(adapter); + + /* Request a slot slot reset. */ + return PCIERR_RESULT_NEED_RESET; +} + +/** e1000_io_slot_reset is called after the pci bus has been reset. + * Restart the card from scratch. + * Implementation resembles the first-half of the + * e1000_resume routine. + */ +static int e1000_io_slot_reset(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct e1000_adapter *adapter = netdev->priv; + + if (pci_enable_device(pdev)) { + printk(KERN_ERR "e1000: Cannot re-enable PCI device after reset.\n"); + return PCIERR_RESULT_DISCONNECT; + } + pci_set_master(pdev); + + pci_enable_wake(pdev, 3, 0); + pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */ + + /* Perform card reset only on one instance of the card */ + if (0 != PCI_FUNC (pdev->devfn)) + return PCIERR_RESULT_RECOVERED; + + e1000_reset(adapter); + E1000_WRITE_REG(&adapter->hw, WUS, ~0); + + return PCIERR_RESULT_RECOVERED; +} + +/** e1000_io_resume is called when the error recovery driver + * tells us that its OK to resume normal operation. + * Implementation resembles the second-half of the + * e1000_resume routine. + */ +static void e1000_io_resume(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct e1000_adapter *adapter = netdev->priv; + uint32_t manc, swsm; + + if (netif_running(netdev)) { + if (e1000_up(adapter)) { + printk("e1000: can't bring device back up after reset\n"); + return; + } + } + + netif_device_attach(netdev); + + if (adapter->hw.mac_type >= e1000_82540 && + adapter->hw.media_type == e1000_media_type_copper) { + manc = E1000_READ_REG(&adapter->hw, MANC); + manc &= ~(E1000_MANC_ARP_EN); + E1000_WRITE_REG(&adapter->hw, MANC, manc); + } + + switch(adapter->hw.mac_type) { + case e1000_82573: + swsm = E1000_READ_REG(&adapter->hw, SWSM); + E1000_WRITE_REG(&adapter->hw, SWSM, + swsm | E1000_SWSM_DRV_LOAD); + break; + default: + break; + } + + mod_timer(&adapter->watchdog_timer, jiffies); +} + +#endif /* CONFIG_E1000_EEH_RECOVERY */ + /* e1000_main.c */ From linas at austin.ibm.com Fri Oct 7 09:59:19 2005 From: linas at austin.ibm.com (linas) Date: Thu, 6 Oct 2005 18:59:19 -0500 Subject: [PATCH 22/22] PCI Error Recovery: ixgb network device driver In-Reply-To: <20051006232032.GA29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> Message-ID: <20051006235919.GW29826@austin.ibm.com> PCI Error Recovery: ixgb network device driver Various PCI bus errors can be signaled by newer PCI controllers. This patch adds the PCI error recovery callbacks to the intel ten-gigabit ethernet ixgb device driver. The patch has been tested, and appears to work well. Signed-off-by: Linas Vepstas -- arch/ppc64/configs/pSeries_defconfig | 1 drivers/net/Kconfig | 8 +++ drivers/net/ixgb/ixgb_main.c | 78 +++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+) Index: linux-2.6.14-rc2-git6/arch/ppc64/configs/pSeries_defconfig =================================================================== --- linux-2.6.14-rc2-git6.orig/arch/ppc64/configs/pSeries_defconfig 2005-10-05 16:55:25.109651477 -0500 +++ linux-2.6.14-rc2-git6/arch/ppc64/configs/pSeries_defconfig 2005-10-05 16:55:26.410469062 -0500 @@ -610,6 +610,7 @@ # CONFIG_IXGB=m # CONFIG_IXGB_NAPI is not set +CONFIG_IXGB_EEH_RECOVERY=y CONFIG_S2IO=m # CONFIG_S2IO_NAPI is not set # CONFIG_2BUFF_MODE is not set Index: linux-2.6.14-rc2-git6/drivers/net/Kconfig =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/net/Kconfig 2005-10-05 16:55:25.114650776 -0500 +++ linux-2.6.14-rc2-git6/drivers/net/Kconfig 2005-10-05 16:55:26.414468501 -0500 @@ -2195,6 +2195,14 @@ If in doubt, say N. +config IXGB_EEH_RECOVERY + bool "Enable PCI bus error recovery" + depends on IXGB && PPC_PSERIES + help + If you say Y here, the driver will be able to recover from + PCI bus errors on many PowerPC platforms. IBM pSeries users + should answer Y. + config S2IO tristate "S2IO 10Gbe XFrame NIC" depends on PCI Index: linux-2.6.14-rc2-git6/drivers/net/ixgb/ixgb_main.c =================================================================== --- linux-2.6.14-rc2-git6.orig/drivers/net/ixgb/ixgb_main.c 2005-10-05 16:54:33.590875982 -0500 +++ linux-2.6.14-rc2-git6/drivers/net/ixgb/ixgb_main.c 2005-10-05 17:00:08.092967727 -0500 @@ -132,6 +132,18 @@ static void ixgb_netpoll(struct net_device *dev); #endif +#ifdef CONFIG_IXGB_EEH_RECOVERY +static int ixgb_io_error_detected (struct pci_dev *pdev, enum pci_channel_state state); +static int ixgb_io_slot_reset (struct pci_dev *pdev); +static void ixgb_io_resume (struct pci_dev *pdev); + +static struct pci_error_handlers ixgb_err_handler = { + .error_detected = ixgb_io_error_detected, + .slot_reset = ixgb_io_slot_reset, + .resume = ixgb_io_resume, +}; +#endif /* CONFIG_IXGB_EEH_RECOVERY */ + /* Exported from other modules */ extern void ixgb_check_options(struct ixgb_adapter *adapter); @@ -141,6 +153,10 @@ .id_table = ixgb_pci_tbl, .probe = ixgb_probe, .remove = __devexit_p(ixgb_remove), +#ifdef CONFIG_IXGB_EEH_RECOVERY + .err_handler = &ixgb_err_handler, +#endif /* CONFIG_IXGB_EEH_RECOVERY */ + }; MODULE_AUTHOR("Intel Corporation, "); @@ -1653,8 +1669,16 @@ unsigned int i; #endif +#ifdef XXX_CONFIG_IXGB_EEH_RECOVERY + if(unlikely(icr==EEH_IO_ERROR_VALUE(4))) { + if (eeh_slot_is_isolated (adapter->pdev)) + // disable_irq_nosync (adapter->pdev->irq); + return IRQ_NONE; /* Not our interrupt */ + } +#else if(unlikely(!icr)) return IRQ_NONE; /* Not our interrupt */ +#endif /* CONFIG_IXGB_EEH_RECOVERY */ if(unlikely(icr & (IXGB_INT_RXSEQ | IXGB_INT_LSC))) { mod_timer(&adapter->watchdog_timer, jiffies); @@ -2124,4 +2148,71 @@ } #endif +#ifdef CONFIG_IXGB_EEH_RECOVERY + +/** ixgb_io_error_detected() is called when PCI error is detected */ +static int ixgb_io_error_detected (struct pci_dev *pdev, enum pci_channel_state state) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct ixgb_adapter *adapter = netdev->priv; + + if(netif_running(netdev)) + ixgb_down(adapter, TRUE); + + /* Request a slot reset. */ + return PCIERR_RESULT_NEED_RESET; +} + +/** ixgb_io_slot_reset is called after the pci bus has been reset. + * Restart the card from scratch. + * Implementation resembles the first-half of the + * ixgb_resume routine. + */ +static int ixgb_io_slot_reset (struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct ixgb_adapter *adapter = netdev->priv; + + if(pci_enable_device(pdev)) { + printk(KERN_ERR "ixgb: Cannot re-enable PCI device after reset.\n"); + return PCIERR_RESULT_DISCONNECT; + } + pci_set_master(pdev); + + /* Perform card reset only on one instance of the card */ + if (0 != PCI_FUNC (pdev->devfn)) + return PCIERR_RESULT_RECOVERED; + + ixgb_reset(adapter); + + return PCIERR_RESULT_RECOVERED; +} + +/** ixgb_io_resume is called when the error recovery driver + * tells us that its OK to resume normal operation. + * Implementation resembles the second-half of the + * ixgb_resume routine. + */ +static void ixgb_io_resume (struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct ixgb_adapter *adapter = netdev->priv; + + if(netif_running(netdev)) { + if(ixgb_up(adapter)) { + printk ("ixgb: can't bring device back up after reset\n"); + return; + } + } + + netif_device_attach(netdev); + mod_timer(&adapter->watchdog_timer, jiffies); + + /* Reading all-ff's from the adapter will completely hose + * the counts and statistics. So just clear them out */ + memset(&adapter->stats, 0, sizeof(struct ixgb_hw_stats)); + ixgb_update_stats(adapter); +} +#endif /* CONFIG_IXGB_EEH_RECOVERY */ + /* ixgb_main.c */ From michael at ellerman.id.au Fri Oct 7 12:03:30 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Fri, 7 Oct 2005 12:03:30 +1000 Subject: [RFC] Old paca being written by firmware after kexec Message-ID: <200510071203.37301.michael@ellerman.id.au> Hi, I'm seeing a bug of sorts when I kexec some kernels. It's exhibiting as page structs being corrupted early during boot: freeing bootmem node 0 Bad page state at __free_pages_ok (in process 'swapper', page c0000000005a7408) flags:0x0000000000020004 mapping:0000000000000000 mapcount:0 count:0 Backtrace: Call Trace: [c00000000257bc00] [c000000002089c54] .bad_page+0x90/0xec (unreliable) [c00000000257bc80] [c00000000208a26c] .__free_pages_ok+0x170/0x174 [c00000000257bd50] [c0000000025385e8] .free_all_bootmem_core+0x3e8/0x404 [c00000000257be30] [c000000002534188] .mem_init+0xe0/0x1d8 [c00000000257bed0] [c00000000252082c] .start_kernel+0x1f8/0x328 [c00000000257bf90] [c000000002008684] .hmt_init+0x0/0x7c Trying to fix it up, but a reboot is needed What's happening is that firmware is writing into the lppaca of the old kernel, which is now being used by the second kernel for page structs. It seems to be writing into the word starting at paca[x].lppaca.reserved2, which I guess it's allowed to do seeing as it's reserved. For kdump this isn't an issue, as the second kernel doesn't reuse the first kernel's memory. But for regular kexec it can be a problem. I think we're getting away with it most of the time because a) if you kexec the same kernel then the paca will land in the same spot, b) it only seems to write into those locations for a short while early during boot (presumably until we've set up pointers to the new paca?). The only solution I can see is to always allocate the paca in the same place. So that a kexec from one kernel to another always results in the paca landing in the same spot. Any other ideas? cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051007/ea234682/attachment.pgp From anton at samba.org Fri Oct 7 12:11:29 2005 From: anton at samba.org (Anton Blanchard) Date: Fri, 7 Oct 2005 12:11:29 +1000 Subject: [RFC] Old paca being written by firmware after kexec In-Reply-To: <200510071203.37301.michael@ellerman.id.au> References: <200510071203.37301.michael@ellerman.id.au> Message-ID: <20051007021129.GD5210@krispykreme> Hi Michael, > It seems to be writing into the word starting at paca[x].lppaca.reserved2, > which I guess it's allowed to do seeing as it's reserved. Ouch! Nice work hunting this down. I think there is an unregister vpa hcall, for a clean kexec boot perhaps we could have a shutdown method that calls this. Anton From david at gibson.dropbear.id.au Fri Oct 7 13:39:39 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Fri, 7 Oct 2005 13:39:39 +1000 Subject: [RFC] Old paca being written by firmware after kexec In-Reply-To: <200510071203.37301.michael@ellerman.id.au> References: <200510071203.37301.michael@ellerman.id.au> Message-ID: <20051007033939.GD7088@localhost.localdomain> On Fri, Oct 07, 2005 at 12:03:30PM +1000, Michael Ellerman wrote: > Hi, > > I'm seeing a bug of sorts when I kexec some kernels. It's exhibiting as page > structs being corrupted early during boot: > > freeing bootmem node 0 > Bad page state at __free_pages_ok (in process 'swapper', page > c0000000005a7408) > flags:0x0000000000020004 mapping:0000000000000000 mapcount:0 count:0 > Backtrace: > Call Trace: > [c00000000257bc00] [c000000002089c54] .bad_page+0x90/0xec (unreliable) > [c00000000257bc80] [c00000000208a26c] .__free_pages_ok+0x170/0x174 > [c00000000257bd50] [c0000000025385e8] .free_all_bootmem_core+0x3e8/0x404 > [c00000000257be30] [c000000002534188] .mem_init+0xe0/0x1d8 > [c00000000257bed0] [c00000000252082c] .start_kernel+0x1f8/0x328 > [c00000000257bf90] [c000000002008684] .hmt_init+0x0/0x7c > Trying to fix it up, but a reboot is needed > > What's happening is that firmware is writing into the lppaca of the old > kernel, which is now being used by the second kernel for page structs. > > It seems to be writing into the word starting at paca[x].lppaca.reserved2, > which I guess it's allowed to do seeing as it's reserved. > > For kdump this isn't an issue, as the second kernel doesn't reuse the first > kernel's memory. > > But for regular kexec it can be a problem. I think we're getting away with it > most of the time because a) if you kexec the same kernel then the paca will > land in the same spot, b) it only seems to write into those locations for a > short while early during boot (presumably until we've set up pointers to the > new paca?). > > The only solution I can see is to always allocate the paca in the > same place. So that a kexec from one kernel to another always > results in the paca landing in the same spot. Heh.. maybe a reason to write iBoot. Hrm. I think we should probably split the paca and lppaca in this case, that way only the lppaca needs to be fixed, at least. How does the hypervisor find the paca address again? -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: Digital signature Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051007/8ed9edb5/attachment.pgp From anton at samba.org Fri Oct 7 13:59:22 2005 From: anton at samba.org (Anton Blanchard) Date: Fri, 7 Oct 2005 13:59:22 +1000 Subject: [RFC] Old paca being written by firmware after kexec In-Reply-To: <20051007021129.GD5210@krispykreme> References: <200510071203.37301.michael@ellerman.id.au> <20051007021129.GD5210@krispykreme> Message-ID: <20051007035922.GE5210@krispykreme> > Ouch! Nice work hunting this down. > > I think there is an unregister vpa hcall, for a clean kexec boot perhaps > we could have a shutdown method that calls this. I had a look at the register vpa hcall and it can take a flag to deregister the vpa: flags = 101 - deregister virtual processor area Anton From michael at ellerman.id.au Fri Oct 7 14:11:07 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Fri, 7 Oct 2005 14:11:07 +1000 Subject: [RFC] Old paca being written by firmware after kexec In-Reply-To: <20051007035922.GE5210@krispykreme> References: <200510071203.37301.michael@ellerman.id.au> <20051007021129.GD5210@krispykreme> <20051007035922.GE5210@krispykreme> Message-ID: <200510071411.11909.michael@ellerman.id.au> On Fri, 7 Oct 2005 13:59, Anton Blanchard wrote: > > Ouch! Nice work hunting this down. > > > > I think there is an unregister vpa hcall, for a clean kexec boot perhaps > > we could have a shutdown method that calls this. > > I had a look at the register vpa hcall and it can take a flag > to deregister the vpa: > > flags = 101 - deregister virtual processor area Yep, I'm just looking at that. I'll try and scrape up a patch to do that this arvo. Thanks for the pointer. cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051007/bc97ea3d/attachment.pgp From michael at ellerman.id.au Fri Oct 7 16:56:35 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Fri, 7 Oct 2005 16:56:35 +1000 Subject: [RFC] Old paca being written by firmware after kexec In-Reply-To: <200510071203.37301.michael@ellerman.id.au> References: <200510071203.37301.michael@ellerman.id.au> Message-ID: <200510071656.36822.michael@ellerman.id.au> Hi again, Here's a first cut at a patch. It needs some spit and polish, but I thought I'd throw it out over the weekend in case anyone has any comments. This seems to fix my bug, although on one reboot I got a cpu stuck - which may or may not be related. cheers arch/ppc64/kernel/machine_kexec.c | 19 +++++++++++++++++++ arch/ppc64/kernel/pSeries_lpar.c | 10 +++------- include/asm-ppc64/plpar_wrappers.h | 24 ++++++++++++++++++++---- include/asm-ppc64/smp.h | 8 +------- 4 files changed, 43 insertions(+), 18 deletions(-) Index: kexec/arch/ppc64/kernel/pSeries_lpar.c =================================================================== --- kexec.orig/arch/ppc64/kernel/pSeries_lpar.c +++ kexec/arch/ppc64/kernel/pSeries_lpar.c @@ -260,22 +260,18 @@ out: void vpa_init(int cpu) { int hwcpu = get_hard_smp_processor_id(cpu); - unsigned long vpa = (unsigned long)&(paca[cpu].lppaca); + unsigned long vpa = __pa(&paca[cpu].lppaca); long ret; - unsigned long flags; - - /* Register the Virtual Processor Area (VPA) */ - flags = 1UL << (63 - 18); if (cpu_has_feature(CPU_FTR_ALTIVEC)) paca[cpu].lppaca.vmxregs_in_use = 1; - ret = register_vpa(flags, hwcpu, __pa(vpa)); + ret = register_vpa(hwcpu, vpa); if (ret) printk(KERN_ERR "WARNING: vpa_init: VPA registration for " "cpu %d (hw %d) of area %lx returns %ld\n", - cpu, hwcpu, __pa(vpa), ret); + cpu, hwcpu, vpa, ret); } long pSeries_lpar_hpte_insert(unsigned long hpte_group, Index: kexec/include/asm-ppc64/plpar_wrappers.h =================================================================== --- kexec.orig/include/asm-ppc64/plpar_wrappers.h +++ kexec/include/asm-ppc64/plpar_wrappers.h @@ -22,13 +22,29 @@ static inline long cede_processor(void) return(0); } -static inline long register_vpa(unsigned long flags, unsigned long proc, - unsigned long vpa) +static inline long unregister_vpa(unsigned long cpu, unsigned long vpa) { - return plpar_hcall_norets(H_REGISTER_VPA, flags, proc, vpa); + unsigned long flags; + + /* The flags are in bits 16-18 (counting from most significant bit) */ + flags = 5UL << (63 - 18); + + printk("VPA unregister (%lx) cpu %d at %lx\n", flags, cpu, vpa); + + return plpar_hcall_norets(H_REGISTER_VPA, flags, cpu, vpa); } -void vpa_init(int cpu); +static inline long register_vpa(unsigned long cpu, unsigned long vpa) +{ + unsigned long flags; + + /* The flags are in bits 16-18 (counting from most significant bit) */ + flags = 1UL << (63 - 18); + + printk("VPA register (%lx) cpu %d at %lx\n", flags, cpu, vpa); + + return plpar_hcall_norets(H_REGISTER_VPA, flags, cpu, vpa); +} static inline long plpar_pte_remove(unsigned long flags, unsigned long ptex, Index: kexec/include/asm-ppc64/smp.h =================================================================== --- kexec.orig/include/asm-ppc64/smp.h +++ kexec/include/asm-ppc64/smp.h @@ -86,13 +86,7 @@ extern void smp_generic_take_timebase(vo extern struct smp_ops_t *smp_ops; -#ifdef CONFIG_PPC_PSERIES -void vpa_init(int cpu); -#else -static inline void vpa_init(int cpu) -{ -} -#endif /* CONFIG_PPC_PSERIES */ +extern void vpa_init(int cpu); #endif /* __ASSEMBLY__ */ Index: kexec/arch/ppc64/kernel/machine_kexec.c =================================================================== --- kexec.orig/arch/ppc64/kernel/machine_kexec.c +++ kexec/arch/ppc64/kernel/machine_kexec.c @@ -24,6 +24,7 @@ #include #include /* _end */ #include +#include #define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ @@ -191,10 +192,26 @@ void kexec_smp_down(void *arg) /* NOTREACHED */ } +static void vpa_shutdown(int cpu) +{ + unsigned long vpa = __pa(&paca[cpu].lppaca); + long ret; + + ret = unregister_vpa(get_hard_smp_processor_id(cpu), vpa); + + if (ret) + printk(KERN_ERR "WARNING: vpa_shutdown: VPA deregistration " + "failed with code %d\n", ret); +} + static void kexec_prepare_cpus(void) { int my_cpu, i, notified=-1; + /* FIXME: I'd rather do this after the loop but we clobber hw_cpu_id */ + for_each_cpu(i) + vpa_shutdown(i); + smp_call_function(kexec_smp_down, NULL, 0, /* wait */0); my_cpu = get_cpu(); @@ -203,6 +220,8 @@ static void kexec_prepare_cpus(void) if (i == my_cpu) continue; + /* FIXME: Why do we abuse hw_cpu_id and not cpu_start? */ + while (paca[i].hw_cpu_id != -1) { barrier(); if (!cpu_possible(i)) { From support at creditunion1.org Fri Oct 7 22:16:06 2005 From: support at creditunion1.org (support at creditunion1.org) Date: Fri, 7 Oct 2005 14:16:06 +0200 Subject: Credit Union One Informs You! Message-ID: <200510071216.j97CG6vt028737@nvmopenhuizendag.nl> An HTML attachment was scrubbed... URL: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051007/40a3fe98/attachment.htm From linas at austin.ibm.com Sat Oct 8 00:59:52 2005 From: linas at austin.ibm.com (linas) Date: Fri, 7 Oct 2005 09:59:52 -0500 Subject: [PATCH 3/7] ppc64: EEH Add event/internal state statistics In-Reply-To: <17219.46514.903283.21680@cargo.ozlabs.ibm.com> References: <20050930004800.GL29826@austin.ibm.com> <20050930005451.GC6173@austin.ibm.com> <17219.46514.903283.21680@cargo.ozlabs.ibm.com> Message-ID: <20051007145952.GX29826@austin.ibm.com> On Wed, Oct 05, 2005 at 09:14:58PM +1000, Paul Mackerras was heard to remark: > Linas writes: > > > 03-eeh-statistics.patch > > > + if (!dn) { > > + __get_cpu_var(no_dn)++; > > We have to make sure we are not preemptible when we use > __get_cpu_var, since it uses smp_processor_id(). It's not clear to me > that we have ensured that in every case where we use __get_cpu_var. > Are you sure that we hold a spinlock, or are at interrupt level, or > have explicitly disabled preemption at every point where we use > __get_cpu_var? Tese used to be plain-old global variables, but someone submitted a patch that to turn them into the __get_cpu_var() form. I don't know why; there's no real performance reason, since these are almost never incremented, except a bit during boot. What if we just change them back to global vars? I've also day-dreamed about moving these stats to somewhere in in the /sys directory. Any suggestions there? --linas From miltonm at bga.com Sat Oct 8 01:05:12 2005 From: miltonm at bga.com (Milton Miller) Date: Fri, 7 Oct 2005 10:05:12 -0500 Subject: [RFC] Old paca being written by firmware after kexec In-Reply-To: <200510071656.36822.michael@ellerman.id.au> References: <200510071203.37301.michael@ellerman.id.au> <200510071656.36822.michael@ellerman.id.au> Message-ID: <416e59effa18d7d7eb6500416e638ef8@bga.com> On Oct 7, 2005, at 1:56 AM, Michael Ellerman wrote: > Hi again, > > Here's a first cut at a patch. It needs some spit and polish, but I > thought > I'd throw it out over the weekend in case anyone has any comments. > > This seems to fix my bug, although on one reboot I got a cpu stuck - > which > may or may not be related. > > cheers > ... > --- kexec.orig/arch/ppc64/kernel/machine_kexec.c > +++ kexec/arch/ppc64/kernel/machine_kexec.c > @@ -24,6 +24,7 @@ > #include > #include /* _end */ > #include > +#include > > #define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ > > @@ -191,10 +192,26 @@ void kexec_smp_down(void *arg) > /* NOTREACHED */ > } > > +static void vpa_shutdown(int cpu) > +{ > + unsigned long vpa = __pa(&paca[cpu].lppaca); > + long ret; > + > + ret = unregister_vpa(get_hard_smp_processor_id(cpu), vpa); > + > + if (ret) > + printk(KERN_ERR "WARNING: vpa_shutdown: VPA deregistration " > + "failed with code %d\n", ret); > +} > + > static void kexec_prepare_cpus(void) > { > int my_cpu, i, notified=-1; > > + /* FIXME: I'd rather do this after the loop but we clobber hw_cpu_id > */ i'd rather each cpu do it in kexec-cpu-down we can skip on panic however, that means we should register on the cpu to make sure we don't miss one going down. > + for_each_cpu(i) > + vpa_shutdown(i); > + > smp_call_function(kexec_smp_down, NULL, 0, /* wait */0); > my_cpu = get_cpu(); > > @@ -203,6 +220,8 @@ static void kexec_prepare_cpus(void) > if (i == my_cpu) > continue; > > + /* FIXME: Why do we abuse hw_cpu_id and not cpu_start? */ > + We abuse hw_cpuid becaus this makes all cases the same. the cpu is saying this is no longer me > while (paca[i].hw_cpu_id != -1) { > barrier(); > if (!cpu_possible(i)) { > > Another thing to investigate: if we start SMT threads but tell the kernel no-smt, i saw a case where we put hw_cpuids in the paca but never started them. milton From linas at austin.ibm.com Sat Oct 8 01:23:05 2005 From: linas at austin.ibm.com (linas) Date: Fri, 7 Oct 2005 10:23:05 -0500 Subject: [PATCH 6/7] ppc64: EEH Avoid racing reports of errors In-Reply-To: <17219.47007.44643.148022@cargo.ozlabs.ibm.com> References: <20050930004800.GL29826@austin.ibm.com> <20050930010038.GF6173@austin.ibm.com> <17219.47007.44643.148022@cargo.ozlabs.ibm.com> Message-ID: <20051007152305.GY29826@austin.ibm.com> On Wed, Oct 05, 2005 at 09:23:11PM +1000, Paul Mackerras was heard to remark: > Linas writes: > > > 06-eeh-report-race.patch > > Shouldn't you pass in pe_dn->child here, or > alternatively rearrange __eeh_mark_slot to do the node you give it > plus its children (recursively)? Yes; that's right; this gets fixed in a later patch in the series. I guess this one snuck by while I was trying to sync up all the different patches I was carrying :-/ > Two other comments about __eeh_mark_slot: (1) despite the comment, the > function doesn't do anything to any pci_dev or pci_driver The comment is also a "back port" of function that shows up in a later patch, and so indeed is inappropriate for this patch. Again, my excuse is that I got sloppy while juggling all of these patchlets. Sorry. > (not that it > should be touching any pci_driver), One problem I was seeing was that after getting an EEH error, some device drivers would start spinning in thier interrupt handlers. I tried to break out of this spin-loop by adding a call to a function that asked "am I the victim of an EEH event"? Unfortunately, the first implementation of this call was not interrupt safe (pci_device_to_OF_node calls traverse_pci_devices). While scratching my head on to how to best fix this, I decided that the best thing to do would be to mark up the pci driver with a flag; that way, the driver can look up te EEH state without any further ado. One might be able to get rid of this state in pci_driver, although it seemed generically useful to have. For example, later on, I futzed with a version that disabled the irq line for that adapter "as soon as possible", and that seems to also work, at least on an SMP machine. On a non-SMP machine, there is still the danger that the device driver is spinning with interrupts disabled, waiting on a status regiser to change, that will never change. (And because of the deadlock, the code to disable a given irq line never runs). Its all depends on how the device driver got written. > and (2) a recursive function can't > really be inline Well, no, but at least the first level call can be inlined; I assumed that gcc would do at least that, but didn't check. --linas From linas at austin.ibm.com Sat Oct 8 05:46:44 2005 From: linas at austin.ibm.com (linas) Date: Fri, 7 Oct 2005 14:46:44 -0500 Subject: [PATCH 1/7] ppc64: EEH typos, include files, macros, whitespace In-Reply-To: <17219.46319.501091.93202@cargo.ozlabs.ibm.com> References: <20050930004800.GL29826@austin.ibm.com> <20050930005141.GA6173@austin.ibm.com> <17219.46319.501091.93202@cargo.ozlabs.ibm.com> Message-ID: <20051007194644.GA29826@austin.ibm.com> On Wed, Oct 05, 2005 at 09:11:43PM +1000, Paul Mackerras was heard to remark: > > This makes the line go over 80 columns, which seems unnecessary. Hm, I code with tabstops set to 3, on a 132-column terminal. It looked goofy there. > > - * @token i/o token, should be address in the form 0xE.... > > + * @token i/o token, should be address in the form 0xA.... > > I think the virtual addresses we get from ioremap these days start > with 0xD00008... Ah, didn't realize this changed. I was simultaneously debugging a 2.4 kernel (for other reasons) when I noticed this. --linas From olh at suse.de Sat Oct 8 20:47:25 2005 From: olh at suse.de (Olaf Hering) Date: Sat, 8 Oct 2005 12:47:25 +0200 Subject: pcnet32 does not use the PROM address on powerpc Message-ID: <20051008104725.GA10248@suse.de> Does anyone remember why ppc is handled special in pcnet32_probe1()? It as added to Linus tree around 2.6.14, I dont find relevant patches in our bugzilla. It was likely added during early ppc64 bringup. google doesnt know the patch submitter, perhaps jgarzik has some old records from around 2000/2001? I have a 44p 270, which gets all 0xFF as MAC address if I power if off and on again. Further reboots do not fix it. But it does get the correct one if I boot into SMS and do a netboot, further reboots will always get the correct MAC address I think these 0xFF happen also on other systems, I have seen them on B50 as well. We should just remove the #ifdef. .... if (memcmp(promaddr, dev->dev_addr, 6) || !is_valid_ether_addr(dev->dev_addr)) { #ifndef __powerpc__ if (is_valid_ether_addr(promaddr)) { #else if (!is_valid_ether_addr(dev->dev_addr) && is_valid_ether_addr(promaddr)) { #endif if (pcnet32_debug & NETIF_MSG_PROBE) { printk(" warning: CSR address invalid,\n"); printk(KERN_INFO " using instead PROM address of"); } memcpy(dev->dev_addr, promaddr, 6); } } .... -- short story of a lazy sysadmin: alias appserv=wotan From olh at suse.de Sat Oct 8 22:03:16 2005 From: olh at suse.de (Olaf Hering) Date: Sat, 8 Oct 2005 14:03:16 +0200 Subject: pcnet32 does not use the PROM address on powerpc In-Reply-To: <20051008104725.GA10248@suse.de> References: <20051008104725.GA10248@suse.de> Message-ID: <20051008120316.GA12122@suse.de> On Sat, Oct 08, Olaf Hering wrote: > I have a 44p 270, which gets all 0xFF as MAC address if I power if off > and on again. Further reboots do not fix it. > But it does get the correct one if I boot into SMS and do a netboot, > further reboots will always get the correct MAC address The CSR will get the correct value after the first ifup: (none):/# modprobe -v pcnet32 insmod /lib/modules/2.6.13.3-20051007_rtas-ppc64/kernel/drivers/net/mii.ko insmod /lib/modules/2.6.13.3-20051007_rtas-ppc64/kernel/drivers/net/pcnet32.ko (none):/# ip link set eth0 up (none):/# rmmod pcnet32 (none):/# modprobe -v pcnet32 insmod /lib/modules/2.6.13.3-20051007_rtas-ppc64/kernel/drivers/net/mii.ko insmod /lib/modules/2.6.13.3-20051007_rtas-ppc64/kernel/drivers/net/pcnet32.ko (none):/# dmesg | tail -n 15 pcnet32.c:v1.30j 29.04.2005 tsbogend at alpha.franken.de PCI: Enabling device: (0000:00:10.0), cmd 143 pcnet32: PCnet/FAST 79C971 at 0xfff400, prom 0004ace4a6d7 csr ffffffffffff, warning: CSR address invalid, using instead PROM address of 00 04 ac e4 a6 d7 tx_start_pt(0x0c00):~220 bytes, BCR18(68e1):BurstWrEn BurstRdEn DWordIO NoUFlow SRAMSIZE=0x7f00, SRAM_BND=0x4000, assigned IRQ 18. eth0: registered as PCnet/FAST 79C971 pcnet32: 1 cards_found. eth0: link up, 100Mbps, half-duplex, lpa 0x40A1 pcnet32.c:v1.30j 29.04.2005 tsbogend at alpha.franken.de pcnet32: PCnet/FAST 79C971 at 0xfff400, prom 0004ace4a6d7 csr 0004ace4a6d7, 00 04 ac e4 a6 d7 tx_start_pt(0x0c00):~220 bytes, BCR18(68e1):BurstWrEn BurstRdEn DWordIO NoUFlow SRAMSIZE=0x7f00, SRAM_BND=0x4000, assigned IRQ 18. eth0: registered as PCnet/FAST 79C971 pcnet32: 1 cards_found. -- short story of a lazy sysadmin: alias appserv=wotan From olh at suse.de Mon Oct 10 04:19:34 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:34 +0000 Subject: [PATCH 2/13] ppc64 boot: remove include from include/linux/zutil.h In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181934.2.pxElh29123.29065.olh@nectarine.suse.de> zutil.h does not need errno.h Signed-off-by: Olaf Hering include/linux/zutil.h | 1 - 1 files changed, 1 deletion(-) Index: linux-2.6.14-rc3/include/linux/zutil.h =================================================================== --- linux-2.6.14-rc3.orig/include/linux/zutil.h +++ linux-2.6.14-rc3/include/linux/zutil.h @@ -15,7 +15,6 @@ #include #include -#include #include typedef unsigned char uch; From olh at suse.de Mon Oct 10 04:19:41 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:41 +0000 Subject: [PATCH 9/13] ppc64 boot: use memset to clear bss In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181941.9.uFqIl29292.29065.olh@nectarine.suse.de> Use memset to clear bss, instead of own version. Signed-off-by: Olaf Hering arch/ppc64/boot/crt0.S | 19 ------------------- arch/ppc64/boot/main.c | 3 +++ 2 files changed, 3 insertions(+), 19 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/boot/crt0.S =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/crt0.S +++ linux-2.6.14-rc3/arch/ppc64/boot/crt0.S @@ -25,24 +25,5 @@ _start: sync isync - ## Clear out the BSS as per ANSI C requirements - - lis r7,_end at ha - addi r7,r7,_end at l # r7 = &_end - lis r8,__bss_start at ha # - addi r8,r8,__bss_start at l # r8 = &_bss_start - - ## Determine how large an area, in number of words, to clear - - subf r7,r8,r7 # r7 = &_end - &_bss_start + 1 - addi r7,r7,3 # r7 += 3 - srwi. r7,r7,2 # r7 = size in words. - beq 3f # If the size is zero, don't bother - addi r8,r8,-4 # r8 -= 4 - mtctr r7 # SPRN_CTR = number of words to clear - li r0,0 # r0 = 0 -2: stwu r0,4(r8) # Clear out a word - bdnz 2b # Keep clearing until done -3: b start Index: linux-2.6.14-rc3/arch/ppc64/boot/main.c =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/main.c +++ linux-2.6.14-rc3/arch/ppc64/boot/main.c @@ -26,6 +26,7 @@ extern void flush_cache(void *, unsigned #define ONE_MB 0x100000 extern char _start[]; +extern char __bss_start[]; extern char _end[]; extern char _vmlinux_start[]; extern char _vmlinux_end[]; @@ -138,6 +139,8 @@ void start(unsigned long a1, unsigned lo Elf64_Ehdr *elf64; Elf64_Phdr *elf64ph; + memset(__bss_start, 0, _end - __bss_start); + prom = (int (*)(void *)) promptr; chosen_handle = finddevice("/chosen"); if (chosen_handle == (void *) -1) From olh at suse.de Mon Oct 10 04:19:39 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:39 +0000 Subject: [PATCH 7/13] ppc64 boot: bootfiles depend on linker script In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181939.7.yJERr29241.29065.olh@nectarine.suse.de> bootfiles must be relinked of linker script changes Signed-off-by: Olaf Hering arch/ppc64/boot/Makefile | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/boot/Makefile =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/Makefile +++ linux-2.6.14-rc3/arch/ppc64/boot/Makefile @@ -127,11 +127,11 @@ $(call obj-sec, $(required) $(initrd)): $(call cmd,addsection) $(obj)/zImage.vmode: obj-boot += $(call obj-sec, $(required)) -$(obj)/zImage.vmode: $(call obj-sec, $(required)) $(obj-boot) +$(obj)/zImage.vmode: $(call obj-sec, $(required)) $(obj-boot) $(srctree)/$(src)/zImage.lds $(call cmd,bootld,$(obj-boot)) $(obj)/zImage.initrd.vmode: obj-boot += $(call obj-sec, $(required) $(initrd)) -$(obj)/zImage.initrd.vmode: $(call obj-sec, $(required) $(initrd)) $(obj-boot) +$(obj)/zImage.initrd.vmode: $(call obj-sec, $(required) $(initrd)) $(obj-boot) $(srctree)/$(src)/zImage.lds $(call cmd,bootld,$(obj-boot)) $(obj)/zImage: $(obj)/zImage.vmode $(obj)/addnote From olh at suse.de Mon Oct 10 04:19:35 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:35 +0000 Subject: [PATCH 3/13] ppc64 boot: missing include for size_t In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181935.3.fNZxJ29147.29065.olh@nectarine.suse.de> string.h needs definition of size_t, but not the one from linux/include Signed-off-by: Olaf Hering arch/ppc64/boot/string.h | 1 + 1 files changed, 1 insertion(+) Index: linux-2.6.14-rc3/arch/ppc64/boot/string.h =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/string.h +++ linux-2.6.14-rc3/arch/ppc64/boot/string.h @@ -1,5 +1,6 @@ #ifndef _PPC_BOOT_STRING_H_ #define _PPC_BOOT_STRING_H_ +#include extern char *strcpy(char *dest, const char *src); extern char *strncpy(char *dest, const char *src, size_t n); From olh at suse.de Mon Oct 10 04:19:45 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:45 +0000 Subject: [PATCH 13/13] ppc64 boot: proof that reloc works In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181945.13.JsFBk29388.29065.olh@nectarine.suse.de> To proof that the relocation works, move the crt0.o away from the beginning. Move linker options from command line into linker script. rename entry point because '_start' is referenced in printf output. Signed-off-by: Olaf Hering arch/ppc64/boot/Makefile | 4 ++-- arch/ppc64/boot/crt0.S | 4 ++-- arch/ppc64/boot/zImage.lds | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/boot/Makefile =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/Makefile +++ linux-2.6.14-rc3/arch/ppc64/boot/Makefile @@ -24,7 +24,7 @@ HOSTCC := gcc BOOTCFLAGS := $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include) -fPIC BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc -BOOTLFLAGS := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds +BOOTLFLAGS := -T $(srctree)/$(src)/zImage.lds OBJCOPYFLAGS := contents,alloc,load,readonly,data zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c @@ -34,7 +34,7 @@ zliblinuxheader := zlib.h zconf.h zutil. $(addprefix $(obj)/,$(zlib) main.o): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) #$(addprefix $(obj)/,main.o): $(addprefix $(obj)/,zlib.h) -src-boot := crt0.S string.S prom.c main.c div64.S +src-boot := string.S prom.c main.c div64.S crt0.S src-boot += $(zlib) src-boot := $(addprefix $(obj)/, $(src-boot)) obj-boot := $(addsuffix .o, $(basename $(src-boot))) Index: linux-2.6.14-rc3/arch/ppc64/boot/crt0.S =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/crt0.S +++ linux-2.6.14-rc3/arch/ppc64/boot/crt0.S @@ -12,8 +12,8 @@ #include "ppc_asm.h" .text - .globl _start -_start: + .globl _zimage_start +_zimage_start: bl reloc_offset reloc_offset: Index: linux-2.6.14-rc3/arch/ppc64/boot/zImage.lds =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/zImage.lds +++ linux-2.6.14-rc3/arch/ppc64/boot/zImage.lds @@ -1,6 +1,9 @@ OUTPUT_ARCH(powerpc:common) +ENTRY(_zimage_start) SECTIONS { + . = (4*1024*1024); + _start = .; .text : { *(.text) From olh at suse.de Mon Oct 10 04:19:44 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:44 +0000 Subject: [PATCH 12/13] ppc64 boot: make the zImage relocateable In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181944.12.zilpf29364.29065.olh@nectarine.suse.de> Make the zImage relocateable. So yaboot could just load and run any ELF binary, without worrying about its load address. Signed-off-by: Olaf Hering arch/ppc64/boot/Makefile | 2 +- arch/ppc64/boot/crt0.S | 29 +++++++++++++++++++++++++++++ arch/ppc64/boot/zImage.lds | 4 +++- 3 files changed, 33 insertions(+), 2 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/boot/Makefile =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/Makefile +++ linux-2.6.14-rc3/arch/ppc64/boot/Makefile @@ -22,7 +22,7 @@ HOSTCC := gcc -BOOTCFLAGS := $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include) +BOOTCFLAGS := $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include) -fPIC BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc BOOTLFLAGS := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds OBJCOPYFLAGS := contents,alloc,load,readonly,data Index: linux-2.6.14-rc3/arch/ppc64/boot/crt0.S =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/crt0.S +++ linux-2.6.14-rc3/arch/ppc64/boot/crt0.S @@ -14,9 +14,38 @@ .text .globl _start _start: + bl reloc_offset + +reloc_offset: + mflr r0 + lis r9,reloc_offset at ha + addi r9,r9,reloc_offset at l + subf. r0,r9,r0 + beq clear_caches + +reloc_got2: + lis r9,__got2_start at ha + addi r9,r9,__got2_start at l + lis r8,__got2_end at ha + addi r8,r8,__got2_end at l + subf. r8,r9,r8 + beq clear_caches + srwi. r8,r8,2 + mtctr r8 + add r9,r0,r9 +reloc_got2_loop: + lwz r8,0(r9) + add r8,r8,r0 + stw r8,0(r9) + addi r9,r9,4 + bdnz reloc_got2_loop + +clear_caches: lis r9,_start at h + add r9,r0,r9 lis r8,_etext at ha addi r8,r8,_etext at l + add r8,r0,r8 1: dcbf r0,r9 icbi r0,r9 addi r9,r9,0x20 Index: linux-2.6.14-rc3/arch/ppc64/boot/zImage.lds =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/zImage.lds +++ linux-2.6.14-rc3/arch/ppc64/boot/zImage.lds @@ -13,7 +13,9 @@ SECTIONS *(.rodata*) *(.data*) *(.sdata*) - *(.got*) + __got2_start = .; + *(.got2) + __got2_end = .; } . = ALIGN(4096); From olh at suse.de Mon Oct 10 04:19:38 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:38 +0000 Subject: [PATCH 6/13] ppc64 boot: move gunzip function before use In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181938.6.QgKuQ29217.29065.olh@nectarine.suse.de> Move the gunzip function up. Signed-off-by: Olaf Hering arch/ppc64/boot/main.c | 115 ++++++++++++++++++++++++------------------------- 1 files changed, 57 insertions(+), 58 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/boot/main.c =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/main.c +++ linux-2.6.14-rc3/arch/ppc64/boot/main.c @@ -17,7 +17,6 @@ #include "prom.h" #include "zlib.h" -static void gunzip(void *, int, unsigned char *, int *); extern void flush_cache(void *, unsigned long); @@ -56,6 +55,63 @@ typedef void (*kernel_entry_t)( unsigned static unsigned long claim_base; +#define HEAD_CRC 2 +#define EXTRA_FIELD 4 +#define ORIG_NAME 8 +#define COMMENT 0x10 +#define RESERVED 0xe0 + +static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) +{ + z_stream s; + int r, i, flags; + + /* skip header */ + i = 10; + flags = src[3]; + if (src[2] != Z_DEFLATED || (flags & RESERVED) != 0) { + printf("bad gzipped data\n\r"); + exit(); + } + if ((flags & EXTRA_FIELD) != 0) + i = 12 + src[10] + (src[11] << 8); + if ((flags & ORIG_NAME) != 0) + while (src[i++] != 0) + ; + if ((flags & COMMENT) != 0) + while (src[i++] != 0) + ; + if ((flags & HEAD_CRC) != 0) + i += 2; + if (i >= *lenp) { + printf("gunzip: ran out of data in header\n\r"); + exit(); + } + + if (zlib_inflate_workspacesize() > sizeof(scratch)) { + printf("gunzip needs more mem\n"); + exit(); + } + memset(&s, 0, sizeof(s)); + s.workspace = scratch; + r = zlib_inflateInit2(&s, -MAX_WBITS); + if (r != Z_OK) { + printf("inflateInit2 returned %d\n\r", r); + exit(); + } + s.next_in = src + i; + s.avail_in = *lenp - i; + s.next_out = dst; + s.avail_out = dstlen; + r = zlib_inflate(&s, Z_FULL_FLUSH); + if (r != Z_OK && r != Z_STREAM_END) { + printf("inflate returned %d msg: %s\n\r", r, s.msg); + exit(); + } + *lenp = s.next_out - (unsigned char *) dst; + zlib_inflateEnd(&s); +} + static unsigned long try_claim(unsigned long size) { unsigned long addr = 0; @@ -213,60 +269,3 @@ void start(unsigned long a1, unsigned lo exit(); } -#define HEAD_CRC 2 -#define EXTRA_FIELD 4 -#define ORIG_NAME 8 -#define COMMENT 0x10 -#define RESERVED 0xe0 - -static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) -{ - z_stream s; - int r, i, flags; - - /* skip header */ - i = 10; - flags = src[3]; - if (src[2] != Z_DEFLATED || (flags & RESERVED) != 0) { - printf("bad gzipped data\n\r"); - exit(); - } - if ((flags & EXTRA_FIELD) != 0) - i = 12 + src[10] + (src[11] << 8); - if ((flags & ORIG_NAME) != 0) - while (src[i++] != 0) - ; - if ((flags & COMMENT) != 0) - while (src[i++] != 0) - ; - if ((flags & HEAD_CRC) != 0) - i += 2; - if (i >= *lenp) { - printf("gunzip: ran out of data in header\n\r"); - exit(); - } - - if (zlib_inflate_workspacesize() > sizeof(scratch)) { - printf("gunzip needs more mem\n"); - exit(); - } - memset(&s, 0, sizeof(s)); - s.workspace = scratch; - r = zlib_inflateInit2(&s, -MAX_WBITS); - if (r != Z_OK) { - printf("inflateInit2 returned %d\n\r", r); - exit(); - } - s.next_in = src + i; - s.avail_in = *lenp - i; - s.next_out = dst; - s.avail_out = dstlen; - r = zlib_inflate(&s, Z_FULL_FLUSH); - if (r != Z_OK && r != Z_STREAM_END) { - printf("inflate returned %d msg: %s\n\r", r, s.msg); - exit(); - } - *lenp = s.next_out - (unsigned char *) dst; - zlib_inflateEnd(&s); -} - From olh at suse.de Mon Oct 10 04:19:37 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:37 +0000 Subject: [PATCH 5/13] ppc64 boot: remove need for imagesize.c In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181937.5.OCUbo29193.29065.olh@nectarine.suse.de> Compute the vmlinux size at runtime. Use Z_FULL_FLUSH instead of Z_FINISH, to extract only the ELF header and ELF program header. ->p_memsz is the required memory range for the executable, including bss ->p_filesz is the size of .text, .data and other runtime sections These values must be used for the claim call. All additional memory needed by the kernel is claimed in prom_init, remove the extra Mb. Pass the full memsize as target area to gunzip, otherwise not everything will be uncompressed. flush_cache has to flush all runtime sections, do not reduce the memrange by the ->p_offset value because its just that: an offset. Remove the Makefile code to produce an imagesize.c, its not needed anymore. Remove all FORCE flags, to not rebuild the zImage if vmlinux was not changed. Signed-off-by: Olaf Hering arch/ppc64/boot/Makefile | 32 ++++++------------ arch/ppc64/boot/main.c | 80 ++++++++++++++++++++++------------------------- 2 files changed, 50 insertions(+), 62 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/boot/main.c =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/main.c +++ linux-2.6.14-rc3/arch/ppc64/boot/main.c @@ -32,8 +32,6 @@ extern char _vmlinux_start[]; extern char _vmlinux_end[]; extern char _initrd_start[]; extern char _initrd_end[]; -extern unsigned long vmlinux_filesize; -extern unsigned long vmlinux_memsize; struct addr_range { unsigned long addr; @@ -45,6 +43,7 @@ static struct addr_range vmlinuz = {0, 0 static struct addr_range initrd = {0, 0, 0}; static char scratch[46912]; /* scratch space for gunzip, from zlib_inflate_workspacesize() */ +static char elfheader[256]; typedef void (*kernel_entry_t)( unsigned long, @@ -78,6 +77,7 @@ static unsigned long try_claim(unsigned void start(unsigned long a1, unsigned long a2, void *promptr) { unsigned long i; + int len; kernel_entry_t kernel_entry; Elf64_Ehdr *elf64; Elf64_Phdr *elf64ph; @@ -113,25 +113,45 @@ void start(unsigned long a1, unsigned lo claim_base = PROG_START; #endif - /* - * Now we try to claim some memory for the kernel itself - * our "vmlinux_memsize" is the memory footprint in RAM, _HOWEVER_, what - * our Makefile stuffs in is an image containing all sort of junk including - * an ELF header. We need to do some calculations here to find the right - * size... In practice we add 1Mb, that is enough, but we should really - * consider fixing the Makefile to put a _raw_ kernel in there ! - */ - vmlinux_memsize += ONE_MB; - printf("Allocating 0x%lx bytes for kernel ...\n\r", vmlinux_memsize); - vmlinux.addr = try_claim(vmlinux_memsize); + vmlinuz.addr = (unsigned long)_vmlinux_start; + vmlinuz.size = (unsigned long)(_vmlinux_end - _vmlinux_start); + + /* gunzip the ELF header of the kernel */ + if (*(unsigned short *)vmlinuz.addr == 0x1f8b) { + len = vmlinuz.size; + gunzip(elfheader, sizeof(elfheader), + (unsigned char *)vmlinuz.addr, &len); + } else + memcpy(elfheader, (const void *)vmlinuz.addr, sizeof(elfheader)); + + elf64 = (Elf64_Ehdr *)elfheader; + if ( elf64->e_ident[EI_MAG0] != ELFMAG0 || + elf64->e_ident[EI_MAG1] != ELFMAG1 || + elf64->e_ident[EI_MAG2] != ELFMAG2 || + elf64->e_ident[EI_MAG3] != ELFMAG3 || + elf64->e_ident[EI_CLASS] != ELFCLASS64 || + elf64->e_ident[EI_DATA] != ELFDATA2MSB || + elf64->e_type != ET_EXEC || + elf64->e_machine != EM_PPC64 ) + { + printf("Error: not a valid PPC64 ELF file!\n\r"); + exit(); + } + + elf64ph = (Elf64_Phdr *)((unsigned long)elf64 + + (unsigned long)elf64->e_phoff); + for(i=0; i < (unsigned int)elf64->e_phnum ;i++,elf64ph++) { + if (elf64ph->p_type == PT_LOAD && elf64ph->p_offset != 0) + break; + } + vmlinux.size = (unsigned long)elf64ph->p_filesz; + vmlinux.memsize = (unsigned long)elf64ph->p_memsz; + printf("Allocating 0x%lx bytes for kernel ...\n\r", vmlinux.memsize); + vmlinux.addr = try_claim(vmlinux.memsize); if (vmlinux.addr == 0) { printf("Can't allocate memory for kernel image !\n\r"); exit(); } - vmlinuz.addr = (unsigned long)_vmlinux_start; - vmlinuz.size = (unsigned long)(_vmlinux_end - _vmlinux_start); - vmlinux.size = PAGE_ALIGN(vmlinux_filesize); - vmlinux.memsize = vmlinux_memsize; /* * Now we try to claim memory for the initrd (and copy it there) @@ -155,11 +175,10 @@ void start(unsigned long a1, unsigned lo /* Eventually gunzip the kernel */ if (*(unsigned short *)vmlinuz.addr == 0x1f8b) { - int len; printf("gunzipping (0x%lx <- 0x%lx:0x%0lx)...", vmlinux.addr, vmlinuz.addr, vmlinuz.addr+vmlinuz.size); len = vmlinuz.size; - gunzip((void *)vmlinux.addr, vmlinux.size, + gunzip((void *)vmlinux.addr, vmlinux.memsize, (unsigned char *)vmlinuz.addr, &len); printf("done 0x%lx bytes\n\r", len); } else { @@ -167,32 +186,11 @@ void start(unsigned long a1, unsigned lo } /* Skip over the ELF header */ - elf64 = (Elf64_Ehdr *)vmlinux.addr; - if ( elf64->e_ident[EI_MAG0] != ELFMAG0 || - elf64->e_ident[EI_MAG1] != ELFMAG1 || - elf64->e_ident[EI_MAG2] != ELFMAG2 || - elf64->e_ident[EI_MAG3] != ELFMAG3 || - elf64->e_ident[EI_CLASS] != ELFCLASS64 || - elf64->e_ident[EI_DATA] != ELFDATA2MSB || - elf64->e_type != ET_EXEC || - elf64->e_machine != EM_PPC64 ) - { - printf("Error: not a valid PPC64 ELF file!\n\r"); - exit(); - } - - elf64ph = (Elf64_Phdr *)((unsigned long)elf64 + - (unsigned long)elf64->e_phoff); - for(i=0; i < (unsigned int)elf64->e_phnum ;i++,elf64ph++) { - if (elf64ph->p_type == PT_LOAD && elf64ph->p_offset != 0) - break; - } #ifdef DEBUG printf("... skipping 0x%lx bytes of ELF header\n\r", (unsigned long)elf64ph->p_offset); #endif vmlinux.addr += (unsigned long)elf64ph->p_offset; - vmlinux.size -= (unsigned long)elf64ph->p_offset; flush_cache((void *)vmlinux.addr, vmlinux.size); @@ -263,7 +261,7 @@ static void gunzip(void *dst, int dstlen s.avail_in = *lenp - i; s.next_out = dst; s.avail_out = dstlen; - r = zlib_inflate(&s, Z_FINISH); + r = zlib_inflate(&s, Z_FULL_FLUSH); if (r != Z_OK && r != Z_STREAM_END) { printf("inflate returned %d msg: %s\n\r", r, s.msg); exit(); Index: linux-2.6.14-rc3/arch/ppc64/boot/Makefile =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/Makefile +++ linux-2.6.14-rc3/arch/ppc64/boot/Makefile @@ -34,7 +34,7 @@ zliblinuxheader := zlib.h zconf.h zutil. $(addprefix $(obj)/,$(zlib) main.o): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) #$(addprefix $(obj)/,main.o): $(addprefix $(obj)/,zlib.h) -src-boot := crt0.S string.S prom.c main.c imagesize.c div64.S +src-boot := crt0.S string.S prom.c main.c div64.S src-boot += $(zlib) src-boot := $(addprefix $(obj)/, $(src-boot)) obj-boot := $(addsuffix .o, $(basename $(src-boot))) @@ -87,7 +87,7 @@ src-sec = $(foreach section, $(1), $(pat gz-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.gz, $(section))) hostprogs-y := addnote addRamDisk -targets += zImage.vmode zImage.initrd.vmode zImage zImage.initrd imagesize.c \ +targets += zImage.vmode zImage.initrd.vmode zImage zImage.initrd \ $(patsubst $(obj)/%,%, $(call obj-sec, $(required) $(initrd))) \ $(patsubst $(obj)/%,%, $(call src-sec, $(required) $(initrd))) \ $(patsubst $(obj)/%,%, $(call gz-sec, $(required) $(initrd))) \ @@ -100,9 +100,9 @@ quiet_cmd_ramdisk = RAMDISK $@ quiet_cmd_stripvm = STRIP $@ cmd_stripvm = $(STRIP) -s $< -o $@ -vmlinux.strip: vmlinux FORCE +vmlinux.strip: vmlinux $(call if_changed,stripvm) -$(obj)/vmlinux.initrd: vmlinux.strip $(obj)/addRamDisk $(obj)/ramdisk.image.gz FORCE +$(obj)/vmlinux.initrd: vmlinux.strip $(obj)/addRamDisk $(obj)/ramdisk.image.gz $(call if_changed,ramdisk) quiet_cmd_addsection = ADDSEC $@ @@ -110,48 +110,38 @@ quiet_cmd_addsection = ADDSEC $@ --add-section=.kernel:$(strip $(patsubst $(obj)/kernel-%.o,%, $@))=$(patsubst %.o,%.gz, $@) \ --set-section-flags=.kernel:$(strip $(patsubst $(obj)/kernel-%.o,%, $@))=$(OBJCOPYFLAGS) -quiet_cmd_imagesize = GENSIZE $@ - cmd_imagesize = ls -l vmlinux.strip | \ - awk '{printf "/* generated -- do not edit! */\n" "unsigned long vmlinux_filesize = %d;\n", $$5}' \ - > $(obj)/imagesize.c && \ - $(CROSS_COMPILE)nm -n vmlinux | tail -n 1 | \ - awk '{printf "unsigned long vmlinux_memsize = 0x%s;\n", substr($$1,8)}' >> $(obj)/imagesize.c - quiet_cmd_addnote = ADDNOTE $@ cmd_addnote = $(obj)/addnote $@ -$(call gz-sec, $(required)): $(obj)/kernel-%.gz: % FORCE +$(call gz-sec, $(required)): $(obj)/kernel-%.gz: % $(call if_changed,gzip) $(obj)/kernel-initrd.gz: $(obj)/ramdisk.image.gz cp -f $(obj)/ramdisk.image.gz $@ -$(call src-sec, $(required) $(initrd)): $(obj)/kernel-%.c: $(obj)/kernel-%.gz FORCE +$(call src-sec, $(required) $(initrd)): $(obj)/kernel-%.c: $(obj)/kernel-%.gz @touch $@ -$(call obj-sec, $(required) $(initrd)): $(obj)/kernel-%.o: $(obj)/kernel-%.c FORCE +$(call obj-sec, $(required) $(initrd)): $(obj)/kernel-%.o: $(obj)/kernel-%.c $(call if_changed_dep,bootcc) $(call cmd,addsection) $(obj)/zImage.vmode: obj-boot += $(call obj-sec, $(required)) -$(obj)/zImage.vmode: $(call obj-sec, $(required)) $(obj-boot) FORCE +$(obj)/zImage.vmode: $(call obj-sec, $(required)) $(obj-boot) $(call cmd,bootld,$(obj-boot)) $(obj)/zImage.initrd.vmode: obj-boot += $(call obj-sec, $(required) $(initrd)) -$(obj)/zImage.initrd.vmode: $(call obj-sec, $(required) $(initrd)) $(obj-boot) FORCE +$(obj)/zImage.initrd.vmode: $(call obj-sec, $(required) $(initrd)) $(obj-boot) $(call cmd,bootld,$(obj-boot)) -$(obj)/zImage: $(obj)/zImage.vmode $(obj)/addnote FORCE +$(obj)/zImage: $(obj)/zImage.vmode $(obj)/addnote @cp -f $< $@ $(call if_changed,addnote) -$(obj)/zImage.initrd: $(obj)/zImage.initrd.vmode $(obj)/addnote FORCE +$(obj)/zImage.initrd: $(obj)/zImage.initrd.vmode $(obj)/addnote @cp -f $< $@ $(call if_changed,addnote) -$(obj)/imagesize.c: vmlinux.strip - $(call cmd,imagesize) - install: $(CONFIGURE) $(BOOTIMAGE) sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" "$(BOOTIMAGE)" From olh at suse.de Mon Oct 10 04:19:36 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:36 +0000 Subject: [PATCH 4/13] ppc64 boot: remove zlib In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181936.4.YPbQW29171.29065.olh@nectarine.suse.de> Switch ppc64 to the in-kernel zlib, it has less bugs than the current one. The code in arch/ppc64/boot is compiled as 32bit, so it can not use the includes from include/asm. Copy all zlib related header files and convert them with sed. Reduce the scratch size to 47k, check possible changes at runtime. Signed-off-by: Olaf Hering arch/ppc64/boot/zlib.c | 2195 ----------------------------------------------- arch/ppc64/boot/zlib.h | 432 --------- arch/ppc64/boot/Makefile | 33 arch/ppc64/boot/main.c | 82 - 4 files changed, 44 insertions(+), 2698 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/boot/Makefile =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/Makefile +++ linux-2.6.14-rc3/arch/ppc64/boot/Makefile @@ -27,10 +27,41 @@ BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAG BOOTLFLAGS := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds OBJCOPYFLAGS := contents,alloc,load,readonly,data -src-boot := crt0.S string.S prom.c main.c zlib.c imagesize.c div64.S +zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c +zlibheader := infblock.h infcodes.h inffast.h inftrees.h infutil.h +zliblinuxheader := zlib.h zconf.h zutil.h + +$(addprefix $(obj)/,$(zlib) main.o): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) +#$(addprefix $(obj)/,main.o): $(addprefix $(obj)/,zlib.h) + +src-boot := crt0.S string.S prom.c main.c imagesize.c div64.S +src-boot += $(zlib) src-boot := $(addprefix $(obj)/, $(src-boot)) obj-boot := $(addsuffix .o, $(basename $(src-boot))) +BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) + +quiet_cmd_copy_zlib = COPY $@ + cmd_copy_zlib = sed "s at __attribute_used__@@;s@]\+\).*@\"\1\"@" $< > $@ + +quiet_cmd_copy_zlibheader = COPY $@ + cmd_copy_zlibheader = sed "s@]\+\).*@\"\1\"@" $< > $@ +# stddef.h for NULL +quiet_cmd_copy_zliblinuxheader = COPY $@ + cmd_copy_zliblinuxheader = sed "s@@\"string.h\"@;s@@@;s@]\+\).*@\"\1\"@" $< > $@ + +$(addprefix $(obj)/,$(zlib)): $(obj)/%: $(srctree)/lib/zlib_inflate/% + $(call cmd,copy_zlib) + +$(addprefix $(obj)/,$(zlibheader)): $(obj)/%: $(srctree)/lib/zlib_inflate/% + $(call cmd,copy_zlibheader) + +$(addprefix $(obj)/,$(zliblinuxheader)): $(obj)/%: $(srctree)/include/linux/% + $(call cmd,copy_zliblinuxheader) + +clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) + + quiet_cmd_bootcc = BOOTCC $@ cmd_bootcc = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< Index: linux-2.6.14-rc3/arch/ppc64/boot/main.c =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/main.c +++ linux-2.6.14-rc3/arch/ppc64/boot/main.c @@ -26,12 +26,6 @@ extern void flush_cache(void *, unsigned #define RAM_END (512<<20) // Fixme: use OF */ #define ONE_MB 0x100000 -static char *avail_ram; -static char *begin_avail, *end_avail; -static char *avail_high; -static unsigned int heap_use; -static unsigned int heap_max; - extern char _start[]; extern char _end[]; extern char _vmlinux_start[]; @@ -50,7 +44,8 @@ static struct addr_range vmlinux = {0, 0 static struct addr_range vmlinuz = {0, 0, 0}; static struct addr_range initrd = {0, 0, 0}; -static char scratch[128<<10]; /* 128kB of scratch space for gunzip */ +static char scratch[46912]; /* scratch space for gunzip, from zlib_inflate_workspacesize() */ + typedef void (*kernel_entry_t)( unsigned long, unsigned long, @@ -161,17 +156,12 @@ void start(unsigned long a1, unsigned lo /* Eventually gunzip the kernel */ if (*(unsigned short *)vmlinuz.addr == 0x1f8b) { int len; - avail_ram = scratch; - begin_avail = avail_high = avail_ram; - end_avail = scratch + sizeof(scratch); printf("gunzipping (0x%lx <- 0x%lx:0x%0lx)...", vmlinux.addr, vmlinuz.addr, vmlinuz.addr+vmlinuz.size); len = vmlinuz.size; gunzip((void *)vmlinux.addr, vmlinux.size, (unsigned char *)vmlinuz.addr, &len); printf("done 0x%lx bytes\n\r", len); - printf("0x%x bytes of heap consumed, max in use 0x%x\n\r", - (unsigned)(avail_high - begin_avail), heap_max); } else { memmove((void *)vmlinux.addr,(void *)vmlinuz.addr,vmlinuz.size); } @@ -225,64 +215,12 @@ void start(unsigned long a1, unsigned lo exit(); } -struct memchunk { - unsigned int size; - unsigned int pad; - struct memchunk *next; -}; - -static struct memchunk *freechunks; - -void *zalloc(void *x, unsigned items, unsigned size) -{ - void *p; - struct memchunk **mpp, *mp; - - size *= items; - size = _ALIGN(size, sizeof(struct memchunk)); - heap_use += size; - if (heap_use > heap_max) - heap_max = heap_use; - for (mpp = &freechunks; (mp = *mpp) != 0; mpp = &mp->next) { - if (mp->size == size) { - *mpp = mp->next; - return mp; - } - } - p = avail_ram; - avail_ram += size; - if (avail_ram > avail_high) - avail_high = avail_ram; - if (avail_ram > end_avail) { - printf("oops... out of memory\n\r"); - pause(); - } - return p; -} - -void zfree(void *x, void *addr, unsigned nb) -{ - struct memchunk *mp = addr; - - nb = _ALIGN(nb, sizeof(struct memchunk)); - heap_use -= nb; - if (avail_ram == addr + nb) { - avail_ram = addr; - return; - } - mp->size = nb; - mp->next = freechunks; - freechunks = mp; -} - #define HEAD_CRC 2 #define EXTRA_FIELD 4 #define ORIG_NAME 8 #define COMMENT 0x10 #define RESERVED 0xe0 -#define DEFLATED 8 - static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) { z_stream s; @@ -291,7 +229,7 @@ static void gunzip(void *dst, int dstlen /* skip header */ i = 10; flags = src[3]; - if (src[2] != DEFLATED || (flags & RESERVED) != 0) { + if (src[2] != Z_DEFLATED || (flags & RESERVED) != 0) { printf("bad gzipped data\n\r"); exit(); } @@ -310,9 +248,13 @@ static void gunzip(void *dst, int dstlen exit(); } - s.zalloc = zalloc; - s.zfree = zfree; - r = inflateInit2(&s, -MAX_WBITS); + if (zlib_inflate_workspacesize() > sizeof(scratch)) { + printf("gunzip needs more mem\n"); + exit(); + } + memset(&s, 0, sizeof(s)); + s.workspace = scratch; + r = zlib_inflateInit2(&s, -MAX_WBITS); if (r != Z_OK) { printf("inflateInit2 returned %d\n\r", r); exit(); @@ -321,12 +263,12 @@ static void gunzip(void *dst, int dstlen s.avail_in = *lenp - i; s.next_out = dst; s.avail_out = dstlen; - r = inflate(&s, Z_FINISH); + r = zlib_inflate(&s, Z_FINISH); if (r != Z_OK && r != Z_STREAM_END) { printf("inflate returned %d msg: %s\n\r", r, s.msg); exit(); } *lenp = s.next_out - (unsigned char *) dst; - inflateEnd(&s); + zlib_inflateEnd(&s); } Index: linux-2.6.14-rc3/arch/ppc64/boot/zlib.c =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/zlib.c +++ /dev/null @@ -1,2195 +0,0 @@ -/* - * This file is derived from various .h and .c files from the zlib-0.95 - * distribution by Jean-loup Gailly and Mark Adler, with some additions - * by Paul Mackerras to aid in implementing Deflate compression and - * decompression for PPP packets. See zlib.h for conditions of - * distribution and use. - * - * Changes that have been made include: - * - changed functions not used outside this file to "local" - * - added minCompression parameter to deflateInit2 - * - added Z_PACKET_FLUSH (see zlib.h for details) - * - added inflateIncomp - * - Copyright (C) 1995 Jean-loup Gailly and Mark Adler - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - Jean-loup Gailly Mark Adler - gzip at prep.ai.mit.edu madler at alumni.caltech.edu - - * - * - */ - -/*+++++*/ -/* zutil.h -- internal interface and configuration of the compression library - * Copyright (C) 1995 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* From: zutil.h,v 1.9 1995/05/03 17:27:12 jloup Exp */ - -#define _Z_UTIL_H - -#include "zlib.h" - -#ifndef local -# define local static -#endif -/* compile with -Dlocal if your debugger can't find static symbols */ - -#define FAR - -typedef unsigned char uch; -typedef uch FAR uchf; -typedef unsigned short ush; -typedef ush FAR ushf; -typedef unsigned long ulg; - -extern char *z_errmsg[]; /* indexed by 1-zlib_error */ - -#define ERR_RETURN(strm,err) return (strm->msg=z_errmsg[1-err], err) -/* To be used only when the state is known to be valid */ - -#ifndef NULL -#define NULL ((void *) 0) -#endif - - /* common constants */ - -#define DEFLATED 8 - -#ifndef DEF_WBITS -# define DEF_WBITS MAX_WBITS -#endif -/* default windowBits for decompression. MAX_WBITS is for compression only */ - -#if MAX_MEM_LEVEL >= 8 -# define DEF_MEM_LEVEL 8 -#else -# define DEF_MEM_LEVEL MAX_MEM_LEVEL -#endif -/* default memLevel */ - -#define STORED_BLOCK 0 -#define STATIC_TREES 1 -#define DYN_TREES 2 -/* The three kinds of block type */ - -#define MIN_MATCH 3 -#define MAX_MATCH 258 -/* The minimum and maximum match lengths */ - - /* functions */ - -extern void *memcpy(void *, const void *, unsigned long); -#define zmemcpy memcpy - -/* Diagnostic functions */ -#ifdef DEBUG_ZLIB -# include "stdio.h" -# ifndef verbose -# define verbose 0 -# endif -# define Assert(cond,msg) {if(!(cond)) z_error(msg);} -# define Trace(x) fprintf x -# define Tracev(x) {if (verbose) fprintf x ;} -# define Tracevv(x) {if (verbose>1) fprintf x ;} -# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} -# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} -#else -# define Assert(cond,msg) -# define Trace(x) -# define Tracev(x) -# define Tracevv(x) -# define Tracec(c,x) -# define Tracecv(c,x) -#endif - - -typedef uLong (*check_func) OF((uLong check, Bytef *buf, uInt len)); - -/* voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); */ -/* void zcfree OF((voidpf opaque, voidpf ptr)); */ - -#define ZALLOC(strm, items, size) \ - (*((strm)->zalloc))((strm)->opaque, (items), (size)) -#define ZFREE(strm, addr, size) \ - (*((strm)->zfree))((strm)->opaque, (voidpf)(addr), (size)) -#define TRY_FREE(s, p, n) {if (p) ZFREE(s, p, n);} - -/* deflate.h -- internal compression state - * Copyright (C) 1995 Jean-loup Gailly - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/*+++++*/ -/* infblock.h -- header to use infblock.c - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -struct inflate_blocks_state; -typedef struct inflate_blocks_state FAR inflate_blocks_statef; - -local inflate_blocks_statef * inflate_blocks_new OF(( - z_stream *z, - check_func c, /* check function */ - uInt w)); /* window size */ - -local int inflate_blocks OF(( - inflate_blocks_statef *, - z_stream *, - int)); /* initial return code */ - -local void inflate_blocks_reset OF(( - inflate_blocks_statef *, - z_stream *, - uLongf *)); /* check value on output */ - -local int inflate_blocks_free OF(( - inflate_blocks_statef *, - z_stream *, - uLongf *)); /* check value on output */ - -local int inflate_addhistory OF(( - inflate_blocks_statef *, - z_stream *)); - -local int inflate_packet_flush OF(( - inflate_blocks_statef *)); - -/*+++++*/ -/* inftrees.h -- header to use inftrees.c - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* Huffman code lookup table entry--this entry is four bytes for machines - that have 16-bit pointers (e.g. PC's in the small or medium model). */ - -typedef struct inflate_huft_s FAR inflate_huft; - -struct inflate_huft_s { - union { - struct { - Byte Exop; /* number of extra bits or operation */ - Byte Bits; /* number of bits in this code or subcode */ - } what; - uInt Nalloc; /* number of these allocated here */ - Bytef *pad; /* pad structure to a power of 2 (4 bytes for */ - } word; /* 16-bit, 8 bytes for 32-bit machines) */ - union { - uInt Base; /* literal, length base, or distance base */ - inflate_huft *Next; /* pointer to next level of table */ - } more; -}; - -#ifdef DEBUG_ZLIB - local uInt inflate_hufts; -#endif - -local int inflate_trees_bits OF(( - uIntf *, /* 19 code lengths */ - uIntf *, /* bits tree desired/actual depth */ - inflate_huft * FAR *, /* bits tree result */ - z_stream *)); /* for zalloc, zfree functions */ - -local int inflate_trees_dynamic OF(( - uInt, /* number of literal/length codes */ - uInt, /* number of distance codes */ - uIntf *, /* that many (total) code lengths */ - uIntf *, /* literal desired/actual bit depth */ - uIntf *, /* distance desired/actual bit depth */ - inflate_huft * FAR *, /* literal/length tree result */ - inflate_huft * FAR *, /* distance tree result */ - z_stream *)); /* for zalloc, zfree functions */ - -local int inflate_trees_fixed OF(( - uIntf *, /* literal desired/actual bit depth */ - uIntf *, /* distance desired/actual bit depth */ - inflate_huft * FAR *, /* literal/length tree result */ - inflate_huft * FAR *)); /* distance tree result */ - -local int inflate_trees_free OF(( - inflate_huft *, /* tables to free */ - z_stream *)); /* for zfree function */ - - -/*+++++*/ -/* infcodes.h -- header to use infcodes.c - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -struct inflate_codes_state; -typedef struct inflate_codes_state FAR inflate_codes_statef; - -local inflate_codes_statef *inflate_codes_new OF(( - uInt, uInt, - inflate_huft *, inflate_huft *, - z_stream *)); - -local int inflate_codes OF(( - inflate_blocks_statef *, - z_stream *, - int)); - -local void inflate_codes_free OF(( - inflate_codes_statef *, - z_stream *)); - - -/*+++++*/ -/* inflate.c -- zlib interface to inflate modules - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* inflate private state */ -struct internal_state { - - /* mode */ - enum { - METHOD, /* waiting for method byte */ - FLAG, /* waiting for flag byte */ - BLOCKS, /* decompressing blocks */ - CHECK4, /* four check bytes to go */ - CHECK3, /* three check bytes to go */ - CHECK2, /* two check bytes to go */ - CHECK1, /* one check byte to go */ - DONE, /* finished check, done */ - BAD} /* got an error--stay here */ - mode; /* current inflate mode */ - - /* mode dependent information */ - union { - uInt method; /* if FLAGS, method byte */ - struct { - uLong was; /* computed check value */ - uLong need; /* stream check value */ - } check; /* if CHECK, check values to compare */ - uInt marker; /* if BAD, inflateSync's marker bytes count */ - } sub; /* submode */ - - /* mode independent information */ - int nowrap; /* flag for no wrapper */ - uInt wbits; /* log2(window size) (8..15, defaults to 15) */ - inflate_blocks_statef - *blocks; /* current inflate_blocks state */ - -}; - - -int inflateReset( - z_stream *z -) -{ - uLong c; - - if (z == Z_NULL || z->state == Z_NULL) - return Z_STREAM_ERROR; - z->total_in = z->total_out = 0; - z->msg = Z_NULL; - z->state->mode = z->state->nowrap ? BLOCKS : METHOD; - inflate_blocks_reset(z->state->blocks, z, &c); - Trace((stderr, "inflate: reset\n")); - return Z_OK; -} - - -int inflateEnd( - z_stream *z -) -{ - uLong c; - - if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL) - return Z_STREAM_ERROR; - if (z->state->blocks != Z_NULL) - inflate_blocks_free(z->state->blocks, z, &c); - ZFREE(z, z->state, sizeof(struct internal_state)); - z->state = Z_NULL; - Trace((stderr, "inflate: end\n")); - return Z_OK; -} - - -int inflateInit2( - z_stream *z, - int w -) -{ - /* initialize state */ - if (z == Z_NULL) - return Z_STREAM_ERROR; -/* if (z->zalloc == Z_NULL) z->zalloc = zcalloc; */ -/* if (z->zfree == Z_NULL) z->zfree = zcfree; */ - if ((z->state = (struct internal_state FAR *) - ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL) - return Z_MEM_ERROR; - z->state->blocks = Z_NULL; - - /* handle undocumented nowrap option (no zlib header or check) */ - z->state->nowrap = 0; - if (w < 0) - { - w = - w; - z->state->nowrap = 1; - } - - /* set window size */ - if (w < 8 || w > 15) - { - inflateEnd(z); - return Z_STREAM_ERROR; - } - z->state->wbits = (uInt)w; - - /* create inflate_blocks state */ - if ((z->state->blocks = - inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, 1 << w)) - == Z_NULL) - { - inflateEnd(z); - return Z_MEM_ERROR; - } - Trace((stderr, "inflate: allocated\n")); - - /* reset state */ - inflateReset(z); - return Z_OK; -} - - -int inflateInit( - z_stream *z -) -{ - return inflateInit2(z, DEF_WBITS); -} - - -#define NEEDBYTE {if(z->avail_in==0)goto empty;r=Z_OK;} -#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++) - -int inflate( - z_stream *z, - int f -) -{ - int r; - uInt b; - - if (z == Z_NULL || z->next_in == Z_NULL) - return Z_STREAM_ERROR; - r = Z_BUF_ERROR; - while (1) switch (z->state->mode) - { - case METHOD: - NEEDBYTE - if (((z->state->sub.method = NEXTBYTE) & 0xf) != DEFLATED) - { - z->state->mode = BAD; - z->msg = "unknown compression method"; - z->state->sub.marker = 5; /* can't try inflateSync */ - break; - } - if ((z->state->sub.method >> 4) + 8 > z->state->wbits) - { - z->state->mode = BAD; - z->msg = "invalid window size"; - z->state->sub.marker = 5; /* can't try inflateSync */ - break; - } - z->state->mode = FLAG; - case FLAG: - NEEDBYTE - if ((b = NEXTBYTE) & 0x20) - { - z->state->mode = BAD; - z->msg = "invalid reserved bit"; - z->state->sub.marker = 5; /* can't try inflateSync */ - break; - } - if (((z->state->sub.method << 8) + b) % 31) - { - z->state->mode = BAD; - z->msg = "incorrect header check"; - z->state->sub.marker = 5; /* can't try inflateSync */ - break; - } - Trace((stderr, "inflate: zlib header ok\n")); - z->state->mode = BLOCKS; - case BLOCKS: - r = inflate_blocks(z->state->blocks, z, r); - if (f == Z_PACKET_FLUSH && z->avail_in == 0 && z->avail_out != 0) - r = inflate_packet_flush(z->state->blocks); - if (r == Z_DATA_ERROR) - { - z->state->mode = BAD; - z->state->sub.marker = 0; /* can try inflateSync */ - break; - } - if (r != Z_STREAM_END) - return r; - r = Z_OK; - inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was); - if (z->state->nowrap) - { - z->state->mode = DONE; - break; - } - z->state->mode = CHECK4; - case CHECK4: - NEEDBYTE - z->state->sub.check.need = (uLong)NEXTBYTE << 24; - z->state->mode = CHECK3; - case CHECK3: - NEEDBYTE - z->state->sub.check.need += (uLong)NEXTBYTE << 16; - z->state->mode = CHECK2; - case CHECK2: - NEEDBYTE - z->state->sub.check.need += (uLong)NEXTBYTE << 8; - z->state->mode = CHECK1; - case CHECK1: - NEEDBYTE - z->state->sub.check.need += (uLong)NEXTBYTE; - - if (z->state->sub.check.was != z->state->sub.check.need) - { - z->state->mode = BAD; - z->msg = "incorrect data check"; - z->state->sub.marker = 5; /* can't try inflateSync */ - break; - } - Trace((stderr, "inflate: zlib check ok\n")); - z->state->mode = DONE; - case DONE: - return Z_STREAM_END; - case BAD: - return Z_DATA_ERROR; - default: - return Z_STREAM_ERROR; - } - - empty: - if (f != Z_PACKET_FLUSH) - return r; - z->state->mode = BAD; - z->state->sub.marker = 0; /* can try inflateSync */ - return Z_DATA_ERROR; -} - -/* - * This subroutine adds the data at next_in/avail_in to the output history - * without performing any output. The output buffer must be "caught up"; - * i.e. no pending output (hence s->read equals s->write), and the state must - * be BLOCKS (i.e. we should be willing to see the start of a series of - * BLOCKS). On exit, the output will also be caught up, and the checksum - * will have been updated if need be. - */ - -int inflateIncomp( - z_stream *z -) -{ - if (z->state->mode != BLOCKS) - return Z_DATA_ERROR; - return inflate_addhistory(z->state->blocks, z); -} - - -int inflateSync( - z_stream *z -) -{ - uInt n; /* number of bytes to look at */ - Bytef *p; /* pointer to bytes */ - uInt m; /* number of marker bytes found in a row */ - uLong r, w; /* temporaries to save total_in and total_out */ - - /* set up */ - if (z == Z_NULL || z->state == Z_NULL) - return Z_STREAM_ERROR; - if (z->state->mode != BAD) - { - z->state->mode = BAD; - z->state->sub.marker = 0; - } - if ((n = z->avail_in) == 0) - return Z_BUF_ERROR; - p = z->next_in; - m = z->state->sub.marker; - - /* search */ - while (n && m < 4) - { - if (*p == (Byte)(m < 2 ? 0 : 0xff)) - m++; - else if (*p) - m = 0; - else - m = 4 - m; - p++, n--; - } - - /* restore */ - z->total_in += p - z->next_in; - z->next_in = p; - z->avail_in = n; - z->state->sub.marker = m; - - /* return no joy or set up to restart on a new block */ - if (m != 4) - return Z_DATA_ERROR; - r = z->total_in; w = z->total_out; - inflateReset(z); - z->total_in = r; z->total_out = w; - z->state->mode = BLOCKS; - return Z_OK; -} - -#undef NEEDBYTE -#undef NEXTBYTE - -/*+++++*/ -/* infutil.h -- types and macros common to blocks and codes - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* inflate blocks semi-private state */ -struct inflate_blocks_state { - - /* mode */ - enum { - TYPE, /* get type bits (3, including end bit) */ - LENS, /* get lengths for stored */ - STORED, /* processing stored block */ - TABLE, /* get table lengths */ - BTREE, /* get bit lengths tree for a dynamic block */ - DTREE, /* get length, distance trees for a dynamic block */ - CODES, /* processing fixed or dynamic block */ - DRY, /* output remaining window bytes */ - DONEB, /* finished last block, done */ - BADB} /* got a data error--stuck here */ - mode; /* current inflate_block mode */ - - /* mode dependent information */ - union { - uInt left; /* if STORED, bytes left to copy */ - struct { - uInt table; /* table lengths (14 bits) */ - uInt index; /* index into blens (or border) */ - uIntf *blens; /* bit lengths of codes */ - uInt bb; /* bit length tree depth */ - inflate_huft *tb; /* bit length decoding tree */ - int nblens; /* # elements allocated at blens */ - } trees; /* if DTREE, decoding info for trees */ - struct { - inflate_huft *tl, *td; /* trees to free */ - inflate_codes_statef - *codes; - } decode; /* if CODES, current state */ - } sub; /* submode */ - uInt last; /* true if this block is the last block */ - - /* mode independent information */ - uInt bitk; /* bits in bit buffer */ - uLong bitb; /* bit buffer */ - Bytef *window; /* sliding window */ - Bytef *end; /* one byte after sliding window */ - Bytef *read; /* window read pointer */ - Bytef *write; /* window write pointer */ - check_func checkfn; /* check function */ - uLong check; /* check on output */ - -}; - - -/* defines for inflate input/output */ -/* update pointers and return */ -#define UPDBITS {s->bitb=b;s->bitk=k;} -#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;} -#define UPDOUT {s->write=q;} -#define UPDATE {UPDBITS UPDIN UPDOUT} -#define LEAVE {UPDATE return inflate_flush(s,z,r);} -/* get bytes and bits */ -#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;} -#define NEEDBYTE {if(n)r=Z_OK;else LEAVE} -#define NEXTBYTE (n--,*p++) -#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<>=(j);k-=(j);} -/* output bytes */ -#define WAVAIL (qread?s->read-q-1:s->end-q) -#define LOADOUT {q=s->write;m=WAVAIL;} -#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=WAVAIL;}} -#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT} -#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;} -#define OUTBYTE(a) {*q++=(Byte)(a);m--;} -/* load local pointers */ -#define LOAD {LOADIN LOADOUT} - -/* And'ing with mask[n] masks the lower n bits */ -local uInt inflate_mask[] = { - 0x0000, - 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, - 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff -}; - -/* copy as much as possible from the sliding window to the output area */ -local int inflate_flush OF(( - inflate_blocks_statef *, - z_stream *, - int)); - -/*+++++*/ -/* inffast.h -- header to use inffast.c - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -local int inflate_fast OF(( - uInt, - uInt, - inflate_huft *, - inflate_huft *, - inflate_blocks_statef *, - z_stream *)); - - -/*+++++*/ -/* infblock.c -- interpret and process block types to last block - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* Table for deflate from PKZIP's appnote.txt. */ -local uInt border[] = { /* Order of the bit length code lengths */ - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - -/* - Notes beyond the 1.93a appnote.txt: - - 1. Distance pointers never point before the beginning of the output - stream. - 2. Distance pointers can point back across blocks, up to 32k away. - 3. There is an implied maximum of 7 bits for the bit length table and - 15 bits for the actual data. - 4. If only one code exists, then it is encoded using one bit. (Zero - would be more efficient, but perhaps a little confusing.) If two - codes exist, they are coded using one bit each (0 and 1). - 5. There is no way of sending zero distance codes--a dummy must be - sent if there are none. (History: a pre 2.0 version of PKZIP would - store blocks with no distance codes, but this was discovered to be - too harsh a criterion.) Valid only for 1.93a. 2.04c does allow - zero distance codes, which is sent as one code of zero bits in - length. - 6. There are up to 286 literal/length codes. Code 256 represents the - end-of-block. Note however that the static length tree defines - 288 codes just to fill out the Huffman codes. Codes 286 and 287 - cannot be used though, since there is no length base or extra bits - defined for them. Similarily, there are up to 30 distance codes. - However, static trees define 32 codes (all 5 bits) to fill out the - Huffman codes, but the last two had better not show up in the data. - 7. Unzip can check dynamic Huffman blocks for complete code sets. - The exception is that a single code would not be complete (see #4). - 8. The five bits following the block type is really the number of - literal codes sent minus 257. - 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits - (1+6+6). Therefore, to output three times the length, you output - three codes (1+1+1), whereas to output four times the same length, - you only need two codes (1+3). Hmm. - 10. In the tree reconstruction algorithm, Code = Code + Increment - only if BitLength(i) is not zero. (Pretty obvious.) - 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) - 12. Note: length code 284 can represent 227-258, but length code 285 - really is 258. The last length deserves its own, short code - since it gets used a lot in very redundant files. The length - 258 is special since 258 - 3 (the min match length) is 255. - 13. The literal/length and distance code bit lengths are read as a - single stream of lengths. It is possible (and advantageous) for - a repeat code (16, 17, or 18) to go across the boundary between - the two sets of lengths. - */ - - -local void inflate_blocks_reset( - inflate_blocks_statef *s, - z_stream *z, - uLongf *c -) -{ - if (s->checkfn != Z_NULL) - *c = s->check; - if (s->mode == BTREE || s->mode == DTREE) - ZFREE(z, s->sub.trees.blens, s->sub.trees.nblens * sizeof(uInt)); - if (s->mode == CODES) - { - inflate_codes_free(s->sub.decode.codes, z); - inflate_trees_free(s->sub.decode.td, z); - inflate_trees_free(s->sub.decode.tl, z); - } - s->mode = TYPE; - s->bitk = 0; - s->bitb = 0; - s->read = s->write = s->window; - if (s->checkfn != Z_NULL) - s->check = (*s->checkfn)(0L, Z_NULL, 0); - Trace((stderr, "inflate: blocks reset\n")); -} - - -local inflate_blocks_statef *inflate_blocks_new( - z_stream *z, - check_func c, - uInt w -) -{ - inflate_blocks_statef *s; - - if ((s = (inflate_blocks_statef *)ZALLOC - (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL) - return s; - if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL) - { - ZFREE(z, s, sizeof(struct inflate_blocks_state)); - return Z_NULL; - } - s->end = s->window + w; - s->checkfn = c; - s->mode = TYPE; - Trace((stderr, "inflate: blocks allocated\n")); - inflate_blocks_reset(s, z, &s->check); - return s; -} - - -local int inflate_blocks( - inflate_blocks_statef *s, - z_stream *z, - int r -) -{ - uInt t; /* temporary storage */ - uLong b; /* bit buffer */ - uInt k; /* bits in bit buffer */ - Bytef *p; /* input data pointer */ - uInt n; /* bytes available there */ - Bytef *q; /* output window write pointer */ - uInt m; /* bytes to end of window or read pointer */ - - /* copy input/output information to locals (UPDATE macro restores) */ - LOAD - - /* process input based on current state */ - while (1) switch (s->mode) - { - case TYPE: - NEEDBITS(3) - t = (uInt)b & 7; - s->last = t & 1; - switch (t >> 1) - { - case 0: /* stored */ - Trace((stderr, "inflate: stored block%s\n", - s->last ? " (last)" : "")); - DUMPBITS(3) - t = k & 7; /* go to byte boundary */ - DUMPBITS(t) - s->mode = LENS; /* get length of stored block */ - break; - case 1: /* fixed */ - Trace((stderr, "inflate: fixed codes block%s\n", - s->last ? " (last)" : "")); - { - uInt bl, bd; - inflate_huft *tl, *td; - - inflate_trees_fixed(&bl, &bd, &tl, &td); - s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z); - if (s->sub.decode.codes == Z_NULL) - { - r = Z_MEM_ERROR; - LEAVE - } - s->sub.decode.tl = Z_NULL; /* don't try to free these */ - s->sub.decode.td = Z_NULL; - } - DUMPBITS(3) - s->mode = CODES; - break; - case 2: /* dynamic */ - Trace((stderr, "inflate: dynamic codes block%s\n", - s->last ? " (last)" : "")); - DUMPBITS(3) - s->mode = TABLE; - break; - case 3: /* illegal */ - DUMPBITS(3) - s->mode = BADB; - z->msg = "invalid block type"; - r = Z_DATA_ERROR; - LEAVE - } - break; - case LENS: - NEEDBITS(32) - if (((~b) >> 16) != (b & 0xffff)) - { - s->mode = BADB; - z->msg = "invalid stored block lengths"; - r = Z_DATA_ERROR; - LEAVE - } - s->sub.left = (uInt)b & 0xffff; - b = k = 0; /* dump bits */ - Tracev((stderr, "inflate: stored length %u\n", s->sub.left)); - s->mode = s->sub.left ? STORED : TYPE; - break; - case STORED: - if (n == 0) - LEAVE - NEEDOUT - t = s->sub.left; - if (t > n) t = n; - if (t > m) t = m; - zmemcpy(q, p, t); - p += t; n -= t; - q += t; m -= t; - if ((s->sub.left -= t) != 0) - break; - Tracev((stderr, "inflate: stored end, %lu total out\n", - z->total_out + (q >= s->read ? q - s->read : - (s->end - s->read) + (q - s->window)))); - s->mode = s->last ? DRY : TYPE; - break; - case TABLE: - NEEDBITS(14) - s->sub.trees.table = t = (uInt)b & 0x3fff; -#ifndef PKZIP_BUG_WORKAROUND - if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29) - { - s->mode = BADB; - z->msg = "too many length or distance symbols"; - r = Z_DATA_ERROR; - LEAVE - } -#endif - t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); - if (t < 19) - t = 19; - if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL) - { - r = Z_MEM_ERROR; - LEAVE - } - s->sub.trees.nblens = t; - DUMPBITS(14) - s->sub.trees.index = 0; - Tracev((stderr, "inflate: table sizes ok\n")); - s->mode = BTREE; - case BTREE: - while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10)) - { - NEEDBITS(3) - s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7; - DUMPBITS(3) - } - while (s->sub.trees.index < 19) - s->sub.trees.blens[border[s->sub.trees.index++]] = 0; - s->sub.trees.bb = 7; - t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb, - &s->sub.trees.tb, z); - if (t != Z_OK) - { - r = t; - if (r == Z_DATA_ERROR) - s->mode = BADB; - LEAVE - } - s->sub.trees.index = 0; - Tracev((stderr, "inflate: bits tree ok\n")); - s->mode = DTREE; - case DTREE: - while (t = s->sub.trees.table, - s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f)) - { - inflate_huft *h; - uInt i, j, c; - - t = s->sub.trees.bb; - NEEDBITS(t) - h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]); - t = h->word.what.Bits; - c = h->more.Base; - if (c < 16) - { - DUMPBITS(t) - s->sub.trees.blens[s->sub.trees.index++] = c; - } - else /* c == 16..18 */ - { - i = c == 18 ? 7 : c - 14; - j = c == 18 ? 11 : 3; - NEEDBITS(t + i) - DUMPBITS(t) - j += (uInt)b & inflate_mask[i]; - DUMPBITS(i) - i = s->sub.trees.index; - t = s->sub.trees.table; - if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) || - (c == 16 && i < 1)) - { - s->mode = BADB; - z->msg = "invalid bit length repeat"; - r = Z_DATA_ERROR; - LEAVE - } - c = c == 16 ? s->sub.trees.blens[i - 1] : 0; - do { - s->sub.trees.blens[i++] = c; - } while (--j); - s->sub.trees.index = i; - } - } - inflate_trees_free(s->sub.trees.tb, z); - s->sub.trees.tb = Z_NULL; - { - uInt bl, bd; - inflate_huft *tl, *td; - inflate_codes_statef *c; - - bl = 9; /* must be <= 9 for lookahead assumptions */ - bd = 6; /* must be <= 9 for lookahead assumptions */ - t = s->sub.trees.table; - t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f), - s->sub.trees.blens, &bl, &bd, &tl, &td, z); - if (t != Z_OK) - { - if (t == (uInt)Z_DATA_ERROR) - s->mode = BADB; - r = t; - LEAVE - } - Tracev((stderr, "inflate: trees ok\n")); - if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL) - { - inflate_trees_free(td, z); - inflate_trees_free(tl, z); - r = Z_MEM_ERROR; - LEAVE - } - ZFREE(z, s->sub.trees.blens, s->sub.trees.nblens * sizeof(uInt)); - s->sub.decode.codes = c; - s->sub.decode.tl = tl; - s->sub.decode.td = td; - } - s->mode = CODES; - case CODES: - UPDATE - if ((r = inflate_codes(s, z, r)) != Z_STREAM_END) - return inflate_flush(s, z, r); - r = Z_OK; - inflate_codes_free(s->sub.decode.codes, z); - inflate_trees_free(s->sub.decode.td, z); - inflate_trees_free(s->sub.decode.tl, z); - LOAD - Tracev((stderr, "inflate: codes end, %lu total out\n", - z->total_out + (q >= s->read ? q - s->read : - (s->end - s->read) + (q - s->window)))); - if (!s->last) - { - s->mode = TYPE; - break; - } - if (k > 7) /* return unused byte, if any */ - { - Assert(k < 16, "inflate_codes grabbed too many bytes") - k -= 8; - n++; - p--; /* can always return one */ - } - s->mode = DRY; - case DRY: - FLUSH - if (s->read != s->write) - LEAVE - s->mode = DONEB; - case DONEB: - r = Z_STREAM_END; - LEAVE - case BADB: - r = Z_DATA_ERROR; - LEAVE - default: - r = Z_STREAM_ERROR; - LEAVE - } -} - - -local int inflate_blocks_free( - inflate_blocks_statef *s, - z_stream *z, - uLongf *c -) -{ - inflate_blocks_reset(s, z, c); - ZFREE(z, s->window, s->end - s->window); - ZFREE(z, s, sizeof(struct inflate_blocks_state)); - Trace((stderr, "inflate: blocks freed\n")); - return Z_OK; -} - -/* - * This subroutine adds the data at next_in/avail_in to the output history - * without performing any output. The output buffer must be "caught up"; - * i.e. no pending output (hence s->read equals s->write), and the state must - * be BLOCKS (i.e. we should be willing to see the start of a series of - * BLOCKS). On exit, the output will also be caught up, and the checksum - * will have been updated if need be. - */ -local int inflate_addhistory( - inflate_blocks_statef *s, - z_stream *z -) -{ - uLong b; /* bit buffer */ /* NOT USED HERE */ - uInt k; /* bits in bit buffer */ /* NOT USED HERE */ - uInt t; /* temporary storage */ - Bytef *p; /* input data pointer */ - uInt n; /* bytes available there */ - Bytef *q; /* output window write pointer */ - uInt m; /* bytes to end of window or read pointer */ - - if (s->read != s->write) - return Z_STREAM_ERROR; - if (s->mode != TYPE) - return Z_DATA_ERROR; - - /* we're ready to rock */ - LOAD - /* while there is input ready, copy to output buffer, moving - * pointers as needed. - */ - while (n) { - t = n; /* how many to do */ - /* is there room until end of buffer? */ - if (t > m) t = m; - /* update check information */ - if (s->checkfn != Z_NULL) - s->check = (*s->checkfn)(s->check, q, t); - zmemcpy(q, p, t); - q += t; - p += t; - n -= t; - z->total_out += t; - s->read = q; /* drag read pointer forward */ -/* WRAP */ /* expand WRAP macro by hand to handle s->read */ - if (q == s->end) { - s->read = q = s->window; - m = WAVAIL; - } - } - UPDATE - return Z_OK; -} - - -/* - * At the end of a Deflate-compressed PPP packet, we expect to have seen - * a `stored' block type value but not the (zero) length bytes. - */ -local int inflate_packet_flush( - inflate_blocks_statef *s -) -{ - if (s->mode != LENS) - return Z_DATA_ERROR; - s->mode = TYPE; - return Z_OK; -} - - -/*+++++*/ -/* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* simplify the use of the inflate_huft type with some defines */ -#define base more.Base -#define next more.Next -#define exop word.what.Exop -#define bits word.what.Bits - - -local int huft_build OF(( - uIntf *, /* code lengths in bits */ - uInt, /* number of codes */ - uInt, /* number of "simple" codes */ - uIntf *, /* list of base values for non-simple codes */ - uIntf *, /* list of extra bits for non-simple codes */ - inflate_huft * FAR*,/* result: starting table */ - uIntf *, /* maximum lookup bits (returns actual) */ - z_stream *)); /* for zalloc function */ - -local voidpf falloc OF(( - voidpf, /* opaque pointer (not used) */ - uInt, /* number of items */ - uInt)); /* size of item */ - -local void ffree OF(( - voidpf q, /* opaque pointer (not used) */ - voidpf p, /* what to free (not used) */ - uInt n)); /* number of bytes (not used) */ - -/* Tables for deflate from PKZIP's appnote.txt. */ -local uInt cplens[] = { /* Copy lengths for literal codes 257..285 */ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, - 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; - /* actually lengths - 2; also see note #13 above about 258 */ -local uInt cplext[] = { /* Extra bits for literal codes 257..285 */ - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, - 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 192, 192}; /* 192==invalid */ -local uInt cpdist[] = { /* Copy offsets for distance codes 0..29 */ - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, - 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, - 8193, 12289, 16385, 24577}; -local uInt cpdext[] = { /* Extra bits for distance codes */ - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, - 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, - 12, 12, 13, 13}; - -/* - Huffman code decoding is performed using a multi-level table lookup. - The fastest way to decode is to simply build a lookup table whose - size is determined by the longest code. However, the time it takes - to build this table can also be a factor if the data being decoded - is not very long. The most common codes are necessarily the - shortest codes, so those codes dominate the decoding time, and hence - the speed. The idea is you can have a shorter table that decodes the - shorter, more probable codes, and then point to subsidiary tables for - the longer codes. The time it costs to decode the longer codes is - then traded against the time it takes to make longer tables. - - This results of this trade are in the variables lbits and dbits - below. lbits is the number of bits the first level table for literal/ - length codes can decode in one step, and dbits is the same thing for - the distance codes. Subsequent tables are also less than or equal to - those sizes. These values may be adjusted either when all of the - codes are shorter than that, in which case the longest code length in - bits is used, or when the shortest code is *longer* than the requested - table size, in which case the length of the shortest code in bits is - used. - - There are two different values for the two tables, since they code a - different number of possibilities each. The literal/length table - codes 286 possible values, or in a flat code, a little over eight - bits. The distance table codes 30 possible values, or a little less - than five bits, flat. The optimum values for speed end up being - about one bit more than those, so lbits is 8+1 and dbits is 5+1. - The optimum values may differ though from machine to machine, and - possibly even between compilers. Your mileage may vary. - */ - - -/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */ -#define BMAX 15 /* maximum bit length of any code */ -#define N_MAX 288 /* maximum number of codes in any set */ - -#ifdef DEBUG_ZLIB - uInt inflate_hufts; -#endif - -local int huft_build( - uIntf *b, /* code lengths in bits (all assumed <= BMAX) */ - uInt n, /* number of codes (assumed <= N_MAX) */ - uInt s, /* number of simple-valued codes (0..s-1) */ - uIntf *d, /* list of base values for non-simple codes */ - uIntf *e, /* list of extra bits for non-simple codes */ - inflate_huft * FAR *t, /* result: starting table */ - uIntf *m, /* maximum lookup bits, returns actual */ - z_stream *zs /* for zalloc function */ -) -/* Given a list of code lengths and a maximum table size, make a set of - tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR - if the given code set is incomplete (the tables are still built in this - case), Z_DATA_ERROR if the input is invalid (all zero length codes or an - over-subscribed set of lengths), or Z_MEM_ERROR if not enough memory. */ -{ - - uInt a; /* counter for codes of length k */ - uInt c[BMAX+1]; /* bit length count table */ - uInt f; /* i repeats in table every f entries */ - int g; /* maximum code length */ - int h; /* table level */ - register uInt i; /* counter, current code */ - register uInt j; /* counter */ - register int k; /* number of bits in current code */ - int l; /* bits per table (returned in m) */ - register uIntf *p; /* pointer into c[], b[], or v[] */ - inflate_huft *q; /* points to current table */ - struct inflate_huft_s r; /* table entry for structure assignment */ - inflate_huft *u[BMAX]; /* table stack */ - uInt v[N_MAX]; /* values in order of bit length */ - register int w; /* bits before this table == (l * h) */ - uInt x[BMAX+1]; /* bit offsets, then code stack */ - uIntf *xp; /* pointer into x */ - int y; /* number of dummy codes added */ - uInt z; /* number of entries in current table */ - - - /* Generate counts for each bit length */ - p = c; -#define C0 *p++ = 0; -#define C2 C0 C0 C0 C0 -#define C4 C2 C2 C2 C2 - C4 /* clear c[]--assume BMAX+1 is 16 */ - p = b; i = n; - do { - c[*p++]++; /* assume all entries <= BMAX */ - } while (--i); - if (c[0] == n) /* null input--all zero length codes */ - { - *t = (inflate_huft *)Z_NULL; - *m = 0; - return Z_DATA_ERROR; - } - - - /* Find minimum and maximum length, bound *m by those */ - l = *m; - for (j = 1; j <= BMAX; j++) - if (c[j]) - break; - k = j; /* minimum code length */ - if ((uInt)l < j) - l = j; - for (i = BMAX; i; i--) - if (c[i]) - break; - g = i; /* maximum code length */ - if ((uInt)l > i) - l = i; - *m = l; - - - /* Adjust last length count to fill out codes, if needed */ - for (y = 1 << j; j < i; j++, y <<= 1) - if ((y -= c[j]) < 0) - return Z_DATA_ERROR; - if ((y -= c[i]) < 0) - return Z_DATA_ERROR; - c[i] += y; - - - /* Generate starting offsets into the value table for each length */ - x[1] = j = 0; - p = c + 1; xp = x + 2; - while (--i) { /* note that i == g from above */ - *xp++ = (j += *p++); - } - - - /* Make a table of values in order of bit lengths */ - p = b; i = 0; - do { - if ((j = *p++) != 0) - v[x[j]++] = i; - } while (++i < n); - n = x[g]; /* set n to length of v */ - - - /* Generate the Huffman codes and for each, make the table entries */ - x[0] = i = 0; /* first Huffman code is zero */ - p = v; /* grab values in bit order */ - h = -1; /* no tables yet--level -1 */ - w = -l; /* bits decoded == (l * h) */ - u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */ - q = (inflate_huft *)Z_NULL; /* ditto */ - z = 0; /* ditto */ - - /* go through the bit lengths (k already is bits in shortest code) */ - for (; k <= g; k++) - { - a = c[k]; - while (a--) - { - /* here i is the Huffman code of length k bits for value *p */ - /* make tables up to required level */ - while (k > w + l) - { - h++; - w += l; /* previous table always l bits */ - - /* compute minimum size table less than or equal to l bits */ - z = (z = g - w) > (uInt)l ? l : z; /* table size upper limit */ - if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ - { /* too few codes for k-w bit table */ - f -= a + 1; /* deduct codes from patterns left */ - xp = c + k; - if (j < z) - while (++j < z) /* try smaller tables up to z bits */ - { - if ((f <<= 1) <= *++xp) - break; /* enough codes to use up j bits */ - f -= *xp; /* else deduct codes from patterns */ - } - } - z = 1 << j; /* table entries for j-bit table */ - - /* allocate and link in new table */ - if ((q = (inflate_huft *)ZALLOC - (zs,z + 1,sizeof(inflate_huft))) == Z_NULL) - { - if (h) - inflate_trees_free(u[0], zs); - return Z_MEM_ERROR; /* not enough memory */ - } - q->word.Nalloc = z + 1; -#ifdef DEBUG_ZLIB - inflate_hufts += z + 1; -#endif - *t = q + 1; /* link to list for huft_free() */ - *(t = &(q->next)) = Z_NULL; - u[h] = ++q; /* table starts after link */ - - /* connect to last table, if there is one */ - if (h) - { - x[h] = i; /* save pattern for backing up */ - r.bits = (Byte)l; /* bits to dump before this table */ - r.exop = (Byte)j; /* bits in this table */ - r.next = q; /* pointer to this table */ - j = i >> (w - l); /* (get around Turbo C bug) */ - u[h-1][j] = r; /* connect to last table */ - } - } - - /* set up table entry in r */ - r.bits = (Byte)(k - w); - if (p >= v + n) - r.exop = 128 + 64; /* out of values--invalid code */ - else if (*p < s) - { - r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */ - r.base = *p++; /* simple code is just the value */ - } - else - { - r.exop = (Byte)e[*p - s] + 16 + 64; /* non-simple--look up in lists */ - r.base = d[*p++ - s]; - } - - /* fill code-like entries with r */ - f = 1 << (k - w); - for (j = i >> w; j < z; j += f) - q[j] = r; - - /* backwards increment the k-bit code i */ - for (j = 1 << (k - 1); i & j; j >>= 1) - i ^= j; - i ^= j; - - /* backup over finished tables */ - while ((i & ((1 << w) - 1)) != x[h]) - { - h--; /* don't need to update q */ - w -= l; - } - } - } - - - /* Return Z_BUF_ERROR if we were given an incomplete table */ - return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK; -} - - -local int inflate_trees_bits( - uIntf *c, /* 19 code lengths */ - uIntf *bb, /* bits tree desired/actual depth */ - inflate_huft * FAR *tb, /* bits tree result */ - z_stream *z /* for zfree function */ -) -{ - int r; - - r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL, tb, bb, z); - if (r == Z_DATA_ERROR) - z->msg = "oversubscribed dynamic bit lengths tree"; - else if (r == Z_BUF_ERROR) - { - inflate_trees_free(*tb, z); - z->msg = "incomplete dynamic bit lengths tree"; - r = Z_DATA_ERROR; - } - return r; -} - - -local int inflate_trees_dynamic( - uInt nl, /* number of literal/length codes */ - uInt nd, /* number of distance codes */ - uIntf *c, /* that many (total) code lengths */ - uIntf *bl, /* literal desired/actual bit depth */ - uIntf *bd, /* distance desired/actual bit depth */ - inflate_huft * FAR *tl, /* literal/length tree result */ - inflate_huft * FAR *td, /* distance tree result */ - z_stream *z /* for zfree function */ -) -{ - int r; - - /* build literal/length tree */ - if ((r = huft_build(c, nl, 257, cplens, cplext, tl, bl, z)) != Z_OK) - { - if (r == Z_DATA_ERROR) - z->msg = "oversubscribed literal/length tree"; - else if (r == Z_BUF_ERROR) - { - inflate_trees_free(*tl, z); - z->msg = "incomplete literal/length tree"; - r = Z_DATA_ERROR; - } - return r; - } - - /* build distance tree */ - if ((r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, z)) != Z_OK) - { - if (r == Z_DATA_ERROR) - z->msg = "oversubscribed literal/length tree"; - else if (r == Z_BUF_ERROR) { -#ifdef PKZIP_BUG_WORKAROUND - r = Z_OK; - } -#else - inflate_trees_free(*td, z); - z->msg = "incomplete literal/length tree"; - r = Z_DATA_ERROR; - } - inflate_trees_free(*tl, z); - return r; -#endif - } - - /* done */ - return Z_OK; -} - - -/* build fixed tables only once--keep them here */ -local int fixed_lock = 0; -local int fixed_built = 0; -#define FIXEDH 530 /* number of hufts used by fixed tables */ -local uInt fixed_left = FIXEDH; -local inflate_huft fixed_mem[FIXEDH]; -local uInt fixed_bl; -local uInt fixed_bd; -local inflate_huft *fixed_tl; -local inflate_huft *fixed_td; - - -local voidpf falloc( - voidpf q, /* opaque pointer (not used) */ - uInt n, /* number of items */ - uInt s /* size of item */ -) -{ - Assert(s == sizeof(inflate_huft) && n <= fixed_left, - "inflate_trees falloc overflow"); - if (q) s++; /* to make some compilers happy */ - fixed_left -= n; - return (voidpf)(fixed_mem + fixed_left); -} - - -local void ffree( - voidpf q, - voidpf p, - uInt n -) -{ - Assert(0, "inflate_trees ffree called!"); - if (q) q = p; /* to make some compilers happy */ -} - - -local int inflate_trees_fixed( - uIntf *bl, /* literal desired/actual bit depth */ - uIntf *bd, /* distance desired/actual bit depth */ - inflate_huft * FAR *tl, /* literal/length tree result */ - inflate_huft * FAR *td /* distance tree result */ -) -{ - /* build fixed tables if not built already--lock out other instances */ - while (++fixed_lock > 1) - fixed_lock--; - if (!fixed_built) - { - int k; /* temporary variable */ - unsigned c[288]; /* length list for huft_build */ - z_stream z; /* for falloc function */ - - /* set up fake z_stream for memory routines */ - z.zalloc = falloc; - z.zfree = ffree; - z.opaque = Z_NULL; - - /* literal table */ - for (k = 0; k < 144; k++) - c[k] = 8; - for (; k < 256; k++) - c[k] = 9; - for (; k < 280; k++) - c[k] = 7; - for (; k < 288; k++) - c[k] = 8; - fixed_bl = 7; - huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, &z); - - /* distance table */ - for (k = 0; k < 30; k++) - c[k] = 5; - fixed_bd = 5; - huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, &z); - - /* done */ - fixed_built = 1; - } - fixed_lock--; - *bl = fixed_bl; - *bd = fixed_bd; - *tl = fixed_tl; - *td = fixed_td; - return Z_OK; -} - - -local int inflate_trees_free( - inflate_huft *t, /* table to free */ - z_stream *z /* for zfree function */ -) -/* Free the malloc'ed tables built by huft_build(), which makes a linked - list of the tables it made, with the links in a dummy first entry of - each table. */ -{ - register inflate_huft *p, *q; - - /* Go through linked list, freeing from the malloced (t[-1]) address. */ - p = t; - while (p != Z_NULL) - { - q = (--p)->next; - ZFREE(z, p, p->word.Nalloc * sizeof(inflate_huft)); - p = q; - } - return Z_OK; -} - -/*+++++*/ -/* infcodes.c -- process literals and length/distance pairs - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* simplify the use of the inflate_huft type with some defines */ -#define base more.Base -#define next more.Next -#define exop word.what.Exop -#define bits word.what.Bits - -/* inflate codes private state */ -struct inflate_codes_state { - - /* mode */ - enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ - START, /* x: set up for LEN */ - LEN, /* i: get length/literal/eob next */ - LENEXT, /* i: getting length extra (have base) */ - DIST, /* i: get distance next */ - DISTEXT, /* i: getting distance extra */ - COPY, /* o: copying bytes in window, waiting for space */ - LIT, /* o: got literal, waiting for output space */ - WASH, /* o: got eob, possibly still output waiting */ - END, /* x: got eob and all data flushed */ - BADCODE} /* x: got error */ - mode; /* current inflate_codes mode */ - - /* mode dependent information */ - uInt len; - union { - struct { - inflate_huft *tree; /* pointer into tree */ - uInt need; /* bits needed */ - } code; /* if LEN or DIST, where in tree */ - uInt lit; /* if LIT, literal */ - struct { - uInt get; /* bits to get for extra */ - uInt dist; /* distance back to copy from */ - } copy; /* if EXT or COPY, where and how much */ - } sub; /* submode */ - - /* mode independent information */ - Byte lbits; /* ltree bits decoded per branch */ - Byte dbits; /* dtree bits decoder per branch */ - inflate_huft *ltree; /* literal/length/eob tree */ - inflate_huft *dtree; /* distance tree */ - -}; - - -local inflate_codes_statef *inflate_codes_new( - uInt bl, - uInt bd, - inflate_huft *tl, - inflate_huft *td, - z_stream *z -) -{ - inflate_codes_statef *c; - - if ((c = (inflate_codes_statef *) - ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL) - { - c->mode = START; - c->lbits = (Byte)bl; - c->dbits = (Byte)bd; - c->ltree = tl; - c->dtree = td; - Tracev((stderr, "inflate: codes new\n")); - } - return c; -} - - -local int inflate_codes( - inflate_blocks_statef *s, - z_stream *z, - int r -) -{ - uInt j; /* temporary storage */ - inflate_huft *t; /* temporary pointer */ - uInt e; /* extra bits or operation */ - uLong b; /* bit buffer */ - uInt k; /* bits in bit buffer */ - Bytef *p; /* input data pointer */ - uInt n; /* bytes available there */ - Bytef *q; /* output window write pointer */ - uInt m; /* bytes to end of window or read pointer */ - Bytef *f; /* pointer to copy strings from */ - inflate_codes_statef *c = s->sub.decode.codes; /* codes state */ - - /* copy input/output information to locals (UPDATE macro restores) */ - LOAD - - /* process input and output based on current state */ - while (1) switch (c->mode) - { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ - case START: /* x: set up for LEN */ -#ifndef SLOW - if (m >= 258 && n >= 10) - { - UPDATE - r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z); - LOAD - if (r != Z_OK) - { - c->mode = r == Z_STREAM_END ? WASH : BADCODE; - break; - } - } -#endif /* !SLOW */ - c->sub.code.need = c->lbits; - c->sub.code.tree = c->ltree; - c->mode = LEN; - case LEN: /* i: get length/literal/eob next */ - j = c->sub.code.need; - NEEDBITS(j) - t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); - DUMPBITS(t->bits) - e = (uInt)(t->exop); - if (e == 0) /* literal */ - { - c->sub.lit = t->base; - Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? - "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", t->base)); - c->mode = LIT; - break; - } - if (e & 16) /* length */ - { - c->sub.copy.get = e & 15; - c->len = t->base; - c->mode = LENEXT; - break; - } - if ((e & 64) == 0) /* next table */ - { - c->sub.code.need = e; - c->sub.code.tree = t->next; - break; - } - if (e & 32) /* end of block */ - { - Tracevv((stderr, "inflate: end of block\n")); - c->mode = WASH; - break; - } - c->mode = BADCODE; /* invalid code */ - z->msg = "invalid literal/length code"; - r = Z_DATA_ERROR; - LEAVE - case LENEXT: /* i: getting length extra (have base) */ - j = c->sub.copy.get; - NEEDBITS(j) - c->len += (uInt)b & inflate_mask[j]; - DUMPBITS(j) - c->sub.code.need = c->dbits; - c->sub.code.tree = c->dtree; - Tracevv((stderr, "inflate: length %u\n", c->len)); - c->mode = DIST; - case DIST: /* i: get distance next */ - j = c->sub.code.need; - NEEDBITS(j) - t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); - DUMPBITS(t->bits) - e = (uInt)(t->exop); - if (e & 16) /* distance */ - { - c->sub.copy.get = e & 15; - c->sub.copy.dist = t->base; - c->mode = DISTEXT; - break; - } - if ((e & 64) == 0) /* next table */ - { - c->sub.code.need = e; - c->sub.code.tree = t->next; - break; - } - c->mode = BADCODE; /* invalid code */ - z->msg = "invalid distance code"; - r = Z_DATA_ERROR; - LEAVE - case DISTEXT: /* i: getting distance extra */ - j = c->sub.copy.get; - NEEDBITS(j) - c->sub.copy.dist += (uInt)b & inflate_mask[j]; - DUMPBITS(j) - Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist)); - c->mode = COPY; - case COPY: /* o: copying bytes in window, waiting for space */ -#ifndef __TURBOC__ /* Turbo C bug for following expression */ - f = (uInt)(q - s->window) < c->sub.copy.dist ? - s->end - (c->sub.copy.dist - (q - s->window)) : - q - c->sub.copy.dist; -#else - f = q - c->sub.copy.dist; - if ((uInt)(q - s->window) < c->sub.copy.dist) - f = s->end - (c->sub.copy.dist - (q - s->window)); -#endif - while (c->len) - { - NEEDOUT - OUTBYTE(*f++) - if (f == s->end) - f = s->window; - c->len--; - } - c->mode = START; - break; - case LIT: /* o: got literal, waiting for output space */ - NEEDOUT - OUTBYTE(c->sub.lit) - c->mode = START; - break; - case WASH: /* o: got eob, possibly more output */ - FLUSH - if (s->read != s->write) - LEAVE - c->mode = END; - case END: - r = Z_STREAM_END; - LEAVE - case BADCODE: /* x: got error */ - r = Z_DATA_ERROR; - LEAVE - default: - r = Z_STREAM_ERROR; - LEAVE - } -} - - -local void inflate_codes_free( - inflate_codes_statef *c, - z_stream *z -) -{ - ZFREE(z, c, sizeof(struct inflate_codes_state)); - Tracev((stderr, "inflate: codes free\n")); -} - -/*+++++*/ -/* inflate_util.c -- data and routines common to blocks and codes - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* copy as much as possible from the sliding window to the output area */ -local int inflate_flush( - inflate_blocks_statef *s, - z_stream *z, - int r -) -{ - uInt n; - Bytef *p, *q; - - /* local copies of source and destination pointers */ - p = z->next_out; - q = s->read; - - /* compute number of bytes to copy as far as end of window */ - n = (uInt)((q <= s->write ? s->write : s->end) - q); - if (n > z->avail_out) n = z->avail_out; - if (n && r == Z_BUF_ERROR) r = Z_OK; - - /* update counters */ - z->avail_out -= n; - z->total_out += n; - - /* update check information */ - if (s->checkfn != Z_NULL) - s->check = (*s->checkfn)(s->check, q, n); - - /* copy as far as end of window */ - zmemcpy(p, q, n); - p += n; - q += n; - - /* see if more to copy at beginning of window */ - if (q == s->end) - { - /* wrap pointers */ - q = s->window; - if (s->write == s->end) - s->write = s->window; - - /* compute bytes to copy */ - n = (uInt)(s->write - q); - if (n > z->avail_out) n = z->avail_out; - if (n && r == Z_BUF_ERROR) r = Z_OK; - - /* update counters */ - z->avail_out -= n; - z->total_out += n; - - /* update check information */ - if (s->checkfn != Z_NULL) - s->check = (*s->checkfn)(s->check, q, n); - - /* copy */ - zmemcpy(p, q, n); - p += n; - q += n; - } - - /* update pointers */ - z->next_out = p; - s->read = q; - - /* done */ - return r; -} - - -/*+++++*/ -/* inffast.c -- process literals and length/distance pairs fast - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* simplify the use of the inflate_huft type with some defines */ -#define base more.Base -#define next more.Next -#define exop word.what.Exop -#define bits word.what.Bits - -/* macros for bit input with no checking and for returning unused bytes */ -#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<>3);p-=c;k&=7;} - -/* Called with number of bytes left to write in window at least 258 - (the maximum string length) and number of input bytes available - at least ten. The ten bytes are six bytes for the longest length/ - distance pair plus four bytes for overloading the bit buffer. */ - -local int inflate_fast( - uInt bl, - uInt bd, - inflate_huft *tl, - inflate_huft *td, - inflate_blocks_statef *s, - z_stream *z -) -{ - inflate_huft *t; /* temporary pointer */ - uInt e; /* extra bits or operation */ - uLong b; /* bit buffer */ - uInt k; /* bits in bit buffer */ - Bytef *p; /* input data pointer */ - uInt n; /* bytes available there */ - Bytef *q; /* output window write pointer */ - uInt m; /* bytes to end of window or read pointer */ - uInt ml; /* mask for literal/length tree */ - uInt md; /* mask for distance tree */ - uInt c; /* bytes to copy */ - uInt d; /* distance back to copy from */ - Bytef *r; /* copy source pointer */ - - /* load input, output, bit values */ - LOAD - - /* initialize masks */ - ml = inflate_mask[bl]; - md = inflate_mask[bd]; - - /* do until not enough input or output space for fast loop */ - do { /* assume called with m >= 258 && n >= 10 */ - /* get literal/length code */ - GRABBITS(20) /* max bits for literal/length code */ - if ((e = (t = tl + ((uInt)b & ml))->exop) == 0) - { - DUMPBITS(t->bits) - Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? - "inflate: * literal '%c'\n" : - "inflate: * literal 0x%02x\n", t->base)); - *q++ = (Byte)t->base; - m--; - continue; - } - do { - DUMPBITS(t->bits) - if (e & 16) - { - /* get extra bits for length */ - e &= 15; - c = t->base + ((uInt)b & inflate_mask[e]); - DUMPBITS(e) - Tracevv((stderr, "inflate: * length %u\n", c)); - - /* decode distance base of block to copy */ - GRABBITS(15); /* max bits for distance code */ - e = (t = td + ((uInt)b & md))->exop; - do { - DUMPBITS(t->bits) - if (e & 16) - { - /* get extra bits to add to distance base */ - e &= 15; - GRABBITS(e) /* get extra bits (up to 13) */ - d = t->base + ((uInt)b & inflate_mask[e]); - DUMPBITS(e) - Tracevv((stderr, "inflate: * distance %u\n", d)); - - /* do the copy */ - m -= c; - if ((uInt)(q - s->window) >= d) /* offset before dest */ - { /* just copy */ - r = q - d; - *q++ = *r++; c--; /* minimum count is three, */ - *q++ = *r++; c--; /* so unroll loop a little */ - } - else /* else offset after destination */ - { - e = d - (q - s->window); /* bytes from offset to end */ - r = s->end - e; /* pointer to offset */ - if (c > e) /* if source crosses, */ - { - c -= e; /* copy to end of window */ - do { - *q++ = *r++; - } while (--e); - r = s->window; /* copy rest from start of window */ - } - } - do { /* copy all or what's left */ - *q++ = *r++; - } while (--c); - break; - } - else if ((e & 64) == 0) - e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop; - else - { - z->msg = "invalid distance code"; - UNGRAB - UPDATE - return Z_DATA_ERROR; - } - } while (1); - break; - } - if ((e & 64) == 0) - { - if ((e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop) == 0) - { - DUMPBITS(t->bits) - Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? - "inflate: * literal '%c'\n" : - "inflate: * literal 0x%02x\n", t->base)); - *q++ = (Byte)t->base; - m--; - break; - } - } - else if (e & 32) - { - Tracevv((stderr, "inflate: * end of block\n")); - UNGRAB - UPDATE - return Z_STREAM_END; - } - else - { - z->msg = "invalid literal/length code"; - UNGRAB - UPDATE - return Z_DATA_ERROR; - } - } while (1); - } while (m >= 258 && n >= 10); - - /* not enough input or output--restore pointers and return */ - UNGRAB - UPDATE - return Z_OK; -} - - -/*+++++*/ -/* zutil.c -- target dependent utility functions for the compression library - * Copyright (C) 1995 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* From: zutil.c,v 1.8 1995/05/03 17:27:12 jloup Exp */ - -char *zlib_version = ZLIB_VERSION; - -char *z_errmsg[] = { -"stream end", /* Z_STREAM_END 1 */ -"", /* Z_OK 0 */ -"file error", /* Z_ERRNO (-1) */ -"stream error", /* Z_STREAM_ERROR (-2) */ -"data error", /* Z_DATA_ERROR (-3) */ -"insufficient memory", /* Z_MEM_ERROR (-4) */ -"buffer error", /* Z_BUF_ERROR (-5) */ -""}; - - -/*+++++*/ -/* adler32.c -- compute the Adler-32 checksum of a data stream - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* From: adler32.c,v 1.6 1995/05/03 17:27:08 jloup Exp */ - -#define BASE 65521L /* largest prime smaller than 65536 */ -#define NMAX 5552 -/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ - -#define DO1(buf) {s1 += *buf++; s2 += s1;} -#define DO2(buf) DO1(buf); DO1(buf); -#define DO4(buf) DO2(buf); DO2(buf); -#define DO8(buf) DO4(buf); DO4(buf); -#define DO16(buf) DO8(buf); DO8(buf); - -/* ========================================================================= */ -uLong adler32( - uLong adler, - Bytef *buf, - uInt len -) -{ - unsigned long s1 = adler & 0xffff; - unsigned long s2 = (adler >> 16) & 0xffff; - int k; - - if (buf == Z_NULL) return 1L; - - while (len > 0) { - k = len < NMAX ? len : NMAX; - len -= k; - while (k >= 16) { - DO16(buf); - k -= 16; - } - if (k != 0) do { - DO1(buf); - } while (--k); - s1 %= BASE; - s2 %= BASE; - } - return (s2 << 16) | s1; -} Index: linux-2.6.14-rc3/arch/ppc64/boot/zlib.h =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/zlib.h +++ /dev/null @@ -1,432 +0,0 @@ -/* */ - -/* - * This file is derived from zlib.h and zconf.h from the zlib-0.95 - * distribution by Jean-loup Gailly and Mark Adler, with some additions - * by Paul Mackerras to aid in implementing Deflate compression and - * decompression for PPP packets. - */ - -/* - * ==FILEVERSION 960122== - * - * This marker is used by the Linux installation script to determine - * whether an up-to-date version of this file is already installed. - */ - -/* zlib.h -- interface of the 'zlib' general purpose compression library - version 0.95, Aug 16th, 1995. - - Copyright (C) 1995 Jean-loup Gailly and Mark Adler - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - Jean-loup Gailly Mark Adler - gzip at prep.ai.mit.edu madler at alumni.caltech.edu - */ - -#ifndef _ZLIB_H -#define _ZLIB_H - -/* #include "zconf.h" */ /* included directly here */ - -/* zconf.h -- configuration of the zlib compression library - * Copyright (C) 1995 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* From: zconf.h,v 1.12 1995/05/03 17:27:12 jloup Exp */ - -/* - The library does not install any signal handler. It is recommended to - add at least a handler for SIGSEGV when decompressing; the library checks - the consistency of the input data whenever possible but may go nuts - for some forms of corrupted input. - */ - -/* - * Compile with -DMAXSEG_64K if the alloc function cannot allocate more - * than 64k bytes at a time (needed on systems with 16-bit int). - * Compile with -DUNALIGNED_OK if it is OK to access shorts or ints - * at addresses which are not a multiple of their size. - * Under DOS, -DFAR=far or -DFAR=__far may be needed. - */ - -#ifndef STDC -# if defined(MSDOS) || defined(__STDC__) || defined(__cplusplus) -# define STDC -# endif -#endif - -#ifdef __MWERKS__ /* Metrowerks CodeWarrior declares fileno() in unix.h */ -# include -#endif - -/* Maximum value for memLevel in deflateInit2 */ -#ifndef MAX_MEM_LEVEL -# ifdef MAXSEG_64K -# define MAX_MEM_LEVEL 8 -# else -# define MAX_MEM_LEVEL 9 -# endif -#endif - -#ifndef FAR -# define FAR -#endif - -/* Maximum value for windowBits in deflateInit2 and inflateInit2 */ -#ifndef MAX_WBITS -# define MAX_WBITS 15 /* 32K LZ77 window */ -#endif - -/* The memory requirements for deflate are (in bytes): - 1 << (windowBits+2) + 1 << (memLevel+9) - that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) - plus a few kilobytes for small objects. For example, if you want to reduce - the default memory requirements from 256K to 128K, compile with - make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" - Of course this will generally degrade compression (there's no free lunch). - - The memory requirements for inflate are (in bytes) 1 << windowBits - that is, 32K for windowBits=15 (default value) plus a few kilobytes - for small objects. -*/ - - /* Type declarations */ - -#ifndef OF /* function prototypes */ -# ifdef STDC -# define OF(args) args -# else -# define OF(args) () -# endif -#endif - -typedef unsigned char Byte; /* 8 bits */ -typedef unsigned int uInt; /* 16 bits or more */ -typedef unsigned long uLong; /* 32 bits or more */ - -typedef Byte FAR Bytef; -typedef char FAR charf; -typedef int FAR intf; -typedef uInt FAR uIntf; -typedef uLong FAR uLongf; - -#ifdef STDC - typedef void FAR *voidpf; - typedef void *voidp; -#else - typedef Byte FAR *voidpf; - typedef Byte *voidp; -#endif - -/* end of original zconf.h */ - -#define ZLIB_VERSION "0.95P" - -/* - The 'zlib' compression library provides in-memory compression and - decompression functions, including integrity checks of the uncompressed - data. This version of the library supports only one compression method - (deflation) but other algorithms may be added later and will have the same - stream interface. - - For compression the application must provide the output buffer and - may optionally provide the input buffer for optimization. For decompression, - the application must provide the input buffer and may optionally provide - the output buffer for optimization. - - Compression can be done in a single step if the buffers are large - enough (for example if an input file is mmap'ed), or can be done by - repeated calls of the compression function. In the latter case, the - application must provide more input and/or consume the output - (providing more output space) before each call. -*/ - -typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); -typedef void (*free_func) OF((voidpf opaque, voidpf address, uInt nbytes)); - -struct internal_state; - -typedef struct z_stream_s { - Bytef *next_in; /* next input byte */ - uInt avail_in; /* number of bytes available at next_in */ - uLong total_in; /* total nb of input bytes read so far */ - - Bytef *next_out; /* next output byte should be put there */ - uInt avail_out; /* remaining free space at next_out */ - uLong total_out; /* total nb of bytes output so far */ - - char *msg; /* last error message, NULL if no error */ - struct internal_state FAR *state; /* not visible by applications */ - - alloc_func zalloc; /* used to allocate the internal state */ - free_func zfree; /* used to free the internal state */ - voidp opaque; /* private data object passed to zalloc and zfree */ - - Byte data_type; /* best guess about the data type: ascii or binary */ - -} z_stream; - -/* - The application must update next_in and avail_in when avail_in has - dropped to zero. It must update next_out and avail_out when avail_out - has dropped to zero. The application must initialize zalloc, zfree and - opaque before calling the init function. All other fields are set by the - compression library and must not be updated by the application. - - The opaque value provided by the application will be passed as the first - parameter for calls of zalloc and zfree. This can be useful for custom - memory management. The compression library attaches no meaning to the - opaque value. - - zalloc must return Z_NULL if there is not enough memory for the object. - On 16-bit systems, the functions zalloc and zfree must be able to allocate - exactly 65536 bytes, but will not be required to allocate more than this - if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, - pointers returned by zalloc for objects of exactly 65536 bytes *must* - have their offset normalized to zero. The default allocation function - provided by this library ensures this (see zutil.c). To reduce memory - requirements and avoid any allocation of 64K objects, at the expense of - compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). - - The fields total_in and total_out can be used for statistics or - progress reports. After compression, total_in holds the total size of - the uncompressed data and may be saved for use in the decompressor - (particularly if the decompressor wants to decompress everything in - a single step). -*/ - - /* constants */ - -#define Z_NO_FLUSH 0 -#define Z_PARTIAL_FLUSH 1 -#define Z_FULL_FLUSH 2 -#define Z_SYNC_FLUSH 3 /* experimental: partial_flush + byte align */ -#define Z_FINISH 4 -#define Z_PACKET_FLUSH 5 -/* See deflate() below for the usage of these constants */ - -#define Z_OK 0 -#define Z_STREAM_END 1 -#define Z_ERRNO (-1) -#define Z_STREAM_ERROR (-2) -#define Z_DATA_ERROR (-3) -#define Z_MEM_ERROR (-4) -#define Z_BUF_ERROR (-5) -/* error codes for the compression/decompression functions */ - -#define Z_BEST_SPEED 1 -#define Z_BEST_COMPRESSION 9 -#define Z_DEFAULT_COMPRESSION (-1) -/* compression levels */ - -#define Z_FILTERED 1 -#define Z_HUFFMAN_ONLY 2 -#define Z_DEFAULT_STRATEGY 0 - -#define Z_BINARY 0 -#define Z_ASCII 1 -#define Z_UNKNOWN 2 -/* Used to set the data_type field */ - -#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ - -extern char *zlib_version; -/* The application can compare zlib_version and ZLIB_VERSION for consistency. - If the first character differs, the library code actually used is - not compatible with the zlib.h header file used by the application. - */ - - /* basic functions */ - -extern int inflateInit OF((z_stream *strm)); -/* - Initializes the internal stream state for decompression. The fields - zalloc and zfree must be initialized before by the caller. If zalloc and - zfree are set to Z_NULL, inflateInit updates them to use default allocation - functions. - - inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not - enough memory. msg is set to null if there is no error message. - inflateInit does not perform any decompression: this will be done by - inflate(). -*/ - - -extern int inflate OF((z_stream *strm, int flush)); -/* - Performs one or both of the following actions: - - - Decompress more input starting at next_in and update next_in and avail_in - accordingly. If not all input can be processed (because there is not - enough room in the output buffer), next_in is updated and processing - will resume at this point for the next call of inflate(). - - - Provide more output starting at next_out and update next_out and avail_out - accordingly. inflate() always provides as much output as possible - (until there is no more input data or no more space in the output buffer). - - Before the call of inflate(), the application should ensure that at least - one of the actions is possible, by providing more input and/or consuming - more output, and updating the next_* and avail_* values accordingly. - The application can consume the uncompressed output when it wants, for - example when the output buffer is full (avail_out == 0), or after each - call of inflate(). - - If the parameter flush is set to Z_PARTIAL_FLUSH or Z_PACKET_FLUSH, - inflate flushes as much output as possible to the output buffer. The - flushing behavior of inflate is not specified for values of the flush - parameter other than Z_PARTIAL_FLUSH, Z_PACKET_FLUSH or Z_FINISH, but the - current implementation actually flushes as much output as possible - anyway. For Z_PACKET_FLUSH, inflate checks that once all the input data - has been consumed, it is expecting to see the length field of a stored - block; if not, it returns Z_DATA_ERROR. - - inflate() should normally be called until it returns Z_STREAM_END or an - error. However if all decompression is to be performed in a single step - (a single call of inflate), the parameter flush should be set to - Z_FINISH. In this case all pending input is processed and all pending - output is flushed; avail_out must be large enough to hold all the - uncompressed data. (The size of the uncompressed data may have been saved - by the compressor for this purpose.) The next operation on this stream must - be inflateEnd to deallocate the decompression state. The use of Z_FINISH - is never required, but can be used to inform inflate that a faster routine - may be used for the single inflate() call. - - inflate() returns Z_OK if some progress has been made (more input - processed or more output produced), Z_STREAM_END if the end of the - compressed data has been reached and all uncompressed output has been - produced, Z_DATA_ERROR if the input data was corrupted, Z_STREAM_ERROR if - the stream structure was inconsistent (for example if next_in or next_out - was NULL), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if no - progress is possible or if there was not enough room in the output buffer - when Z_FINISH is used. In the Z_DATA_ERROR case, the application may then - call inflateSync to look for a good compression block. */ - - -extern int inflateEnd OF((z_stream *strm)); -/* - All dynamically allocated data structures for this stream are freed. - This function discards any unprocessed input and does not flush any - pending output. - - inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state - was inconsistent. In the error case, msg may be set but then points to a - static string (which must not be deallocated). -*/ - - /* advanced functions */ - -extern int inflateInit2 OF((z_stream *strm, - int windowBits)); -/* - This is another version of inflateInit with more compression options. The - fields next_out, zalloc and zfree must be initialized before by the caller. - - The windowBits parameter is the base two logarithm of the maximum window - size (the size of the history buffer). It should be in the range 8..15 for - this version of the library (the value 16 will be allowed soon). The - default value is 15 if inflateInit is used instead. If a compressed stream - with a larger window size is given as input, inflate() will return with - the error code Z_DATA_ERROR instead of trying to allocate a larger window. - - If next_out is not null, the library will use this buffer for the history - buffer; the buffer must either be large enough to hold the entire output - data, or have at least 1< The following series of patches improves the zImage header on ppc64. Similar code is used in openSuSE since a while and works well, on 32bit and 64bit systems. Two patches touch generic code, they are required to simplify the copying of lib/zlib_inflate. Since the bootheader is 32bit and include/asm points to 64bit headers, the source files cant be used as is. I have tested the resulting zImage on a 44p 270, with stock yaboot 1.3.13. arch/ppc64/boot/Makefile | 67 - arch/ppc64/boot/crt0.S | 52 - arch/ppc64/boot/main.c | 264 ++--- arch/ppc64/boot/string.S | 4 arch/ppc64/boot/string.h | 1 arch/ppc64/boot/zImage.lds | 64 - arch/ppc64/boot/zlib.c | 2195 --------------------------------------------- arch/ppc64/boot/zlib.h | 432 -------- include/linux/zutil.h | 1 lib/zlib_inflate/inflate.c | 1 10 files changed, 191 insertions(+), 2890 deletions(-) From olh at suse.de Mon Oct 10 04:19:33 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:33 +0000 Subject: [PATCH 1/13] ppc64 boot: remove include from lib/zlib_inflate/inflate.c In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181933.1.VzVQu29099.29065.olh@nectarine.suse.de> There is no need to include module.h in inflate.c Signed-off-by: Olaf Hering lib/zlib_inflate/inflate.c | 1 - 1 files changed, 1 deletion(-) Index: linux-2.6.14-rc3/lib/zlib_inflate/inflate.c =================================================================== --- linux-2.6.14-rc3.orig/lib/zlib_inflate/inflate.c +++ linux-2.6.14-rc3/lib/zlib_inflate/inflate.c @@ -3,7 +3,6 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include #include #include "infblock.h" #include "infutil.h" From olh at suse.de Mon Oct 10 04:19:40 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:40 +0000 Subject: [PATCH 8/13] ppc64 boot: cleanup linker script In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181940.8.lNpql29268.29065.olh@nectarine.suse.de> remove userland related stuff from ld.script, they are not required for zImage use wildcards for some sections Signed-off-by: Olaf Hering arch/ppc64/boot/zImage.lds | 59 +++------------------------------------------ 1 files changed, 5 insertions(+), 54 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/boot/zImage.lds =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/zImage.lds +++ linux-2.6.14-rc3/arch/ppc64/boot/zImage.lds @@ -1,62 +1,19 @@ OUTPUT_ARCH(powerpc:common) -SEARCH_DIR(/lib); SEARCH_DIR(/usr/lib); SEARCH_DIR(/usr/local/lib); SEARCH_DIR(/usr/local/powerpc-any-elf/lib); -/* Do we need any of these for elf? - __DYNAMIC = 0; */ SECTIONS { - /* Read-only sections, merged into text segment: */ - . = + SIZEOF_HEADERS; - .interp : { *(.interp) } - .hash : { *(.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .rel.text : { *(.rel.text) } - .rela.text : { *(.rela.text) } - .rel.data : { *(.rel.data) } - .rela.data : { *(.rela.data) } - .rel.rodata : { *(.rel.rodata) } - .rela.rodata : { *(.rela.rodata) } - .rel.got : { *(.rel.got) } - .rela.got : { *(.rela.got) } - .rel.ctors : { *(.rel.ctors) } - .rela.ctors : { *(.rela.ctors) } - .rel.dtors : { *(.rel.dtors) } - .rela.dtors : { *(.rela.dtors) } - .rel.bss : { *(.rel.bss) } - .rela.bss : { *(.rela.bss) } - .rel.plt : { *(.rel.plt) } - .rela.plt : { *(.rela.plt) } - .plt : { *(.plt) } .text : { *(.text) *(.fixup) - *(.got1) } - . = ALIGN(4096); _etext = .; - PROVIDE (etext = .); - .rodata : - { - *(.rodata) - *(.rodata1) - } - .kstrtab : { *(.kstrtab) } - __vermagic : { *(__vermagic) } - .fini : { *(.fini) } =0 - .ctors : { *(.ctors) } - .dtors : { *(.dtors) } - /* Read-write section, merged into data segment: */ . = ALIGN(4096); .data : { - *(.data) - *(.data1) - *(.sdata) - *(.sdata2) - *(.got.plt) *(.got) - *(.dynamic) - CONSTRUCTORS + *(.rodata*) + *(.data*) + *(.sdata*) + *(.got*) } . = ALIGN(4096); @@ -71,20 +28,14 @@ SECTIONS . = ALIGN(4096); _edata = .; - PROVIDE (edata = .); - - .fixup : { *(.fixup) } . = ALIGN(4096); __bss_start = .; .bss : { - *(.sbss) *(.scommon) - *(.dynbss) + *(.sbss) *(.bss) - *(COMMON) } . = ALIGN(4096); _end = . ; - PROVIDE (end = .); } From olh at suse.de Mon Oct 10 04:19:42 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:42 +0000 Subject: [PATCH 10/13] ppc64 boot: fix typo in asm comments In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181942.10.CTOmF29316.29065.olh@nectarine.suse.de> update comment in memcpy, r7 contains the byte count Signed-off-by: Olaf Hering arch/ppc64/boot/string.S | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/boot/string.S =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/string.S +++ linux-2.6.14-rc3/arch/ppc64/boot/string.S @@ -104,7 +104,7 @@ memmove: .globl memcpy memcpy: - rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + rlwinm. r7,r5,32-3,3,31 /* r7 = r5 >> 3 */ addi r6,r3,-4 addi r4,r4,-4 beq 2f /* if less than 8 bytes to do */ @@ -146,7 +146,7 @@ memcpy: .globl backwards_memcpy backwards_memcpy: - rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + rlwinm. r7,r5,32-3,3,31 /* r7 = r5 >> 3 */ add r6,r3,r5 add r4,r4,r5 beq 2f From olh at suse.de Mon Oct 10 04:19:43 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 9 Oct 2005 18:19:43 +0000 Subject: [PATCH 11/13] ppc64 boot: remove global initializers In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051009181943.11.RoZSo29340.29065.olh@nectarine.suse.de> no need to initialize global variables Signed-off-by: Olaf Hering arch/ppc64/boot/main.c | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/boot/main.c =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/main.c +++ linux-2.6.14-rc3/arch/ppc64/boot/main.c @@ -38,9 +38,9 @@ struct addr_range { unsigned long size; unsigned long memsize; }; -static struct addr_range vmlinux = {0, 0, 0}; -static struct addr_range vmlinuz = {0, 0, 0}; -static struct addr_range initrd = {0, 0, 0}; +static struct addr_range vmlinux; +static struct addr_range vmlinuz; +static struct addr_range initrd; static char scratch[46912]; /* scratch space for gunzip, from zlib_inflate_workspacesize() */ static char elfheader[256]; From security at paypal.com Mon Oct 10 10:06:48 2005 From: security at paypal.com (PayPal) Date: Mon, 10 Oct 2005 09:06:48 +0900 (JST) Subject: New Security Measures Message-ID: <20051010000648.A74D94DDD8@server03.itest-unet.ocn.ne.jp> An HTML attachment was scrubbed... URL: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051010/66344f0e/attachment.htm From david at gibson.dropbear.id.au Mon Oct 10 11:55:38 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Mon, 10 Oct 2005 11:55:38 +1000 Subject: [PATCH] ppc64: Fix issue with non zero boot cpu In-Reply-To: <20050919085104.GA25588@krispykreme> References: <20050919085104.GA25588@krispykreme> Message-ID: <20051010015538.GE25913@localhost.localdomain> On Mon, Sep 19, 2005 at 06:51:05PM +1000, Anton Blanchard wrote: > > Hi, > > The new version of the flattened device tree passes the boot cpuid in the > header instead of via a linux,boot-cpu property. > > We need to update the in kernel OF parsing code to do this, otherwise > machines with a non zero boot cpuid fail to come up. Ok, so I can get this write in dtc as well, what should the overall semantics for finding the boot CPU be? At present I'm guessing: 1) If the blob version is >=2, then take the CPU with physical ID given in the header. 2) Otherwise, if there is a linux,boot-cpu property on one of the CPU nodes, use that CPU. 3) Otherwise, if there's a node with 'reg' property equal to 0, use that one. 4) Otherwise, use the first listed CPU node. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From michael at ellerman.id.au Mon Oct 10 13:40:29 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 10 Oct 2005 13:40:29 +1000 Subject: [RFC] Old paca being written by firmware after kexec In-Reply-To: <416e59effa18d7d7eb6500416e638ef8@bga.com> References: <200510071203.37301.michael@ellerman.id.au> <200510071656.36822.michael@ellerman.id.au> <416e59effa18d7d7eb6500416e638ef8@bga.com> Message-ID: <200510101340.34124.michael@ellerman.id.au> On Sat, 8 Oct 2005 01:05, Milton Miller wrote: > i'd rather each cpu do it in kexec-cpu-down Fair enough. > we can skip on panic Will do. > however, that means we should register on the cpu to make sure we > don't miss one going down. I don't know what you mean? cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051010/2f5e8006/attachment.pgp From michael at ellerman.id.au Mon Oct 10 18:03:54 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 10 Oct 2005 18:03:54 +1000 Subject: [RFC] Old paca being written by firmware after kexec In-Reply-To: <416e59effa18d7d7eb6500416e638ef8@bga.com> References: <200510071203.37301.michael@ellerman.id.au> <200510071656.36822.michael@ellerman.id.au> <416e59effa18d7d7eb6500416e638ef8@bga.com> Message-ID: <200510101803.58863.michael@ellerman.id.au> On Sat, 8 Oct 2005 01:05, Milton Miller wrote: > > static void kexec_prepare_cpus(void) > > { > > int my_cpu, i, notified=-1; > > > > + /* FIXME: I'd rather do this after the loop but we clobber hw_cpu_id > > */ > > i'd rather each cpu do it in kexec-cpu-down I've coded that up, but then the calling cpu never does vpa_shutdown(). Why don't we just do it in a loop? cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051010/e3b58420/attachment.pgp From olh at suse.de Mon Oct 10 21:37:42 2005 From: olh at suse.de (Olaf Hering) Date: Mon, 10 Oct 2005 13:37:42 +0200 Subject: [PATCH 1/1] ppc64 boot: print firmware provided stackpointer In-Reply-To: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> References: <20051009181931.0.IoWCk29070.29065.olh@nectarine.suse.de> Message-ID: <20051010113742.GA24115@suse.de> Yet another one: show firmware provided stackpointer during boot. This helps to find the "taboo" areas on the various boards. claim tends to fail for these memory areas, but some jokers return success anyway. Use %p to print the load address, its a pointer. Signed-off-by: Olaf Hering arch/ppc64/boot/crt0.S | 1 + arch/ppc64/boot/main.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/boot/crt0.S =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/crt0.S +++ linux-2.6.14-rc3/arch/ppc64/boot/crt0.S @@ -54,5 +54,6 @@ clear_caches: sync isync + mr r6,r1 b start Index: linux-2.6.14-rc3/arch/ppc64/boot/main.c =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/boot/main.c +++ linux-2.6.14-rc3/arch/ppc64/boot/main.c @@ -131,7 +131,7 @@ static unsigned long try_claim(unsigned return addr; } -void start(unsigned long a1, unsigned long a2, void *promptr) +void start(unsigned long a1, unsigned long a2, void *promptr, void *sp) { unsigned long i; int len; @@ -151,7 +151,7 @@ void start(unsigned long a1, unsigned lo if (getprop(chosen_handle, "stdin", &stdin, sizeof(stdin)) != 4) exit(); - printf("\n\rzImage starting: loaded at 0x%lx\n\r", (unsigned long) _start); + printf("\n\rzImage starting: loaded at 0x%p (sp: 0x%p)\n\r", _start, sp); /* * The first available claim_base must be above the end of the -- short story of a lazy sysadmin: alias appserv=wotan From olh at suse.de Tue Oct 11 00:23:44 2005 From: olh at suse.de (Olaf Hering) Date: Mon, 10 Oct 2005 16:23:44 +0200 Subject: [PATCH] pcnet32 does not use the PROM address on powerpc In-Reply-To: <20051008120316.GA12122@suse.de> References: <20051008104725.GA10248@suse.de> <20051008120316.GA12122@suse.de> Message-ID: <20051010142344.GA26737@suse.de> On Sat, Oct 08, Olaf Hering wrote: > On Sat, Oct 08, Olaf Hering wrote: > > > I have a 44p 270, which gets all 0xFF as MAC address if I power if off > > and on again. Further reboots do not fix it. > > But it does get the correct one if I boot into SMS and do a netboot, > > further reboots will always get the correct MAC address The CSR contains garbage after a coldboot on RS/6000. One some systems (like my 44p 270) the MAC address is all FF, on others (like my B50) it is ff:ff:ff:fd:ff:6b. It can eventually be fixed by loading pcnet32, set the interface into the UP state, rmmod pcnet32 and load it again. But this worked only on the 270. Only netbooting after a cold start provides the correct MAC address via prom and CSR. This makes it very unreliable. I dont know why the MAC is stored in two different places. Remove the special case for powerpc, which was added in early 2.4 development. Signed-off-by: Olaf Hering drivers/net/pcnet32.c | 5 ----- 1 files changed, 5 deletions(-) Index: linux-2.6.14-rc3/drivers/net/pcnet32.c =================================================================== --- linux-2.6.14-rc3.orig/drivers/net/pcnet32.c +++ linux-2.6.14-rc3/drivers/net/pcnet32.c @@ -1172,12 +1172,7 @@ pcnet32_probe1(unsigned long ioaddr, int if (memcmp(promaddr, dev->dev_addr, 6) || !is_valid_ether_addr(dev->dev_addr)) { -#ifndef __powerpc__ if (is_valid_ether_addr(promaddr)) { -#else - if (!is_valid_ether_addr(dev->dev_addr) - && is_valid_ether_addr(promaddr)) { -#endif if (pcnet32_debug & NETIF_MSG_PROBE) { printk(" warning: CSR address invalid,\n"); printk(KERN_INFO " using instead PROM address of"); -- short story of a lazy sysadmin: alias appserv=wotan From olh at suse.de Tue Oct 11 07:18:56 2005 From: olh at suse.de (Olaf Hering) Date: Mon, 10 Oct 2005 23:18:56 +0200 Subject: [PATCH] ppc64: compile nls_cp437 and nls_iso8859_1 into the kernel in defconfig Message-ID: <20051010211856.GA16162@suse.de> compile nls_cp437 and nls_iso8859_1 into the kernel in defconfig. This is already enabled in pSeries_defconfig. Reason: if one just boots the new shiny zImage and the root filesystem is on a filesystem not readable by yaboot (like jfs, raid or lvm) upgrading the bootloader will fail because the FAT bootpartition can not be mounted. Upgrade the defconfig files to current status. Signed-off-by: Olaf Hering arch/ppc64/configs/bpa_defconfig | 92 ++++++++++++++++++----- arch/ppc64/configs/g5_defconfig | 138 ++++++++++++++++++++++------------- arch/ppc64/configs/iSeries_defconfig | 92 ++++++++++++++++++----- arch/ppc64/configs/maple_defconfig | 77 +++++++++++++------ arch/ppc64/configs/pSeries_defconfig | 106 ++++++++++++++++++++------ arch/ppc64/defconfig | 115 ++++++++++++++++++++++------- 6 files changed, 455 insertions(+), 165 deletions(-) Index: linux-2.6.14-rc3/arch/ppc64/configs/bpa_defconfig =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/configs/bpa_defconfig +++ linux-2.6.14-rc3/arch/ppc64/configs/bpa_defconfig @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:12:19 2005 +# Linux kernel version: 2.6.14-rc3-git8 +# Mon Oct 10 22:33:37 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y # CONFIG_POSIX_MQUEUE is not set @@ -36,6 +37,7 @@ CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y # CONFIG_IKCONFIG is not set # CONFIG_CPUSETS is not set +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set @@ -95,6 +97,7 @@ CONFIG_FLATMEM_MANUAL=y # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set # CONFIG_NUMA is not set CONFIG_SCHED_SMT=y CONFIG_PREEMPT_NONE=y @@ -110,17 +113,18 @@ CONFIG_PPC_RTAS=y CONFIG_RTAS_PROC=y CONFIG_RTAS_FLASH=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y -CONFIG_PCI_NAMES=y # CONFIG_PCI_DEBUG is not set # @@ -132,8 +136,6 @@ CONFIG_PCI_NAMES=y # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set -CONFIG_PROC_DEVICETREE=y -# CONFIG_CMDLINE_BOOL is not set # # Networking @@ -163,8 +165,8 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set CONFIG_INET_TUNNEL=y -CONFIG_IP_TCPDIAG=y -CONFIG_IP_TCPDIAG_IPV6=y +CONFIG_INET_DIAG=m +CONFIG_INET_TCP_DIAG=m # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y @@ -181,6 +183,9 @@ CONFIG_INET6_TUNNEL=m CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m # # IP: Netfilter Configuration @@ -188,11 +193,14 @@ CONFIG_NETFILTER=y CONFIG_IP_NF_CONNTRACK=y # CONFIG_IP_NF_CT_ACCT is not set # CONFIG_IP_NF_CONNTRACK_MARK is not set +CONFIG_IP_NF_CONNTRACK_EVENTS=y CONFIG_IP_NF_CT_PROTO_SCTP=y CONFIG_IP_NF_FTP=m CONFIG_IP_NF_IRC=m +CONFIG_IP_NF_NETBIOS_NS=m CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m +CONFIG_IP_NF_PPTP=m CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_LIMIT=m @@ -216,13 +224,16 @@ CONFIG_IP_NF_MATCH_OWNER=m CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_REALM=m CONFIG_IP_NF_MATCH_SCTP=m +CONFIG_IP_NF_MATCH_DCCP=m CONFIG_IP_NF_MATCH_COMMENT=m CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_MATCH_STRING=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_TARGET_NFQUEUE=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_NAT_NEEDED=y CONFIG_IP_NF_TARGET_MASQUERADE=m @@ -234,12 +245,14 @@ CONFIG_IP_NF_NAT_IRC=m CONFIG_IP_NF_NAT_FTP=m CONFIG_IP_NF_NAT_TFTP=m CONFIG_IP_NF_NAT_AMANDA=m +CONFIG_IP_NF_NAT_PPTP=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_TOS=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_MARK=m CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_TARGET_NOTRACK=m CONFIG_IP_NF_ARPTABLES=m @@ -251,6 +264,12 @@ CONFIG_IP_NF_ARP_MANGLE=m # # CONFIG_IP6_NF_QUEUE is not set # CONFIG_IP6_NF_IPTABLES is not set +# CONFIG_IP6_NF_TARGET_NFQUEUE is not set + +# +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set # # SCTP Configuration (EXPERIMENTAL) @@ -278,6 +297,7 @@ CONFIG_NET_CLS_ROUTE=y # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -292,6 +312,11 @@ CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +CONFIG_CONNECTOR=m + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -322,7 +347,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=131072 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set # @@ -395,6 +419,7 @@ CONFIG_IDEDMA_AUTO=y # # SCSI device support # +CONFIG_RAID_ATTRS=m # CONFIG_SCSI is not set # @@ -436,12 +461,28 @@ CONFIG_NETDEVICES=y # CONFIG_ARCNET is not set # +# PHY device support +# +CONFIG_PHYLIB=m +CONFIG_PHYCONTROL=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set # @@ -462,15 +503,18 @@ CONFIG_E1000=m # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set CONFIG_SKGE=m # CONFIG_SK98LIN is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set +# CONFIG_SPIDER_NET is not set # CONFIG_MV643XX_ETH is not set # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set @@ -552,6 +596,7 @@ CONFIG_HW_CONSOLE=y CONFIG_SERIAL_NONSTANDARD=y # CONFIG_ROCKETPORT is not set # CONFIG_CYCLADES is not set +# CONFIG_DIGIEPCA is not set # CONFIG_MOXA_SMARTIO is not set # CONFIG_ISI is not set # CONFIG_SYNCLINK is not set @@ -642,7 +687,6 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_ISA is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_PROSAVAGE is not set @@ -656,7 +700,6 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set # CONFIG_I2C_PCA_ISA is not set -# CONFIG_I2C_SENSOR is not set # # Miscellaneous I2C Chip support @@ -683,12 +726,17 @@ CONFIG_I2C_ALGOBIT=y # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -756,10 +804,6 @@ CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set @@ -768,6 +812,7 @@ CONFIG_INOTIFY=y CONFIG_DNOTIFY=y # CONFIG_AUTOFS_FS is not set # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -794,13 +839,11 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -# CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -# CONFIG_TMPFS_SECURITY is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -846,6 +889,7 @@ CONFIG_SUNRPC=m # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -923,6 +967,7 @@ CONFIG_NLS_ISO8859_15=m CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=15 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -981,7 +1026,12 @@ CONFIG_CRYPTO_DEFLATE=m # Library routines # # CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=m CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m Index: linux-2.6.14-rc3/arch/ppc64/configs/g5_defconfig =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/configs/g5_defconfig +++ linux-2.6.14-rc3/arch/ppc64/configs/g5_defconfig @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:16:59 2005 +# Linux kernel version: 2.6.14-rc3-git8 +# Mon Oct 10 22:33:38 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -37,6 +38,7 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set @@ -97,6 +99,7 @@ CONFIG_FLATMEM_MANUAL=y # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set # CONFIG_NUMA is not set # CONFIG_SCHED_SMT is not set CONFIG_PREEMPT_NONE=y @@ -109,19 +112,20 @@ CONFIG_HZ_250=y CONFIG_HZ=250 CONFIG_GENERIC_HARDIRQS=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_HOTPLUG_CPU=y +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y -CONFIG_PCI_NAMES=y # CONFIG_PCI_DEBUG is not set -# CONFIG_HOTPLUG_CPU is not set # # PCCARD (PCMCIA/CardBus) support @@ -132,8 +136,6 @@ CONFIG_PCI_NAMES=y # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set -CONFIG_PROC_DEVICETREE=y -# CONFIG_CMDLINE_BOOL is not set # # Networking @@ -163,8 +165,8 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m CONFIG_INET_TUNNEL=y -CONFIG_IP_TCPDIAG=m -# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_INET_DIAG=m +CONFIG_INET_TCP_DIAG=m # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y @@ -175,6 +177,9 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m # # IP: Netfilter Configuration @@ -182,11 +187,15 @@ CONFIG_NETFILTER=y CONFIG_IP_NF_CONNTRACK=m CONFIG_IP_NF_CT_ACCT=y CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_CONNTRACK_EVENTS=y +CONFIG_IP_NF_CONNTRACK_NETLINK=m CONFIG_IP_NF_CT_PROTO_SCTP=m CONFIG_IP_NF_FTP=m CONFIG_IP_NF_IRC=m +CONFIG_IP_NF_NETBIOS_NS=m CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m +CONFIG_IP_NF_PPTP=m CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_LIMIT=m @@ -210,14 +219,18 @@ CONFIG_IP_NF_MATCH_OWNER=m CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_REALM=m CONFIG_IP_NF_MATCH_SCTP=m +CONFIG_IP_NF_MATCH_DCCP=m CONFIG_IP_NF_MATCH_COMMENT=m CONFIG_IP_NF_MATCH_CONNMARK=m +CONFIG_IP_NF_MATCH_CONNBYTES=m CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_MATCH_STRING=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_TARGET_NFQUEUE=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_NAT_NEEDED=y CONFIG_IP_NF_TARGET_MASQUERADE=m @@ -229,12 +242,14 @@ CONFIG_IP_NF_NAT_IRC=m CONFIG_IP_NF_NAT_FTP=m CONFIG_IP_NF_NAT_TFTP=m CONFIG_IP_NF_NAT_AMANDA=m +CONFIG_IP_NF_NAT_PPTP=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_TOS=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_MARK=m CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_TARGET_CONNMARK=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m @@ -244,6 +259,11 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m # +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set @@ -270,6 +290,7 @@ CONFIG_NET_CLS_ROUTE=y # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -284,6 +305,11 @@ CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +CONFIG_CONNECTOR=m + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -315,7 +341,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" CONFIG_CDROM_PKTCDVD=m CONFIG_CDROM_PKTCDVD_BUFFERS=8 # CONFIG_CDROM_PKTCDVD_WCACHE is not set @@ -395,6 +420,7 @@ CONFIG_IDEDMA_AUTO=y # # SCSI device support # +CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_SCSI_PROC_FS=y @@ -422,6 +448,7 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set +CONFIG_SCSI_SAS_ATTRS=m # # SCSI low-level drivers @@ -435,10 +462,12 @@ CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_AIC79XX is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set CONFIG_SCSI_SATA=y # CONFIG_SCSI_SATA_AHCI is not set CONFIG_SCSI_SATA_SVW=y # CONFIG_SCSI_ATA_PIIX is not set +# CONFIG_SCSI_SATA_MV is not set # CONFIG_SCSI_SATA_NV is not set # CONFIG_SCSI_SATA_PROMISE is not set # CONFIG_SCSI_SATA_QSTOR is not set @@ -498,6 +527,7 @@ CONFIG_DM_ZERO=m # CONFIG_FUSION is not set # CONFIG_FUSION_SPI is not set # CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set # # IEEE 1394 (FireWire) support @@ -540,7 +570,6 @@ CONFIG_IEEE1394_RAWIO=y # CONFIG_ADB_PMU=y CONFIG_PMAC_SMU=y -# CONFIG_PMAC_BACKLIGHT is not set CONFIG_THERM_PM72=y # @@ -558,12 +587,28 @@ CONFIG_TUN=m # CONFIG_ARCNET is not set # +# PHY device support +# +CONFIG_PHYLIB=m +CONFIG_PHYCONTROL=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set CONFIG_SUNGEM=y +# CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set # @@ -585,6 +630,7 @@ CONFIG_E1000=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set CONFIG_TIGON3=m @@ -594,6 +640,7 @@ CONFIG_TIGON3=m # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set @@ -760,8 +807,8 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_ISA is not set CONFIG_I2C_KEYWEST=y +CONFIG_I2C_PMAC_SMU=y # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_PROSAVAGE is not set @@ -775,7 +822,6 @@ CONFIG_I2C_KEYWEST=y # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set # CONFIG_I2C_PCA_ISA is not set -# CONFIG_I2C_SENSOR is not set # # Miscellaneous I2C Chip support @@ -802,12 +848,17 @@ CONFIG_I2C_KEYWEST=y # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -856,6 +907,7 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_FB_KYRO is not set # CONFIG_FB_3DFX is not set # CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_CYBLA is not set # CONFIG_FB_TRIDENT is not set # CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set @@ -937,6 +989,7 @@ CONFIG_USB_STORAGE_DPCM=y CONFIG_USB_STORAGE_SDDR09=y CONFIG_USB_STORAGE_SDDR55=y CONFIG_USB_STORAGE_JUMPSHOT=y +# CONFIG_USB_STORAGE_ONETOUCH is not set # # USB Input Devices @@ -956,9 +1009,11 @@ CONFIG_USB_HIDDEV=y # CONFIG_USB_MTOUCH is not set # CONFIG_USB_ITMTOUCH is not set # CONFIG_USB_EGALAX is not set +# CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set # CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set # # USB Imaging devices @@ -983,30 +1038,14 @@ CONFIG_USB_KAWETH=m CONFIG_USB_PEGASUS=m CONFIG_USB_RTL8150=m CONFIG_USB_USBNET=m - -# -# USB Host-to-Host Cables -# -CONFIG_USB_ALI_M5632=y -CONFIG_USB_AN2720=y -CONFIG_USB_BELKIN=y -CONFIG_USB_GENESYS=y -CONFIG_USB_NET1080=y -CONFIG_USB_PL2301=y -CONFIG_USB_KC2190=y - -# -# Intelligent USB Devices/Gadgets -# -CONFIG_USB_ARMLINUX=y -CONFIG_USB_EPSON2888=y -CONFIG_USB_ZAURUS=y -CONFIG_USB_CDCETHER=y - -# -# USB Network Adapters -# -CONFIG_USB_AX8817X=y +CONFIG_USB_NET_AX8817X=m +CONFIG_USB_NET_CDCETHER=m +# CONFIG_USB_NET_GL620A is not set +CONFIG_USB_NET_NET1080=m +# CONFIG_USB_NET_PLUSB is not set +# CONFIG_USB_NET_RNDIS_HOST is not set +# CONFIG_USB_NET_CDC_SUBSET is not set +CONFIG_USB_NET_ZAURUS=m CONFIG_USB_MON=y # @@ -1124,16 +1163,12 @@ CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# CONFIG_XFS_FS=m CONFIG_XFS_EXPORT=y -# CONFIG_XFS_RT is not set # CONFIG_XFS_QUOTA is not set CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y +# CONFIG_XFS_RT is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y @@ -1141,6 +1176,7 @@ CONFIG_INOTIFY=y CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=m # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -1168,14 +1204,11 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -CONFIG_DEVPTS_FS_XATTR=y -# CONFIG_DEVPTS_FS_SECURITY is not set CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_SECURITY=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -1225,6 +1258,7 @@ CONFIG_CIFS=m # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -1303,6 +1337,7 @@ CONFIG_OPROFILE=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1360,7 +1395,12 @@ CONFIG_CRYPTO_TEST=m # Library routines # CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set CONFIG_CRC32=y CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m Index: linux-2.6.14-rc3/arch/ppc64/configs/iSeries_defconfig =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/configs/iSeries_defconfig +++ linux-2.6.14-rc3/arch/ppc64/configs/iSeries_defconfig @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:17:02 2005 +# Linux kernel version: 2.6.14-rc3-git8 +# Mon Oct 10 22:33:39 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -38,6 +39,7 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set @@ -88,6 +90,7 @@ CONFIG_FLATMEM_MANUAL=y # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set # CONFIG_NUMA is not set # CONFIG_SCHED_SMT is not set CONFIG_PREEMPT_NONE=y @@ -101,17 +104,16 @@ CONFIG_HZ=250 CONFIG_GENERIC_HARDIRQS=y CONFIG_LPARCFG=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y -CONFIG_PCI_NAMES=y # CONFIG_PCI_DEBUG is not set # @@ -152,8 +154,8 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m CONFIG_INET_TUNNEL=y -CONFIG_IP_TCPDIAG=m -# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_INET_DIAG=m +CONFIG_INET_TCP_DIAG=m # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y @@ -164,6 +166,9 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m # # IP: Netfilter Configuration @@ -171,11 +176,15 @@ CONFIG_NETFILTER=y CONFIG_IP_NF_CONNTRACK=m CONFIG_IP_NF_CT_ACCT=y CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_CONNTRACK_EVENTS=y +CONFIG_IP_NF_CONNTRACK_NETLINK=m CONFIG_IP_NF_CT_PROTO_SCTP=m CONFIG_IP_NF_FTP=m CONFIG_IP_NF_IRC=m +CONFIG_IP_NF_NETBIOS_NS=m CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m +CONFIG_IP_NF_PPTP=m CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_LIMIT=m @@ -199,14 +208,18 @@ CONFIG_IP_NF_MATCH_OWNER=m CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_REALM=m CONFIG_IP_NF_MATCH_SCTP=m +CONFIG_IP_NF_MATCH_DCCP=m CONFIG_IP_NF_MATCH_COMMENT=m CONFIG_IP_NF_MATCH_CONNMARK=m +CONFIG_IP_NF_MATCH_CONNBYTES=m CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_MATCH_STRING=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_TARGET_NFQUEUE=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_NAT_NEEDED=y CONFIG_IP_NF_TARGET_MASQUERADE=m @@ -218,12 +231,14 @@ CONFIG_IP_NF_NAT_IRC=m CONFIG_IP_NF_NAT_FTP=m CONFIG_IP_NF_NAT_TFTP=m CONFIG_IP_NF_NAT_AMANDA=m +CONFIG_IP_NF_NAT_PPTP=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_TOS=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_MARK=m CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_TARGET_CONNMARK=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m @@ -233,6 +248,11 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m # +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set @@ -259,6 +279,7 @@ CONFIG_NET_CLS_ROUTE=y # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -273,6 +294,11 @@ CONFIG_FW_LOADER=m # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +CONFIG_CONNECTOR=m + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -303,7 +329,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set # @@ -323,6 +348,7 @@ CONFIG_IOSCHED_CFQ=y # # SCSI device support # +CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_SCSI_PROC_FS=y @@ -350,6 +376,7 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_ISCSI_ATTRS is not set +CONFIG_SCSI_SAS_ATTRS=m # # SCSI low-level drivers @@ -363,6 +390,7 @@ CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_AIC79XX is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set # CONFIG_SCSI_SATA is not set # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set @@ -415,6 +443,7 @@ CONFIG_DM_ZERO=m # CONFIG_FUSION is not set # CONFIG_FUSION_SPI is not set # CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set # # IEEE 1394 (FireWire) support @@ -445,12 +474,28 @@ CONFIG_TUN=m # CONFIG_ARCNET is not set # +# PHY device support +# +CONFIG_PHYLIB=m +CONFIG_PHYCONTROL=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set # @@ -489,6 +534,7 @@ CONFIG_E1000=m # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set @@ -498,6 +544,7 @@ CONFIG_E1000=m # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set @@ -632,7 +679,6 @@ CONFIG_MAX_RAW_DEVS=256 # I2C support # # CONFIG_I2C is not set -# CONFIG_I2C_SENSOR is not set # # Dallas's 1-wire bus @@ -643,12 +689,17 @@ CONFIG_MAX_RAW_DEVS=256 # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -722,16 +773,12 @@ CONFIG_JFS_SECURITY=y # CONFIG_JFS_DEBUG is not set # CONFIG_JFS_STATISTICS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# CONFIG_XFS_FS=m CONFIG_XFS_EXPORT=y -# CONFIG_XFS_RT is not set # CONFIG_XFS_QUOTA is not set CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y +# CONFIG_XFS_RT is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y @@ -739,6 +786,7 @@ CONFIG_INOTIFY=y CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=m # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -766,14 +814,11 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -CONFIG_DEVPTS_FS_XATTR=y -CONFIG_DEVPTS_FS_SECURITY=y CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_SECURITY=y # CONFIG_HUGETLBFS is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -824,6 +869,7 @@ CONFIG_CIFS_POSIX=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -897,6 +943,7 @@ CONFIG_OPROFILE=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -954,7 +1001,12 @@ CONFIG_CRYPTO_TEST=m # Library routines # CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set CONFIG_CRC32=y CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m Index: linux-2.6.14-rc3/arch/ppc64/configs/maple_defconfig =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/configs/maple_defconfig +++ linux-2.6.14-rc3/arch/ppc64/configs/maple_defconfig @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:17:04 2005 +# Linux kernel version: 2.6.14-rc3-git8 +# Mon Oct 10 22:33:39 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -37,6 +38,7 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y @@ -97,6 +99,7 @@ CONFIG_FLATMEM_MANUAL=y # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set # CONFIG_NUMA is not set # CONFIG_SCHED_SMT is not set CONFIG_PREEMPT_NONE=y @@ -109,17 +112,18 @@ CONFIG_HZ_250=y CONFIG_HZ=250 CONFIG_GENERIC_HARDIRQS=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y -CONFIG_PCI_NAMES=y # CONFIG_PCI_DEBUG is not set # @@ -131,8 +135,6 @@ CONFIG_PCI_NAMES=y # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set -CONFIG_PROC_DEVICETREE=y -# CONFIG_CMDLINE_BOOL is not set # # Networking @@ -163,14 +165,19 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set # CONFIG_INET_TUNNEL is not set -CONFIG_IP_TCPDIAG=y -# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_INET_DIAG=m +CONFIG_INET_TCP_DIAG=m # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set # +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set @@ -196,6 +203,7 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -210,6 +218,11 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +CONFIG_CONNECTOR=m + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -240,7 +253,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=8192 # CONFIG_BLK_DEV_INITRD is not set -CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set # @@ -313,6 +325,7 @@ CONFIG_IDEDMA_AUTO=y # # SCSI device support # +CONFIG_RAID_ATTRS=m # CONFIG_SCSI is not set # @@ -354,12 +367,28 @@ CONFIG_NETDEVICES=y # CONFIG_ARCNET is not set # +# PHY device support +# +CONFIG_PHYLIB=m +CONFIG_PHYCONTROL=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set # @@ -398,6 +427,7 @@ CONFIG_E1000=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set @@ -408,6 +438,7 @@ CONFIG_E1000=y # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set @@ -553,7 +584,6 @@ CONFIG_I2C_AMD8111=y # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_ISA is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_PROSAVAGE is not set @@ -567,7 +597,6 @@ CONFIG_I2C_AMD8111=y # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set # CONFIG_I2C_PCA_ISA is not set -# CONFIG_I2C_SENSOR is not set # # Miscellaneous I2C Chip support @@ -594,12 +623,17 @@ CONFIG_I2C_AMD8111=y # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -681,9 +715,11 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_MTOUCH is not set # CONFIG_USB_ITMTOUCH is not set # CONFIG_USB_EGALAX is not set +# CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set # CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set # # USB Imaging devices @@ -814,10 +850,6 @@ CONFIG_JBD=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set @@ -826,6 +858,7 @@ CONFIG_INOTIFY=y CONFIG_DNOTIFY=y # CONFIG_AUTOFS_FS is not set # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -849,14 +882,11 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -CONFIG_DEVPTS_FS_XATTR=y -# CONFIG_DEVPTS_FS_SECURITY is not set CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_SECURITY=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -898,6 +928,7 @@ CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -975,6 +1006,7 @@ CONFIG_NLS_UTF8=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set CONFIG_DEBUG_SLAB=y # CONFIG_DEBUG_SPINLOCK is not set @@ -1034,6 +1066,7 @@ CONFIG_CRYPTO_DES=y # Library routines # CONFIG_CRC_CCITT=y +# CONFIG_CRC16 is not set CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y Index: linux-2.6.14-rc3/arch/ppc64/configs/pSeries_defconfig =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/configs/pSeries_defconfig +++ linux-2.6.14-rc3/arch/ppc64/configs/pSeries_defconfig @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:17:07 2005 +# Linux kernel version: 2.6.14-rc3-git8 +# Mon Oct 10 22:33:40 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -38,6 +39,7 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CPUSETS=y +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y @@ -104,6 +106,7 @@ CONFIG_DISCONTIGMEM_MANUAL=y CONFIG_DISCONTIGMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_NEED_MULTIPLE_NODES=y +# CONFIG_SPARSEMEM_STATIC is not set CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y CONFIG_NODES_SPAN_OTHER_NODES=y CONFIG_NUMA=y @@ -124,19 +127,20 @@ CONFIG_RTAS_FLASH=m CONFIG_SCANLOG=m CONFIG_LPARCFG=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_HOTPLUG_CPU=y +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y -CONFIG_PCI_NAMES=y # CONFIG_PCI_DEBUG is not set -CONFIG_HOTPLUG_CPU=y # # PCCARD (PCMCIA/CardBus) support @@ -152,8 +156,6 @@ CONFIG_HOTPLUG_PCI=m # CONFIG_HOTPLUG_PCI_SHPC is not set CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m -CONFIG_PROC_DEVICETREE=y -# CONFIG_CMDLINE_BOOL is not set # # Networking @@ -183,8 +185,8 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m CONFIG_INET_TUNNEL=y -CONFIG_IP_TCPDIAG=m -# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_INET_DIAG=m +CONFIG_INET_TCP_DIAG=m # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y @@ -195,6 +197,9 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m # # IP: Netfilter Configuration @@ -202,11 +207,15 @@ CONFIG_NETFILTER=y CONFIG_IP_NF_CONNTRACK=m CONFIG_IP_NF_CT_ACCT=y CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_CONNTRACK_EVENTS=y +CONFIG_IP_NF_CONNTRACK_NETLINK=m CONFIG_IP_NF_CT_PROTO_SCTP=m CONFIG_IP_NF_FTP=m CONFIG_IP_NF_IRC=m +CONFIG_IP_NF_NETBIOS_NS=m CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m +CONFIG_IP_NF_PPTP=m CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_LIMIT=m @@ -230,14 +239,18 @@ CONFIG_IP_NF_MATCH_OWNER=m CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_REALM=m CONFIG_IP_NF_MATCH_SCTP=m +CONFIG_IP_NF_MATCH_DCCP=m CONFIG_IP_NF_MATCH_COMMENT=m CONFIG_IP_NF_MATCH_CONNMARK=m +CONFIG_IP_NF_MATCH_CONNBYTES=m CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_MATCH_STRING=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_TARGET_NFQUEUE=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_NAT_NEEDED=y CONFIG_IP_NF_TARGET_MASQUERADE=m @@ -249,12 +262,14 @@ CONFIG_IP_NF_NAT_IRC=m CONFIG_IP_NF_NAT_FTP=m CONFIG_IP_NF_NAT_TFTP=m CONFIG_IP_NF_NAT_AMANDA=m +CONFIG_IP_NF_NAT_PPTP=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_TOS=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_MARK=m CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_TARGET_CONNMARK=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m @@ -264,6 +279,11 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m # +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set @@ -290,6 +310,7 @@ CONFIG_NET_CLS_ROUTE=y # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -304,6 +325,11 @@ CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +CONFIG_CONNECTOR=m + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -342,7 +368,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set # @@ -416,6 +441,7 @@ CONFIG_IDEDMA_AUTO=y # # SCSI device support # +CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_SCSI_PROC_FS=y @@ -443,6 +469,7 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_ISCSI_ATTRS=m +CONFIG_SCSI_SAS_ATTRS=m # # SCSI low-level drivers @@ -456,6 +483,7 @@ CONFIG_SCSI_ISCSI_ATTRS=m # CONFIG_SCSI_AIC79XX is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set # CONFIG_SCSI_SATA is not set # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set @@ -517,6 +545,7 @@ CONFIG_DM_MULTIPATH_EMC=m # CONFIG_FUSION is not set # CONFIG_FUSION_SPI is not set # CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set # # IEEE 1394 (FireWire) support @@ -547,12 +576,28 @@ CONFIG_TUN=m # CONFIG_ARCNET is not set # +# PHY device support +# +CONFIG_PHYLIB=m +CONFIG_PHYCONTROL=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set CONFIG_NET_VENDOR_3COM=y CONFIG_VORTEX=y # CONFIG_TYPHOON is not set @@ -581,6 +626,7 @@ CONFIG_E100=y # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set # CONFIG_VIA_RHINE is not set +# CONFIG_NET_POCKET is not set # # Ethernet (1000 Mbit) @@ -594,6 +640,7 @@ CONFIG_E1000=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set @@ -604,6 +651,7 @@ CONFIG_TIGON3=y # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set CONFIG_IXGB=m # CONFIG_IXGB_NAPI is not set CONFIG_S2IO=m @@ -789,7 +837,6 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_ISA is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT is not set # CONFIG_I2C_PARPORT_LIGHT is not set @@ -804,7 +851,6 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set # CONFIG_I2C_PCA_ISA is not set -# CONFIG_I2C_SENSOR is not set # # Miscellaneous I2C Chip support @@ -831,12 +877,17 @@ CONFIG_I2C_ALGOBIT=y # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -885,6 +936,7 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_FB_KYRO is not set # CONFIG_FB_3DFX is not set # CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_CYBLA is not set # CONFIG_FB_TRIDENT is not set # CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set @@ -982,9 +1034,11 @@ CONFIG_USB_HIDDEV=y # CONFIG_USB_MTOUCH is not set # CONFIG_USB_ITMTOUCH is not set # CONFIG_USB_EGALAX is not set +# CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set # CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set # # USB Imaging devices @@ -1057,7 +1111,8 @@ CONFIG_USB_MON=y # InfiniBand support # CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_VERBS=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m # CONFIG_INFINIBAND_MTHCA_DEBUG is not set CONFIG_INFINIBAND_IPOIB=m @@ -1095,16 +1150,12 @@ CONFIG_JFS_SECURITY=y # CONFIG_JFS_DEBUG is not set # CONFIG_JFS_STATISTICS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# CONFIG_XFS_FS=m CONFIG_XFS_EXPORT=y -# CONFIG_XFS_RT is not set # CONFIG_XFS_QUOTA is not set CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y +# CONFIG_XFS_RT is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y @@ -1112,6 +1163,7 @@ CONFIG_INOTIFY=y CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=m # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -1139,14 +1191,11 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -CONFIG_DEVPTS_FS_XATTR=y -CONFIG_DEVPTS_FS_SECURITY=y CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_SECURITY=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -1197,6 +1246,7 @@ CONFIG_CIFS_POSIX=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -1261,6 +1311,7 @@ CONFIG_OPROFILE=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1320,7 +1371,12 @@ CONFIG_CRYPTO_TEST=m # Library routines # CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set CONFIG_CRC32=y CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m Index: linux-2.6.14-rc3/arch/ppc64/defconfig =================================================================== --- linux-2.6.14-rc3.orig/arch/ppc64/defconfig +++ linux-2.6.14-rc3/arch/ppc64/defconfig @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:16:54 2005 +# Linux kernel version: 2.6.14-rc3-git8 +# Mon Oct 10 23:06:50 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -37,6 +38,7 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CPUSETS=y +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set @@ -106,6 +108,7 @@ CONFIG_DISCONTIGMEM_MANUAL=y CONFIG_DISCONTIGMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_NEED_MULTIPLE_NODES=y +# CONFIG_SPARSEMEM_STATIC is not set CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y CONFIG_NODES_SPAN_OTHER_NODES=y # CONFIG_NUMA is not set @@ -126,19 +129,20 @@ CONFIG_RTAS_FLASH=m CONFIG_SCANLOG=m CONFIG_LPARCFG=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_HOTPLUG_CPU=y +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m # CONFIG_PCI_LEGACY_PROC is not set -# CONFIG_PCI_NAMES is not set # CONFIG_PCI_DEBUG is not set -CONFIG_HOTPLUG_CPU=y # # PCCARD (PCMCIA/CardBus) support @@ -154,8 +158,6 @@ CONFIG_HOTPLUG_PCI=m # CONFIG_HOTPLUG_PCI_SHPC is not set CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m -CONFIG_PROC_DEVICETREE=y -# CONFIG_CMDLINE_BOOL is not set # # Networking @@ -185,8 +187,8 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m CONFIG_INET_TUNNEL=y -# CONFIG_IP_TCPDIAG is not set -# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_INET_DIAG=m +CONFIG_INET_TCP_DIAG=m # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y @@ -197,6 +199,9 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m # # IP: Netfilter Configuration @@ -204,11 +209,15 @@ CONFIG_NETFILTER=y CONFIG_IP_NF_CONNTRACK=m CONFIG_IP_NF_CT_ACCT=y CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_CONNTRACK_EVENTS=y +CONFIG_IP_NF_CONNTRACK_NETLINK=m CONFIG_IP_NF_CT_PROTO_SCTP=m CONFIG_IP_NF_FTP=m CONFIG_IP_NF_IRC=m +CONFIG_IP_NF_NETBIOS_NS=m CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m +CONFIG_IP_NF_PPTP=m CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_LIMIT=m @@ -232,14 +241,18 @@ CONFIG_IP_NF_MATCH_OWNER=m CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_REALM=m CONFIG_IP_NF_MATCH_SCTP=m +CONFIG_IP_NF_MATCH_DCCP=m CONFIG_IP_NF_MATCH_COMMENT=m CONFIG_IP_NF_MATCH_CONNMARK=m +CONFIG_IP_NF_MATCH_CONNBYTES=m CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_MATCH_STRING=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_TARGET_NFQUEUE=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_NAT_NEEDED=y CONFIG_IP_NF_TARGET_MASQUERADE=m @@ -251,12 +264,14 @@ CONFIG_IP_NF_NAT_IRC=m CONFIG_IP_NF_NAT_FTP=m CONFIG_IP_NF_NAT_TFTP=m CONFIG_IP_NF_NAT_AMANDA=m +CONFIG_IP_NF_NAT_PPTP=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_TOS=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_MARK=m CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_TARGET_CONNMARK=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m @@ -266,6 +281,11 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m # +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set @@ -292,6 +312,7 @@ CONFIG_NET_CLS_ROUTE=y # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -306,6 +327,11 @@ CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +CONFIG_CONNECTOR=m + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -344,7 +370,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set # @@ -422,6 +447,7 @@ CONFIG_IDEDMA_AUTO=y # # SCSI device support # +CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_SCSI_PROC_FS=y @@ -449,6 +475,7 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_ISCSI_ATTRS=m +CONFIG_SCSI_SAS_ATTRS=m # # SCSI low-level drivers @@ -462,10 +489,12 @@ CONFIG_SCSI_ISCSI_ATTRS=m # CONFIG_SCSI_AIC79XX is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set CONFIG_SCSI_SATA=y # CONFIG_SCSI_SATA_AHCI is not set CONFIG_SCSI_SATA_SVW=y # CONFIG_SCSI_ATA_PIIX is not set +# CONFIG_SCSI_SATA_MV is not set # CONFIG_SCSI_SATA_NV is not set # CONFIG_SCSI_SATA_PROMISE is not set # CONFIG_SCSI_SATA_QSTOR is not set @@ -535,6 +564,7 @@ CONFIG_DM_MULTIPATH_EMC=m # CONFIG_FUSION is not set # CONFIG_FUSION_SPI is not set # CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set # # IEEE 1394 (FireWire) support @@ -578,7 +608,6 @@ CONFIG_IEEE1394_AMDTP=m # CONFIG_ADB_PMU=y CONFIG_PMAC_SMU=y -# CONFIG_PMAC_BACKLIGHT is not set CONFIG_THERM_PM72=y # @@ -596,12 +625,28 @@ CONFIG_TUN=m # CONFIG_ARCNET is not set # +# PHY device support +# +CONFIG_PHYLIB=m +CONFIG_PHYCONTROL=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set CONFIG_SUNGEM=y +# CONFIG_CASSINI is not set CONFIG_NET_VENDOR_3COM=y CONFIG_VORTEX=y # CONFIG_TYPHOON is not set @@ -630,6 +675,7 @@ CONFIG_E100=y # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set # CONFIG_VIA_RHINE is not set +# CONFIG_NET_POCKET is not set # # Ethernet (1000 Mbit) @@ -643,16 +689,19 @@ CONFIG_E1000=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y # CONFIG_BNX2 is not set +# CONFIG_SPIDER_NET is not set # CONFIG_MV643XX_ETH is not set # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set CONFIG_IXGB=m # CONFIG_IXGB_NAPI is not set # CONFIG_S2IO is not set @@ -838,8 +887,8 @@ CONFIG_I2C_AMD8111=y # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_ISA is not set CONFIG_I2C_KEYWEST=y +CONFIG_I2C_PMAC_SMU=y # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT is not set # CONFIG_I2C_PARPORT_LIGHT is not set @@ -854,7 +903,6 @@ CONFIG_I2C_KEYWEST=y # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set # CONFIG_I2C_PCA_ISA is not set -# CONFIG_I2C_SENSOR is not set # # Miscellaneous I2C Chip support @@ -881,12 +929,17 @@ CONFIG_I2C_KEYWEST=y # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -939,6 +992,7 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_FB_KYRO is not set # CONFIG_FB_3DFX is not set # CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_CYBLA is not set # CONFIG_FB_TRIDENT is not set # CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set @@ -1020,6 +1074,7 @@ CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set # # USB Input Devices @@ -1036,9 +1091,11 @@ CONFIG_USB_HIDDEV=y # CONFIG_USB_MTOUCH is not set # CONFIG_USB_ITMTOUCH is not set # CONFIG_USB_EGALAX is not set +# CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set # CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set # # USB Imaging devices @@ -1111,7 +1168,8 @@ CONFIG_USB_PEGASUS=y # InfiniBand support # CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_VERBS=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m # CONFIG_INFINIBAND_MTHCA_DEBUG is not set CONFIG_INFINIBAND_IPOIB=m @@ -1149,16 +1207,12 @@ CONFIG_JFS_SECURITY=y # CONFIG_JFS_DEBUG is not set # CONFIG_JFS_STATISTICS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# CONFIG_XFS_FS=m CONFIG_XFS_EXPORT=y -# CONFIG_XFS_RT is not set # CONFIG_XFS_QUOTA is not set CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y +# CONFIG_XFS_RT is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y @@ -1166,6 +1220,7 @@ CONFIG_INOTIFY=y CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=y # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -1192,14 +1247,11 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -CONFIG_DEVPTS_FS_XATTR=y -CONFIG_DEVPTS_FS_SECURITY=y CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_SECURITY=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -1250,6 +1302,7 @@ CONFIG_CIFS_POSIX=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -1276,7 +1329,7 @@ CONFIG_MSDOS_PARTITION=y # CONFIG_NLS=y CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_737=m CONFIG_NLS_CODEPAGE_775=m CONFIG_NLS_CODEPAGE_850=m @@ -1300,7 +1353,7 @@ CONFIG_NLS_ISO8859_8=m CONFIG_NLS_CODEPAGE_1250=m CONFIG_NLS_CODEPAGE_1251=m CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_2=m CONFIG_NLS_ISO8859_3=m CONFIG_NLS_ISO8859_4=m @@ -1328,6 +1381,7 @@ CONFIG_OPROFILE=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1387,7 +1441,12 @@ CONFIG_CRYPTO_TEST=m # Library routines # CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set CONFIG_CRC32=y CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m -- short story of a lazy sysadmin: alias appserv=wotan From galak at freescale.com Tue Oct 11 05:41:50 2005 From: galak at freescale.com (Kumar Gala) Date: Mon, 10 Oct 2005 14:41:50 -0500 (CDT) Subject: [PATCH] powerpc: Some fixes to allow building for a Book-E processor Message-ID: Some minor fixes that are needed if we are building for a book-e processor. Book-e processors dont have DABR or hashed page tables. Signed-off-by: Kumar K. Gala --- commit dfc32a358c961c3fbfa94942ecb06da2e895ffe7 tree 61513cafc77bf3fb49ccf9ab652ff2a0b492b113 parent 05f62a5c049845eab8dfb3aeda55c18a2d4396e3 author Kumar K. Gala Mon, 10 Oct 2005 13:12:10 -0500 committer Kumar K. Gala Mon, 10 Oct 2005 13:12:10 -0500 arch/powerpc/kernel/process.c | 2 ++ arch/powerpc/mm/Makefile | 5 ++--- arch/powerpc/mm/fault.c | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -214,6 +214,7 @@ int dump_spe(struct pt_regs *regs, elf_v } #endif /* CONFIG_SPE */ +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) static void set_dabr_spr(unsigned long val) { mtspr(SPRN_DABR, val); @@ -238,6 +239,7 @@ int set_dabr(unsigned long dabr) } static DEFINE_PER_CPU(unsigned long, current_dabr); +#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -3,10 +3,9 @@ # obj-y := fault.o mem.o lmb.o -obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o \ - tlb.o +obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o obj-$(CONFIG_PPC64) += init64.o pgtable64.o mmu_context64.o -obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o +obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o tlb.o obj-$(CONFIG_40x) += 4xx_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -80,6 +80,7 @@ static int store_updates_sp(struct pt_re return 0; } +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) static void do_dabr(struct pt_regs *regs, unsigned long error_code) { siginfo_t info; @@ -101,6 +102,7 @@ static void do_dabr(struct pt_regs *regs info.si_addr = (void __user *)regs->nip; force_sig_info(SIGTRAP, &info, current); } +#endif /* * For 600- and 800-family processors, the error_code parameter is DSISR From galak at freescale.com Tue Oct 11 05:51:12 2005 From: galak at freescale.com (Kumar Gala) Date: Mon, 10 Oct 2005 14:51:12 -0500 (CDT) Subject: [PATCH] powerpc: zero out BSS for all platforms Message-ID: We need to ensure that the BSS is zeroed out for all platforms. Currently only prom_init.c was clearlying out the BSS which only works for PPC_OF platforms. Signed-off-by: Kumar K. Gala --- commit 56381a9f0765ba3ffa5f21a4cdcb93ac0279eeea tree 9f0f353b0776129626082a46b578d637fb79dad1 parent dfc32a358c961c3fbfa94942ecb06da2e895ffe7 author Kumar K. Gala Mon, 10 Oct 2005 14:48:36 -0500 committer Kumar K. Gala Mon, 10 Oct 2005 14:48:36 -0500 arch/powerpc/kernel/setup.c | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kernel/setup.c b/arch/powerpc/kernel/setup.c --- a/arch/powerpc/kernel/setup.c +++ b/arch/powerpc/kernel/setup.c @@ -293,6 +293,10 @@ unsigned long __init early_init(unsigned reloc_got2(offset); + /* First zero the BSS -- use memset, some arches don't have + * caches on yet */ + memset_io(PTRRELOC(&__bss_start), 0, _end - __bss_start); + /* * Identify the CPU type and fix up code sections * that depend on which cpu we have. From galak at freescale.com Tue Oct 11 05:55:13 2005 From: galak at freescale.com (Kumar Gala) Date: Mon, 10 Oct 2005 14:55:13 -0500 (CDT) Subject: [PATCH] powerpc: Fix building on non-MULTIPLATFORM machines Message-ID: On !CONFIG_PPC_MULTIPLATFORM _machine is defined as 0. This is ok, but we can't assign a value to _machine then. Also, we may not have CONFIG_PCI available, so only build in support for find_parent_pci_resource(), request_OF_resource(), release_OF_resource() if PCI is enabled. This is probably not the long term fix but works out for now. Signed-off-by: Kumar K. Gala --- commit 00c61049131b483e217a7327f2f55dff40db8cbc tree d1915b9b33372f5e89b6b8ef581681f49b24bab4 parent 56381a9f0765ba3ffa5f21a4cdcb93ac0279eeea author Kumar K. Gala Mon, 10 Oct 2005 14:54:32 -0500 committer Kumar K. Gala Mon, 10 Oct 2005 14:54:32 -0500 arch/powerpc/kernel/prom.c | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -1129,8 +1129,10 @@ static int __init early_init_dt_scan_cho #ifdef CONFIG_PPC64 systemcfg->platform = *prop; #else +#ifdef CONFIG_PPC_MULTIPLATFORM _machine = *prop; #endif +#endif #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ @@ -1987,6 +1989,7 @@ bus_space_to_resource_flags(unsigned int } } +#ifdef CONFIG_PCI static struct resource *find_parent_pci_resource(struct pci_dev* pdev, struct address_range *range) { @@ -2139,3 +2142,4 @@ int release_OF_resource(struct device_no return 0; } EXPORT_SYMBOL(release_OF_resource); +#endif /* CONFIG_PCI */ From sfr at ozlabs.org Tue Oct 11 10:53:37 2005 From: sfr at ozlabs.org (Stephen Rothwell) Date: Tue, 11 Oct 2005 10:53:37 +1000 Subject: Posting delays Message-ID: <20051011105337.6cda121e.sfr@ozlabs.org> Hi all, Just a note to let you all know that I have changed the list manager configuration so that posts from email addresses that are not subscribed to the list will be held until looked at by one of the admins. This may cause some delay in peoples postings, sorry. If people are happy with some (hopefully short) delay, then nothing needs to change - whenever what looks like a reasonable post is held, I am adding the senders address to those whom will not be held again. -- Cheers, Stephen Rothwell sfr at ozlabs.org -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051011/713e621a/attachment.pgp From greg at kroah.com Tue Oct 11 10:10:56 2005 From: greg at kroah.com (Greg KH) Date: Mon, 10 Oct 2005 17:10:56 -0700 Subject: [PATCH 20/22] PCI Error Recovery: e100 network device driver In-Reply-To: <20051006235729.GU29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> <20051006235729.GU29826@austin.ibm.com> Message-ID: <20051011001056.GA16634@kroah.com> On Thu, Oct 06, 2005 at 06:57:29PM -0500, linas wrote: > +config E100_EEH_RECOVERY > + bool "Enable PCI bus error recovery" > + depends on E100 && PPC_PSERIES > + help > + If you say Y here, the driver will be able to recover from > + PCI bus errors on many PowerPC platforms. IBM pSeries users > + should answer Y. Why make a config option for this at all? Who would turn it off? > @@ -2661,6 +2731,9 @@ > .resume = e100_resume, > #endif > .shutdown = e100_shutdown, > +#ifdef CONFIG_E100_EEH_RECOVERY > + .err_handler = &e100_err_handler, > +#endif /* CONFIG_E100_EEH_RECOVERY */ No, don't put #ifdefs in the middle of a structure, remember we made err_handler always present in the .h file for a reason... thanks, greg k-h From benh at kernel.crashing.org Tue Oct 11 17:46:11 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 11 Oct 2005 17:46:11 +1000 Subject: [PATCH] powerpc: zero out BSS for all platforms In-Reply-To: References: Message-ID: <1129016771.17365.198.camel@gaston> On Mon, 2005-10-10 at 14:51 -0500, Kumar Gala wrote: > We need to ensure that the BSS is zeroed out for all platforms. > Currently only prom_init.c was clearlying out the BSS which only works > for PPC_OF platforms. > > Signed-off-by: Kumar K. Gala You need to make absolutely certain that we have not written anything to the bss yet though... Is that the case ? I usually prefer doing the zero'ing in assembly :) Ben. From olh at suse.de Tue Oct 11 19:30:40 2005 From: olh at suse.de (Olaf Hering) Date: Tue, 11 Oct 2005 11:30:40 +0200 Subject: [PATCH] powerpc: zero out BSS for all platforms In-Reply-To: <1129016771.17365.198.camel@gaston> References: <1129016771.17365.198.camel@gaston> Message-ID: <20051011093040.GA5362@suse.de> On Tue, Oct 11, Benjamin Herrenschmidt wrote: > On Mon, 2005-10-10 at 14:51 -0500, Kumar Gala wrote: > > We need to ensure that the BSS is zeroed out for all platforms. > > Currently only prom_init.c was clearlying out the BSS which only works > > for PPC_OF platforms. > > > > Signed-off-by: Kumar K. Gala > > You need to make absolutely certain that we have not written anything to > the bss yet though... Is that the case ? I usually prefer doing the > zero'ing in assembly :) early_init is called so early, its almost like asm code. -- short story of a lazy sysadmin: alias appserv=wotan From geert at linux-m68k.org Tue Oct 11 17:47:34 2005 From: geert at linux-m68k.org (Geert Uytterhoeven) Date: Tue, 11 Oct 2005 09:47:34 +0200 (CEST) Subject: [PATCH] powerpc: zero out BSS for all platforms In-Reply-To: References: Message-ID: On Mon, 10 Oct 2005, Kumar Gala wrote: > We need to ensure that the BSS is zeroed out for all platforms. > Currently only prom_init.c was clearlying out the BSS which only works > for PPC_OF platforms. > > Signed-off-by: Kumar K. Gala > > --- > commit 56381a9f0765ba3ffa5f21a4cdcb93ac0279eeea > tree 9f0f353b0776129626082a46b578d637fb79dad1 > parent dfc32a358c961c3fbfa94942ecb06da2e895ffe7 > author Kumar K. Gala Mon, 10 Oct 2005 14:48:36 -0500 > committer Kumar K. Gala Mon, 10 Oct 2005 14:48:36 -0500 > > arch/powerpc/kernel/setup.c | 4 ++++ > 1 files changed, 4 insertions(+), 0 deletions(-) > > diff --git a/arch/powerpc/kernel/setup.c b/arch/powerpc/kernel/setup.c > --- a/arch/powerpc/kernel/setup.c > +++ b/arch/powerpc/kernel/setup.c > @@ -293,6 +293,10 @@ unsigned long __init early_init(unsigned > > reloc_got2(offset); > > + /* First zero the BSS -- use memset, some arches don't have ^^^^^^ > + * caches on yet */ > + memset_io(PTRRELOC(&__bss_start), 0, _end - __bss_start); ^^^^^^^^^ The comment is not in sync with the code. Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert at linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds From benh at kernel.crashing.org Tue Oct 11 19:48:56 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Tue, 11 Oct 2005 19:48:56 +1000 Subject: [PATCH] powerpc: zero out BSS for all platforms In-Reply-To: <20051011093040.GA5362@suse.de> References: <1129016771.17365.198.camel@gaston> <20051011093040.GA5362@suse.de> Message-ID: <1129024136.17365.202.camel@gaston> On Tue, 2005-10-11 at 11:30 +0200, Olaf Hering wrote: > On Tue, Oct 11, Benjamin Herrenschmidt wrote: > > > On Mon, 2005-10-10 at 14:51 -0500, Kumar Gala wrote: > > > We need to ensure that the BSS is zeroed out for all platforms. > > > Currently only prom_init.c was clearlying out the BSS which only works > > > for PPC_OF platforms. > > > > > > Signed-off-by: Kumar K. Gala > > > > You need to make absolutely certain that we have not written anything to > > the bss yet though... Is that the case ? I usually prefer doing the > > zero'ing in assembly :) > > early_init is called so early, its almost like asm code. Yah, as long as that stuff is still separate between ppc32 and ppc64, that's fine. We'll have to be careful with iSeries once we do the merge :) Ben. From anton at samba.org Tue Oct 11 22:18:54 2005 From: anton at samba.org (Anton Blanchard) Date: Tue, 11 Oct 2005 22:18:54 +1000 Subject: [PATCH] ppc64: Add R_PPC64_TOC16 module reloc In-Reply-To: <1129000992.4557.13.camel@localhost.localdomain> References: <200510061523.j96FNXZm009893@d01av01.pok.ibm.com> <1128620515.4547.16.camel@localhost.localdomain> <20051007031801.GN26007@bubble.grove.modra.org> <1129000992.4557.13.camel@localhost.localdomain> Message-ID: <20051011121854.GB5174@krispykreme> From: Peter Bergner Just to be safe, I verified the patch still applies to Linus' tree. There was a little fuzz, so I'm attaching a new patch that applies cleanly with no fuzz. Signed-off-by: Peter Bergner Signed-off-by: Anton Blanchard --- arch/ppc64/kernel/module.c.orig 2005-10-10 21:55:00.404946024 -0400 +++ arch/ppc64/kernel/module.c 2005-10-10 22:03:02.073931824 -0400 @@ -341,6 +341,19 @@ *(unsigned long *)location = my_r2(sechdrs, me); break; + case R_PPC64_TOC16: + /* Subtact TOC pointer */ + value -= my_r2(sechdrs, me); + if (value + 0x8000 > 0xffff) { + printk("%s: bad TOC16 relocation (%lu)\n", + me->name, value); + return -ENOEXEC; + } + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xffff) + | (value & 0xffff); + break; + case R_PPC64_TOC16_DS: /* Subtact TOC pointer */ value -= my_r2(sechdrs, me); From paulus at samba.org Tue Oct 11 22:26:55 2005 From: paulus at samba.org (Paul Mackerras) Date: Tue, 11 Oct 2005 22:26:55 +1000 Subject: merge progress Message-ID: <17227.44943.714106.911471@cargo.ozlabs.ibm.com> I have just pushed out a new pile of commits to the powerpc-merge tree on kernel.org. (It may take a little while for them to get mirrored from master.kernel.org to ftp/rsync.kernel.org.) The current state is that with ARCH=ppc, the merge tree will build all the ppc32 configs that build in the stock 2.6.14-rc2 tree, and the powermac configs run (I haven't boot-tested any of the others). With ARCH=powerpc, a 32-bit powermac target builds (both UP and SMP) and runs, and a 64-bit pSeries target builds and runs. 64-bit iSeries builds but there is some weird bug that Stephen Rothwell and BenH have been chasing. I haven't got far enough with the merge to be able to build a 64-bit powermac target. With ARCH=ppc64, pSeries, iSeries, powermac and maple all build, and pSeries and iSeries have been boot-tested. I think I have broken BPA/cell because it was using pSeries_smp.c (I think we need to make cell have its own bpa_smp.c or whatever anyway). I have about a week's backlog of mail, resulting from being on vacation last week and from concentrating on pushing the merge along. Tomorrow I'll hopefully get a chance to look at all the patches people have sent in the last week. :) Paul. From Simon.Richter at hogyros.de Tue Oct 11 22:31:23 2005 From: Simon.Richter at hogyros.de (Simon Richter) Date: Tue, 11 Oct 2005 14:31:23 +0200 Subject: merge progress In-Reply-To: <17227.44943.714106.911471@cargo.ozlabs.ibm.com> References: <17227.44943.714106.911471@cargo.ozlabs.ibm.com> Message-ID: <434BB09B.9070605@hogyros.de> Hi, Paul Mackerras wrote: > I have just pushed out a new pile of commits to the powerpc-merge tree > on kernel.org. (It may take a little while for them to get mirrored > from master.kernel.org to ftp/rsync.kernel.org.) Should I fast-forward the APUS patches towards the merge tree, or is it better to apply them in the unmerged tree and merge them back into the merge tree from there? Simon -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 307 bytes Desc: OpenPGP digital signature Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051011/38de347a/attachment.pgp From hch at lst.de Wed Oct 12 00:35:29 2005 From: hch at lst.de (Christoph Hellwig) Date: Tue, 11 Oct 2005 16:35:29 +0200 Subject: [PATCH] ppc64: Add R_PPC64_TOC16 module reloc In-Reply-To: <20051011121854.GB5174@krispykreme> References: <200510061523.j96FNXZm009893@d01av01.pok.ibm.com> <1128620515.4547.16.camel@localhost.localdomain> <20051007031801.GN26007@bubble.grove.modra.org> <1129000992.4557.13.camel@localhost.localdomain> <20051011121854.GB5174@krispykreme> Message-ID: <20051011143529.GB11440@lst.de> On Tue, Oct 11, 2005 at 10:18:54PM +1000, Anton Blanchard wrote: > > From: Peter Bergner > > Just to be safe, I verified the patch still applies to Linus' tree. > There was a little fuzz, so I'm attaching a new patch that applies > cleanly with no fuzz. Which module needs this relocation? From anton at samba.org Wed Oct 12 00:41:26 2005 From: anton at samba.org (Anton Blanchard) Date: Wed, 12 Oct 2005 00:41:26 +1000 Subject: [PATCH] ppc64: Add R_PPC64_TOC16 module reloc In-Reply-To: <20051011143529.GB11440@lst.de> References: <200510061523.j96FNXZm009893@d01av01.pok.ibm.com> <1128620515.4547.16.camel@localhost.localdomain> <20051007031801.GN26007@bubble.grove.modra.org> <1129000992.4557.13.camel@localhost.localdomain> <20051011121854.GB5174@krispykreme> <20051011143529.GB11440@lst.de> Message-ID: <20051011144126.GC5174@krispykreme> > Which module needs this relocation? Why do you ask? Its a valid relocation and it looks like newer gcc versions are starting to use it. Anton From hch at lst.de Wed Oct 12 00:45:20 2005 From: hch at lst.de (Christoph Hellwig) Date: Tue, 11 Oct 2005 16:45:20 +0200 Subject: [PATCH] ppc64: Add R_PPC64_TOC16 module reloc In-Reply-To: <20051011144126.GC5174@krispykreme> References: <200510061523.j96FNXZm009893@d01av01.pok.ibm.com> <1128620515.4547.16.camel@localhost.localdomain> <20051007031801.GN26007@bubble.grove.modra.org> <1129000992.4557.13.camel@localhost.localdomain> <20051011121854.GB5174@krispykreme> <20051011143529.GB11440@lst.de> <20051011144126.GC5174@krispykreme> Message-ID: <20051011144520.GA11873@lst.de> On Wed, Oct 12, 2005 at 12:41:26AM +1000, Anton Blanchard wrote: > > > Which module needs this relocation? > > Why do you ask? Its a valid relocation and it looks like newer gcc > versions are starting to use it. I remember cases where people tried to add odd relocation for ia64 because their modules were doign really silly things. If this is one that recent gcc generates it's fine obviously. From olh at suse.de Wed Oct 12 01:03:50 2005 From: olh at suse.de (Olaf Hering) Date: Tue, 11 Oct 2005 17:03:50 +0200 Subject: [PATCH] ppc64: Add R_PPC64_TOC16 module reloc In-Reply-To: <20051011143529.GB11440@lst.de> References: <200510061523.j96FNXZm009893@d01av01.pok.ibm.com> <1128620515.4547.16.camel@localhost.localdomain> <20051007031801.GN26007@bubble.grove.modra.org> <1129000992.4557.13.camel@localhost.localdomain> <20051011121854.GB5174@krispykreme> <20051011143529.GB11440@lst.de> Message-ID: <20051011150350.GA23063@suse.de> On Tue, Oct 11, Christoph Hellwig wrote: > On Tue, Oct 11, 2005 at 10:18:54PM +1000, Anton Blanchard wrote: > > > > From: Peter Bergner > > > > Just to be safe, I verified the patch still applies to Linus' tree. > > There was a little fuzz, so I'm attaching a new patch that applies > > cleanly with no fuzz. > > Which module needs this relocation? scanlog and rtas_flash. Everything that casts a pointer to u32 to make RTAS happy. -- short story of a lazy sysadmin: alias appserv=wotan From bergner at vnet.ibm.com Wed Oct 12 00:55:00 2005 From: bergner at vnet.ibm.com (Peter Bergner) Date: Tue, 11 Oct 2005 09:55:00 -0500 Subject: [PATCH] ppc64: Add R_PPC64_TOC16 module reloc In-Reply-To: <20051011144520.GA11873@lst.de> References: <200510061523.j96FNXZm009893@d01av01.pok.ibm.com> <1128620515.4547.16.camel@localhost.localdomain> <20051007031801.GN26007@bubble.grove.modra.org> <1129000992.4557.13.camel@localhost.localdomain> <20051011121854.GB5174@krispykreme> <20051011143529.GB11440@lst.de> <20051011144126.GC5174@krispykreme> <20051011144520.GA11873@lst.de> Message-ID: <1129042500.4213.8.camel@localhost.localdomain> On Tue, 2005-10-11 at 16:45 +0200, Christoph Hellwig wrote: > On Wed, Oct 12, 2005 at 12:41:26AM +1000, Anton Blanchard wrote: > > > > > Which module needs this relocation? > > > > Why do you ask? Its a valid relocation and it looks like newer gcc > > versions are starting to use it. > > I remember cases where people tried to add odd relocation for ia64 > because their modules were doign really silly things. If this is > one that recent gcc generates it's fine obviously. This is a valid gcc generated relocation. The simplified test case came from rtas_flash.ko. Olaf also saw another use from scanlog.ko. Peter bergner at vervain:~/olaf> cat foo.c extern char rtas_data_buf[4096]; extern int rtas_call(unsigned int); void validate_flash(void) { rtas_call((unsigned int) (unsigned long)rtas_data_buf); } bergner at vervain:~/olaf> gcc -m64 -O1 -c foo.c bergner at vervain:~/olaf> objdump -dr foo.o foo.o: file format elf64-powerpc Disassembly of section .text: 0000000000000000 <.validate_flash>: 0: 7c 08 02 a6 mflr r0 4: f8 01 00 10 std r0,16(r1) 8: f8 21 ff 91 stdu r1,-112(r1) c: 80 62 00 04 lwz r3,4(r2) e: R_PPC64_TOC16 .toc+0x4 10: 48 00 00 01 bl 10 <.validate_flash+0x10> 10: R_PPC64_REL24 .rtas_call 14: 60 00 00 00 nop 18: 38 21 00 70 addi r1,r1,112 1c: e8 01 00 10 ld r0,16(r1) 20: 7c 08 03 a6 mtlr r0 24: 4e 80 00 20 blr 28: 00 00 00 00 .long 0x0 2c: 00 00 00 01 .long 0x1 30: 80 00 00 00 lwz r0,0(r0) From grave at ipno.in2p3.fr Wed Oct 12 01:57:32 2005 From: grave at ipno.in2p3.fr (Xavier Grave) Date: Tue, 11 Oct 2005 17:57:32 +0200 Subject: Infiniband & chelsio with linux 2.6.14-rc3 Message-ID: <1129046252.14198.22.camel@ipnnarval> Hi, I have compiled a 2.6.14-rc3 kernel for an openpower 710. I have for test purpose a chelsio 10 Gb ethernet board, and also a dual 10Gb infiniband interface with the mellanox chipset. When hotplug load the driver for infiniband I get the following message : ib_mthca: Mellanox InfiniBand HCA driver v0.06 (June 23, 2005) ib_mthca: Initializing 0001:d1:00.0 ib_mthca 0001:d1:00.0: FW reports that HCA-attached memory is not hidden; does g ib_mthca 0001:d1:00.0: INIT_HCA command failed, aborting. ib_mthca: probe of 0001:d1:00.0 failed with error -16 After this I load the ip over infiniband driver and no IP interface comes up... For the chelsio driver : i modprobe cxbg and no more IP interface comes... Does somebody already test this king of hardware with a ppc machine ? Any hint welcome, thanks in advance, xavier -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: This is a digitally signed message part Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051011/9f014712/attachment.pgp From arnd at arndb.de Wed Oct 12 03:18:13 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Tue, 11 Oct 2005 19:18:13 +0200 Subject: libspe for 2.6.13 spufs In-Reply-To: <200509301628.49277.arnd@arndb.de> References: <20050929220009.146368000@localhost> <200509301628.49277.arnd@arndb.de> Message-ID: <200510111918.13991.arnd@arndb.de> On Freedag 30 September 2005 16:28, Arnd Bergmann wrote: > As a companion to the spufs release posted yesterday, this is the > user space libspe library from Dirk Herrendoerfer, together with > the extracted interface documentation. This is a small update to the libspe version posted previously, the changes are: - Add a proper .spec file for building rpms (the one used on www.bsc.es was rather broken) - Include the doxygen source for building the documentation. - rename spe.h to libspe.h in order to prevent conflicts with the file of the same name provided by some gcc versions. - fix the usage of spe_program_handle_t. Arnd <>< diff -urN -x CVS libspe-0.9/Doxyfile libspe-0.9.1/Doxyfile --- libspe-0.9/Doxyfile 1970-01-01 01:00:00.000000000 +0100 +++ libspe-0.9.1/Doxyfile 2005-09-29 19:35:03.000000000 +0200 @@ -0,0 +1,267 @@ +# Doxyfile 1.4.1 + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +PROJECT_NAME = libspe +PROJECT_NUMBER = V1.0 +OUTPUT_DIRECTORY = doc/ +CREATE_SUBDIRS = NO +OUTPUT_LANGUAGE = English +USE_WINDOWS_ENCODING = NO +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = ./ +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +DETAILS_AT_TOP = NO +INHERIT_DOCS = YES +DISTRIBUTE_GROUP_DOC = NO +TAB_SIZE = 8 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = YES +OPTIMIZE_OUTPUT_JAVA = NO +SUBGROUPING = YES +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = NO +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = NO +HIDE_UNDOC_MEMBERS = YES +HIDE_UNDOC_CLASSES = YES +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = NO +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_BY_SCOPE_NAME = NO +GENERATE_TODOLIST = NO +GENERATE_TESTLIST = NO +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = NO +SHOW_DIRECTORIES = NO +FILE_VERSION_FILTER = +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = ./ +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.idl \ + *.odl \ + *.cs \ + *.php \ + *.php3 \ + *.inc \ + *.m \ + *.mm \ + *.dox \ + *.C \ + *.CC \ + *.C++ \ + *.II \ + *.I++ \ + *.H \ + *.HH \ + *.H++ \ + *.CS \ + *.PHP \ + *.PHP3 \ + *.M \ + *.MM +RECURSIVE = YES +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = * +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = NO +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +VERBATIM_HEADERS = NO +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = NO +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_ALIGN_MEMBERS = YES +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +BINARY_TOC = NO +TOC_EXPAND = NO +DISABLE_INDEX = NO +ENUM_VALUES_PER_LINE = 4 +GENERATE_TREEVIEW = NO +TREEVIEW_WIDTH = 250 +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = YES +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4wide +EXTRA_PACKAGES = +LATEX_HEADER = +PDF_HYPERLINKS = YES +USE_PDFLATEX = YES +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = YES +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_LINKS = NO +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = NO +XML_OUTPUT = xml +XML_SCHEMA = +XML_DTD = +XML_PROGRAMLISTING = YES +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +PERL_PATH = /usr/bin/perl +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = YES +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = NO +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +DOT_PATH = +DOTFILE_DIRS = +MAX_DOT_GRAPH_WIDTH = 1024 +MAX_DOT_GRAPH_HEIGHT = 1024 +MAX_DOT_GRAPH_DEPTH = 1000 +DOT_TRANSPARENT = NO +DOT_MULTI_TARGETS = NO +GENERATE_LEGEND = YES +DOT_CLEANUP = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- +SEARCHENGINE = NO diff -urN -x CVS libspe-0.9/Makefile libspe-0.9.1/Makefile --- libspe-0.9/Makefile 2005-09-30 15:32:18.000000000 +0200 +++ libspe-0.9.1/Makefile 2005-10-11 16:08:31.000000000 +0200 @@ -73,18 +73,18 @@ sed -e "s%@BINDIR@%${bindir}%g" <$< >$@ chmod +x $@ -install: spe.h libspe.so elfspe elfspe-register +install: mfc.h libspe.h libspe.so elfspe elfspe-register $(INSTALL_DATA) mfc.h \ $(DESTDIR)$(includedir)/mfc.h - $(INSTALL_DATA) spe.h \ - $(DESTDIR)$(includedir)/spe.h + $(INSTALL_DATA) libspe.h \ + $(DESTDIR)$(includedir)/libspe.h $(INSTALL_PROGRAM) libspe.so \ $(DESTDIR)$(libdir)/$(libspe_SO) $(INSTALL_PROGRAM) libspe.a \ $(DESTDIR)$(libdir)/libspe.a - ln -sf $(DESTDIR)$(libdir)/$(libspe_SO) \ + ln -sf $(libspe_SO) \ $(DESTDIR)$(libdir)/$(libspe_SONAME) - ln -sf $(DESTDIR)$(libdir)/$(libspe_SO) \ + ln -sf $(libspe_SO) \ $(DESTDIR)$(libdir)/libspe.so $(INSTALL_PROGRAM) elfspe \ $(DESTDIR)$(bindir)/elfspe @@ -98,5 +98,5 @@ $(CTAGS) -R . clean: - rm -f libspe.so $(libspe_OBJS) *~ tags elfspe elfspe-register + rm -f libspe.a libspe.so $(libspe_OBJS) *~ tags elfspe elfspe-register make -C tests clean diff -urN -x CVS libspe-0.9/elf_loader.c libspe-0.9.1/elf_loader.c --- libspe-0.9/elf_loader.c 2005-09-30 15:30:18.000000000 +0200 +++ libspe-0.9.1/elf_loader.c 2005-10-11 16:08:31.000000000 +0200 @@ -26,8 +26,9 @@ #include #include -#include "elf_loader.h" +#include "libspe.h" #include "spe.h" +#include "elf_loader.h" #define __PRINTF(fmt, args...) { fprintf(stderr,fmt , ## args); } #ifdef DEBUG @@ -56,7 +57,7 @@ int num_load_seg = 0; void *elf_start; - if (handle->toe_struct) + if (handle->toe_shadow) { printf("CSOF type image loading not supported."); errno = EINVAL; @@ -64,7 +65,7 @@ } - elf_start = handle->start; + elf_start = handle->elf_image; DEBUG_PRINTF ("load_spe_elf(%p, %p)\n", elf_start, ld_buffer); ehdr = (Elf32_Ehdr *) elf_start; diff -urN -x CVS libspe-0.9/elfspe.c libspe-0.9.1/elfspe.c --- libspe-0.9/elfspe.c 2005-09-30 15:30:18.000000000 +0200 +++ libspe-0.9.1/elfspe.c 2005-10-11 16:08:31.000000000 +0200 @@ -28,7 +28,8 @@ #include #include -#include +#include "libspe.h" +#include "spe.h" static void *spe = NULL; @@ -37,27 +38,6 @@ #define LS_ADDR_MASK (LS_SIZE - 1) #endif /* LS_SIZE */ -static void * -mmap_file (const char *filename, off_t size, int rw) -{ - int flags = rw? O_RDWR : O_RDONLY; - int prot = rw? (PROT_READ | PROT_WRITE) : PROT_READ; - size_t ps = getpagesize (); - - void *addr; - int fd; - - if ((fd = open (filename, flags)) < 0) - return NULL; - - addr = mmap (NULL, (size + ps - 1) & ~(ps - 1), prot, MAP_SHARED, fd, 0); - if (addr == MAP_FAILED) - return NULL; - - close (fd); - - return addr; -} static void handler (int signr ) __attribute__ ((noreturn)); static void @@ -157,10 +137,9 @@ int main (int argc, char **argv) { + spe_program_handle_t *handle; spe_gid_t gid; struct spe_regs params; - struct stat st; - void *file; int rc; signal (SIGSEGV, handler); @@ -177,14 +156,8 @@ exit (1); } - if (stat (argv[1], &st) < 0) - { - perror (argv[1]); - exit (1); - } - - file = mmap_file (argv[1], st.st_size, 0); - if (!file) + handle = spe_open_image (argv[1]); + if (!handle) { perror (argv[1]); exit (1); @@ -203,7 +176,7 @@ exit (1); } - spe = spe_setup (gid, file, ¶ms, NULL, SPE_USER_REGS); + spe = spe_setup (gid, handle, ¶ms, NULL, SPE_USER_REGS); if (!spe) { perror ("spe_setup"); @@ -213,6 +186,8 @@ rc = do_spe_run (spe); spe_cleanup (spe); + spe_close_image (handle); + return rc & 0xff; } diff -urN -x CVS libspe-0.9/libspe.h libspe-0.9.1/libspe.h --- libspe-0.9/libspe.h 1970-01-01 01:00:00.000000000 +0100 +++ libspe-0.9.1/libspe.h 2005-10-11 10:50:09.000000000 +0200 @@ -0,0 +1,74 @@ +/* + * libspe - A wrapper library to adapt the JSRE SPE usage model to SPUFS + * Copyright (C) 2005 IBM Corp. + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, + * or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _libspe_h_ +#define _libspe_h_ + +typedef void *speid_t; +typedef void *spe_gid_t; + +/* spe_program_handle per CESOF spec + */ +typedef struct spe_program_handle { + unsigned int handle_size; + void *elf_image; + void *toe_shadow; +} spe_program_handle_t; + +/* Flags for spe_create_thread + */ + +#define SPE_PTRACE_ME 0x0001 /* Allow debugger to attach */ +#define SPE_USER_REGS 0x0002 /* 128b user data for r3-5. */ + +#define SPE_CFG_SIGNOTIFY1_OR 0x00000010 +#define SPE_CFG_SIGNOTIFY2_OR 0x00000020 + +/* Flags for + * */ +#define SPE_MAX_THREADS_PER_GROUP 16 /* Sanity value - will be removed in the future */ + +/* APIs for SPE threads. + */ + +extern spe_gid_t spe_create_group (int policy, int priority, int spe_events); +extern int spe_group_max (spe_gid_t spe_gid); + +extern speid_t spe_create_thread (spe_gid_t gid, spe_program_handle_t *handle, + void *argp, void *envp, + unsigned long mask, int flags); + +extern int spe_wait (speid_t speid, int *status, int options); + +extern int spe_kill (speid_t speid, int sig); +extern int spe_get_priority(spe_gid_t gid); +extern int spe_set_priority(spe_gid_t gid, int priority); +extern int spe_get_policy(spe_gid_t gid); +extern spe_gid_t spe_get_group (speid_t speid); +extern int spe_group_defaults(int policy, int priority, int spe_events); + +extern void *spe_get_ls(speid_t speid); +extern void *spe_get_ps(speid_t speid); + +/* APIs for loading SPE images + */ +extern spe_program_handle_t *spe_open_image(const char *filename); +extern int spe_close_image(spe_program_handle_t *program_handle); + +#endif diff -urN -x CVS libspe-0.9/libspe.spec libspe-0.9.1/libspe.spec --- libspe-0.9/libspe.spec 1970-01-01 01:00:00.000000000 +0100 +++ libspe-0.9.1/libspe.spec 2005-10-11 19:09:47.000000000 +0200 @@ -0,0 +1,48 @@ +Name: libspe +Version: 0.9.1 +Release: 1 +License: LGPL +Group: System Environment/Base +Source: %{name}-%{version}.tar.gz +Buildroot: %{_tmppath}/libspe +Summary: SPE Runtime Management Library + +%package -n libspe-devel +Summary: SPE Runtime Management Library +Group: Development/Libraries + +%description +SPE Runtime Management Library for the +Cell Broadband Engine Architecture. + +%description -n libspe-devel +Header and object files for SPE Runtime +Management Library. + +%prep + +%setup + +%build +make prefix=%{_prefix} libdir=%{_libdir} + +%install +rm -rf $RPM_BUILD_ROOT + +mkdir -p $RPM_BUILD_ROOT/%{_bindir} +mkdir -p $RPM_BUILD_ROOT/%{_libdir} +mkdir -p $RPM_BUILD_ROOT/%{_includedir} +%makeinstall + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root) +%{_bindir} +%{_libdir}/*.so.* + +%files -n libspe-devel +%{_libdir}/*.so +%{_libdir}/*.a +%{_includedir} diff -urN -x CVS libspe-0.9/main.dox libspe-0.9.1/main.dox --- libspe-0.9/main.dox 1970-01-01 01:00:00.000000000 +0100 +++ libspe-0.9.1/main.dox 2005-09-30 15:57:01.000000000 +0200 @@ -0,0 +1,22 @@ +/** + * \mainpage SPE Managemenent Library (libspe) + * \author D.Herrendoerfer (d.herrendoerfer at de.ibm.com) + * \section About + * The SPE Management Library consists of PPE interfaces used to manage SPEs. + * These interfaces are similar to those used to manage regular threads and + * processes in a POSIX compliant operating system. + * + * Library name: + * + * libspe + * + * Header file: + * + * + * + * + * + * \section Misc + * This library presents work-in-progress. Some functions are provided for + * completeness of the programming model only. + */ diff -urN -x CVS libspe-0.9/spe.c libspe-0.9.1/spe.c --- libspe-0.9/spe.c 2005-09-30 15:39:19.000000000 +0200 +++ libspe-0.9.1/spe.c 2005-10-11 16:08:31.000000000 +0200 @@ -33,9 +33,10 @@ #include #include -#include -#include -#include +#include "libspe.h" +#include "mfc.h" +#include "spe.h" +#include "elf_loader.h" #include #define __PRINTF(fmt, args...) { fprintf(stderr,fmt , ## args); } @@ -482,6 +483,11 @@ /* APIs for loading SPE-ELF images & files. */ +struct image_handle { + spe_program_handle_t speh; + unsigned int map_size; +}; + /** * \brief spe_open_image - maps spe executable int system memory. * @@ -492,33 +498,48 @@ * \returns handle of the program * \retval NULL failure, see \c errno for more info */ -spe_program_handle_t +spe_program_handle_t *spe_open_image(const char *filename) { - spe_program_handle_t *ret; - int binfd, f_stat; + /* allocate an extra integer in the spe handle to keep the mapped size information */ + struct image_handle *ret; + int binfd = -1, f_stat; struct stat statbuf; - - ret=malloc(sizeof(spe_program_handle_t)); + size_t ps = getpagesize (); + + ret = malloc(sizeof(struct image_handle)); if (!ret) return NULL; + ret->speh.handle_size = sizeof(spe_program_handle_t); + ret->speh.toe_shadow = NULL; + binfd = open(filename, O_RDONLY); if (binfd < 0) - return NULL; - + goto ret_err; + f_stat = fstat(binfd, &statbuf); if (f_stat < 0) - return NULL; - - ret->toe_struct = NULL; - ret->prog_size = statbuf.st_size; + goto ret_err; - ret->start = mmap(NULL, statbuf.st_size, PROT_READ , MAP_SHARED, binfd, 0); - if (ret->start == MAP_FAILED) - return NULL; + /* now store the size at the extra allocated space */ + ret->map_size = (statbuf.st_size + ps - 1) & ~(ps - 1); - return ret; + ret->speh.elf_image = mmap(NULL, ret->map_size, PROT_READ, MAP_SHARED, binfd, 0); + if (ret->speh.elf_image == MAP_FAILED) + goto ret_err; + + /* ok */ + close(binfd); + return (spe_program_handle_t *)ret; + + /* err & cleanup */ +ret_err: + if (binfd >= 0) + close(binfd); + + free(ret); + return NULL; } /** @@ -530,15 +551,15 @@ * \retval zero on success * \retval -1 on failure, and sets \c errno. */ -int -spe_close_image(spe_program_handle_t *program_handle) +int +spe_close_image(spe_program_handle_t *handle) { - void *start = program_handle->start; - unsigned int prog_size = program_handle->prog_size; - - free(program_handle); + void *elf_start = handle->elf_image; + unsigned int prog_size = ((struct image_handle*)handle)->map_size; + + free(handle); - return munmap(start, prog_size); + return munmap(elf_start, prog_size); } static void * @@ -704,6 +725,7 @@ * @param speid spe store structure * * \returns address of local store. + * \retval NULL on failure. */ void *spe_get_ls (speid_t speid) diff -urN -x CVS libspe-0.9/spe.h libspe-0.9.1/spe.h --- libspe-0.9/spe.h 2005-09-30 15:32:18.000000000 +0200 +++ libspe-0.9.1/spe.h 2005-10-11 16:08:31.000000000 +0200 @@ -20,28 +20,6 @@ #ifndef _spe_h_ #define _spe_h_ -typedef void *speid_t; -typedef void *spe_gid_t; - -typedef struct { - void *start; - void *toe_struct; - unsigned int prog_size; -} spe_program_handle_t; - -/* Flags for spe_create_thread - */ - -#define SPE_PTRACE_ME 0x0001 /* Allow debugger to attach */ -#define SPE_USER_REGS 0x0002 /* 128b user data for r3-5. */ - -#define SPE_CFG_SIGNOTIFY1_OR 0x00000010 -#define SPE_CFG_SIGNOTIFY2_OR 0x00000020 - -/* Flags for - * */ -#define SPE_MAX_THREADS_PER_GROUP 16 /* Sanity value - will be removed in the future */ - /* Low-level SPE execution API. */ extern void *spe_gid_setup(int policy, int priority, int use_events); @@ -50,31 +28,4 @@ extern int do_spe_run (void *spe); extern unsigned int set_npc (void *spe, unsigned int npc); -/* APIs for SPE threads. - */ - -extern spe_gid_t spe_create_group (int policy, int priority, int spe_events); -extern int spe_group_max (spe_gid_t spe_gid); - -extern speid_t spe_create_thread (spe_gid_t gid, spe_program_handle_t *handle, - void *argp, void *envp, - unsigned long mask, int flags); - -extern int spe_wait (speid_t speid, int *status, int options); - -extern int spe_kill (speid_t speid, int sig); -extern int spe_get_priority(spe_gid_t gid); -extern int spe_set_priority(spe_gid_t gid, int priority); -extern int spe_get_policy(spe_gid_t gid); -extern spe_gid_t spe_get_group (speid_t speid); -extern int spe_group_defaults(int policy, int priority, int spe_events); - -extern void *spe_get_ls(speid_t speid); -extern void *spe_get_ps(speid_t speid); - -/* APIs for loading SPE images - */ -extern spe_program_handle_t *spe_open_image(const char *filename); -extern int spe_close_image(spe_program_handle_t *program_handle); - #endif diff -urN -x CVS libspe-0.9/tests/start-stop/ppe-start-stop.c libspe-0.9.1/tests/start-stop/ppe-start-stop.c --- libspe-0.9/tests/start-stop/ppe-start-stop.c 2005-09-28 14:31:01.000000000 +0200 +++ libspe-0.9.1/tests/start-stop/ppe-start-stop.c 2005-10-11 16:08:31.000000000 +0200 @@ -21,7 +21,7 @@ #include #include -#include +#include int main(int argc, char* argv[]) From arnd at arndb.de Wed Oct 12 04:54:51 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Tue, 11 Oct 2005 20:54:51 +0200 Subject: merge progress In-Reply-To: <17227.44943.714106.911471@cargo.ozlabs.ibm.com> References: <17227.44943.714106.911471@cargo.ozlabs.ibm.com> Message-ID: <200510112054.52348.arnd@arndb.de> On Dinsdag 11 Oktober 2005 14:26, Paul Mackerras wrote: > With ARCH=ppc64, pSeries, iSeries, powermac and maple all build, and > pSeries and iSeries have been boot-tested. ?I think I have broken > BPA/cell because it was using pSeries_smp.c (I think we need to make > cell have its own bpa_smp.c or whatever anyway). I will fix that up when I move BPA over to arch/powerpc/platforms/cell. Do you plan to have the whole merge tree included in 2.6.15? Otherwise I'd rather postpone doing the structural changes on BPA until November, because we're currently freezing our spufs development to 2.6.14 and I'd like to have only one current set of big patches. Arnd <>< From linas at austin.ibm.com Wed Oct 12 09:04:09 2005 From: linas at austin.ibm.com (linas) Date: Tue, 11 Oct 2005 18:04:09 -0500 Subject: [PATCH 20/22] PCI Error Recovery: e100 network device driver In-Reply-To: <20051011001056.GA16634@kroah.com> References: <20051006232032.GA29826@austin.ibm.com> <20051006235729.GU29826@austin.ibm.com> <20051011001056.GA16634@kroah.com> Message-ID: <20051011230409.GS29826@austin.ibm.com> On Mon, Oct 10, 2005 at 05:10:56PM -0700, Greg KH was heard to remark: > On Thu, Oct 06, 2005 at 06:57:29PM -0500, linas wrote: > > +config E100_EEH_RECOVERY > > + bool "Enable PCI bus error recovery" > > + depends on E100 && PPC_PSERIES > > + help > > + If you say Y here, the driver will be able to recover from > > + PCI bus errors on many PowerPC platforms. IBM pSeries users > > + should answer Y. > > Why make a config option for this at all? Who would turn it off? I wanted to have this turned off for anyone who didn't have hardware capable of supporting this, and didn't really think about how to hide this from the menu. I guess its best to just plain hide this, keep the menus from getting cluttered. > > @@ -2661,6 +2731,9 @@ > > .resume = e100_resume, > > #endif > > .shutdown = e100_shutdown, > > +#ifdef CONFIG_E100_EEH_RECOVERY > > + .err_handler = &e100_err_handler, > > +#endif /* CONFIG_E100_EEH_RECOVERY */ > > No, don't put #ifdefs in the middle of a structure, remember we made > err_handler always present in the .h file for a reason... OK. I'll send revised patches patches tommorrw, hiding the config, and removing the ifdef. --linas From benh at kernel.crashing.org Wed Oct 12 10:20:36 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 12 Oct 2005 10:20:36 +1000 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <20051011170358.2684347a.akpm@osdl.org> References: <1128404215.31063.32.camel@gaston> <20051011170358.2684347a.akpm@osdl.org> Message-ID: <1129076436.17365.243.camel@gaston> On Tue, 2005-10-11 at 17:03 -0700, Andrew Morton wrote: > This will leave wf_lock held on error. Oops. Will fix along with the other ones you spotted asap. Ben. From benh at kernel.crashing.org Wed Oct 12 10:23:07 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 12 Oct 2005 10:23:07 +1000 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <20051011171052.3e1d00b6.akpm@osdl.org> References: <1128404215.31063.32.camel@gaston> <20051011171052.3e1d00b6.akpm@osdl.org> Message-ID: <1129076588.17365.245.camel@gaston> On Tue, 2005-10-11 at 17:10 -0700, Andrew Morton wrote: > > + > > + /* Alloc & initialize state */ > > + wf_smu_sys_fans = kmalloc(sizeof(struct wf_smu_sys_fans_state), > > + GFP_KERNEL); > > + if (wf_smu_sys_fans == NULL) { > > + printk(KERN_WARNING "windfarm: Memory allocation error" > > + " max fan speed\n"); > > + goto fail; > > + } > > + wf_smu_sys_fans->ticks = 1; > > whitespace broke. How so ? You mean the GFP_KERNEL a little bit too much on the right ? :) Ben. From benh at kernel.crashing.org Wed Oct 12 10:23:36 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 12 Oct 2005 10:23:36 +1000 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <20051011171315.2fe087e7.akpm@osdl.org> References: <1128404215.31063.32.camel@gaston> <20051011171315.2fe087e7.akpm@osdl.org> Message-ID: <1129076616.17365.247.camel@gaston> > \ > > Someone needs a tab key ;) Or an emacs that stops turning tabs into spaces :) Ben. From benh at kernel.crashing.org Wed Oct 12 10:24:51 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 12 Oct 2005 10:24:51 +1000 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <20051011171315.2fe087e7.akpm@osdl.org> References: <1128404215.31063.32.camel@gaston> <20051011171315.2fe087e7.akpm@osdl.org> Message-ID: <1129076691.17365.250.camel@gaston> On Tue, 2005-10-11 at 17:13 -0700, Andrew Morton wrote: > Benjamin Herrenschmidt wrote: > > > > +#define BUILD_SHOW_FUNC_FIX(name, data) \ > > +static ssize_t show_##name(struct device *dev, \ > > + struct device_attribute *attr, \ > > + char *buf) \ > > +{ \ > > + ssize_t r; \ > > + s32 val = 0; \ > > + data->ops->get_value(data, &val); \ > > + r = sprintf(buf, "%d.%03d", FIX32TOPRINT(val)); \ > > + return r; \ > > +} \ > > +static DEVICE_ATTR(name,S_IRUGO,show_##name, NULL); > > + > > + > > +#define BUILD_SHOW_FUNC_INT(name, data) \ > > +static ssize_t show_##name(struct device *dev, \ > > + struct device_attribute *attr, \ > > + char *buf) \ > > +{ \ > > + s32 val = 0; \ > > + data->ops->get_value(data, &val); \ > > + return sprintf(buf, "%d", val); \ > > +} \ > > Someone needs a tab key ;) Ahh no, the problem here is that stupid emacs is very bad with tab and multi-line macros and just turns the whole thing into shit, so I used spaces. Sorry, I'm not an emacs guru and don't know how to work around that ... Ben. From akpm at osdl.org Wed Oct 12 10:03:58 2005 From: akpm at osdl.org (Andrew Morton) Date: Tue, 11 Oct 2005 17:03:58 -0700 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <1128404215.31063.32.camel@gaston> References: <1128404215.31063.32.camel@gaston> Message-ID: <20051011170358.2684347a.akpm@osdl.org> Benjamin Herrenschmidt wrote: > > +int wf_register_client(struct notifier_block *nb) > +{ > + int rc; > + struct wf_control *ct; > + struct wf_sensor *sr; > + > + down(&wf_lock); > + rc = notifier_chain_register(&wf_client_list, nb); > + if (rc != 0) > + goto bail; > + wf_client_count++; > + list_for_each_entry(ct, &wf_controls, link) > + wf_notify(WF_EVENT_NEW_CONTROL, ct); > + list_for_each_entry(sr, &wf_sensors, link) > + wf_notify(WF_EVENT_NEW_SENSOR, sr); > + if (wf_client_count == 1) > + wf_start_thread(); > + up(&wf_lock); > + bail: > + return rc; > +} This will leave wf_lock held on error. From akpm at osdl.org Wed Oct 12 10:10:52 2005 From: akpm at osdl.org (Andrew Morton) Date: Tue, 11 Oct 2005 17:10:52 -0700 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <1128404215.31063.32.camel@gaston> References: <1128404215.31063.32.camel@gaston> Message-ID: <20051011171052.3e1d00b6.akpm@osdl.org> Benjamin Herrenschmidt wrote: > > + /* First, locate the params for this model */ > + for (i = 0; i < WF_SMU_SYS_FANS_NUM_CONFIGS; i++) > + if (wf_smu_sys_all_params[i].model_id == mach_model) { The loop control is a bit scary here. If you were to do #define WF_SMU_SYS_FANS_NUM_CONFIGS ARRAY_SIZE(wf_smu_sys_all_params) then we wouldn't need duplicated into in two places. > + param = &wf_smu_sys_all_params[i]; > + break; > + } > + /* No params found, put fans to max */ > + if (param == NULL) { > + printk(KERN_WARNING "windfarm: System fan config not found " > + "for this machine model, max fan speed\n"); > + goto fail; > + } > + > + /* Alloc & initialize state */ > + wf_smu_sys_fans = kmalloc(sizeof(struct wf_smu_sys_fans_state), > + GFP_KERNEL); > + if (wf_smu_sys_fans == NULL) { > + printk(KERN_WARNING "windfarm: Memory allocation error" > + " max fan speed\n"); > + goto fail; > + } > + wf_smu_sys_fans->ticks = 1; whitespace broke. From akpm at osdl.org Wed Oct 12 10:13:15 2005 From: akpm at osdl.org (Andrew Morton) Date: Tue, 11 Oct 2005 17:13:15 -0700 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <1128404215.31063.32.camel@gaston> References: <1128404215.31063.32.camel@gaston> Message-ID: <20051011171315.2fe087e7.akpm@osdl.org> Benjamin Herrenschmidt wrote: > > +#define BUILD_SHOW_FUNC_FIX(name, data) \ > +static ssize_t show_##name(struct device *dev, \ > + struct device_attribute *attr, \ > + char *buf) \ > +{ \ > + ssize_t r; \ > + s32 val = 0; \ > + data->ops->get_value(data, &val); \ > + r = sprintf(buf, "%d.%03d", FIX32TOPRINT(val)); \ > + return r; \ > +} \ > +static DEVICE_ATTR(name,S_IRUGO,show_##name, NULL); > + > + > +#define BUILD_SHOW_FUNC_INT(name, data) \ > +static ssize_t show_##name(struct device *dev, \ > + struct device_attribute *attr, \ > + char *buf) \ > +{ \ > + s32 val = 0; \ > + data->ops->get_value(data, &val); \ > + return sprintf(buf, "%d", val); \ > +} \ Someone needs a tab key ;) From paulus at samba.org Wed Oct 12 09:41:13 2005 From: paulus at samba.org (Paul Mackerras) Date: Wed, 12 Oct 2005 09:41:13 +1000 Subject: [PATCH 20/22] PCI Error Recovery: e100 network device driver In-Reply-To: <20051011230409.GS29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> <20051006235729.GU29826@austin.ibm.com> <20051011001056.GA16634@kroah.com> <20051011230409.GS29826@austin.ibm.com> Message-ID: <17228.19865.982656.956187@cargo.ozlabs.ibm.com> linas writes: > On Mon, Oct 10, 2005 at 05:10:56PM -0700, Greg KH was heard to remark: > > On Thu, Oct 06, 2005 at 06:57:29PM -0500, linas wrote: > > > +config E100_EEH_RECOVERY > > > + bool "Enable PCI bus error recovery" > > > + depends on E100 && PPC_PSERIES > > > + help > > > + If you say Y here, the driver will be able to recover from > > > + PCI bus errors on many PowerPC platforms. IBM pSeries users > > > + should answer Y. > > > > Why make a config option for this at all? Who would turn it off? > > I wanted to have this turned off for anyone who didn't have > hardware capable of supporting this, and didn't really think > about how to hide this from the menu. I guess its best to > just plain hide this, keep the menus from getting cluttered. I would think we could have one config option to enable PCI bus error recovery generally, and have the code in the drivers enabled by that. I don't think we need an individual config option for each driver to enable PCI error recovery. Regards, Paul. From benh at kernel.crashing.org Wed Oct 12 17:40:18 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 12 Oct 2005 17:40:18 +1000 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <434CBC8E.40201@tremplin-utc.net> References: <1128404215.31063.32.camel@gaston> <20051011171052.3e1d00b6.akpm@osdl.org> <1129076588.17365.245.camel@gaston> <434CBC8E.40201@tremplin-utc.net> Message-ID: <1129102818.11300.4.camel@gaston> > I think Andrew was talking about the spaces instead of tabs on the last > line. Yah, found them, send a fixed patch already. Ben. From paulus at samba.org Wed Oct 12 19:49:52 2005 From: paulus at samba.org (Paul Mackerras) Date: Wed, 12 Oct 2005 19:49:52 +1000 Subject: [PATCH 15/22] ppc64: PCI Error Recovery: PPC64 core recovery routines In-Reply-To: <20051006234742.GP29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> <20051006234742.GP29826@austin.ibm.com> Message-ID: <17228.56384.469138.175618@cargo.ozlabs.ibm.com> Linas writes: > + /* We might not have a pci device, if it was a config space read > + * that failed. Find the pci device now. */ > + if (!dev) { > + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { > + if (pci_device_to_OF_node(dev) == event->dn) > + break; > + } > + } Couldn't we just use PCI_DN(event->dn)->pcidev here? Is there some reason why this would not work in some circumstances? It would be nice to avoid this linear search. Paul. From Eric.Piel at tremplin-utc.net Wed Oct 12 17:34:38 2005 From: Eric.Piel at tremplin-utc.net (Eric Piel) Date: Wed, 12 Oct 2005 09:34:38 +0200 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <1129076588.17365.245.camel@gaston> References: <1128404215.31063.32.camel@gaston> <20051011171052.3e1d00b6.akpm@osdl.org> <1129076588.17365.245.camel@gaston> Message-ID: <434CBC8E.40201@tremplin-utc.net> 12.10.2005 02:23, Benjamin Herrenschmidt wrote/a ?crit: > On Tue, 2005-10-11 at 17:10 -0700, Andrew Morton wrote: > > >>>+ >>>+ /* Alloc & initialize state */ >>>+ wf_smu_sys_fans = kmalloc(sizeof(struct wf_smu_sys_fans_state), >>>+ GFP_KERNEL); >>>+ if (wf_smu_sys_fans == NULL) { >>>+ printk(KERN_WARNING "windfarm: Memory allocation error" >>>+ " max fan speed\n"); >>>+ goto fail; >>>+ } >>>+ wf_smu_sys_fans->ticks = 1; >> >>whitespace broke. > > > How so ? You mean the GFP_KERNEL a little bit too much on the right ? :) > I think Andrew was talking about the spaces instead of tabs on the last line. Eric From Eric.Piel at tremplin-utc.net Wed Oct 12 17:34:38 2005 From: Eric.Piel at tremplin-utc.net (Eric Piel) Date: Wed, 12 Oct 2005 09:34:38 +0200 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <1129076588.17365.245.camel@gaston> References: <1128404215.31063.32.camel@gaston> <20051011171052.3e1d00b6.akpm@osdl.org> <1129076588.17365.245.camel@gaston> Message-ID: <434CBC8E.40201@tremplin-utc.net> 12.10.2005 02:23, Benjamin Herrenschmidt wrote/a ?crit: > On Tue, 2005-10-11 at 17:10 -0700, Andrew Morton wrote: > > >>>+ >>>+ /* Alloc & initialize state */ >>>+ wf_smu_sys_fans = kmalloc(sizeof(struct wf_smu_sys_fans_state), >>>+ GFP_KERNEL); >>>+ if (wf_smu_sys_fans == NULL) { >>>+ printk(KERN_WARNING "windfarm: Memory allocation error" >>>+ " max fan speed\n"); >>>+ goto fail; >>>+ } >>>+ wf_smu_sys_fans->ticks = 1; >> >>whitespace broke. > > > How so ? You mean the GFP_KERNEL a little bit too much on the right ? :) > I think Andrew was talking about the spaces instead of tabs on the last line. Eric - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From benh at kernel.crashing.org Wed Oct 12 17:40:18 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 12 Oct 2005 17:40:18 +1000 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <434CBC8E.40201@tremplin-utc.net> References: <1128404215.31063.32.camel@gaston> <20051011171052.3e1d00b6.akpm@osdl.org> <1129076588.17365.245.camel@gaston> <434CBC8E.40201@tremplin-utc.net> Message-ID: <1129102818.11300.4.camel@gaston> > I think Andrew was talking about the spaces instead of tabs on the last > line. Yah, found them, send a fixed patch already. Ben. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From arnd at arndb.de Thu Oct 13 04:03:58 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Wed, 12 Oct 2005 20:03:58 +0200 Subject: ppc64/cell: local TLB flush with active SPEs Message-ID: <200510122003.59701.arnd@arndb.de> I'm looking for a clean solution to detect the need for global TLB flush when an mm_struct is only used on one logical PowerPC CPU (PPE) and also mapped with the memory flow controller of an SPE on the Cell CPU. Normally, we set bits in mm_struct:cpu_vm_mask for each CPU that accesses the mm and then do global flushes instead of local flushes when CPUs other than the currently running one are marked as used in that mask. When an SPE does DMA to that mm, it also gets local TLB entries that are only flushed with a global tlbie broadcast. The current hack is to always set cpu_vm_mask to all bits set when we map an mm into an SPE to ensure receiving the broadcast, but that is obviously not how it's meant to be used. In particular, it doesn't work in UP configurations where the cpumask contains only one bit. One solution that might be better could be to introduce a new special flag in addition to cpu_vm_mask for this purpose. We already have a bit field in mm_struct for dumpable, so adding another bit there at least does not waste space for other platforms, and it's likely to be in the same cache line as cpu_vm_mask. However, I'm reluctant to add more bit fields to such a prominent place, because it might encourage other people to add more bit fields or thing that they are accepted coding practice. Another idea would be to add a new field to mm_context_t, so it stays in the architecture specific code. Again, adding an int here does not waste space because there is currently padding in that place on ppc64. Or maybe there is a completely different solution. Suggestions? Arnd <>< From ak at suse.de Thu Oct 13 04:08:59 2005 From: ak at suse.de (Andi Kleen) Date: Wed, 12 Oct 2005 20:08:59 +0200 Subject: ppc64/cell: local TLB flush with active SPEs In-Reply-To: <200510122003.59701.arnd@arndb.de> References: <200510122003.59701.arnd@arndb.de> Message-ID: <200510122009.00393.ak@suse.de> On Wednesday 12 October 2005 20:03, Arnd Bergmann wrote: > > Another idea would be to add a new field to mm_context_t, so it stays > in the architecture specific code. Again, adding an int here does > not waste space because there is currently padding in tha place on > ppc64. mm_context_t sounds like the right place for this to me. -Andi From paulus at samba.org Wed Oct 12 09:41:13 2005 From: paulus at samba.org (Paul Mackerras) Date: Wed, 12 Oct 2005 09:41:13 +1000 Subject: [PATCH 20/22] PCI Error Recovery: e100 network device driver In-Reply-To: <20051011230409.GS29826@austin.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> <20051006235729.GU29826@austin.ibm.com> <20051011001056.GA16634@kroah.com> <20051011230409.GS29826@austin.ibm.com> Message-ID: <17228.19865.982656.956187@cargo.ozlabs.ibm.com> linas writes: > On Mon, Oct 10, 2005 at 05:10:56PM -0700, Greg KH was heard to remark: > > On Thu, Oct 06, 2005 at 06:57:29PM -0500, linas wrote: > > > +config E100_EEH_RECOVERY > > > + bool "Enable PCI bus error recovery" > > > + depends on E100 && PPC_PSERIES > > > + help > > > + If you say Y here, the driver will be able to recover from > > > + PCI bus errors on many PowerPC platforms. IBM pSeries users > > > + should answer Y. > > > > Why make a config option for this at all? Who would turn it off? > > I wanted to have this turned off for anyone who didn't have > hardware capable of supporting this, and didn't really think > about how to hide this from the menu. I guess its best to > just plain hide this, keep the menus from getting cluttered. I would think we could have one config option to enable PCI bus error recovery generally, and have the code in the drivers enabled by that. I don't think we need an individual config option for each driver to enable PCI error recovery. Regards, Paul. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo at vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ From becky.bruce at freescale.com Thu Oct 13 07:21:05 2005 From: becky.bruce at freescale.com (Becky Bruce) Date: Wed, 12 Oct 2005 16:21:05 -0500 Subject: Handling of alignment exceptions of load/store multiples Message-ID: <5b0bf3bd6357f0a9ce7a4c22b116496b@freescale.com> Paul & company, I'm currently in the process of merging align.c into arch/powerpc/kernel. I noticed that there's a difference in the handling of alignment exceptions involving ld/st string instructions and ld/st multiple instructions between the 2 architectures right now. The 32-bit code does some handling of these, while the 64-bit version currently just bails out. Should I try to adopt a handling model for these in the merged tree like the 32-bit code, or is there a reason behind not attempting to handle these on the 64-bit side that I'm not aware of? Cheers! -B From mnutter at us.ibm.com Thu Oct 13 08:09:26 2005 From: mnutter at us.ibm.com (Mark Nutter) Date: Wed, 12 Oct 2005 17:09:26 -0500 Subject: ppc64/cell: local TLB flush with active SPEs In-Reply-To: <200510122003.59701.arnd@arndb.de> Message-ID: For reference, the 2.6.3 bring-up kernel always issued global TLBIE. This was a hack, and we very much wanted to improve performance if possible, particularly for the vast majority of PPC applications out there that don't use SPEs. As long as we are thinking about a proper solution, the whole mm->cpu_vm_mask thing is broken, at least as a selector for local -vs- global TLBIE. The problem, as I see it, is that memory regions can shared among processes (via mmap/shmat), with each task bound to different processors. If we are to continue using a cpumask as selector for TLBIE, then we really need a vma->cpu_vma_mask. --- Mark Nutter STI Design Center / IBM email: mnutter at us.ibm.com voice: 512-838-1612 fax: 512-838-1927 11400 Burnet Road Mail Stop 906/3003B Austin, TX 78758 Arnd Bergmann 10/12/2005 01:03 PM To: linuxppc64-dev at ozlabs.org, linux-mm at kvack.org cc: Benjamin Herrenschmidt , Paul Mackerras , Mark Nutter/Austin/IBM at IBMUS, Michael Day/Austin/IBM at IBMUS, Ulrich Weigand Subject: ppc64/cell: local TLB flush with active SPEs I'm looking for a clean solution to detect the need for global TLB flush when an mm_struct is only used on one logical PowerPC CPU (PPE) and also mapped with the memory flow controller of an SPE on the Cell CPU. Normally, we set bits in mm_struct:cpu_vm_mask for each CPU that accesses the mm and then do global flushes instead of local flushes when CPUs other than the currently running one are marked as used in that mask. When an SPE does DMA to that mm, it also gets local TLB entries that are only flushed with a global tlbie broadcast. The current hack is to always set cpu_vm_mask to all bits set when we map an mm into an SPE to ensure receiving the broadcast, but that is obviously not how it's meant to be used. In particular, it doesn't work in UP configurations where the cpumask contains only one bit. One solution that might be better could be to introduce a new special flag in addition to cpu_vm_mask for this purpose. We already have a bit field in mm_struct for dumpable, so adding another bit there at least does not waste space for other platforms, and it's likely to be in the same cache line as cpu_vm_mask. However, I'm reluctant to add more bit fields to such a prominent place, because it might encourage other people to add more bit fields or thing that they are accepted coding practice. Another idea would be to add a new field to mm_context_t, so it stays in the architecture specific code. Again, adding an int here does not waste space because there is currently padding in that place on ppc64. Or maybe there is a completely different solution. Suggestions? Arnd <>< -------------- next part -------------- An HTML attachment was scrubbed... URL: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051012/f625a3c9/attachment.htm From arnd at arndb.de Thu Oct 13 09:45:12 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Thu, 13 Oct 2005 01:45:12 +0200 Subject: ppc64/cell: local TLB flush with active SPEs In-Reply-To: References: Message-ID: <200510130145.15377.arnd@arndb.de> On Dunnersdag 13 Oktober 2005 00:09, Mark Nutter wrote: > As long as we are thinking about a proper solution, the whole > mm->cpu_vm_mask thing is broken, at least as a selector for local -vs- > global TLBIE. ?The problem, as I see it, is that memory regions can shared > among processes (via mmap/shmat), with each task bound to different > processors. ?If we are to continue using a cpumask as selector for TLBIE, > then we really need a vma->cpu_vma_mask. > ? No, because different tasks mapping the same address_space result in distinct virtual (though not necessarily effective) addresses, so the TLB entries are never shared across processes. A TLB entry on ppc64 always maps between the (mm_struct,effective address) tuple and the real address and is local to one CPU or SPU. If we want to be clever, we could optimize the case where an mm_struct has been used on exactly on CPU (the currently running one) and at least one SPU. In that case, doing a TLBIEL plus a separate flush of each of the MFCs that were used (by writing to their TLB_Invalidate_Entry registers) is probably better than a global TLBIE. Arnd <>< From david at gibson.dropbear.id.au Thu Oct 13 14:28:58 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Thu, 13 Oct 2005 14:28:58 +1000 Subject: Fix use of LOADBASE in merge tree Message-ID: <20051013042858.GF11601@localhost.localdomain> The merge-tree version of LOADBASE actually loads the whole given address from the toc for ppc64. The matching OFF macro adjust for this, using an offset of 0 for ppc64, but we weren't using that in power4_idle. Signed-off-by: David Gibson Index: working-2.6/arch/powerpc/kernel/idle_power4.S =================================================================== --- working-2.6.orig/arch/powerpc/kernel/idle_power4.S 2005-10-13 11:09:01.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/idle_power4.S 2005-10-13 12:06:03.000000000 +1000 @@ -39,13 +39,13 @@ * can be cleared by CPU init after the fixups are done */ LOADBASE(r3,cur_cpu_spec) - ld r4,cur_cpu_spec at l(r3) + ld r4,OFF(cur_cpu_spec)(r3) ld r4,CPU_SPEC_FEATURES(r4) andi. r0,r4,CPU_FTR_CAN_NAP beqlr /* Now check if user or arch enabled NAP mode */ LOADBASE(r3,powersave_nap) - lwz r4,powersave_nap at l(r3) + lwz r4,OFF(powersave_nap)(r3) cmpwi 0,r4,0 beqlr -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From david at gibson.dropbear.id.au Thu Oct 13 15:46:22 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Thu, 13 Oct 2005 15:46:22 +1000 Subject: Another maple merge tree fix Message-ID: <20051013054622.GH11601@localhost.localdomain> With ARCH=powerpc, a spurious ifdef in prom_init prevented the seconday hold loop being correctly copied down on Maple. With this patch, Maple boots with ARCH=powerpc Signed-off-by: David Gibson Index: working-2.6/arch/powerpc/kernel/prom_init.c =================================================================== --- working-2.6.orig/arch/powerpc/kernel/prom_init.c 2005-10-13 12:20:34.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/prom_init.c 2005-10-13 15:40:59.000000000 +1000 @@ -2014,13 +2014,11 @@ prom_send_capabilities(); #endif -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_BPA) /* * On pSeries and BPA, copy the CPU hold code */ - if (RELOC(of_platform) & (PLATFORM_PSERIES | PLATFORM_BPA)) + if (RELOC(of_platform) != PLATFORM_POWERMAC) copy_and_flush(0, KERNELBASE + offset, 0x100, 0); -#endif /* * Do early parsing of command line -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From michael at ellerman.id.au Thu Oct 13 18:42:06 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 13 Oct 2005 18:42:06 +1000 (EST) Subject: [PATCH 0/10] powerpc: Merges and fixups Message-ID: <1129192924.438846.55228671808.qpush@concordia> Here's a few patches I've had in my tree for too long. There's a couple of easy merge patches, and then some cleanups/fixups we'll need for kexec/kdump soonish. Booted on pSeries LPAR and iSeries with ARCH=powerpc. Built for pSeries and iSeries with ARCH=ppc64, and built for PPC32 common_defconfig + CONFIG_KEXEC. cheers From michael at ellerman.id.au Thu Oct 13 18:42:07 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 13 Oct 2005 18:42:07 +1000 (EST) Subject: [PATCH 1/10] powerpc: Merge arch/ppc64/kernel/vio.c into arch/powerpc/sysdev/vio.c In-Reply-To: <1129192924.438846.55228671808.qpush@concordia> Message-ID: <20051013084207.9F6036855E@ozlabs.org> Merge arch/ppc64/kernel/vio.c into arch/powerpc/sysdev/vio.c, update the Makefiles to make it work, and make ARCH=ppc64 still work. Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/Makefile | 3 arch/powerpc/sysdev/vio.c | 261 +++++++++++++++++++++++++++++++++++++++++++ arch/ppc64/Makefile | 1 arch/ppc64/kernel/Makefile | 2 arch/ppc64/kernel/vio.c | 261 ------------------------------------------- 5 files changed, 265 insertions(+), 263 deletions(-) Index: kexec/arch/powerpc/sysdev/Makefile =================================================================== --- kexec.orig/arch/powerpc/sysdev/Makefile +++ kexec/arch/powerpc/sysdev/Makefile @@ -1,3 +1,6 @@ +ifeq ($(CONFIG_PPC_MERGE),y) obj-$(CONFIG_MPIC) += mpic.o indirectpci-$(CONFIG_PPC_PMAC) = indirect_pci.o obj-$(CONFIG_PPC32) += $(indirectpci-y) +endif +obj-$(CONFIG_IBMVIO) += vio.o Index: kexec/arch/powerpc/sysdev/vio.c =================================================================== --- /dev/null +++ kexec/arch/powerpc/sysdev/vio.c @@ -0,0 +1,261 @@ +/* + * IBM PowerPC Virtual I/O Infrastructure Support. + * + * Copyright (c) 2003-2005 IBM Corp. + * Dave Engebretsen engebret at us.ibm.com + * Santiago Leon santil at us.ibm.com + * Hollis Blanchard + * Stephen Rothwell + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct vio_device_id *vio_match_device( + const struct vio_device_id *, const struct vio_dev *); + +struct vio_dev vio_bus_device = { /* fake "parent" device */ + .name = vio_bus_device.dev.bus_id, + .type = "", + .dev.bus_id = "vio", + .dev.bus = &vio_bus_type, +}; + +static struct vio_bus_ops vio_bus_ops; + +/* + * Convert from struct device to struct vio_dev and pass to driver. + * dev->driver has already been set by generic code because vio_bus_match + * succeeded. + */ +static int vio_bus_probe(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv = to_vio_driver(dev->driver); + const struct vio_device_id *id; + int error = -ENODEV; + + if (!viodrv->probe) + return error; + + id = vio_match_device(viodrv->id_table, viodev); + if (id) + error = viodrv->probe(viodev, id); + + return error; +} + +/* convert from struct device to struct vio_dev and pass to driver. */ +static int vio_bus_remove(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv = to_vio_driver(dev->driver); + + if (viodrv->remove) + return viodrv->remove(viodev); + + /* driver can't remove */ + return 1; +} + +/** + * vio_register_driver: - Register a new vio driver + * @drv: The vio_driver structure to be registered. + */ +int vio_register_driver(struct vio_driver *viodrv) +{ + printk(KERN_DEBUG "%s: driver %s registering\n", __FUNCTION__, + viodrv->name); + + /* fill in 'struct driver' fields */ + viodrv->driver.name = viodrv->name; + viodrv->driver.bus = &vio_bus_type; + viodrv->driver.probe = vio_bus_probe; + viodrv->driver.remove = vio_bus_remove; + + return driver_register(&viodrv->driver); +} +EXPORT_SYMBOL(vio_register_driver); + +/** + * vio_unregister_driver - Remove registration of vio driver. + * @driver: The vio_driver struct to be removed form registration + */ +void vio_unregister_driver(struct vio_driver *viodrv) +{ + driver_unregister(&viodrv->driver); +} +EXPORT_SYMBOL(vio_unregister_driver); + +/** + * vio_match_device: - Tell if a VIO device has a matching + * VIO device id structure. + * @ids: array of VIO device id structures to search in + * @dev: the VIO device structure to match against + * + * Used by a driver to check whether a VIO device present in the + * system is in its list of supported devices. Returns the matching + * vio_device_id structure or NULL if there is no match. + */ +static const struct vio_device_id *vio_match_device( + const struct vio_device_id *ids, const struct vio_dev *dev) +{ + while (ids->type[0] != '\0') { + if (vio_bus_ops.match(ids, dev)) + return ids; + ids++; + } + return NULL; +} + +/** + * vio_bus_init: - Initialize the virtual IO bus + */ +int __init vio_bus_init(struct vio_bus_ops *ops) +{ + int err; + + vio_bus_ops = *ops; + + err = bus_register(&vio_bus_type); + if (err) { + printk(KERN_ERR "failed to register VIO bus\n"); + return err; + } + + /* + * The fake parent of all vio devices, just to give us + * a nice directory + */ + err = device_register(&vio_bus_device.dev); + if (err) { + printk(KERN_WARNING "%s: device_register returned %i\n", + __FUNCTION__, err); + return err; + } + + return 0; +} + +/* vio_dev refcount hit 0 */ +static void __devinit vio_dev_release(struct device *dev) +{ + if (vio_bus_ops.release_device) + vio_bus_ops.release_device(dev); + kfree(to_vio_dev(dev)); +} + +static ssize_t viodev_show_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_vio_dev(dev)->name); +} +DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL); + +struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev) +{ + /* init generic 'struct device' fields: */ + viodev->dev.parent = &vio_bus_device.dev; + viodev->dev.bus = &vio_bus_type; + viodev->dev.release = vio_dev_release; + + /* register with generic device framework */ + if (device_register(&viodev->dev)) { + printk(KERN_ERR "%s: failed to register device %s\n", + __FUNCTION__, viodev->dev.bus_id); + return NULL; + } + device_create_file(&viodev->dev, &dev_attr_name); + + return viodev; +} + +void __devinit vio_unregister_device(struct vio_dev *viodev) +{ + if (vio_bus_ops.unregister_device) + vio_bus_ops.unregister_device(viodev); + device_remove_file(&viodev->dev, &dev_attr_name); + device_unregister(&viodev->dev); +} +EXPORT_SYMBOL(vio_unregister_device); + +static dma_addr_t vio_map_single(struct device *dev, void *vaddr, + size_t size, enum dma_data_direction direction) +{ + return iommu_map_single(to_vio_dev(dev)->iommu_table, vaddr, size, + direction); +} + +static void vio_unmap_single(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + iommu_unmap_single(to_vio_dev(dev)->iommu_table, dma_handle, size, + direction); +} + +static int vio_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + return iommu_map_sg(dev, to_vio_dev(dev)->iommu_table, sglist, + nelems, direction); +} + +static void vio_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + iommu_unmap_sg(to_vio_dev(dev)->iommu_table, sglist, nelems, direction); +} + +static void *vio_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + return iommu_alloc_coherent(to_vio_dev(dev)->iommu_table, size, + dma_handle, flag); +} + +static void vio_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + iommu_free_coherent(to_vio_dev(dev)->iommu_table, size, vaddr, + dma_handle); +} + +static int vio_dma_supported(struct device *dev, u64 mask) +{ + return 1; +} + +struct dma_mapping_ops vio_dma_ops = { + .alloc_coherent = vio_alloc_coherent, + .free_coherent = vio_free_coherent, + .map_single = vio_map_single, + .unmap_single = vio_unmap_single, + .map_sg = vio_map_sg, + .unmap_sg = vio_unmap_sg, + .dma_supported = vio_dma_supported, +}; + +static int vio_bus_match(struct device *dev, struct device_driver *drv) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + struct vio_driver *vio_drv = to_vio_driver(drv); + const struct vio_device_id *ids = vio_drv->id_table; + + return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); +} + +struct bus_type vio_bus_type = { + .name = "vio", + .match = vio_bus_match, +}; Index: kexec/arch/ppc64/kernel/Makefile =================================================================== --- kexec.orig/arch/ppc64/kernel/Makefile +++ kexec/arch/ppc64/kernel/Makefile @@ -43,7 +43,6 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_HVCS) += hvcserver.o -obj-$(CONFIG_IBMVIO) += vio.o $(vio-obj-y) obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_MPIC) += mpic.o @@ -107,7 +106,6 @@ obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o -obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_PPC_MAPLE) += maple_setup.o maple_pci.o maple_time.o \ Index: kexec/arch/ppc64/kernel/vio.c =================================================================== --- kexec.orig/arch/ppc64/kernel/vio.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * IBM PowerPC Virtual I/O Infrastructure Support. - * - * Copyright (c) 2003-2005 IBM Corp. - * Dave Engebretsen engebret at us.ibm.com - * Santiago Leon santil at us.ibm.com - * Hollis Blanchard - * Stephen Rothwell - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static const struct vio_device_id *vio_match_device( - const struct vio_device_id *, const struct vio_dev *); - -struct vio_dev vio_bus_device = { /* fake "parent" device */ - .name = vio_bus_device.dev.bus_id, - .type = "", - .dev.bus_id = "vio", - .dev.bus = &vio_bus_type, -}; - -static struct vio_bus_ops vio_bus_ops; - -/* - * Convert from struct device to struct vio_dev and pass to driver. - * dev->driver has already been set by generic code because vio_bus_match - * succeeded. - */ -static int vio_bus_probe(struct device *dev) -{ - struct vio_dev *viodev = to_vio_dev(dev); - struct vio_driver *viodrv = to_vio_driver(dev->driver); - const struct vio_device_id *id; - int error = -ENODEV; - - if (!viodrv->probe) - return error; - - id = vio_match_device(viodrv->id_table, viodev); - if (id) - error = viodrv->probe(viodev, id); - - return error; -} - -/* convert from struct device to struct vio_dev and pass to driver. */ -static int vio_bus_remove(struct device *dev) -{ - struct vio_dev *viodev = to_vio_dev(dev); - struct vio_driver *viodrv = to_vio_driver(dev->driver); - - if (viodrv->remove) - return viodrv->remove(viodev); - - /* driver can't remove */ - return 1; -} - -/** - * vio_register_driver: - Register a new vio driver - * @drv: The vio_driver structure to be registered. - */ -int vio_register_driver(struct vio_driver *viodrv) -{ - printk(KERN_DEBUG "%s: driver %s registering\n", __FUNCTION__, - viodrv->name); - - /* fill in 'struct driver' fields */ - viodrv->driver.name = viodrv->name; - viodrv->driver.bus = &vio_bus_type; - viodrv->driver.probe = vio_bus_probe; - viodrv->driver.remove = vio_bus_remove; - - return driver_register(&viodrv->driver); -} -EXPORT_SYMBOL(vio_register_driver); - -/** - * vio_unregister_driver - Remove registration of vio driver. - * @driver: The vio_driver struct to be removed form registration - */ -void vio_unregister_driver(struct vio_driver *viodrv) -{ - driver_unregister(&viodrv->driver); -} -EXPORT_SYMBOL(vio_unregister_driver); - -/** - * vio_match_device: - Tell if a VIO device has a matching - * VIO device id structure. - * @ids: array of VIO device id structures to search in - * @dev: the VIO device structure to match against - * - * Used by a driver to check whether a VIO device present in the - * system is in its list of supported devices. Returns the matching - * vio_device_id structure or NULL if there is no match. - */ -static const struct vio_device_id *vio_match_device( - const struct vio_device_id *ids, const struct vio_dev *dev) -{ - while (ids->type[0] != '\0') { - if (vio_bus_ops.match(ids, dev)) - return ids; - ids++; - } - return NULL; -} - -/** - * vio_bus_init: - Initialize the virtual IO bus - */ -int __init vio_bus_init(struct vio_bus_ops *ops) -{ - int err; - - vio_bus_ops = *ops; - - err = bus_register(&vio_bus_type); - if (err) { - printk(KERN_ERR "failed to register VIO bus\n"); - return err; - } - - /* - * The fake parent of all vio devices, just to give us - * a nice directory - */ - err = device_register(&vio_bus_device.dev); - if (err) { - printk(KERN_WARNING "%s: device_register returned %i\n", - __FUNCTION__, err); - return err; - } - - return 0; -} - -/* vio_dev refcount hit 0 */ -static void __devinit vio_dev_release(struct device *dev) -{ - if (vio_bus_ops.release_device) - vio_bus_ops.release_device(dev); - kfree(to_vio_dev(dev)); -} - -static ssize_t viodev_show_name(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "%s\n", to_vio_dev(dev)->name); -} -DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL); - -struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev) -{ - /* init generic 'struct device' fields: */ - viodev->dev.parent = &vio_bus_device.dev; - viodev->dev.bus = &vio_bus_type; - viodev->dev.release = vio_dev_release; - - /* register with generic device framework */ - if (device_register(&viodev->dev)) { - printk(KERN_ERR "%s: failed to register device %s\n", - __FUNCTION__, viodev->dev.bus_id); - return NULL; - } - device_create_file(&viodev->dev, &dev_attr_name); - - return viodev; -} - -void __devinit vio_unregister_device(struct vio_dev *viodev) -{ - if (vio_bus_ops.unregister_device) - vio_bus_ops.unregister_device(viodev); - device_remove_file(&viodev->dev, &dev_attr_name); - device_unregister(&viodev->dev); -} -EXPORT_SYMBOL(vio_unregister_device); - -static dma_addr_t vio_map_single(struct device *dev, void *vaddr, - size_t size, enum dma_data_direction direction) -{ - return iommu_map_single(to_vio_dev(dev)->iommu_table, vaddr, size, - direction); -} - -static void vio_unmap_single(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) -{ - iommu_unmap_single(to_vio_dev(dev)->iommu_table, dma_handle, size, - direction); -} - -static int vio_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) -{ - return iommu_map_sg(dev, to_vio_dev(dev)->iommu_table, sglist, - nelems, direction); -} - -static void vio_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) -{ - iommu_unmap_sg(to_vio_dev(dev)->iommu_table, sglist, nelems, direction); -} - -static void *vio_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) -{ - return iommu_alloc_coherent(to_vio_dev(dev)->iommu_table, size, - dma_handle, flag); -} - -static void vio_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - iommu_free_coherent(to_vio_dev(dev)->iommu_table, size, vaddr, - dma_handle); -} - -static int vio_dma_supported(struct device *dev, u64 mask) -{ - return 1; -} - -struct dma_mapping_ops vio_dma_ops = { - .alloc_coherent = vio_alloc_coherent, - .free_coherent = vio_free_coherent, - .map_single = vio_map_single, - .unmap_single = vio_unmap_single, - .map_sg = vio_map_sg, - .unmap_sg = vio_unmap_sg, - .dma_supported = vio_dma_supported, -}; - -static int vio_bus_match(struct device *dev, struct device_driver *drv) -{ - const struct vio_dev *vio_dev = to_vio_dev(dev); - struct vio_driver *vio_drv = to_vio_driver(drv); - const struct vio_device_id *ids = vio_drv->id_table; - - return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); -} - -struct bus_type vio_bus_type = { - .name = "vio", - .match = vio_bus_match, -}; Index: kexec/arch/ppc64/Makefile =================================================================== --- kexec.orig/arch/ppc64/Makefile +++ kexec/arch/ppc64/Makefile @@ -85,6 +85,7 @@ libs-y += arch/ppc64/lib/ core-y += arch/ppc64/kernel/ arch/powerpc/kernel/ core-y += arch/powerpc/mm/ core-y += arch/powerpc/platforms/ +core-y += arch/powerpc/sysdev/ core-$(CONFIG_XMON) += arch/ppc64/xmon/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ From michael at ellerman.id.au Thu Oct 13 18:42:10 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 13 Oct 2005 18:42:10 +1000 (EST) Subject: [PATCH 2/10] powerpc: Move include/asm-ppc64/vio.h to include/asm-powerpc/vio.h In-Reply-To: <1129192924.438846.55228671808.qpush@concordia> Message-ID: <20051013084210.1717D68564@ozlabs.org> Move include/asm-ppc64/vio.h to include/asm-powerpc/vio.h, that's it. Signed-off-by: Michael Ellerman --- include/asm-powerpc/vio.h | 106 ++++++++++++++++++++++++++++++++++++++++++++++ include/asm-ppc64/vio.h | 106 ---------------------------------------------- 2 files changed, 106 insertions(+), 106 deletions(-) Index: kexec/include/asm-ppc64/vio.h =================================================================== --- kexec.orig/include/asm-ppc64/vio.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * IBM PowerPC Virtual I/O Infrastructure Support. - * - * Copyright (c) 2003 IBM Corp. - * Dave Engebretsen engebret at us.ibm.com - * Santiago Leon santil at us.ibm.com - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _ASM_VIO_H -#define _ASM_VIO_H - -#include -#include -#include -#include -#include -#include - -#include -#include - -/* - * Architecture-specific constants for drivers to - * extract attributes of the device using vio_get_attribute() - */ -#define VETH_MAC_ADDR "local-mac-address" -#define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters" - -/* End architecture-specific constants */ - -#define h_vio_signal(ua, mode) \ - plpar_hcall_norets(H_VIO_SIGNAL, ua, mode) - -#define VIO_IRQ_DISABLE 0UL -#define VIO_IRQ_ENABLE 1UL - -struct iommu_table; - -/* - * The vio_dev structure is used to describe virtual I/O devices. - */ -struct vio_dev { - struct iommu_table *iommu_table; /* vio_map_* uses this */ - char *name; - char *type; - uint32_t unit_address; - unsigned int irq; - struct device dev; -}; - -struct vio_driver { - struct list_head node; - char *name; - const struct vio_device_id *id_table; - int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); - int (*remove)(struct vio_dev *dev); - unsigned long driver_data; - struct device_driver driver; -}; - -struct vio_bus_ops { - int (*match)(const struct vio_device_id *id, const struct vio_dev *dev); - void (*unregister_device)(struct vio_dev *); - void (*release_device)(struct device *); -}; - -extern struct dma_mapping_ops vio_dma_ops; -extern struct bus_type vio_bus_type; -extern struct vio_dev vio_bus_device; - -extern int vio_register_driver(struct vio_driver *drv); -extern void vio_unregister_driver(struct vio_driver *drv); - -extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev); -extern void __devinit vio_unregister_device(struct vio_dev *dev); - -extern int vio_bus_init(struct vio_bus_ops *); - -#ifdef CONFIG_PPC_PSERIES -struct device_node; - -extern struct vio_dev * __devinit vio_register_device_node( - struct device_node *node_vdev); -extern struct vio_dev *vio_find_node(struct device_node *vnode); -extern const void *vio_get_attribute(struct vio_dev *vdev, void *which, - int *length); -extern int vio_enable_interrupts(struct vio_dev *dev); -extern int vio_disable_interrupts(struct vio_dev *dev); -#endif - -static inline struct vio_driver *to_vio_driver(struct device_driver *drv) -{ - return container_of(drv, struct vio_driver, driver); -} - -static inline struct vio_dev *to_vio_dev(struct device *dev) -{ - return container_of(dev, struct vio_dev, dev); -} - -#endif /* _ASM_VIO_H */ Index: kexec/include/asm-powerpc/vio.h =================================================================== --- /dev/null +++ kexec/include/asm-powerpc/vio.h @@ -0,0 +1,106 @@ +/* + * IBM PowerPC Virtual I/O Infrastructure Support. + * + * Copyright (c) 2003 IBM Corp. + * Dave Engebretsen engebret at us.ibm.com + * Santiago Leon santil at us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_VIO_H +#define _ASM_POWERPC_VIO_H + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Architecture-specific constants for drivers to + * extract attributes of the device using vio_get_attribute() + */ +#define VETH_MAC_ADDR "local-mac-address" +#define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters" + +/* End architecture-specific constants */ + +#define h_vio_signal(ua, mode) \ + plpar_hcall_norets(H_VIO_SIGNAL, ua, mode) + +#define VIO_IRQ_DISABLE 0UL +#define VIO_IRQ_ENABLE 1UL + +struct iommu_table; + +/* + * The vio_dev structure is used to describe virtual I/O devices. + */ +struct vio_dev { + struct iommu_table *iommu_table; /* vio_map_* uses this */ + char *name; + char *type; + uint32_t unit_address; + unsigned int irq; + struct device dev; +}; + +struct vio_driver { + struct list_head node; + char *name; + const struct vio_device_id *id_table; + int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); + int (*remove)(struct vio_dev *dev); + unsigned long driver_data; + struct device_driver driver; +}; + +struct vio_bus_ops { + int (*match)(const struct vio_device_id *id, const struct vio_dev *dev); + void (*unregister_device)(struct vio_dev *); + void (*release_device)(struct device *); +}; + +extern struct dma_mapping_ops vio_dma_ops; +extern struct bus_type vio_bus_type; +extern struct vio_dev vio_bus_device; + +extern int vio_register_driver(struct vio_driver *drv); +extern void vio_unregister_driver(struct vio_driver *drv); + +extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev); +extern void __devinit vio_unregister_device(struct vio_dev *dev); + +extern int vio_bus_init(struct vio_bus_ops *); + +#ifdef CONFIG_PPC_PSERIES +struct device_node; + +extern struct vio_dev * __devinit vio_register_device_node( + struct device_node *node_vdev); +extern struct vio_dev *vio_find_node(struct device_node *vnode); +extern const void *vio_get_attribute(struct vio_dev *vdev, void *which, + int *length); +extern int vio_enable_interrupts(struct vio_dev *dev); +extern int vio_disable_interrupts(struct vio_dev *dev); +#endif + +static inline struct vio_driver *to_vio_driver(struct device_driver *drv) +{ + return container_of(drv, struct vio_driver, driver); +} + +static inline struct vio_dev *to_vio_dev(struct device *dev) +{ + return container_of(dev, struct vio_dev, dev); +} + +#endif /* _ASM_POWERPC_VIO_H */ From michael at ellerman.id.au Thu Oct 13 18:42:12 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 13 Oct 2005 18:42:12 +1000 (EST) Subject: [PATCH 3/10] powerpc: Merge include/asm-ppc/kexec.h and include/asm-ppc64/kexec.h In-Reply-To: <1129192924.438846.55228671808.qpush@concordia> Message-ID: <20051013084212.090686856B@ozlabs.org> Merge include/asm-ppc/kexec.h and include/asm-ppc64/kexec.h. The only thing that's really changed is that we now allocate crash_notes properly on PPC32. It's address is exported via sysfs, so it's not correct for it to be a pointer. I've also removed some of the "we don't use this" comments, because they're wrong (or perhaps were referring only to arch code). Signed-off-by: Michael Ellerman --- arch/ppc/kernel/machine_kexec.c | 2 - include/asm-powerpc/kexec.h | 49 ++++++++++++++++++++++++++++++++++++++++ include/asm-ppc/kexec.h | 40 -------------------------------- include/asm-ppc64/kexec.h | 41 --------------------------------- 4 files changed, 50 insertions(+), 82 deletions(-) Index: kexec/include/asm-powerpc/kexec.h =================================================================== --- /dev/null +++ kexec/include/asm-powerpc/kexec.h @@ -0,0 +1,49 @@ +#ifndef _ASM_POWERPC_KEXEC_H +#define _ASM_POWERPC_KEXEC_H + +/* + * Maximum page that is mapped directly into kernel memory. + * XXX: Since we copy virt we can use any page we allocate + */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) + +/* + * Maximum address we can reach in physical address mode. + * XXX: I want to allow initrd in highmem. Otherwise set to rmo on LPAR. + */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + +/* Maximum address we can use for the control code buffer */ +#ifdef __powerpc64__ +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) +#else +/* TASK_SIZE, probably left over from use_mm ?? */ +#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE +#endif + +#define KEXEC_CONTROL_CODE_SIZE 4096 + +/* The native architecture */ +#ifdef __powerpc64__ +#define KEXEC_ARCH KEXEC_ARCH_PPC64 +#else +#define KEXEC_ARCH KEXEC_ARCH_PPC +#endif + +#ifndef __ASSEMBLY__ + +#define MAX_NOTE_BYTES 1024 +typedef u32 note_buf_t[MAX_NOTE_BYTES / sizeof(u32)]; + +extern note_buf_t crash_notes[]; + +#ifdef __powerpc64__ +extern void kexec_smp_wait(void); /* get and clear naca physid, wait for + master to copy new code to 0 */ +#else +struct kimage; +extern void machine_kexec_simple(struct kimage *image); +#endif + +#endif /* ! __ASSEMBLY__ */ +#endif /* _ASM_POWERPC_KEXEC_H */ Index: kexec/arch/ppc/kernel/machine_kexec.c =================================================================== --- kexec.orig/arch/ppc/kernel/machine_kexec.c +++ kexec/arch/ppc/kernel/machine_kexec.c @@ -32,7 +32,7 @@ const extern unsigned int relocate_new_k * Provide a dummy crash_notes definition while crash dump arrives to ppc. * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. */ -void *crash_notes = NULL; +note_buf_t crash_notes[NR_CPUS]; void machine_shutdown(void) { Index: kexec/include/asm-ppc/kexec.h =================================================================== --- kexec.orig/include/asm-ppc/kexec.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef _PPC_KEXEC_H -#define _PPC_KEXEC_H - -#ifdef CONFIG_KEXEC - -/* - * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. - * I.e. Maximum page that is mapped directly into kernel memory, - * and kmap is not required. - * - * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct - * calculation for the amount of memory directly mappable into the - * kernel memory space. - */ - -/* Maximum physical address we can use pages from */ -#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) -/* Maximum address we can reach in physical address mode */ -#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) -/* Maximum address we can use for the control code buffer */ -#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE - -#define KEXEC_CONTROL_CODE_SIZE 4096 - -/* The native architecture */ -#define KEXEC_ARCH KEXEC_ARCH_PPC - -#ifndef __ASSEMBLY__ - -extern void *crash_notes; - -struct kimage; - -extern void machine_kexec_simple(struct kimage *image); - -#endif /* __ASSEMBLY__ */ - -#endif /* CONFIG_KEXEC */ - -#endif /* _PPC_KEXEC_H */ Index: kexec/include/asm-ppc64/kexec.h =================================================================== --- kexec.orig/include/asm-ppc64/kexec.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _PPC64_KEXEC_H -#define _PPC64_KEXEC_H - -/* - * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. - * I.e. Maximum page that is mapped directly into kernel memory, - * and kmap is not required. - */ - -/* Maximum physical address we can use pages from */ -/* XXX: since we copy virt we can use any page we allocate */ -#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) - -/* Maximum address we can reach in physical address mode */ -/* XXX: I want to allow initrd in highmem. otherwise set to rmo on lpar */ -#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) - -/* Maximum address we can use for the control code buffer */ -/* XXX: unused today, ppc32 uses TASK_SIZE, probably left over from use_mm */ -#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) - -/* XXX: today we don't use this at all, althogh we have a static stack */ -#define KEXEC_CONTROL_CODE_SIZE 4096 - -/* The native architecture */ -#define KEXEC_ARCH KEXEC_ARCH_PPC64 - -#define MAX_NOTE_BYTES 1024 - -#ifndef __ASSEMBLY__ - -typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; - -extern note_buf_t crash_notes[]; - -extern void kexec_smp_wait(void); /* get and clear naca physid, wait for - master to copy new code to 0 */ - -#endif /* __ASSEMBLY__ */ -#endif /* _PPC_KEXEC_H */ - From michael at ellerman.id.au Thu Oct 13 18:42:16 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 13 Oct 2005 18:42:16 +1000 (EST) Subject: [PATCH 4/10] powerpc: iseries: Fix a bogus comment In-Reply-To: <1129192924.438846.55228671808.qpush@concordia> Message-ID: <20051013084216.4AA5A68564@ozlabs.org> A comment in lpevents.c refers to code that's actually in HvCallEvent.h. The code in HvCallEvent.h is pretty obvious, so just remove the comment altogether. Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/iseries/lpevents.c | 6 +----- 1 files changed, 1 insertion(+), 5 deletions(-) Index: kexec/arch/powerpc/platforms/iseries/lpevents.c =================================================================== --- kexec.orig/arch/powerpc/platforms/iseries/lpevents.c +++ kexec/arch/powerpc/platforms/iseries/lpevents.c @@ -184,11 +184,7 @@ void setup_hvlpevent_queue(void) { void *eventStack; - /* - * Allocate a page for the Event Stack. The Hypervisor needs the - * absolute real address, so we subtract out the KERNELBASE and add - * in the absolute real address of the kernel load area. - */ + /* Allocate a page for the Event Stack. */ eventStack = alloc_bootmem_pages(LpEventStackSize); memset(eventStack, 0, LpEventStackSize); From michael at ellerman.id.au Thu Oct 13 18:42:19 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 13 Oct 2005 18:42:19 +1000 (EST) Subject: [PATCH 5/10] powerpc: Make early debugging fit on 80 character terminal In-Reply-To: <1129192924.438846.55228671808.qpush@concordia> Message-ID: <20051013084219.09A8068576@ozlabs.org> There's some debugging in prom.c that wraps nastly on 80 character terminals, reformat it to fit. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: kexec/arch/powerpc/kernel/prom.c =================================================================== --- kexec.orig/arch/powerpc/kernel/prom.c +++ kexec/arch/powerpc/kernel/prom.c @@ -1240,7 +1240,7 @@ static int __init early_init_dt_scan_mem endp = reg + (l / sizeof(cell_t)); - DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n", + DBG("memory scan node %s, reg size %ld, data: %x %x %x %x,\n", uname, l, reg[0], reg[1], reg[2], reg[3]); while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { From michael at ellerman.id.au Thu Oct 13 18:42:19 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 13 Oct 2005 18:42:19 +1000 (EST) Subject: [PATCH 6/10] powerpc: Add __va_ul() which does __va() but returns unsigned long In-Reply-To: <1129192924.438846.55228671808.qpush@concordia> Message-ID: <20051013084219.D511668579@ozlabs.org> Currently __va() returns void *, however about half of the callers of __va() actually want an unsigned long as the return value. So we add a __va_ul() macro that does this for us. Purely cosmetic. While we're at it, remove a few redundant casts to void *. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 4 ++-- arch/powerpc/mm/mem.c | 2 +- arch/powerpc/oprofile/op_model_power4.c | 2 +- arch/powerpc/platforms/iseries/setup.c | 2 +- arch/powerpc/platforms/pseries/iommu.c | 2 +- include/asm-ppc64/io.h | 2 +- include/asm-ppc64/page.h | 3 ++- 7 files changed, 9 insertions(+), 8 deletions(-) Index: kexec/include/asm-ppc64/page.h =================================================================== --- kexec.orig/include/asm-ppc64/page.h +++ kexec/include/asm-ppc64/page.h @@ -200,7 +200,8 @@ extern u64 ppc64_pft_size; /* Log 2 of #define USER_REGION_ID (0UL) #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) -#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) +#define __va_ul(x) (((unsigned long)(x) + KERNELBASE)) +#define __va(x) ((void *)__va_ul(x)) #ifdef CONFIG_DISCONTIGMEM #define page_to_pfn(page) discontigmem_page_to_pfn(page) Index: kexec/arch/powerpc/platforms/iseries/setup.c =================================================================== --- kexec.orig/arch/powerpc/platforms/iseries/setup.c +++ kexec/arch/powerpc/platforms/iseries/setup.c @@ -318,7 +318,7 @@ static void __init iSeries_init_early(vo * a non-zero starting address for it, set it up */ if (naca.xRamDisk) { - initrd_start = (unsigned long)__va(naca.xRamDisk); + initrd_start = __va_ul(naca.xRamDisk); initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE; initrd_below_start_ok = 1; // ramdisk in kernel space ROOT_DEV = Root_RAM0; Index: kexec/arch/powerpc/platforms/pseries/iommu.c =================================================================== --- kexec.orig/arch/powerpc/platforms/pseries/iommu.c +++ kexec/arch/powerpc/platforms/pseries/iommu.c @@ -272,7 +272,7 @@ static void iommu_table_setparms(struct return; } - tbl->it_base = (unsigned long)__va(*basep); + tbl->it_base = __va_ul(*basep); memset((void *)tbl->it_base, 0, *sizep); tbl->it_busno = phb->bus->number; Index: kexec/arch/powerpc/kernel/setup_64.c =================================================================== --- kexec.orig/arch/powerpc/kernel/setup_64.c +++ kexec/arch/powerpc/kernel/setup_64.c @@ -509,11 +509,11 @@ static void __init check_for_initrd(void prop = (u64 *)get_property(of_chosen, "linux,initrd-start", NULL); if (prop != NULL) { - initrd_start = (unsigned long)__va(*prop); + initrd_start = __va_ul(*prop); prop = (u64 *)get_property(of_chosen, "linux,initrd-end", NULL); if (prop != NULL) { - initrd_end = (unsigned long)__va(*prop); + initrd_end = __va_ul(*prop); initrd_below_start_ok = 1; } else initrd_start = 0; Index: kexec/arch/powerpc/oprofile/op_model_power4.c =================================================================== --- kexec.orig/arch/powerpc/oprofile/op_model_power4.c +++ kexec/arch/powerpc/oprofile/op_model_power4.c @@ -229,7 +229,7 @@ static unsigned long get_pc(struct pt_re /* Were we in our exception vectors or SLB real mode miss handler? */ if (pc < 0x1000000UL) - return (unsigned long)__va(pc); + return __va_ul(pc); /* Not sure where we were */ if (pc < KERNELBASE) Index: kexec/arch/powerpc/mm/mem.c =================================================================== --- kexec.orig/arch/powerpc/mm/mem.c +++ kexec/arch/powerpc/mm/mem.c @@ -252,7 +252,7 @@ void __init mem_init(void) unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; num_physpages = max_pfn; /* RAM is assumed contiguous */ - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + high_memory = __va(max_low_pfn * PAGE_SIZE); #ifdef CONFIG_NEED_MULTIPLE_NODES for_each_online_node(nid) { Index: kexec/include/asm-ppc64/io.h =================================================================== --- kexec.orig/include/asm-ppc64/io.h +++ kexec/include/asm-ppc64/io.h @@ -238,7 +238,7 @@ static inline unsigned long virt_to_phys */ static inline void * phys_to_virt(unsigned long address) { - return (void *)__va(address); + return __va(address); } /* From michael at ellerman.id.au Thu Oct 13 18:42:21 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 13 Oct 2005 18:42:21 +1000 (EST) Subject: [PATCH 7/10] powerpc: Add a is_kernel_addr() macro In-Reply-To: <1129192924.438846.55228671808.qpush@concordia> Message-ID: <20051013084221.5332268579@ozlabs.org> There's a bunch of code that compares an address with KERNELBASE to see if it's a "kernel address", ie. >= KERNELBASE. Replace all of them with an is_kernel_addr() macro that does the same thing. This will save us some pain when we change KERNELBASE, and also makes the code more readable IMHO. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/mm/slb.c | 6 +++--- arch/powerpc/mm/stab.c | 6 +++--- arch/powerpc/oprofile/op_model_power4.c | 4 ++-- arch/powerpc/oprofile/op_model_rs64.c | 3 +-- arch/ppc64/xmon/xmon.c | 4 ++-- include/asm-ppc64/page.h | 2 ++ include/asm-ppc64/pgtable.h | 2 +- 9 files changed, 16 insertions(+), 15 deletions(-) Index: kexec/arch/powerpc/mm/stab.c =================================================================== --- kexec.orig/arch/powerpc/mm/stab.c +++ kexec/arch/powerpc/mm/stab.c @@ -122,7 +122,7 @@ static int __ste_allocate(unsigned long unsigned long offset; /* Kernel or user address? */ - if (ea >= KERNELBASE) { + if (is_kernel_addr(ea)) { vsid = get_kernel_vsid(ea); } else { if ((ea >= TASK_SIZE_USER64) || (! mm)) @@ -133,7 +133,7 @@ static int __ste_allocate(unsigned long stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); - if (ea < KERNELBASE) { + if (!is_kernel_addr(ea)) { offset = __get_cpu_var(stab_cache_ptr); if (offset < NR_STAB_CACHE_ENTRIES) __get_cpu_var(stab_cache[offset++]) = stab_entry; @@ -190,7 +190,7 @@ void switch_stab(struct task_struct *tsk entry++, ste++) { unsigned long ea; ea = ste->esid_data & ESID_MASK; - if (ea < KERNELBASE) { + if (!is_kernel_addr(ea)) { ste->esid_data = 0; } } Index: kexec/arch/powerpc/kernel/prom_init.c =================================================================== --- kexec.orig/arch/powerpc/kernel/prom_init.c +++ kexec/arch/powerpc/kernel/prom_init.c @@ -1936,7 +1936,7 @@ static void __init prom_check_initrd(uns if (r3 && r4 && r4 != 0xdeadbeef) { unsigned long val; - RELOC(prom_initrd_start) = (r3 >= KERNELBASE) ? __pa(r3) : r3; + RELOC(prom_initrd_start) = is_kernel_addr(r3) ? __pa(r3) : r3; RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4; val = RELOC(prom_initrd_start); Index: kexec/arch/powerpc/kernel/setup_64.c =================================================================== --- kexec.orig/arch/powerpc/kernel/setup_64.c +++ kexec/arch/powerpc/kernel/setup_64.c @@ -523,7 +523,7 @@ static void __init check_for_initrd(void /* If we were passed an initrd, set the ROOT_DEV properly if the values * look sensible. If not, clear initrd reference. */ - if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && + if (is_kernel_addr(initrd_start) && is_kernel_addr(initrd_end) && initrd_end > initrd_start) ROOT_DEV = Root_RAM0; else Index: kexec/arch/powerpc/mm/slb.c =================================================================== --- kexec.orig/arch/powerpc/mm/slb.c +++ kexec/arch/powerpc/mm/slb.c @@ -111,14 +111,14 @@ void switch_slb(struct task_struct *tsk, else unmapped_base = TASK_UNMAPPED_BASE_USER64; - if (pc >= KERNELBASE) + if (is_kernel_addr(pc)) return; slb_allocate(pc); if (GET_ESID(pc) == GET_ESID(stack)) return; - if (stack >= KERNELBASE) + if (is_kernel_addr(stack)) return; slb_allocate(stack); @@ -126,7 +126,7 @@ void switch_slb(struct task_struct *tsk, || (GET_ESID(stack) == GET_ESID(unmapped_base))) return; - if (unmapped_base >= KERNELBASE) + if (is_kernel_addr(unmapped_base)) return; slb_allocate(unmapped_base); } Index: kexec/arch/powerpc/oprofile/op_model_power4.c =================================================================== --- kexec.orig/arch/powerpc/oprofile/op_model_power4.c +++ kexec/arch/powerpc/oprofile/op_model_power4.c @@ -232,7 +232,7 @@ static unsigned long get_pc(struct pt_re return __va_ul(pc); /* Not sure where we were */ - if (pc < KERNELBASE) + if (!is_kernel_addr(pc)) /* function descriptor madness */ return *((unsigned long *)kernel_unknown_bucket); @@ -244,7 +244,7 @@ static int get_kernel(unsigned long pc) int is_kernel; if (!mmcra_has_sihv) { - is_kernel = (pc >= KERNELBASE); + is_kernel = is_kernel_addr(pc); } else { unsigned long mmcra = mfspr(SPRN_MMCRA); is_kernel = ((mmcra & MMCRA_SIPR) == 0); Index: kexec/arch/ppc64/xmon/xmon.c =================================================================== --- kexec.orig/arch/ppc64/xmon/xmon.c +++ kexec/arch/ppc64/xmon/xmon.c @@ -1032,7 +1032,7 @@ static long check_bp_loc(unsigned long a unsigned int instr; addr &= ~3; - if (addr < KERNELBASE) { + if (!is_kernel_addr(addr)) { printf("Breakpoints may only be placed at kernel addresses\n"); return 0; } @@ -1082,7 +1082,7 @@ bpt_cmds(void) dabr.address = 0; dabr.enabled = 0; if (scanhex(&dabr.address)) { - if (dabr.address < KERNELBASE) { + if (!is_kernel_addr(dabr.address)) { printf(badaddr); break; } Index: kexec/include/asm-ppc64/page.h =================================================================== --- kexec.orig/include/asm-ppc64/page.h +++ kexec/include/asm-ppc64/page.h @@ -203,6 +203,8 @@ extern u64 ppc64_pft_size; /* Log 2 of #define __va_ul(x) (((unsigned long)(x) + KERNELBASE)) #define __va(x) ((void *)__va_ul(x)) +#define is_kernel_addr(x) ((x) >= KERNELBASE) + #ifdef CONFIG_DISCONTIGMEM #define page_to_pfn(page) discontigmem_page_to_pfn(page) #define pfn_to_page(pfn) discontigmem_pfn_to_page(pfn) Index: kexec/include/asm-ppc64/pgtable.h =================================================================== --- kexec.orig/include/asm-ppc64/pgtable.h +++ kexec/include/asm-ppc64/pgtable.h @@ -212,7 +212,7 @@ static inline pte_t pfn_pte(unsigned lon #define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) +#define pmd_set(pmdp, ptep) ({BUG_ON(!is_kernel_addr((u64)ptep)); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) == 0) #define pmd_present(pmd) (pmd_val(pmd) != 0) Index: kexec/arch/powerpc/oprofile/op_model_rs64.c =================================================================== --- kexec.orig/arch/powerpc/oprofile/op_model_rs64.c +++ kexec/arch/powerpc/oprofile/op_model_rs64.c @@ -178,7 +178,6 @@ static void rs64_handle_interrupt(struct int val; int i; unsigned long pc = mfspr(SPRN_SIAR); - int is_kernel = (pc >= KERNELBASE); /* set the PMM bit (see comment below) */ mtmsrd(mfmsr() | MSR_PMM); @@ -187,7 +186,7 @@ static void rs64_handle_interrupt(struct val = ctr_read(i); if (val < 0) { if (ctr[i].enabled) { - oprofile_add_pc(pc, is_kernel, i); + oprofile_add_pc(pc, is_kernel_addr(pc), i); ctr_write(i, reset_value[i]); } else { ctr_write(i, 0); From michael at ellerman.id.au Thu Oct 13 18:42:22 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 13 Oct 2005 18:42:22 +1000 (EST) Subject: [PATCH 8/10] powerpc: Set entry point and text address in linker script In-Reply-To: <1129192924.438846.55228671808.qpush@concordia> Message-ID: <20051013084222.249E96857F@ozlabs.org> Currently we set the kernel entry point and the address of the text section in the Makefile, using CONFIG_KERNEL_START. But we've already got in the linker script, so we can just use KERNELBASE directly. That means if we ever change KERNELBASE there's one less place to change it. And we can set the entry point with ENTRY(). Apart from linux_banner, there are zero differences from "readelf -a vmlinux" before and after this patch. Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 5 +---- arch/powerpc/kernel/vmlinux.lds.S | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) Index: kexec/arch/powerpc/Makefile =================================================================== --- kexec.orig/arch/powerpc/Makefile +++ kexec/arch/powerpc/Makefile @@ -12,9 +12,6 @@ # Rewritten by Cort Dougan and Paul Mackerras # -# This must match PAGE_OFFSET in include/asm-powerpc/page.h. -KERNELLOAD := $(CONFIG_KERNEL_START) - HAS_BIARCH := $(call cc-option-yn, -m32) ifeq ($(CONFIG_PPC64),y) @@ -57,7 +54,7 @@ override LD += -m elf$(SZ)ppc override CC += -m$(SZ) endif -LDFLAGS_vmlinux := -Ttext $(KERNELLOAD) -Bstatic -e $(KERNELLOAD) +LDFLAGS_vmlinux := -Bstatic # The -Iarch/$(ARCH)/include is temporary while we are merging CPPFLAGS += -Iarch/$(ARCH) -Iarch/$(ARCH)/include Index: kexec/arch/powerpc/kernel/vmlinux.lds.S =================================================================== --- kexec.orig/arch/powerpc/kernel/vmlinux.lds.S +++ kexec/arch/powerpc/kernel/vmlinux.lds.S @@ -6,6 +6,8 @@ #endif #include +ENTRY(_stext) + #ifdef CONFIG_PPC64 OUTPUT_ARCH(powerpc:common64) jiffies = jiffies_64; @@ -21,6 +23,7 @@ SECTIONS *(.exit.data) } + . = KERNELBASE; /* Read-only sections, merged into text segment: */ #ifdef CONFIG_PPC32 From michael at ellerman.id.au Thu Oct 13 18:42:25 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 13 Oct 2005 18:42:25 +1000 (EST) Subject: [PATCH 9/10] powerpc: Add helper functions for synthesising instructions at runtime In-Reply-To: <1129192924.438846.55228671808.qpush@concordia> Message-ID: <20051013084225.B2B3368586@ozlabs.org> There's a few places already, and soon will be more, where we synthesise branch instructions at runtime. Rather than doing it by hand in each case, it would make sense to have one implementation. Signed-off-by: Michael Ellerman --- include/asm-powerpc/system.h | 37 +++++++++++++++++++++++++++++++++++++ 1 files changed, 37 insertions(+) Index: kexec/include/asm-powerpc/system.h =================================================================== --- kexec.orig/include/asm-powerpc/system.h +++ kexec/include/asm-powerpc/system.h @@ -356,5 +356,42 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) +static inline void make_instruction(unsigned long addr, unsigned int instr) +{ + unsigned int *p; + p = (unsigned int *)addr; + *p = instr; + asm ("dcbst 0, %0; sync; icbi 0,%0; isync" : : "r" (p)); +} + +static inline void make_branch(unsigned long addr, unsigned long target, + int link, int absolute) +{ + unsigned int instruction; + + instruction = 0x48000000; + instruction |= link & 0x01; + instruction |= (absolute & 0x01) << 1; + + if (link) + target = target - addr; + + instruction |= target & 0x03fffffC; + + make_instruction(addr, instruction); +} + +static inline void make_function_call(unsigned long addr, void * func) +{ + unsigned long func_addr; + +#ifdef CONFIG_PPC64 + func_addr = *(unsigned long *)func; +#else + func_addr = (unsigned long)func; +#endif + make_branch(addr, func_addr, 1, 0); +} + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SYSTEM_H */ From michael at ellerman.id.au Thu Oct 13 18:42:31 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 13 Oct 2005 18:42:31 +1000 (EST) Subject: [PATCH 10/10] powerpc: Seperate usage of KERNELBASE and PAGE_OFFSET In-Reply-To: <1129192924.438846.55228671808.qpush@concordia> Message-ID: <20051013084231.5174868570@ozlabs.org> This patch tries to seperate usage of KERNELBASE and PAGE_OFFSET. PAGE_OFFSET == 0xC00..00 and always will. It's the quantity you subtract from a virtual kernel address to get a physical one. KERNELBASE == 0xC00..00 + SOMETHING, where SOMETHING tends to be 0, but might not be. It points to the start of the kernel text + data in virtual memory. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 4 ++-- arch/powerpc/kernel/lparmap.c | 6 +++--- arch/powerpc/mm/hash_utils_64.c | 6 +++--- arch/powerpc/mm/slb.c | 4 ++-- arch/powerpc/mm/slb_low.S | 6 +++--- arch/powerpc/mm/stab.c | 10 +++++----- arch/powerpc/mm/tlb_64.c | 2 +- arch/ppc64/kernel/machine_kexec.c | 5 ++--- include/asm-ppc64/page.h | 6 +++--- 9 files changed, 24 insertions(+), 25 deletions(-) Index: kexec/arch/powerpc/mm/stab.c =================================================================== --- kexec.orig/arch/powerpc/mm/stab.c +++ kexec/arch/powerpc/mm/stab.c @@ -40,7 +40,7 @@ static int make_ste(unsigned long stab, unsigned long entry, group, old_esid, castout_entry, i; unsigned int global_entry; struct stab_entry *ste, *castout_ste; - unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE; + unsigned long kernel_segment = (esid << SID_SHIFT) >= PAGE_OFFSET; vsid_data = vsid << STE_VSID_SHIFT; esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; @@ -83,7 +83,7 @@ static int make_ste(unsigned long stab, } /* Dont cast out the first kernel segment */ - if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE) + if ((castout_ste->esid_data & ESID_MASK) != PAGE_OFFSET) break; castout_entry = (castout_entry + 1) & 0xf; @@ -248,7 +248,7 @@ void stabs_alloc(void) panic("Unable to allocate segment table for CPU %d.\n", cpu); - newstab += KERNELBASE; + newstab = __va_ul(newstab); memset((void *)newstab, 0, PAGE_SIZE); @@ -265,13 +265,13 @@ void stabs_alloc(void) */ void stab_initialize(unsigned long stab) { - unsigned long vsid = get_kernel_vsid(KERNELBASE); + unsigned long vsid = get_kernel_vsid(PAGE_OFFSET); if (cpu_has_feature(CPU_FTR_SLB)) { slb_initialize(); } else { asm volatile("isync; slbia; isync":::"memory"); - make_ste(stab, GET_ESID(KERNELBASE), vsid); + make_ste(stab, GET_ESID(PAGE_OFFSET), vsid); /* Order update */ asm volatile("sync":::"memory"); Index: kexec/arch/ppc64/kernel/machine_kexec.c =================================================================== --- kexec.orig/arch/ppc64/kernel/machine_kexec.c +++ kexec/arch/ppc64/kernel/machine_kexec.c @@ -171,9 +171,8 @@ void kexec_copy_flush(struct kimage *ima * including ones that were in place on the original copy */ for (i = 0; i < nr_segments; i++) - flush_icache_range(ranges[i].mem + KERNELBASE, - ranges[i].mem + KERNELBASE + - ranges[i].memsz); + flush_icache_range(__va_ul(ranges[i].mem), + __va_ul(ranges[i].mem + ranges[i].memsz)); } #ifdef CONFIG_SMP Index: kexec/arch/powerpc/mm/hash_utils_64.c =================================================================== --- kexec.orig/arch/powerpc/mm/hash_utils_64.c +++ kexec/arch/powerpc/mm/hash_utils_64.c @@ -239,7 +239,7 @@ void __init htab_initialize(void) /* create bolted the linear mapping in the hash table */ for (i=0; i < lmb.memory.cnt; i++) { - base = lmb.memory.region[i].base + KERNELBASE; + base = __va_ul(lmb.memory.region[i].base); size = lmb.memory.region[i].size; DBG("creating mapping for region: %lx : %lx\n", base, size); @@ -276,8 +276,8 @@ void __init htab_initialize(void) * for either 4K or 16MB pages. */ if (tce_alloc_start) { - tce_alloc_start += KERNELBASE; - tce_alloc_end += KERNELBASE; + tce_alloc_start = __va_ul(tce_alloc_start); + tce_alloc_end = __va_ul(tce_alloc_end); if (base + size >= tce_alloc_start) tce_alloc_start = base + size + 1; Index: kexec/arch/powerpc/mm/slb.c =================================================================== --- kexec.orig/arch/powerpc/mm/slb.c +++ kexec/arch/powerpc/mm/slb.c @@ -55,7 +55,7 @@ static void slb_flush_and_rebolt(void) ksp_flags |= SLB_VSID_L; ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); - if ((ksp_esid_data & ESID_MASK) == KERNELBASE) + if ((ksp_esid_data & ESID_MASK) == PAGE_OFFSET) ksp_esid_data &= ~SLB_ESID_V; /* We need to do this all in asm, so we're sure we don't touch @@ -145,7 +145,7 @@ void slb_initialize(void) asm volatile("isync":::"memory"); asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("isync; slbia; isync":::"memory"); - create_slbe(KERNELBASE, flags, 0); + create_slbe(PAGE_OFFSET, flags, 0); create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1); /* We don't bolt the stack for the time being - we're in boot, * so the stack is in the bolted segment. By the time it goes Index: kexec/include/asm-ppc64/page.h =================================================================== --- kexec.orig/include/asm-ppc64/page.h +++ kexec/include/asm-ppc64/page.h @@ -196,14 +196,14 @@ extern u64 ppc64_pft_size; /* Log 2 of #define VMALLOCBASE ASM_CONST(0xD000000000000000) #define VMALLOC_REGION_ID (VMALLOCBASE >> REGION_SHIFT) -#define KERNEL_REGION_ID (KERNELBASE >> REGION_SHIFT) +#define KERNEL_REGION_ID (PAGE_OFFSET >> REGION_SHIFT) #define USER_REGION_ID (0UL) #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) -#define __va_ul(x) (((unsigned long)(x) + KERNELBASE)) +#define __va_ul(x) (((unsigned long)(x) + PAGE_OFFSET)) #define __va(x) ((void *)__va_ul(x)) -#define is_kernel_addr(x) ((x) >= KERNELBASE) +#define is_kernel_addr(x) ((x) >= PAGE_OFFSET) #ifdef CONFIG_DISCONTIGMEM #define page_to_pfn(page) discontigmem_page_to_pfn(page) Index: kexec/arch/powerpc/kernel/entry_64.S =================================================================== --- kexec.orig/arch/powerpc/kernel/entry_64.S +++ kexec/arch/powerpc/kernel/entry_64.S @@ -674,7 +674,7 @@ _GLOBAL(enter_rtas) /* Setup our real return addr */ SET_REG_TO_LABEL(r4,.rtas_return_loc) - SET_REG_TO_CONST(r9,KERNELBASE) + SET_REG_TO_CONST(r9,PAGE_OFFSET) sub r4,r4,r9 mtlr r4 @@ -702,7 +702,7 @@ _GLOBAL(enter_rtas) _STATIC(rtas_return_loc) /* relocation is off at this point */ mfspr r4,SPRN_SPRG3 /* Get PACA */ - SET_REG_TO_CONST(r5, KERNELBASE) + SET_REG_TO_CONST(r5, PAGE_OFFSET) sub r4,r4,r5 /* RELOC the PACA base pointer */ mfmsr r6 Index: kexec/arch/powerpc/mm/slb_low.S =================================================================== --- kexec.orig/arch/powerpc/mm/slb_low.S +++ kexec/arch/powerpc/mm/slb_low.S @@ -66,12 +66,12 @@ _GLOBAL(slb_allocate) srdi r9,r3,60 /* get region */ srdi r3,r3,28 /* get esid */ - cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ + cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */ rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */ oris r10,r10,SLB_ESID_V at h /* r10 |= SLB_ESID_V */ - /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */ + /* r3 = esid, r10 = esid_data, cr7 = <> PAGE_OFFSET */ blt cr7,0f /* user or kernel? */ @@ -114,7 +114,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) ld r9,PACACONTEXTID(r13) rldimi r3,r9,USER_ESID_BITS,0 -9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */ +9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <> PAGE_OFFSET */ ASM_VSID_SCRAMBLE(r3,r9) rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */ Index: kexec/arch/powerpc/kernel/lparmap.c =================================================================== --- kexec.orig/arch/powerpc/kernel/lparmap.c +++ kexec/arch/powerpc/kernel/lparmap.c @@ -16,8 +16,8 @@ const struct LparMap __attribute__((__se .xSegmentTableOffs = STAB0_PAGE, .xEsids = { - { .xKernelEsid = GET_ESID(KERNELBASE), - .xKernelVsid = KERNEL_VSID(KERNELBASE), }, + { .xKernelEsid = GET_ESID(PAGE_OFFSET), + .xKernelVsid = KERNEL_VSID(PAGE_OFFSET), }, { .xKernelEsid = GET_ESID(VMALLOCBASE), .xKernelVsid = KERNEL_VSID(VMALLOCBASE), }, }, @@ -25,7 +25,7 @@ const struct LparMap __attribute__((__se .xRanges = { { .xPages = HvPagesToMap, .xOffset = 0, - .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT), + .xVPN = KERNEL_VSID(PAGE_OFFSET) << (SID_SHIFT - PAGE_SHIFT), }, }, }; Index: kexec/arch/powerpc/mm/tlb_64.c =================================================================== --- kexec.orig/arch/powerpc/mm/tlb_64.c +++ kexec/arch/powerpc/mm/tlb_64.c @@ -149,7 +149,7 @@ void hpte_update(struct mm_struct *mm, u batch->mm = mm; batch->large = pte_huge(pte); } - if (addr < KERNELBASE) { + if (!is_kernel_addr(addr)) { vsid = get_vsid(mm->context.id, addr); WARN_ON(vsid == 0); } else From linas at austin.ibm.com Fri Oct 14 02:03:39 2005 From: linas at austin.ibm.com (linas) Date: Thu, 13 Oct 2005 11:03:39 -0500 Subject: [PATCH 15/22] ppc64: PCI Error Recovery: PPC64 core recovery routines In-Reply-To: <17228.56384.469138.175618@cargo.ozlabs.ibm.com> References: <20051006232032.GA29826@austin.ibm.com> <20051006234742.GP29826@austin.ibm.com> <17228.56384.469138.175618@cargo.ozlabs.ibm.com> Message-ID: <20051013160339.GT29826@austin.ibm.com> On Wed, Oct 12, 2005 at 07:49:52PM +1000, Paul Mackerras was heard to remark: > Linas writes: > > > + /* We might not have a pci device, if it was a config space read > > + * that failed. Find the pci device now. */ > > + if (!dev) { > > + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { > > + if (pci_device_to_OF_node(dev) == event->dn) > > + break; > > + } > > + } > > Couldn't we just use PCI_DN(event->dn)->pcidev here? Is there some > reason why this would not work in some circumstances? It would be > nice to avoid this linear search. Funny tha you mention this, I just chopped this out yesterday; its cruft left over from back-when. The reason I chopped this out is due to a bug regarding the handling of multi-function devices with the "new style" firmware interfaces. With the new interfaces (i.e. those using ibm,get-config-addr-info), every function on a pci card is labelled as a "partitionable endpoint". By contrast, the current code assumes a "PE" is associated with a pci card. As a result of this mismatch, the handling of multi-function cards on systems with the new-style firmware is flubbed. (In particular, the setup and use of config-space is muffed, resulting in crashes due to access of i/o space that wasn't correctly set up). I'm trying several different approaches to fixing this. 1) consolidating multiple pci functions (multiple PE's) into a single "pci card" and treating the thing as a unit. 2) Treating each PE as completely distinct, and handling each distinctly. Each approach seems to have problems. Cross my fingers, hope to have something working later today; however, I'm irritated that I even need to solve his problem. --linas From olof at lixom.net Thu Oct 13 23:27:14 2005 From: olof at lixom.net (Olof Johansson) Date: Thu, 13 Oct 2005 08:27:14 -0500 Subject: [PATCH 9/10] powerpc: Add helper functions for synthesising instructions at runtime In-Reply-To: <20051013084225.B2B3368586@ozlabs.org> References: <1129192924.438846.55228671808.qpush@concordia> <20051013084225.B2B3368586@ozlabs.org> Message-ID: <20051013132714.GS23053@austin.ibm.com> Looks good. I have just a couple of nitpicks below. :) On Thu, Oct 13, 2005 at 06:42:25PM +1000, Michael Ellerman wrote: > +static inline void make_instruction(unsigned long addr, unsigned int instr) This really is more of a "store instruction" function, but whatever. ;-) Goes with the theme of the rest. > +static inline void make_function_call(unsigned long addr, void * func) > +{ > + unsigned long func_addr; > + > +#ifdef CONFIG_PPC64 > + func_addr = *(unsigned long *)func; I'm sure someone will at some point in the future ask why it needs to be dereferenced like this only on ppc64. A small comment to the effect of it being needed because of the function descriptors could be useful. > +#else > + func_addr = (unsigned long)func; > +#endif > + make_branch(addr, func_addr, 1, 0); Hmm. Maybe some enums/defines for BRANCH_ABSOLUTE, BRANCH_RELATIVE could enhance readability when used. -Olof From sfr at canb.auug.org.au Fri Oct 14 02:35:22 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Fri, 14 Oct 2005 02:35:22 +1000 Subject: [PATCH 8/10] powerpc: Set entry point and text address in linker script In-Reply-To: <20051013084222.249E96857F@ozlabs.org> References: <1129192924.438846.55228671808.qpush@concordia> <20051013084222.249E96857F@ozlabs.org> Message-ID: <20051014023522.0632e029.sfr@canb.auug.org.au> On Thu, 13 Oct 2005 18:42:22 +1000 (EST) Michael Ellerman wrote: > > Currently we set the kernel entry point and the address of the text > section in the Makefile, using CONFIG_KERNEL_START. > > But we've already got in the linker script, so we can just > use KERNELBASE directly. That means if we ever change KERNELBASE there's > one less place to change it. So you should probably remove CONFIG_KERNEL_START from Kconfig if it is no longer used. -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051014/434d2d58/attachment.pgp From schwab at suse.de Fri Oct 14 08:28:33 2005 From: schwab at suse.de (Andreas Schwab) Date: Fri, 14 Oct 2005 00:28:33 +0200 Subject: pmac_show_cpuinfo broken Message-ID: Apparently with the new device tree flattening the unflattened device tree has lost the device-tree device, so that pmac_show_cpuinfo no longer shows the model and compatible properties. Signed-off-by: schwab at suse.de --- linux-2.6.14-rc4/arch/ppc64/kernel/pmac_setup.c.~1~ 2005-10-11 20:05:47.000000000 +0200 +++ linux-2.6.14-rc4/arch/ppc64/kernel/pmac_setup.c 2005-10-13 23:26:11.000000000 +0200 @@ -115,7 +115,7 @@ static void __pmac pmac_show_cpuinfo(str /* find motherboard type */ seq_printf(m, "machine\t\t: "); - np = find_devices("device-tree"); + np = find_path_device("/"); if (np != NULL) { pp = (char *) get_property(np, "model", NULL); if (pp != NULL) Andreas. -- Andreas Schwab, SuSE Labs, schwab at suse.de SuSE Linux Products GmbH, Maxfeldstra?e 5, 90409 N?rnberg, Germany Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5 "And now for something completely different." From michael at ellerman.id.au Fri Oct 14 10:57:28 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Fri, 14 Oct 2005 10:57:28 +1000 Subject: [PATCH 8/10] powerpc: Set entry point and text address in linker script In-Reply-To: <20051014023522.0632e029.sfr@canb.auug.org.au> References: <1129192924.438846.55228671808.qpush@concordia> <20051013084222.249E96857F@ozlabs.org> <20051014023522.0632e029.sfr@canb.auug.org.au> Message-ID: <200510141057.30901.michael@ellerman.id.au> On Fri, 14 Oct 2005 02:35, Stephen Rothwell wrote: > On Thu, 13 Oct 2005 18:42:22 +1000 (EST) Michael Ellerman wrote: > > Currently we set the kernel entry point and the address of the text > > section in the Makefile, using CONFIG_KERNEL_START. > > > > But we've already got in the linker script, so we can just > > use KERNELBASE directly. That means if we ever change KERNELBASE there's > > one less place to change it. > > So you should probably remove CONFIG_KERNEL_START from Kconfig if it is no > longer used. Unfortunately it's not quite that simple. Some of the PPC platforms use CONFIG_KERNEL_START to set KERNELBASE to something different, so we still need the config variable for them. cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051014/43e9e0b6/attachment.pgp From anton at samba.org Sat Oct 15 12:14:48 2005 From: anton at samba.org (Anton Blanchard) Date: Sat, 15 Oct 2005 12:14:48 +1000 Subject: [PATCH] ppc64: make dma_addr_t 64 bits Message-ID: <20051015021448.GA6745@krispykreme> From: Stephen Rothwell There has been a need expressed for dma_addr_t to be 64 bits on PPC64. This patch does that. I have built it for pSeries and iSeries and booted a virtual only iSeries partition. Signed-off-by: Anton Blanchard --- diff -ruN linus/include/asm-ppc64/scatterlist.h linus-dma64/include/asm-ppc64/scatterlist.h --- linus/include/asm-ppc64/scatterlist.h 2005-06-27 16:08:08.000000000 +1000 +++ linus-dma64/include/asm-ppc64/scatterlist.h 2005-07-08 16:45:07.000000000 +1000 @@ -19,7 +19,7 @@ unsigned int length; /* For TCE support */ - u32 dma_address; + dma_addr_t dma_address; u32 dma_length; }; diff -ruN linus/include/asm-ppc64/types.h linus-dma64/include/asm-ppc64/types.h --- linus/include/asm-ppc64/types.h 2005-06-27 16:08:08.000000000 +1000 +++ linus-dma64/include/asm-ppc64/types.h 2005-07-08 16:41:08.000000000 +1000 @@ -63,7 +63,7 @@ typedef __vector128 vector128; -typedef u32 dma_addr_t; +typedef u64 dma_addr_t; typedef u64 dma64_addr_t; typedef struct { From paulus at samba.org Sat Oct 15 16:44:16 2005 From: paulus at samba.org (Paul Mackerras) Date: Sat, 15 Oct 2005 16:44:16 +1000 Subject: [PATCH 7/10] powerpc: Add a is_kernel_addr() macro In-Reply-To: <20051013084221.5332268579@ozlabs.org> References: <1129192924.438846.55228671808.qpush@concordia> <20051013084221.5332268579@ozlabs.org> Message-ID: <17232.42304.277414.765635@cargo.ozlabs.ibm.com> Michael Ellerman writes: > There's a bunch of code that compares an address with KERNELBASE to see if > it's a "kernel address", ie. >= KERNELBASE. Replace all of them with an > is_kernel_addr() macro that does the same thing. This will save us some pain > when we change KERNELBASE, and also makes the code more readable IMHO. You don't seem to define an is_kernel_addr() for ppc. Did you try compiling any ppc32 configs? Paul. From paulus at samba.org Sat Oct 15 16:48:46 2005 From: paulus at samba.org (Paul Mackerras) Date: Sat, 15 Oct 2005 16:48:46 +1000 Subject: [PATCH 8/10] powerpc: Set entry point and text address in linker script In-Reply-To: <20051013084222.249E96857F@ozlabs.org> References: <1129192924.438846.55228671808.qpush@concordia> <20051013084222.249E96857F@ozlabs.org> Message-ID: <17232.42574.14366.399317@cargo.ozlabs.ibm.com> Michael Ellerman writes: > Currently we set the kernel entry point and the address of the text > section in the Makefile, using CONFIG_KERNEL_START. > > But we've already got in the linker script, so we can just > use KERNELBASE directly. That means if we ever change KERNELBASE there's > one less place to change it. Well yes, on ppc64 we have asm/page.h, but not on ppc32, not yet anyway. We need to get a merged page.h. On ppc32, CONFIG_KERNEL_START affects both PAGE_OFFSET and KERNELBASE, whereas I think you're talking about changing only KERNELBASE, not PAGE_OFFSET. > Apart from linux_banner, there are zero differences from "readelf -a vmlinux" > before and after this patch. Did you test any ppc32 configs? Paul. From michael at ellerman.id.au Sun Oct 16 21:34:19 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Sun, 16 Oct 2005 21:34:19 +1000 Subject: [PATCH 7/10] powerpc: Add a is_kernel_addr() macro In-Reply-To: <17232.42304.277414.765635@cargo.ozlabs.ibm.com> References: <1129192924.438846.55228671808.qpush@concordia> <20051013084221.5332268579@ozlabs.org> <17232.42304.277414.765635@cargo.ozlabs.ibm.com> Message-ID: <200510162134.21796.michael@ellerman.id.au> On Sat, 15 Oct 2005 16:44, Paul Mackerras wrote: > Michael Ellerman writes: > > There's a bunch of code that compares an address with KERNELBASE to see > > if it's a "kernel address", ie. >= KERNELBASE. Replace all of them with > > an is_kernel_addr() macro that does the same thing. This will save us > > some pain when we change KERNELBASE, and also makes the code more > > readable IMHO. > > You don't seem to define an is_kernel_addr() for ppc. Did you try > compiling any ppc32 configs? No I didn't, I'll do that tomorrow and resend. -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051016/61761dd7/attachment.pgp From michael at ellerman.id.au Sun Oct 16 21:38:57 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Sun, 16 Oct 2005 21:38:57 +1000 Subject: [PATCH 8/10] powerpc: Set entry point and text address in linker script In-Reply-To: <17232.42574.14366.399317@cargo.ozlabs.ibm.com> References: <1129192924.438846.55228671808.qpush@concordia> <20051013084222.249E96857F@ozlabs.org> <17232.42574.14366.399317@cargo.ozlabs.ibm.com> Message-ID: <200510162139.02394.michael@ellerman.id.au> On Sat, 15 Oct 2005 16:48, Paul Mackerras wrote: > Michael Ellerman writes: > > Currently we set the kernel entry point and the address of the text > > section in the Makefile, using CONFIG_KERNEL_START. > > > > But we've already got in the linker script, so we can just > > use KERNELBASE directly. That means if we ever change KERNELBASE there's > > one less place to change it. > > Well yes, on ppc64 we have asm/page.h, but not on ppc32, not yet > anyway. We need to get a merged page.h. Dang, you're right. I'll have a stab at merging page.h sometime. > On ppc32, CONFIG_KERNEL_START affects both PAGE_OFFSET and KERNELBASE, > whereas I think you're talking about changing only KERNELBASE, not > PAGE_OFFSET. For Kdump I'm changing KERNELBASE only. I hadn't really thought about PPC32, but it should be pretty straight forward to make something that could work for both 32 & 64. I'd suggest PAGE_OFFSET := CONFIG_KERNEL_START, and then KERNELBASE := PAGE_OFFSET, except for Kdump where KERNELBASE := PAGE_OFFSET + 32 MB. > > Apart from linux_banner, there are zero differences from "readelf -a > > vmlinux" before and after this patch. > > Did you test any ppc32 configs? Got me again. -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051016/c8dfb09e/attachment.pgp From arnd at arndb.de Mon Oct 17 09:50:47 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Mon, 17 Oct 2005 01:50:47 +0200 Subject: [patch 0/8] spufs release for 2.6.13 In-Reply-To: <20050929220009.146368000@localhost> References: <20050929220009.146368000@localhost> Message-ID: <200510170150.48175.arnd@arndb.de> On Freedag 30 September 2005 00:00, Arnd Bergmann wrote: > This is the current patch set that we have been testing on the Cell > Blade with the 2.6.13 kernel. Parts of this have already been included > in 2.6.14-rc or are superceded by recent developments that I have > already posted for -rc2. > > The patch set is not meant for review or for even inclusion in a > mainline kernel but rather as a reference of what we are running > ourselves. It also provides a base level for our friends at > http://www.bsc.es/projects/deepcomputing/linuxoncell/ so they > can build a distribution kernel from it. Here is a small update for that fixes a few problems with memory management on the 2.6.13 spufs tree: - Do global TLB flush all the time for now. - Enable the use of hugetlb mappings. - Use all eight slb entries instead of only one. - Shut up the debugging output. Everybody using the previous patch set should also apply this patch. --- --- linux-2.6.13.orig/include/asm-ppc64/cputable.h +++ linux-2.6.13/include/asm-ppc64/cputable.h @@ -148,7 +148,7 @@ PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU) #define CPU_FTR_PPCAS_ARCH_V2_BASE (CPU_FTR_SLB | \ - CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \ + CPU_FTR_NOEXECUTE | \ CPU_FTR_NODSISRALIGN | CPU_FTR_CTRL) /* iSeries doesn't support large pages */ Index: linux-2.6.13/arch/ppc64/kernel/spu_base.c =================================================================== --- linux-2.6.13.orig/arch/ppc64/kernel/spu_base.c +++ linux-2.6.13/arch/ppc64/kernel/spu_base.c @@ -20,7 +20,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#define DEBUG 1 +#undef DEBUG #include #include @@ -72,6 +72,7 @@ int spu_handle_data_seg(struct spu *spu) struct spu_priv2 __iomem *priv2; struct mm_struct *mm; unsigned long ea; + u64 esid, vsid; pr_debug("%s\n", __FUNCTION__); @@ -92,14 +93,18 @@ int spu_handle_data_seg(struct spu *spu) mm = current->mm; - if (spu->slb_replace >= 8) - spu->slb_replace = 0; + esid = (ea & ESID_MASK) | SLB_ESID_V; + vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | SLB_VSID_USER; + if (in_hugepage_area(mm->context, ea)) + vsid |= SLB_VSID_L; out_be64(&priv2->slb_index_W, spu->slb_replace); - out_be64(&priv2->slb_vsid_RW, - (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) - | SLB_VSID_USER); - out_be64(&priv2->slb_esid_RW, (ea & ESID_MASK) | SLB_ESID_V); + out_be64(&priv2->slb_vsid_RW, vsid); + out_be64(&priv2->slb_esid_RW, esid); + + spu->slb_replace++; + if (spu->slb_replace >= 8) + spu->slb_replace = 0; spu_restart_dma(spu); From sleddog at us.ibm.com Mon Oct 17 11:18:41 2005 From: sleddog at us.ibm.com (Dave C Boutcher) Date: Sun, 16 Oct 2005 20:18:41 -0500 Subject: [PATCH] ppc64: make dma_addr_t 64 bits In-Reply-To: <20051015021448.GA6745@krispykreme> References: <20051015021448.GA6745@krispykreme> Message-ID: <20051017011841.GA15004@cs.umn.edu> On Sat, Oct 15, 2005 at 12:14:48PM +1000, Anton Blanchard wrote: > > From: Stephen Rothwell > > There has been a need expressed for dma_addr_t to be 64 bits on PPC64. > This patch does that. I have built it for pSeries and iSeries and booted > a virtual only iSeries partition. FYI, I did some vscsi and veth testing with this patch. Works for me. Acked-by: Dave Boutcher -- Dave Boutcher From michael at ellerman.id.au Mon Oct 17 21:48:37 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:37 +1000 (EST) Subject: [PATCH 0/11] powerpc: Merges and fixups Message-ID: <1129549716.320145.129733893202.qpush@concordia> Here's a few patches I've had in my tree for too long. There's a couple of easy merge patches, and then some cleanups/fixups we'll need for kexec/kdump soonish. I've renamed the instruction functions to create_foo(), which I think I like a little better than make_foo(). I've also added a comment about function descriptors on PPC64 as Olof suggested. And added an enum for readability. Not to mention fixing the logic bug that was lurking in create_branch(), oops. I've merged page.h, hopefully I haven't broken too many things. It can probably use some further cleanup, but we'll leave that for a little while. is_kernel_addr() is now defined in the merged page.h, so it should work on PPC32 also. Booted on pSeries LPAR and iSeries with ARCH=powerpc and ARCH=ppc64. This *seems* to build ok on the current merge-tree for PPC32, although there's other broken stuff getting in the way. On top of 3a5f8c5f788d68e325d9fe3c26f4df5a5aee838a I can build it for ppc, and it looks ok (but fails elsewhere) for 32-bit powerpc. cheers From michael at ellerman.id.au Mon Oct 17 21:48:38 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:38 +1000 (EST) Subject: [PATCH 2/11] powerpc: Move include/asm-ppc64/vio.h to include/asm-powerpc/vio.h In-Reply-To: <1129549716.320145.129733893202.qpush@concordia> Message-ID: <20051017114838.780EA685AE@ozlabs.org> Move include/asm-ppc64/vio.h to include/asm-powerpc/vio.h, that's it. Signed-off-by: Michael Ellerman --- include/asm-powerpc/vio.h | 106 ++++++++++++++++++++++++++++++++++++++++++++++ include/asm-ppc64/vio.h | 106 ---------------------------------------------- 2 files changed, 106 insertions(+), 106 deletions(-) Index: kexec/include/asm-ppc64/vio.h =================================================================== --- kexec.orig/include/asm-ppc64/vio.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * IBM PowerPC Virtual I/O Infrastructure Support. - * - * Copyright (c) 2003 IBM Corp. - * Dave Engebretsen engebret at us.ibm.com - * Santiago Leon santil at us.ibm.com - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _ASM_VIO_H -#define _ASM_VIO_H - -#include -#include -#include -#include -#include -#include - -#include -#include - -/* - * Architecture-specific constants for drivers to - * extract attributes of the device using vio_get_attribute() - */ -#define VETH_MAC_ADDR "local-mac-address" -#define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters" - -/* End architecture-specific constants */ - -#define h_vio_signal(ua, mode) \ - plpar_hcall_norets(H_VIO_SIGNAL, ua, mode) - -#define VIO_IRQ_DISABLE 0UL -#define VIO_IRQ_ENABLE 1UL - -struct iommu_table; - -/* - * The vio_dev structure is used to describe virtual I/O devices. - */ -struct vio_dev { - struct iommu_table *iommu_table; /* vio_map_* uses this */ - char *name; - char *type; - uint32_t unit_address; - unsigned int irq; - struct device dev; -}; - -struct vio_driver { - struct list_head node; - char *name; - const struct vio_device_id *id_table; - int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); - int (*remove)(struct vio_dev *dev); - unsigned long driver_data; - struct device_driver driver; -}; - -struct vio_bus_ops { - int (*match)(const struct vio_device_id *id, const struct vio_dev *dev); - void (*unregister_device)(struct vio_dev *); - void (*release_device)(struct device *); -}; - -extern struct dma_mapping_ops vio_dma_ops; -extern struct bus_type vio_bus_type; -extern struct vio_dev vio_bus_device; - -extern int vio_register_driver(struct vio_driver *drv); -extern void vio_unregister_driver(struct vio_driver *drv); - -extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev); -extern void __devinit vio_unregister_device(struct vio_dev *dev); - -extern int vio_bus_init(struct vio_bus_ops *); - -#ifdef CONFIG_PPC_PSERIES -struct device_node; - -extern struct vio_dev * __devinit vio_register_device_node( - struct device_node *node_vdev); -extern struct vio_dev *vio_find_node(struct device_node *vnode); -extern const void *vio_get_attribute(struct vio_dev *vdev, void *which, - int *length); -extern int vio_enable_interrupts(struct vio_dev *dev); -extern int vio_disable_interrupts(struct vio_dev *dev); -#endif - -static inline struct vio_driver *to_vio_driver(struct device_driver *drv) -{ - return container_of(drv, struct vio_driver, driver); -} - -static inline struct vio_dev *to_vio_dev(struct device *dev) -{ - return container_of(dev, struct vio_dev, dev); -} - -#endif /* _ASM_VIO_H */ Index: kexec/include/asm-powerpc/vio.h =================================================================== --- /dev/null +++ kexec/include/asm-powerpc/vio.h @@ -0,0 +1,106 @@ +/* + * IBM PowerPC Virtual I/O Infrastructure Support. + * + * Copyright (c) 2003 IBM Corp. + * Dave Engebretsen engebret at us.ibm.com + * Santiago Leon santil at us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_VIO_H +#define _ASM_POWERPC_VIO_H + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Architecture-specific constants for drivers to + * extract attributes of the device using vio_get_attribute() + */ +#define VETH_MAC_ADDR "local-mac-address" +#define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters" + +/* End architecture-specific constants */ + +#define h_vio_signal(ua, mode) \ + plpar_hcall_norets(H_VIO_SIGNAL, ua, mode) + +#define VIO_IRQ_DISABLE 0UL +#define VIO_IRQ_ENABLE 1UL + +struct iommu_table; + +/* + * The vio_dev structure is used to describe virtual I/O devices. + */ +struct vio_dev { + struct iommu_table *iommu_table; /* vio_map_* uses this */ + char *name; + char *type; + uint32_t unit_address; + unsigned int irq; + struct device dev; +}; + +struct vio_driver { + struct list_head node; + char *name; + const struct vio_device_id *id_table; + int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); + int (*remove)(struct vio_dev *dev); + unsigned long driver_data; + struct device_driver driver; +}; + +struct vio_bus_ops { + int (*match)(const struct vio_device_id *id, const struct vio_dev *dev); + void (*unregister_device)(struct vio_dev *); + void (*release_device)(struct device *); +}; + +extern struct dma_mapping_ops vio_dma_ops; +extern struct bus_type vio_bus_type; +extern struct vio_dev vio_bus_device; + +extern int vio_register_driver(struct vio_driver *drv); +extern void vio_unregister_driver(struct vio_driver *drv); + +extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev); +extern void __devinit vio_unregister_device(struct vio_dev *dev); + +extern int vio_bus_init(struct vio_bus_ops *); + +#ifdef CONFIG_PPC_PSERIES +struct device_node; + +extern struct vio_dev * __devinit vio_register_device_node( + struct device_node *node_vdev); +extern struct vio_dev *vio_find_node(struct device_node *vnode); +extern const void *vio_get_attribute(struct vio_dev *vdev, void *which, + int *length); +extern int vio_enable_interrupts(struct vio_dev *dev); +extern int vio_disable_interrupts(struct vio_dev *dev); +#endif + +static inline struct vio_driver *to_vio_driver(struct device_driver *drv) +{ + return container_of(drv, struct vio_driver, driver); +} + +static inline struct vio_dev *to_vio_dev(struct device *dev) +{ + return container_of(dev, struct vio_dev, dev); +} + +#endif /* _ASM_POWERPC_VIO_H */ From michael at ellerman.id.au Mon Oct 17 21:48:37 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:37 +1000 (EST) Subject: [PATCH 1/11] powerpc: Merge arch/ppc64/kernel/vio.c into arch/powerpc/sysdev/vio.c In-Reply-To: <1129549716.320145.129733893202.qpush@concordia> Message-ID: <20051017114837.DEEFB685A9@ozlabs.org> Merge arch/ppc64/kernel/vio.c into arch/powerpc/sysdev/vio.c, update the Makefiles to make it work, and make ARCH=ppc64 still work. Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/Makefile | 3 arch/powerpc/sysdev/vio.c | 261 +++++++++++++++++++++++++++++++++++++++++++ arch/ppc64/Makefile | 1 arch/ppc64/kernel/Makefile | 1 arch/ppc64/kernel/vio.c | 261 ------------------------------------------- 5 files changed, 265 insertions(+), 262 deletions(-) Index: kexec/arch/powerpc/sysdev/Makefile =================================================================== --- kexec.orig/arch/powerpc/sysdev/Makefile +++ kexec/arch/powerpc/sysdev/Makefile @@ -1,3 +1,6 @@ +ifeq ($(CONFIG_PPC_MERGE),y) obj-$(CONFIG_MPIC) += mpic.o indirectpci-$(CONFIG_PPC_PMAC) = indirect_pci.o obj-$(CONFIG_PPC32) += $(indirectpci-y) +endif +obj-$(CONFIG_IBMVIO) += vio.o Index: kexec/arch/powerpc/sysdev/vio.c =================================================================== --- /dev/null +++ kexec/arch/powerpc/sysdev/vio.c @@ -0,0 +1,261 @@ +/* + * IBM PowerPC Virtual I/O Infrastructure Support. + * + * Copyright (c) 2003-2005 IBM Corp. + * Dave Engebretsen engebret at us.ibm.com + * Santiago Leon santil at us.ibm.com + * Hollis Blanchard + * Stephen Rothwell + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct vio_device_id *vio_match_device( + const struct vio_device_id *, const struct vio_dev *); + +struct vio_dev vio_bus_device = { /* fake "parent" device */ + .name = vio_bus_device.dev.bus_id, + .type = "", + .dev.bus_id = "vio", + .dev.bus = &vio_bus_type, +}; + +static struct vio_bus_ops vio_bus_ops; + +/* + * Convert from struct device to struct vio_dev and pass to driver. + * dev->driver has already been set by generic code because vio_bus_match + * succeeded. + */ +static int vio_bus_probe(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv = to_vio_driver(dev->driver); + const struct vio_device_id *id; + int error = -ENODEV; + + if (!viodrv->probe) + return error; + + id = vio_match_device(viodrv->id_table, viodev); + if (id) + error = viodrv->probe(viodev, id); + + return error; +} + +/* convert from struct device to struct vio_dev and pass to driver. */ +static int vio_bus_remove(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv = to_vio_driver(dev->driver); + + if (viodrv->remove) + return viodrv->remove(viodev); + + /* driver can't remove */ + return 1; +} + +/** + * vio_register_driver: - Register a new vio driver + * @drv: The vio_driver structure to be registered. + */ +int vio_register_driver(struct vio_driver *viodrv) +{ + printk(KERN_DEBUG "%s: driver %s registering\n", __FUNCTION__, + viodrv->name); + + /* fill in 'struct driver' fields */ + viodrv->driver.name = viodrv->name; + viodrv->driver.bus = &vio_bus_type; + viodrv->driver.probe = vio_bus_probe; + viodrv->driver.remove = vio_bus_remove; + + return driver_register(&viodrv->driver); +} +EXPORT_SYMBOL(vio_register_driver); + +/** + * vio_unregister_driver - Remove registration of vio driver. + * @driver: The vio_driver struct to be removed form registration + */ +void vio_unregister_driver(struct vio_driver *viodrv) +{ + driver_unregister(&viodrv->driver); +} +EXPORT_SYMBOL(vio_unregister_driver); + +/** + * vio_match_device: - Tell if a VIO device has a matching + * VIO device id structure. + * @ids: array of VIO device id structures to search in + * @dev: the VIO device structure to match against + * + * Used by a driver to check whether a VIO device present in the + * system is in its list of supported devices. Returns the matching + * vio_device_id structure or NULL if there is no match. + */ +static const struct vio_device_id *vio_match_device( + const struct vio_device_id *ids, const struct vio_dev *dev) +{ + while (ids->type[0] != '\0') { + if (vio_bus_ops.match(ids, dev)) + return ids; + ids++; + } + return NULL; +} + +/** + * vio_bus_init: - Initialize the virtual IO bus + */ +int __init vio_bus_init(struct vio_bus_ops *ops) +{ + int err; + + vio_bus_ops = *ops; + + err = bus_register(&vio_bus_type); + if (err) { + printk(KERN_ERR "failed to register VIO bus\n"); + return err; + } + + /* + * The fake parent of all vio devices, just to give us + * a nice directory + */ + err = device_register(&vio_bus_device.dev); + if (err) { + printk(KERN_WARNING "%s: device_register returned %i\n", + __FUNCTION__, err); + return err; + } + + return 0; +} + +/* vio_dev refcount hit 0 */ +static void __devinit vio_dev_release(struct device *dev) +{ + if (vio_bus_ops.release_device) + vio_bus_ops.release_device(dev); + kfree(to_vio_dev(dev)); +} + +static ssize_t viodev_show_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_vio_dev(dev)->name); +} +DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL); + +struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev) +{ + /* init generic 'struct device' fields: */ + viodev->dev.parent = &vio_bus_device.dev; + viodev->dev.bus = &vio_bus_type; + viodev->dev.release = vio_dev_release; + + /* register with generic device framework */ + if (device_register(&viodev->dev)) { + printk(KERN_ERR "%s: failed to register device %s\n", + __FUNCTION__, viodev->dev.bus_id); + return NULL; + } + device_create_file(&viodev->dev, &dev_attr_name); + + return viodev; +} + +void __devinit vio_unregister_device(struct vio_dev *viodev) +{ + if (vio_bus_ops.unregister_device) + vio_bus_ops.unregister_device(viodev); + device_remove_file(&viodev->dev, &dev_attr_name); + device_unregister(&viodev->dev); +} +EXPORT_SYMBOL(vio_unregister_device); + +static dma_addr_t vio_map_single(struct device *dev, void *vaddr, + size_t size, enum dma_data_direction direction) +{ + return iommu_map_single(to_vio_dev(dev)->iommu_table, vaddr, size, + direction); +} + +static void vio_unmap_single(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + iommu_unmap_single(to_vio_dev(dev)->iommu_table, dma_handle, size, + direction); +} + +static int vio_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + return iommu_map_sg(dev, to_vio_dev(dev)->iommu_table, sglist, + nelems, direction); +} + +static void vio_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + iommu_unmap_sg(to_vio_dev(dev)->iommu_table, sglist, nelems, direction); +} + +static void *vio_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + return iommu_alloc_coherent(to_vio_dev(dev)->iommu_table, size, + dma_handle, flag); +} + +static void vio_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + iommu_free_coherent(to_vio_dev(dev)->iommu_table, size, vaddr, + dma_handle); +} + +static int vio_dma_supported(struct device *dev, u64 mask) +{ + return 1; +} + +struct dma_mapping_ops vio_dma_ops = { + .alloc_coherent = vio_alloc_coherent, + .free_coherent = vio_free_coherent, + .map_single = vio_map_single, + .unmap_single = vio_unmap_single, + .map_sg = vio_map_sg, + .unmap_sg = vio_unmap_sg, + .dma_supported = vio_dma_supported, +}; + +static int vio_bus_match(struct device *dev, struct device_driver *drv) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + struct vio_driver *vio_drv = to_vio_driver(drv); + const struct vio_device_id *ids = vio_drv->id_table; + + return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); +} + +struct bus_type vio_bus_type = { + .name = "vio", + .match = vio_bus_match, +}; Index: kexec/arch/ppc64/kernel/Makefile =================================================================== --- kexec.orig/arch/ppc64/kernel/Makefile +++ kexec/arch/ppc64/kernel/Makefile @@ -53,7 +53,6 @@ obj-$(CONFIG_BOOTX_TEXT) += btext.o endif obj-$(CONFIG_HVCS) += hvcserver.o -obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_XICS) += xics.o ifneq ($(CONFIG_PPC_MERGE),y) obj-$(CONFIG_MPIC) += mpic.o Index: kexec/arch/ppc64/kernel/vio.c =================================================================== --- kexec.orig/arch/ppc64/kernel/vio.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * IBM PowerPC Virtual I/O Infrastructure Support. - * - * Copyright (c) 2003-2005 IBM Corp. - * Dave Engebretsen engebret at us.ibm.com - * Santiago Leon santil at us.ibm.com - * Hollis Blanchard - * Stephen Rothwell - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static const struct vio_device_id *vio_match_device( - const struct vio_device_id *, const struct vio_dev *); - -struct vio_dev vio_bus_device = { /* fake "parent" device */ - .name = vio_bus_device.dev.bus_id, - .type = "", - .dev.bus_id = "vio", - .dev.bus = &vio_bus_type, -}; - -static struct vio_bus_ops vio_bus_ops; - -/* - * Convert from struct device to struct vio_dev and pass to driver. - * dev->driver has already been set by generic code because vio_bus_match - * succeeded. - */ -static int vio_bus_probe(struct device *dev) -{ - struct vio_dev *viodev = to_vio_dev(dev); - struct vio_driver *viodrv = to_vio_driver(dev->driver); - const struct vio_device_id *id; - int error = -ENODEV; - - if (!viodrv->probe) - return error; - - id = vio_match_device(viodrv->id_table, viodev); - if (id) - error = viodrv->probe(viodev, id); - - return error; -} - -/* convert from struct device to struct vio_dev and pass to driver. */ -static int vio_bus_remove(struct device *dev) -{ - struct vio_dev *viodev = to_vio_dev(dev); - struct vio_driver *viodrv = to_vio_driver(dev->driver); - - if (viodrv->remove) - return viodrv->remove(viodev); - - /* driver can't remove */ - return 1; -} - -/** - * vio_register_driver: - Register a new vio driver - * @drv: The vio_driver structure to be registered. - */ -int vio_register_driver(struct vio_driver *viodrv) -{ - printk(KERN_DEBUG "%s: driver %s registering\n", __FUNCTION__, - viodrv->name); - - /* fill in 'struct driver' fields */ - viodrv->driver.name = viodrv->name; - viodrv->driver.bus = &vio_bus_type; - viodrv->driver.probe = vio_bus_probe; - viodrv->driver.remove = vio_bus_remove; - - return driver_register(&viodrv->driver); -} -EXPORT_SYMBOL(vio_register_driver); - -/** - * vio_unregister_driver - Remove registration of vio driver. - * @driver: The vio_driver struct to be removed form registration - */ -void vio_unregister_driver(struct vio_driver *viodrv) -{ - driver_unregister(&viodrv->driver); -} -EXPORT_SYMBOL(vio_unregister_driver); - -/** - * vio_match_device: - Tell if a VIO device has a matching - * VIO device id structure. - * @ids: array of VIO device id structures to search in - * @dev: the VIO device structure to match against - * - * Used by a driver to check whether a VIO device present in the - * system is in its list of supported devices. Returns the matching - * vio_device_id structure or NULL if there is no match. - */ -static const struct vio_device_id *vio_match_device( - const struct vio_device_id *ids, const struct vio_dev *dev) -{ - while (ids->type[0] != '\0') { - if (vio_bus_ops.match(ids, dev)) - return ids; - ids++; - } - return NULL; -} - -/** - * vio_bus_init: - Initialize the virtual IO bus - */ -int __init vio_bus_init(struct vio_bus_ops *ops) -{ - int err; - - vio_bus_ops = *ops; - - err = bus_register(&vio_bus_type); - if (err) { - printk(KERN_ERR "failed to register VIO bus\n"); - return err; - } - - /* - * The fake parent of all vio devices, just to give us - * a nice directory - */ - err = device_register(&vio_bus_device.dev); - if (err) { - printk(KERN_WARNING "%s: device_register returned %i\n", - __FUNCTION__, err); - return err; - } - - return 0; -} - -/* vio_dev refcount hit 0 */ -static void __devinit vio_dev_release(struct device *dev) -{ - if (vio_bus_ops.release_device) - vio_bus_ops.release_device(dev); - kfree(to_vio_dev(dev)); -} - -static ssize_t viodev_show_name(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "%s\n", to_vio_dev(dev)->name); -} -DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL); - -struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev) -{ - /* init generic 'struct device' fields: */ - viodev->dev.parent = &vio_bus_device.dev; - viodev->dev.bus = &vio_bus_type; - viodev->dev.release = vio_dev_release; - - /* register with generic device framework */ - if (device_register(&viodev->dev)) { - printk(KERN_ERR "%s: failed to register device %s\n", - __FUNCTION__, viodev->dev.bus_id); - return NULL; - } - device_create_file(&viodev->dev, &dev_attr_name); - - return viodev; -} - -void __devinit vio_unregister_device(struct vio_dev *viodev) -{ - if (vio_bus_ops.unregister_device) - vio_bus_ops.unregister_device(viodev); - device_remove_file(&viodev->dev, &dev_attr_name); - device_unregister(&viodev->dev); -} -EXPORT_SYMBOL(vio_unregister_device); - -static dma_addr_t vio_map_single(struct device *dev, void *vaddr, - size_t size, enum dma_data_direction direction) -{ - return iommu_map_single(to_vio_dev(dev)->iommu_table, vaddr, size, - direction); -} - -static void vio_unmap_single(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) -{ - iommu_unmap_single(to_vio_dev(dev)->iommu_table, dma_handle, size, - direction); -} - -static int vio_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) -{ - return iommu_map_sg(dev, to_vio_dev(dev)->iommu_table, sglist, - nelems, direction); -} - -static void vio_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) -{ - iommu_unmap_sg(to_vio_dev(dev)->iommu_table, sglist, nelems, direction); -} - -static void *vio_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) -{ - return iommu_alloc_coherent(to_vio_dev(dev)->iommu_table, size, - dma_handle, flag); -} - -static void vio_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - iommu_free_coherent(to_vio_dev(dev)->iommu_table, size, vaddr, - dma_handle); -} - -static int vio_dma_supported(struct device *dev, u64 mask) -{ - return 1; -} - -struct dma_mapping_ops vio_dma_ops = { - .alloc_coherent = vio_alloc_coherent, - .free_coherent = vio_free_coherent, - .map_single = vio_map_single, - .unmap_single = vio_unmap_single, - .map_sg = vio_map_sg, - .unmap_sg = vio_unmap_sg, - .dma_supported = vio_dma_supported, -}; - -static int vio_bus_match(struct device *dev, struct device_driver *drv) -{ - const struct vio_dev *vio_dev = to_vio_dev(dev); - struct vio_driver *vio_drv = to_vio_driver(drv); - const struct vio_device_id *ids = vio_drv->id_table; - - return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); -} - -struct bus_type vio_bus_type = { - .name = "vio", - .match = vio_bus_match, -}; Index: kexec/arch/ppc64/Makefile =================================================================== --- kexec.orig/arch/ppc64/Makefile +++ kexec/arch/ppc64/Makefile @@ -85,6 +85,7 @@ libs-y += arch/ppc64/lib/ core-y += arch/ppc64/kernel/ arch/powerpc/kernel/ core-y += arch/powerpc/mm/ core-y += arch/powerpc/platforms/ +core-y += arch/powerpc/sysdev/ core-$(CONFIG_XMON) += arch/ppc64/xmon/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ From michael at ellerman.id.au Mon Oct 17 21:48:39 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:39 +1000 (EST) Subject: [PATCH 3/11] powerpc: Merge include/asm-ppc/kexec.h and include/asm-ppc64/kexec.h In-Reply-To: <1129549716.320145.129733893202.qpush@concordia> Message-ID: <20051017114839.8D340685B0@ozlabs.org> Merge include/asm-ppc/kexec.h and include/asm-ppc64/kexec.h. The only thing that's really changed is that we now allocate crash_notes properly on PPC32. It's address is exported via sysfs, so it's not correct for it to be a pointer. I've also removed some of the "we don't use this" comments, because they're wrong (or perhaps were referring only to arch code). Signed-off-by: Michael Ellerman --- arch/ppc/kernel/machine_kexec.c | 2 - include/asm-powerpc/kexec.h | 49 ++++++++++++++++++++++++++++++++++++++++ include/asm-ppc/kexec.h | 40 -------------------------------- include/asm-ppc64/kexec.h | 41 --------------------------------- 4 files changed, 50 insertions(+), 82 deletions(-) Index: kexec/include/asm-powerpc/kexec.h =================================================================== --- /dev/null +++ kexec/include/asm-powerpc/kexec.h @@ -0,0 +1,49 @@ +#ifndef _ASM_POWERPC_KEXEC_H +#define _ASM_POWERPC_KEXEC_H + +/* + * Maximum page that is mapped directly into kernel memory. + * XXX: Since we copy virt we can use any page we allocate + */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) + +/* + * Maximum address we can reach in physical address mode. + * XXX: I want to allow initrd in highmem. Otherwise set to rmo on LPAR. + */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + +/* Maximum address we can use for the control code buffer */ +#ifdef __powerpc64__ +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) +#else +/* TASK_SIZE, probably left over from use_mm ?? */ +#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE +#endif + +#define KEXEC_CONTROL_CODE_SIZE 4096 + +/* The native architecture */ +#ifdef __powerpc64__ +#define KEXEC_ARCH KEXEC_ARCH_PPC64 +#else +#define KEXEC_ARCH KEXEC_ARCH_PPC +#endif + +#ifndef __ASSEMBLY__ + +#define MAX_NOTE_BYTES 1024 +typedef u32 note_buf_t[MAX_NOTE_BYTES / sizeof(u32)]; + +extern note_buf_t crash_notes[]; + +#ifdef __powerpc64__ +extern void kexec_smp_wait(void); /* get and clear naca physid, wait for + master to copy new code to 0 */ +#else +struct kimage; +extern void machine_kexec_simple(struct kimage *image); +#endif + +#endif /* ! __ASSEMBLY__ */ +#endif /* _ASM_POWERPC_KEXEC_H */ Index: kexec/arch/ppc/kernel/machine_kexec.c =================================================================== --- kexec.orig/arch/ppc/kernel/machine_kexec.c +++ kexec/arch/ppc/kernel/machine_kexec.c @@ -32,7 +32,7 @@ const extern unsigned int relocate_new_k * Provide a dummy crash_notes definition while crash dump arrives to ppc. * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. */ -void *crash_notes = NULL; +note_buf_t crash_notes[NR_CPUS]; void machine_shutdown(void) { Index: kexec/include/asm-ppc/kexec.h =================================================================== --- kexec.orig/include/asm-ppc/kexec.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef _PPC_KEXEC_H -#define _PPC_KEXEC_H - -#ifdef CONFIG_KEXEC - -/* - * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. - * I.e. Maximum page that is mapped directly into kernel memory, - * and kmap is not required. - * - * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct - * calculation for the amount of memory directly mappable into the - * kernel memory space. - */ - -/* Maximum physical address we can use pages from */ -#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) -/* Maximum address we can reach in physical address mode */ -#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) -/* Maximum address we can use for the control code buffer */ -#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE - -#define KEXEC_CONTROL_CODE_SIZE 4096 - -/* The native architecture */ -#define KEXEC_ARCH KEXEC_ARCH_PPC - -#ifndef __ASSEMBLY__ - -extern void *crash_notes; - -struct kimage; - -extern void machine_kexec_simple(struct kimage *image); - -#endif /* __ASSEMBLY__ */ - -#endif /* CONFIG_KEXEC */ - -#endif /* _PPC_KEXEC_H */ Index: kexec/include/asm-ppc64/kexec.h =================================================================== --- kexec.orig/include/asm-ppc64/kexec.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _PPC64_KEXEC_H -#define _PPC64_KEXEC_H - -/* - * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. - * I.e. Maximum page that is mapped directly into kernel memory, - * and kmap is not required. - */ - -/* Maximum physical address we can use pages from */ -/* XXX: since we copy virt we can use any page we allocate */ -#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) - -/* Maximum address we can reach in physical address mode */ -/* XXX: I want to allow initrd in highmem. otherwise set to rmo on lpar */ -#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) - -/* Maximum address we can use for the control code buffer */ -/* XXX: unused today, ppc32 uses TASK_SIZE, probably left over from use_mm */ -#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) - -/* XXX: today we don't use this at all, althogh we have a static stack */ -#define KEXEC_CONTROL_CODE_SIZE 4096 - -/* The native architecture */ -#define KEXEC_ARCH KEXEC_ARCH_PPC64 - -#define MAX_NOTE_BYTES 1024 - -#ifndef __ASSEMBLY__ - -typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; - -extern note_buf_t crash_notes[]; - -extern void kexec_smp_wait(void); /* get and clear naca physid, wait for - master to copy new code to 0 */ - -#endif /* __ASSEMBLY__ */ -#endif /* _PPC_KEXEC_H */ - From michael at ellerman.id.au Mon Oct 17 21:48:40 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:40 +1000 (EST) Subject: [PATCH 4/11] powerpc: Merge include/asm-ppc/page.h and include/asm-ppc64/page.h In-Reply-To: <1129549716.320145.129733893202.qpush@concordia> Message-ID: <20051017114840.73939685A6@ozlabs.org> Merge include/asm-ppc/page.h and include/asm-ppc64/page.h OK, my first attempt at a "real" merge. It's a bit crufty, but I haven't got any more cycles for it at the moment. Though comments welcome. Signed-off-by: Michael Ellerman --- include/asm-powerpc/page.h | 359 +++++++++++++++++++++++++++++++++++++++++++++ include/asm-ppc/page.h | 173 --------------------- include/asm-ppc64/page.h | 256 -------------------------------- 3 files changed, 359 insertions(+), 429 deletions(-) Index: kexec/include/asm-powerpc/page.h =================================================================== --- /dev/null +++ kexec/include/asm-powerpc/page.h @@ -0,0 +1,359 @@ +#ifndef _ASM_POWERPC_PAGE_H +#define _ASM_POWERPC_PAGE_H + +/* + * Copyright (c) 2001,2005 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include /* for ASM_CONST */ + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) + +/* + * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So on PPC32 + * if we assign PAGE_MASK to a long long it gets extended the way want + * (i.e. with 1s in the high bits) + */ +#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) + +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) +#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) + +/* align addr on a size boundary - adjust address up if needed */ +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#ifdef __powerpc64__ +#define SID_SHIFT 28 +#define SID_MASK 0xfffffffffUL +#define ESID_MASK 0xfffffffff0000000UL +#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK) + +#define HPAGE_SHIFT 24 +#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) + +#ifdef CONFIG_HUGETLB_PAGE + +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#define HTLB_AREA_SHIFT 40 +#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) +#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) + +#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ + - (1U << GET_ESID(addr))) & 0xffff) +#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ + - (1U << GET_HTLB_AREA(addr))) & 0xffff) + +#define ARCH_HAS_HUGEPAGE_ONLY_RANGE +#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE +#define ARCH_HAS_SETCLEAR_HUGE_PTE + +#define touches_hugepage_low_range(mm, addr, len) \ + (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas) +#define touches_hugepage_high_range(mm, addr, len) \ + (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas) + +#define __within_hugepage_low_range(addr, len, segmask) \ + ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) +#define within_hugepage_low_range(addr, len) \ + __within_hugepage_low_range((addr), (len), \ + current->mm->context.low_htlb_areas) +#define __within_hugepage_high_range(addr, len, zonemask) \ + ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)) +#define within_hugepage_high_range(addr, len) \ + __within_hugepage_high_range((addr), (len), \ + current->mm->context.high_htlb_areas) + +#define is_hugepage_only_range(mm, addr, len) \ + (touches_hugepage_high_range((mm), (addr), (len)) || \ + touches_hugepage_low_range((mm), (addr), (len))) +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA + +#define in_hugepage_area(context, addr) \ + (cpu_has_feature(CPU_FTR_16M_PAGE) && \ + ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \ + ( ((addr) < 0x100000000L) && \ + ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) ) + +#else /* !CONFIG_HUGETLB_PAGE */ + +#define in_hugepage_area(mm, addr) 0 + +#endif /* CONFIG_HUGETLB_PAGE */ +#endif /* __powerpc64__ */ + +#ifdef __KERNEL__ + +#ifdef __powerpc64__ +#define PAGE_OFFSET ASM_CONST(0xC000000000000000) +#else +#define PAGE_OFFSET CONFIG_KERNEL_START +#endif /* __powerpc64__ */ + +#define KERNELBASE PAGE_OFFSET +#define VMALLOCBASE ASM_CONST(0xD000000000000000) + +#ifndef __ASSEMBLY__ + +#ifdef __powerpc64__ +#include + +#define REGION_SIZE 4UL +#define REGION_SHIFT 60UL +#define REGION_MASK (((1UL<> PAGE_SHIFT; + asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size)); + return 32 - lz; +} +#endif /* !__powerpc64__ */ + +#endif /* __ASSEMBLY__ */ + +#ifdef __powerpc64__ +#define VMALLOC_REGION_ID (VMALLOCBASE >> REGION_SHIFT) +#define KERNEL_REGION_ID (PAGE_OFFSET >> REGION_SHIFT) +#define USER_REGION_ID (0UL) +#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) + +#ifdef MODULE +#define __page_aligned __attribute__((__aligned__(PAGE_SIZE))) +#else +#define __page_aligned \ + __attribute__((__aligned__(PAGE_SIZE), \ + __section__(".data.page_aligned"))) +#endif /* MODULE */ +#endif /* __powerpc64__ */ + +/* + * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, + * and needs to be executable. This means the whole heap ends + * up being executable. + */ +#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +/* + * This is the default if a program doesn't have a PT_GNU_STACK + * program header entry. The PPC64 ELF ABI has a non executable stack + * stack by default, so in the absense of a PT_GNU_STACK program header + * we turn execute permission off. + */ +#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifdef __powerpc64__ +#define VM_DATA_DEFAULT_FLAGS \ + (test_thread_flag(TIF_32BIT) ? \ + VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64) + +#define VM_STACK_DEFAULT_FLAGS \ + (test_thread_flag(TIF_32BIT) ? \ + VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) +#else +#define VM_DATA_DEFAULT_FLAGS (VM_DATA_DEFAULT_FLAGS32) +#endif /* __powerpc64__ */ + +#ifdef CONFIG_APUS +extern unsigned long ppc_memstart; +extern unsigned long ppc_pgstart; +extern unsigned long ppc_memoffset; +#define PPC_MEMSTART ppc_memstart +#define PPC_PGSTART ppc_pgstart +#define PPC_MEMOFFSET ppc_memoffset +#else /* !CONFIG_APUS */ +#define PPC_MEMSTART 0 +#define PPC_PGSTART 0 +#define PPC_MEMOFFSET PAGE_OFFSET +#endif /* CONFIG_APUS */ + +#if defined(CONFIG_APUS) && !defined(MODULE) +/* map phys->virtual and virtual->phys for RAM pages */ +static inline unsigned long ___pa(unsigned long v) +{ + unsigned long p; + asm volatile ("1: addis %0, %1, %2;" + ".section \".vtop_fixup\",\"aw\";" + ".align 1;" + ".long 1b;" + ".previous;" + : "=r" (p) + : "b" (v), "K" (((-PAGE_OFFSET) >> 16) & 0xffff)); + + return p; +} +static inline void* ___va(unsigned long p) +{ + unsigned long v; + asm volatile ("1: addis %0, %1, %2;" + ".section \".ptov_fixup\",\"aw\";" + ".align 1;" + ".long 1b;" + ".previous;" + : "=r" (v) + : "b" (p), "K" (((PAGE_OFFSET) >> 16) & 0xffff)); + + return (void*) v; +} +#else +#define ___pa(vaddr) ((vaddr) - PPC_MEMOFFSET) +#define ___va(paddr) ((paddr) + PPC_MEMOFFSET) +#endif /* defined(CONFIG_APUS) && !defined(MODULE) */ + +/* Convert virtual address to physical. */ +#define __pa(x) ___pa((unsigned long)(x)) + +/* Convert physical address to virtual. */ +#define __va(x) ((void *)___va((unsigned long)(x))) + +#ifdef CONFIG_DISCONTIGMEM +#define pfn_to_page(pfn) discontigmem_pfn_to_page(pfn) +#define page_to_pfn(page) discontigmem_page_to_pfn(page) +#define pfn_valid(pfn) discontigmem_pfn_valid(pfn) +#endif + +#ifdef CONFIG_FLATMEM +#define pfn_to_page(pfn) (mem_map + ((pfn) - PPC_PGSTART)) +#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PPC_PGSTART) +#define pfn_valid(pfn) (((pfn) - PPC_PGSTART) < max_mapnr) +#endif + +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define page_to_virt(page) __va(page_to_pfn(page) << PAGE_SHIFT) + +#endif /* __KERNEL __ */ + +#ifdef __powerpc64__ +#include /* for get_order() */ +#endif + +#endif /* _ASM_POWERPC_PAGE_H */ Index: kexec/include/asm-ppc/page.h =================================================================== --- kexec.orig/include/asm-ppc/page.h +++ /dev/null @@ -1,173 +0,0 @@ -#ifndef _PPC_PAGE_H -#define _PPC_PAGE_H - -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1UL << PAGE_SHIFT) - -/* - * Subtle: this is an int (not an unsigned long) and so it - * gets extended to 64 bits the way want (i.e. with 1s). -- paulus - */ -#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) - -#ifdef __KERNEL__ -#include - -/* This must match what is in arch/ppc/Makefile */ -#define PAGE_OFFSET CONFIG_KERNEL_START -#define KERNELBASE PAGE_OFFSET - -#ifndef __ASSEMBLY__ - -/* - * The basic type of a PTE - 64 bits for those CPUs with > 32 bit - * physical addressing. For now this just the IBM PPC440. - */ -#ifdef CONFIG_PTE_64BIT -typedef unsigned long long pte_basic_t; -#define PTE_SHIFT (PAGE_SHIFT - 3) /* 512 ptes per page */ -#define PTE_FMT "%16Lx" -#else -typedef unsigned long pte_basic_t; -#define PTE_SHIFT (PAGE_SHIFT - 2) /* 1024 ptes per page */ -#define PTE_FMT "%.8lx" -#endif - -/* align addr on a size boundary - adjust address up/down if needed */ -#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) -#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) - -/* align addr on a size boundary - adjust address up if needed */ -#define _ALIGN(addr,size) _ALIGN_UP(addr,size) - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) - - -#undef STRICT_MM_TYPECHECKS - -#ifdef STRICT_MM_TYPECHECKS -/* - * These are used to make use of C type-checking.. - */ -typedef struct { pte_basic_t pte; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; -typedef struct { unsigned long pgd; } pgd_t; -typedef struct { unsigned long pgprot; } pgprot_t; - -#define pte_val(x) ((x).pte) -#define pmd_val(x) ((x).pmd) -#define pgd_val(x) ((x).pgd) -#define pgprot_val(x) ((x).pgprot) - -#define __pte(x) ((pte_t) { (x) } ) -#define __pmd(x) ((pmd_t) { (x) } ) -#define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -#else -/* - * .. while these make it easier on the compiler - */ -typedef pte_basic_t pte_t; -typedef unsigned long pmd_t; -typedef unsigned long pgd_t; -typedef unsigned long pgprot_t; - -#define pte_val(x) (x) -#define pmd_val(x) (x) -#define pgd_val(x) (x) -#define pgprot_val(x) (x) - -#define __pte(x) (x) -#define __pmd(x) (x) -#define __pgd(x) (x) -#define __pgprot(x) (x) - -#endif - -struct page; -extern void clear_pages(void *page, int order); -static inline void clear_page(void *page) { clear_pages(page, 0); } -extern void copy_page(void *to, void *from); -extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); -extern void copy_user_page(void *to, void *from, unsigned long vaddr, - struct page *pg); - -#ifndef CONFIG_APUS -#define PPC_MEMSTART 0 -#define PPC_PGSTART 0 -#define PPC_MEMOFFSET PAGE_OFFSET -#else -extern unsigned long ppc_memstart; -extern unsigned long ppc_pgstart; -extern unsigned long ppc_memoffset; -#define PPC_MEMSTART ppc_memstart -#define PPC_PGSTART ppc_pgstart -#define PPC_MEMOFFSET ppc_memoffset -#endif - -#if defined(CONFIG_APUS) && !defined(MODULE) -/* map phys->virtual and virtual->phys for RAM pages */ -static inline unsigned long ___pa(unsigned long v) -{ - unsigned long p; - asm volatile ("1: addis %0, %1, %2;" - ".section \".vtop_fixup\",\"aw\";" - ".align 1;" - ".long 1b;" - ".previous;" - : "=r" (p) - : "b" (v), "K" (((-PAGE_OFFSET) >> 16) & 0xffff)); - - return p; -} -static inline void* ___va(unsigned long p) -{ - unsigned long v; - asm volatile ("1: addis %0, %1, %2;" - ".section \".ptov_fixup\",\"aw\";" - ".align 1;" - ".long 1b;" - ".previous;" - : "=r" (v) - : "b" (p), "K" (((PAGE_OFFSET) >> 16) & 0xffff)); - - return (void*) v; -} -#else -#define ___pa(vaddr) ((vaddr)-PPC_MEMOFFSET) -#define ___va(paddr) ((paddr)+PPC_MEMOFFSET) -#endif - -extern int page_is_ram(unsigned long pfn); - -#define __pa(x) ___pa((unsigned long)(x)) -#define __va(x) ((void *)(___va((unsigned long)(x)))) - -#define pfn_to_page(pfn) (mem_map + ((pfn) - PPC_PGSTART)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PPC_PGSTART) -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define page_to_virt(page) __va(page_to_pfn(page) << PAGE_SHIFT) - -#define pfn_valid(pfn) (((pfn) - PPC_PGSTART) < max_mapnr) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) - -/* Pure 2^n version of get_order */ -extern __inline__ int get_order(unsigned long size) -{ - int lz; - - size = (size-1) >> PAGE_SHIFT; - asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size)); - return 32 - lz; -} - -#endif /* __ASSEMBLY__ */ - -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#endif /* __KERNEL__ */ -#endif /* _PPC_PAGE_H */ Index: kexec/include/asm-ppc64/page.h =================================================================== --- kexec.orig/include/asm-ppc64/page.h +++ /dev/null @@ -1,256 +0,0 @@ -#ifndef _PPC64_PAGE_H -#define _PPC64_PAGE_H - -/* - * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include /* for ASM_CONST */ - -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - -#define SID_SHIFT 28 -#define SID_MASK 0xfffffffffUL -#define ESID_MASK 0xfffffffff0000000UL -#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK) - -#define HPAGE_SHIFT 24 -#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) -#define HPAGE_MASK (~(HPAGE_SIZE - 1)) - -#ifdef CONFIG_HUGETLB_PAGE - -#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) - -#define HTLB_AREA_SHIFT 40 -#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) -#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) - -#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ - - (1U << GET_ESID(addr))) & 0xffff) -#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ - - (1U << GET_HTLB_AREA(addr))) & 0xffff) - -#define ARCH_HAS_HUGEPAGE_ONLY_RANGE -#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE -#define ARCH_HAS_SETCLEAR_HUGE_PTE - -#define touches_hugepage_low_range(mm, addr, len) \ - (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas) -#define touches_hugepage_high_range(mm, addr, len) \ - (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas) - -#define __within_hugepage_low_range(addr, len, segmask) \ - ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) -#define within_hugepage_low_range(addr, len) \ - __within_hugepage_low_range((addr), (len), \ - current->mm->context.low_htlb_areas) -#define __within_hugepage_high_range(addr, len, zonemask) \ - ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)) -#define within_hugepage_high_range(addr, len) \ - __within_hugepage_high_range((addr), (len), \ - current->mm->context.high_htlb_areas) - -#define is_hugepage_only_range(mm, addr, len) \ - (touches_hugepage_high_range((mm), (addr), (len)) || \ - touches_hugepage_low_range((mm), (addr), (len))) -#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA - -#define in_hugepage_area(context, addr) \ - (cpu_has_feature(CPU_FTR_16M_PAGE) && \ - ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \ - ( ((addr) < 0x100000000L) && \ - ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) ) - -#else /* !CONFIG_HUGETLB_PAGE */ - -#define in_hugepage_area(mm, addr) 0 - -#endif /* !CONFIG_HUGETLB_PAGE */ - -/* align addr on a size boundary - adjust address up/down if needed */ -#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) -#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) - -/* align addr on a size boundary - adjust address up if needed */ -#define _ALIGN(addr,size) _ALIGN_UP(addr,size) - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) - -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ -#include - -#undef STRICT_MM_TYPECHECKS - -#define REGION_SIZE 4UL -#define REGION_SHIFT 60UL -#define REGION_MASK (((1UL<> REGION_SHIFT) -#define KERNEL_REGION_ID (KERNELBASE >> REGION_SHIFT) -#define USER_REGION_ID (0UL) -#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) - -#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) - -#ifdef CONFIG_DISCONTIGMEM -#define page_to_pfn(page) discontigmem_page_to_pfn(page) -#define pfn_to_page(pfn) discontigmem_pfn_to_page(pfn) -#define pfn_valid(pfn) discontigmem_pfn_valid(pfn) -#endif -#ifdef CONFIG_FLATMEM -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) -#define pfn_valid(pfn) ((pfn) < max_mapnr) -#endif - -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) - -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) - -/* - * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, - * and needs to be executable. This means the whole heap ends - * up being executable. - */ -#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_DATA_DEFAULT_FLAGS \ - (test_thread_flag(TIF_32BIT) ? \ - VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64) - -/* - * This is the default if a program doesn't have a PT_GNU_STACK - * program header entry. The PPC64 ELF ABI has a non executable stack - * stack by default, so in the absense of a PT_GNU_STACK program header - * we turn execute permission off. - */ -#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_STACK_DEFAULT_FLAGS \ - (test_thread_flag(TIF_32BIT) ? \ - VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) - -#endif /* __KERNEL__ */ - -#include - -#endif /* _PPC64_PAGE_H */ From michael at ellerman.id.au Mon Oct 17 21:48:41 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:41 +1000 (EST) Subject: [PATCH 5/11] powerpc: iseries: Fix a bogus comment In-Reply-To: <1129549716.320145.129733893202.qpush@concordia> Message-ID: <20051017114841.DCD26685B3@ozlabs.org> A comment in lpevents.c refers to code that's actually in HvCallEvent.h. The code in HvCallEvent.h is pretty obvious, so just remove the comment altogether. Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/iseries/lpevents.c | 6 +----- 1 files changed, 1 insertion(+), 5 deletions(-) Index: kexec/arch/powerpc/platforms/iseries/lpevents.c =================================================================== --- kexec.orig/arch/powerpc/platforms/iseries/lpevents.c +++ kexec/arch/powerpc/platforms/iseries/lpevents.c @@ -184,11 +184,7 @@ void setup_hvlpevent_queue(void) { void *eventStack; - /* - * Allocate a page for the Event Stack. The Hypervisor needs the - * absolute real address, so we subtract out the KERNELBASE and add - * in the absolute real address of the kernel load area. - */ + /* Allocate a page for the Event Stack. */ eventStack = alloc_bootmem_pages(LpEventStackSize); memset(eventStack, 0, LpEventStackSize); From michael at ellerman.id.au Mon Oct 17 21:48:43 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:43 +1000 (EST) Subject: [PATCH 6/11] powerpc: Make early debugging fit on 80 character terminal In-Reply-To: <1129549716.320145.129733893202.qpush@concordia> Message-ID: <20051017114843.6936B685B3@ozlabs.org> There's some debugging in prom.c that wraps nastly on 80 character terminals, reformat it to fit. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: kexec/arch/powerpc/kernel/prom.c =================================================================== --- kexec.orig/arch/powerpc/kernel/prom.c +++ kexec/arch/powerpc/kernel/prom.c @@ -1240,7 +1240,7 @@ static int __init early_init_dt_scan_mem endp = reg + (l / sizeof(cell_t)); - DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n", + DBG("memory scan node %s, reg size %ld, data: %x %x %x %x,\n", uname, l, reg[0], reg[1], reg[2], reg[3]); while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { From michael at ellerman.id.au Mon Oct 17 21:48:44 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:44 +1000 (EST) Subject: [PATCH 7/11] powerpc: Add __va_ul() which does __va() but returns unsigned long In-Reply-To: <1129549716.320145.129733893202.qpush@concordia> Message-ID: <20051017114844.6F9D5685B9@ozlabs.org> Currently __va() returns void *, however about half of the callers of __va() actually want an unsigned long as the return value. So we add a __va_ul() macro that does this for us. Purely cosmetic. While we're at it, remove a few redundant casts to void *. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 4 ++-- arch/powerpc/mm/mem.c | 2 +- arch/powerpc/oprofile/op_model_power4.c | 2 +- arch/powerpc/platforms/iseries/setup.c | 2 +- arch/powerpc/platforms/pseries/iommu.c | 2 +- include/asm-powerpc/page.h | 3 ++- include/asm-ppc64/io.h | 2 +- 7 files changed, 9 insertions(+), 8 deletions(-) Index: kexec/include/asm-powerpc/page.h =================================================================== --- kexec.orig/include/asm-powerpc/page.h +++ kexec/include/asm-powerpc/page.h @@ -331,7 +331,8 @@ static inline void* ___va(unsigned long #define __pa(x) ___pa((unsigned long)(x)) /* Convert physical address to virtual. */ -#define __va(x) ((void *)___va((unsigned long)(x))) +#define __va_ul(x) (___va((unsigned long)(x))) +#define __va(x) ((void *)__va_ul(x)) #ifdef CONFIG_DISCONTIGMEM #define pfn_to_page(pfn) discontigmem_pfn_to_page(pfn) Index: kexec/arch/powerpc/platforms/iseries/setup.c =================================================================== --- kexec.orig/arch/powerpc/platforms/iseries/setup.c +++ kexec/arch/powerpc/platforms/iseries/setup.c @@ -318,7 +318,7 @@ static void __init iSeries_init_early(vo * a non-zero starting address for it, set it up */ if (naca.xRamDisk) { - initrd_start = (unsigned long)__va(naca.xRamDisk); + initrd_start = __va_ul(naca.xRamDisk); initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE; initrd_below_start_ok = 1; // ramdisk in kernel space ROOT_DEV = Root_RAM0; Index: kexec/arch/powerpc/platforms/pseries/iommu.c =================================================================== --- kexec.orig/arch/powerpc/platforms/pseries/iommu.c +++ kexec/arch/powerpc/platforms/pseries/iommu.c @@ -272,7 +272,7 @@ static void iommu_table_setparms(struct return; } - tbl->it_base = (unsigned long)__va(*basep); + tbl->it_base = __va_ul(*basep); memset((void *)tbl->it_base, 0, *sizep); tbl->it_busno = phb->bus->number; Index: kexec/arch/powerpc/kernel/setup_64.c =================================================================== --- kexec.orig/arch/powerpc/kernel/setup_64.c +++ kexec/arch/powerpc/kernel/setup_64.c @@ -509,11 +509,11 @@ static void __init check_for_initrd(void prop = (u64 *)get_property(of_chosen, "linux,initrd-start", NULL); if (prop != NULL) { - initrd_start = (unsigned long)__va(*prop); + initrd_start = __va_ul(*prop); prop = (u64 *)get_property(of_chosen, "linux,initrd-end", NULL); if (prop != NULL) { - initrd_end = (unsigned long)__va(*prop); + initrd_end = __va_ul(*prop); initrd_below_start_ok = 1; } else initrd_start = 0; Index: kexec/arch/powerpc/oprofile/op_model_power4.c =================================================================== --- kexec.orig/arch/powerpc/oprofile/op_model_power4.c +++ kexec/arch/powerpc/oprofile/op_model_power4.c @@ -229,7 +229,7 @@ static unsigned long get_pc(struct pt_re /* Were we in our exception vectors or SLB real mode miss handler? */ if (pc < 0x1000000UL) - return (unsigned long)__va(pc); + return __va_ul(pc); /* Not sure where we were */ if (pc < KERNELBASE) Index: kexec/arch/powerpc/mm/mem.c =================================================================== --- kexec.orig/arch/powerpc/mm/mem.c +++ kexec/arch/powerpc/mm/mem.c @@ -252,7 +252,7 @@ void __init mem_init(void) unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; num_physpages = max_pfn; /* RAM is assumed contiguous */ - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + high_memory = __va(max_low_pfn * PAGE_SIZE); #ifdef CONFIG_NEED_MULTIPLE_NODES for_each_online_node(nid) { Index: kexec/include/asm-ppc64/io.h =================================================================== --- kexec.orig/include/asm-ppc64/io.h +++ kexec/include/asm-ppc64/io.h @@ -238,7 +238,7 @@ static inline unsigned long virt_to_phys */ static inline void * phys_to_virt(unsigned long address) { - return (void *)__va(address); + return __va(address); } /* From michael at ellerman.id.au Mon Oct 17 21:48:45 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:45 +1000 (EST) Subject: [PATCH 8/11] powerpc: Add a is_kernel_addr() macro In-Reply-To: <1129549716.320145.129733893202.qpush@concordia> Message-ID: <20051017114845.0FC09685BE@ozlabs.org> There's a bunch of code that compares an address with KERNELBASE to see if it's a "kernel address", ie. >= KERNELBASE. Replace all of them with an is_kernel_addr() macro that does the same thing. This will save us some pain when we change KERNELBASE, and also makes the code more readable IMHO. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/mm/slb.c | 6 +++--- arch/powerpc/mm/stab.c | 6 +++--- arch/powerpc/oprofile/op_model_power4.c | 4 ++-- arch/powerpc/oprofile/op_model_rs64.c | 3 +-- arch/ppc64/xmon/xmon.c | 4 ++-- include/asm-powerpc/page.h | 6 ++++++ include/asm-ppc64/pgtable.h | 2 +- 9 files changed, 20 insertions(+), 15 deletions(-) Index: kexec/arch/powerpc/mm/stab.c =================================================================== --- kexec.orig/arch/powerpc/mm/stab.c +++ kexec/arch/powerpc/mm/stab.c @@ -122,7 +122,7 @@ static int __ste_allocate(unsigned long unsigned long offset; /* Kernel or user address? */ - if (ea >= KERNELBASE) { + if (is_kernel_addr(ea)) { vsid = get_kernel_vsid(ea); } else { if ((ea >= TASK_SIZE_USER64) || (! mm)) @@ -133,7 +133,7 @@ static int __ste_allocate(unsigned long stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); - if (ea < KERNELBASE) { + if (!is_kernel_addr(ea)) { offset = __get_cpu_var(stab_cache_ptr); if (offset < NR_STAB_CACHE_ENTRIES) __get_cpu_var(stab_cache[offset++]) = stab_entry; @@ -190,7 +190,7 @@ void switch_stab(struct task_struct *tsk entry++, ste++) { unsigned long ea; ea = ste->esid_data & ESID_MASK; - if (ea < KERNELBASE) { + if (!is_kernel_addr(ea)) { ste->esid_data = 0; } } Index: kexec/arch/powerpc/kernel/prom_init.c =================================================================== --- kexec.orig/arch/powerpc/kernel/prom_init.c +++ kexec/arch/powerpc/kernel/prom_init.c @@ -1936,7 +1936,7 @@ static void __init prom_check_initrd(uns if (r3 && r4 && r4 != 0xdeadbeef) { unsigned long val; - RELOC(prom_initrd_start) = (r3 >= KERNELBASE) ? __pa(r3) : r3; + RELOC(prom_initrd_start) = is_kernel_addr(r3) ? __pa(r3) : r3; RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4; val = RELOC(prom_initrd_start); Index: kexec/arch/powerpc/kernel/setup_64.c =================================================================== --- kexec.orig/arch/powerpc/kernel/setup_64.c +++ kexec/arch/powerpc/kernel/setup_64.c @@ -523,7 +523,7 @@ static void __init check_for_initrd(void /* If we were passed an initrd, set the ROOT_DEV properly if the values * look sensible. If not, clear initrd reference. */ - if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && + if (is_kernel_addr(initrd_start) && is_kernel_addr(initrd_end) && initrd_end > initrd_start) ROOT_DEV = Root_RAM0; else Index: kexec/arch/powerpc/mm/slb.c =================================================================== --- kexec.orig/arch/powerpc/mm/slb.c +++ kexec/arch/powerpc/mm/slb.c @@ -111,14 +111,14 @@ void switch_slb(struct task_struct *tsk, else unmapped_base = TASK_UNMAPPED_BASE_USER64; - if (pc >= KERNELBASE) + if (is_kernel_addr(pc)) return; slb_allocate(pc); if (GET_ESID(pc) == GET_ESID(stack)) return; - if (stack >= KERNELBASE) + if (is_kernel_addr(stack)) return; slb_allocate(stack); @@ -126,7 +126,7 @@ void switch_slb(struct task_struct *tsk, || (GET_ESID(stack) == GET_ESID(unmapped_base))) return; - if (unmapped_base >= KERNELBASE) + if (is_kernel_addr(unmapped_base)) return; slb_allocate(unmapped_base); } Index: kexec/arch/powerpc/oprofile/op_model_power4.c =================================================================== --- kexec.orig/arch/powerpc/oprofile/op_model_power4.c +++ kexec/arch/powerpc/oprofile/op_model_power4.c @@ -232,7 +232,7 @@ static unsigned long get_pc(struct pt_re return __va_ul(pc); /* Not sure where we were */ - if (pc < KERNELBASE) + if (!is_kernel_addr(pc)) /* function descriptor madness */ return *((unsigned long *)kernel_unknown_bucket); @@ -244,7 +244,7 @@ static int get_kernel(unsigned long pc) int is_kernel; if (!mmcra_has_sihv) { - is_kernel = (pc >= KERNELBASE); + is_kernel = is_kernel_addr(pc); } else { unsigned long mmcra = mfspr(SPRN_MMCRA); is_kernel = ((mmcra & MMCRA_SIPR) == 0); Index: kexec/arch/ppc64/xmon/xmon.c =================================================================== --- kexec.orig/arch/ppc64/xmon/xmon.c +++ kexec/arch/ppc64/xmon/xmon.c @@ -1032,7 +1032,7 @@ static long check_bp_loc(unsigned long a unsigned int instr; addr &= ~3; - if (addr < KERNELBASE) { + if (!is_kernel_addr(addr)) { printf("Breakpoints may only be placed at kernel addresses\n"); return 0; } @@ -1082,7 +1082,7 @@ bpt_cmds(void) dabr.address = 0; dabr.enabled = 0; if (scanhex(&dabr.address)) { - if (dabr.address < KERNELBASE) { + if (!is_kernel_addr(dabr.address)) { printf(badaddr); break; } Index: kexec/include/asm-powerpc/page.h =================================================================== --- kexec.orig/include/asm-powerpc/page.h +++ kexec/include/asm-powerpc/page.h @@ -106,6 +106,12 @@ #define KERNELBASE PAGE_OFFSET #define VMALLOCBASE ASM_CONST(0xD000000000000000) +/* + * Don't compare things with KERNELBASE or PAGE_OFFSET to test for + * "kernelness", use is_kernel_addr() - it should do what you want. + */ +#define is_kernel_addr(x) ((x) >= PAGE_OFFSET) + #ifndef __ASSEMBLY__ #ifdef __powerpc64__ Index: kexec/include/asm-ppc64/pgtable.h =================================================================== --- kexec.orig/include/asm-ppc64/pgtable.h +++ kexec/include/asm-ppc64/pgtable.h @@ -212,7 +212,7 @@ static inline pte_t pfn_pte(unsigned lon #define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) +#define pmd_set(pmdp, ptep) ({BUG_ON(!is_kernel_addr((u64)ptep)); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) == 0) #define pmd_present(pmd) (pmd_val(pmd) != 0) Index: kexec/arch/powerpc/oprofile/op_model_rs64.c =================================================================== --- kexec.orig/arch/powerpc/oprofile/op_model_rs64.c +++ kexec/arch/powerpc/oprofile/op_model_rs64.c @@ -178,7 +178,6 @@ static void rs64_handle_interrupt(struct int val; int i; unsigned long pc = mfspr(SPRN_SIAR); - int is_kernel = (pc >= KERNELBASE); /* set the PMM bit (see comment below) */ mtmsrd(mfmsr() | MSR_PMM); @@ -187,7 +186,7 @@ static void rs64_handle_interrupt(struct val = ctr_read(i); if (val < 0) { if (ctr[i].enabled) { - oprofile_add_pc(pc, is_kernel, i); + oprofile_add_pc(pc, is_kernel_addr(pc), i); ctr_write(i, reset_value[i]); } else { ctr_write(i, 0); From michael at ellerman.id.au Mon Oct 17 21:48:45 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:45 +1000 (EST) Subject: [PATCH 9/11] powerpc: Set entry point and text address in linker script In-Reply-To: <1129549716.320145.129733893202.qpush@concordia> Message-ID: <20051017114845.21DE9685C3@ozlabs.org> Currently we set the kernel entry point and the address of the text section in the Makefile, using CONFIG_KERNEL_START. But we've already got in the linker script, so we can just use KERNELBASE directly. That means if we ever change KERNELBASE there's one less place to change it. And we can set the entry point with ENTRY(). Apart from linux_banner, there are zero differences from "readelf -a vmlinux" before and after this patch. Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 5 +---- arch/powerpc/kernel/vmlinux.lds.S | 7 +++---- 2 files changed, 4 insertions(+), 8 deletions(-) Index: kexec/arch/powerpc/Makefile =================================================================== --- kexec.orig/arch/powerpc/Makefile +++ kexec/arch/powerpc/Makefile @@ -12,9 +12,6 @@ # Rewritten by Cort Dougan and Paul Mackerras # -# This must match PAGE_OFFSET in include/asm-powerpc/page.h. -KERNELLOAD := $(CONFIG_KERNEL_START) - HAS_BIARCH := $(call cc-option-yn, -m32) ifeq ($(CONFIG_PPC64),y) @@ -59,7 +56,7 @@ override LD += -m elf$(SZ)ppc override CC += -m$(SZ) endif -LDFLAGS_vmlinux := -Ttext $(KERNELLOAD) -Bstatic -e $(KERNELLOAD) +LDFLAGS_vmlinux := -Bstatic # The -Iarch/$(ARCH)/include is temporary while we are merging CPPFLAGS += -Iarch/$(ARCH) -Iarch/$(ARCH)/include Index: kexec/arch/powerpc/kernel/vmlinux.lds.S =================================================================== --- kexec.orig/arch/powerpc/kernel/vmlinux.lds.S +++ kexec/arch/powerpc/kernel/vmlinux.lds.S @@ -1,11 +1,9 @@ #include -#ifdef CONFIG_PPC64 #include -#else -#define PAGE_SIZE 4096 -#endif #include +ENTRY(_stext) + #ifdef CONFIG_PPC64 OUTPUT_ARCH(powerpc:common64) jiffies = jiffies_64; @@ -21,6 +19,7 @@ SECTIONS *(.exit.data) } + . = KERNELBASE; /* Read-only sections, merged into text segment: */ #ifdef CONFIG_PPC32 From michael at ellerman.id.au Mon Oct 17 21:48:47 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:47 +1000 (EST) Subject: [PATCH 10/11] powerpc: Add helper functions for synthesising instructions at runtime In-Reply-To: <1129549716.320145.129733893202.qpush@concordia> Message-ID: <20051017114847.B8EF6685BE@ozlabs.org> There's a few places already, and soon will be more, where we synthesise branch instructions at runtime. Rather than doing it by hand in each case, it would make sense to have one implementation. Signed-off-by: Michael Ellerman --- include/asm-powerpc/system.h | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 52 insertions(+) Index: kexec/include/asm-powerpc/system.h =================================================================== --- kexec.orig/include/asm-powerpc/system.h +++ kexec/include/asm-powerpc/system.h @@ -356,5 +356,57 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) +typedef enum { + BRANCH_RELATIVE, + BRANCH_RELATIVE_LINK, + BRANCH_ABSOLUTE, + BRANCH_ABSOLUTE_LINK +} branch_t; + +static inline void create_instruction(unsigned long addr, unsigned int instr) +{ + unsigned int *p; + p = (unsigned int *)addr; + *p = instr; + asm ("dcbst 0, %0; sync; icbi 0,%0; isync" : : "r" (p)); +} + +static inline void create_branch(unsigned long addr, unsigned long target, + branch_t type) +{ + unsigned int instruction; + + instruction = 0x48000000; /* Basic branch opcode */ + + if (BRANCH_ABSOLUTE == type || BRANCH_ABSOLUTE_LINK == type) + instruction |= 0x02; /* set AA (absolute address) */ + else + target = target - addr; + + if (BRANCH_RELATIVE_LINK == type || BRANCH_ABSOLUTE_LINK == type) + instruction |= 0x01; /* set LK (set link register) */ + + instruction |= target & 0x03FFFFFC; + + create_instruction(addr, instruction); +} + +static inline void create_function_call(unsigned long addr, void * func) +{ + unsigned long func_addr; + +#ifdef CONFIG_PPC64 + /* + * On PPC64 the function pointer actually points to the function's + * descriptor. The first entry in the descriptor is the address + * of the function text. + */ + func_addr = *(unsigned long *)func; +#else + func_addr = (unsigned long)func; +#endif + create_branch(addr, func_addr, BRANCH_RELATIVE_LINK); +} + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SYSTEM_H */ From michael at ellerman.id.au Mon Oct 17 21:48:49 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 17 Oct 2005 21:48:49 +1000 (EST) Subject: [PATCH 11/11] powerpc: Seperate usage of KERNELBASE and PAGE_OFFSET In-Reply-To: <1129549716.320145.129733893202.qpush@concordia> Message-ID: <20051017114849.40EFD685D2@ozlabs.org> This patch tries to seperate usage of KERNELBASE and PAGE_OFFSET. PAGE_OFFSET == 0xC00..00 and always will. It's the quantity you subtract from a virtual kernel address to get a physical one. KERNELBASE == 0xC00..00 + SOMETHING, where SOMETHING tends to be 0, but might not be. It points to the start of the kernel text + data in virtual memory. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 4 ++-- arch/powerpc/kernel/lparmap.c | 6 +++--- arch/powerpc/mm/hash_utils_64.c | 6 +++--- arch/powerpc/mm/slb.c | 4 ++-- arch/powerpc/mm/slb_low.S | 6 +++--- arch/powerpc/mm/stab.c | 10 +++++----- arch/powerpc/mm/tlb_64.c | 2 +- arch/ppc64/kernel/machine_kexec.c | 5 ++--- 8 files changed, 21 insertions(+), 22 deletions(-) Index: kexec/arch/powerpc/mm/stab.c =================================================================== --- kexec.orig/arch/powerpc/mm/stab.c +++ kexec/arch/powerpc/mm/stab.c @@ -40,7 +40,7 @@ static int make_ste(unsigned long stab, unsigned long entry, group, old_esid, castout_entry, i; unsigned int global_entry; struct stab_entry *ste, *castout_ste; - unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE; + unsigned long kernel_segment = (esid << SID_SHIFT) >= PAGE_OFFSET; vsid_data = vsid << STE_VSID_SHIFT; esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; @@ -83,7 +83,7 @@ static int make_ste(unsigned long stab, } /* Dont cast out the first kernel segment */ - if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE) + if ((castout_ste->esid_data & ESID_MASK) != PAGE_OFFSET) break; castout_entry = (castout_entry + 1) & 0xf; @@ -248,7 +248,7 @@ void stabs_alloc(void) panic("Unable to allocate segment table for CPU %d.\n", cpu); - newstab += KERNELBASE; + newstab = __va_ul(newstab); memset((void *)newstab, 0, PAGE_SIZE); @@ -265,13 +265,13 @@ void stabs_alloc(void) */ void stab_initialize(unsigned long stab) { - unsigned long vsid = get_kernel_vsid(KERNELBASE); + unsigned long vsid = get_kernel_vsid(PAGE_OFFSET); if (cpu_has_feature(CPU_FTR_SLB)) { slb_initialize(); } else { asm volatile("isync; slbia; isync":::"memory"); - make_ste(stab, GET_ESID(KERNELBASE), vsid); + make_ste(stab, GET_ESID(PAGE_OFFSET), vsid); /* Order update */ asm volatile("sync":::"memory"); Index: kexec/arch/ppc64/kernel/machine_kexec.c =================================================================== --- kexec.orig/arch/ppc64/kernel/machine_kexec.c +++ kexec/arch/ppc64/kernel/machine_kexec.c @@ -171,9 +171,8 @@ void kexec_copy_flush(struct kimage *ima * including ones that were in place on the original copy */ for (i = 0; i < nr_segments; i++) - flush_icache_range(ranges[i].mem + KERNELBASE, - ranges[i].mem + KERNELBASE + - ranges[i].memsz); + flush_icache_range(__va_ul(ranges[i].mem), + __va_ul(ranges[i].mem + ranges[i].memsz)); } #ifdef CONFIG_SMP Index: kexec/arch/powerpc/mm/hash_utils_64.c =================================================================== --- kexec.orig/arch/powerpc/mm/hash_utils_64.c +++ kexec/arch/powerpc/mm/hash_utils_64.c @@ -239,7 +239,7 @@ void __init htab_initialize(void) /* create bolted the linear mapping in the hash table */ for (i=0; i < lmb.memory.cnt; i++) { - base = lmb.memory.region[i].base + KERNELBASE; + base = __va_ul(lmb.memory.region[i].base); size = lmb.memory.region[i].size; DBG("creating mapping for region: %lx : %lx\n", base, size); @@ -276,8 +276,8 @@ void __init htab_initialize(void) * for either 4K or 16MB pages. */ if (tce_alloc_start) { - tce_alloc_start += KERNELBASE; - tce_alloc_end += KERNELBASE; + tce_alloc_start = __va_ul(tce_alloc_start); + tce_alloc_end = __va_ul(tce_alloc_end); if (base + size >= tce_alloc_start) tce_alloc_start = base + size + 1; Index: kexec/arch/powerpc/mm/slb.c =================================================================== --- kexec.orig/arch/powerpc/mm/slb.c +++ kexec/arch/powerpc/mm/slb.c @@ -55,7 +55,7 @@ static void slb_flush_and_rebolt(void) ksp_flags |= SLB_VSID_L; ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); - if ((ksp_esid_data & ESID_MASK) == KERNELBASE) + if ((ksp_esid_data & ESID_MASK) == PAGE_OFFSET) ksp_esid_data &= ~SLB_ESID_V; /* We need to do this all in asm, so we're sure we don't touch @@ -145,7 +145,7 @@ void slb_initialize(void) asm volatile("isync":::"memory"); asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("isync; slbia; isync":::"memory"); - create_slbe(KERNELBASE, flags, 0); + create_slbe(PAGE_OFFSET, flags, 0); create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1); /* We don't bolt the stack for the time being - we're in boot, * so the stack is in the bolted segment. By the time it goes Index: kexec/arch/powerpc/kernel/entry_64.S =================================================================== --- kexec.orig/arch/powerpc/kernel/entry_64.S +++ kexec/arch/powerpc/kernel/entry_64.S @@ -674,7 +674,7 @@ _GLOBAL(enter_rtas) /* Setup our real return addr */ SET_REG_TO_LABEL(r4,.rtas_return_loc) - SET_REG_TO_CONST(r9,KERNELBASE) + SET_REG_TO_CONST(r9,PAGE_OFFSET) sub r4,r4,r9 mtlr r4 @@ -702,7 +702,7 @@ _GLOBAL(enter_rtas) _STATIC(rtas_return_loc) /* relocation is off at this point */ mfspr r4,SPRN_SPRG3 /* Get PACA */ - SET_REG_TO_CONST(r5, KERNELBASE) + SET_REG_TO_CONST(r5, PAGE_OFFSET) sub r4,r4,r5 /* RELOC the PACA base pointer */ mfmsr r6 Index: kexec/arch/powerpc/mm/slb_low.S =================================================================== --- kexec.orig/arch/powerpc/mm/slb_low.S +++ kexec/arch/powerpc/mm/slb_low.S @@ -66,12 +66,12 @@ _GLOBAL(slb_allocate) srdi r9,r3,60 /* get region */ srdi r3,r3,28 /* get esid */ - cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ + cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */ rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */ oris r10,r10,SLB_ESID_V at h /* r10 |= SLB_ESID_V */ - /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */ + /* r3 = esid, r10 = esid_data, cr7 = <> PAGE_OFFSET */ blt cr7,0f /* user or kernel? */ @@ -114,7 +114,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) ld r9,PACACONTEXTID(r13) rldimi r3,r9,USER_ESID_BITS,0 -9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */ +9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <> PAGE_OFFSET */ ASM_VSID_SCRAMBLE(r3,r9) rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */ Index: kexec/arch/powerpc/kernel/lparmap.c =================================================================== --- kexec.orig/arch/powerpc/kernel/lparmap.c +++ kexec/arch/powerpc/kernel/lparmap.c @@ -16,8 +16,8 @@ const struct LparMap __attribute__((__se .xSegmentTableOffs = STAB0_PAGE, .xEsids = { - { .xKernelEsid = GET_ESID(KERNELBASE), - .xKernelVsid = KERNEL_VSID(KERNELBASE), }, + { .xKernelEsid = GET_ESID(PAGE_OFFSET), + .xKernelVsid = KERNEL_VSID(PAGE_OFFSET), }, { .xKernelEsid = GET_ESID(VMALLOCBASE), .xKernelVsid = KERNEL_VSID(VMALLOCBASE), }, }, @@ -25,7 +25,7 @@ const struct LparMap __attribute__((__se .xRanges = { { .xPages = HvPagesToMap, .xOffset = 0, - .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT), + .xVPN = KERNEL_VSID(PAGE_OFFSET) << (SID_SHIFT - PAGE_SHIFT), }, }, }; Index: kexec/arch/powerpc/mm/tlb_64.c =================================================================== --- kexec.orig/arch/powerpc/mm/tlb_64.c +++ kexec/arch/powerpc/mm/tlb_64.c @@ -149,7 +149,7 @@ void hpte_update(struct mm_struct *mm, u batch->mm = mm; batch->large = pte_huge(pte); } - if (addr < KERNELBASE) { + if (!is_kernel_addr(addr)) { vsid = get_vsid(mm->context.id, addr); WARN_ON(vsid == 0); } else From segher at kernel.crashing.org Tue Oct 18 00:44:29 2005 From: segher at kernel.crashing.org (Segher Boessenkool) Date: Mon, 17 Oct 2005 16:44:29 +0200 Subject: [PATCH 10/11] powerpc: Add helper functions for synthesising instructions at runtime In-Reply-To: <20051017114847.B8EF6685BE@ozlabs.org> References: <20051017114847.B8EF6685BE@ozlabs.org> Message-ID: <309f5f701666369cdd618d06f664439e@kernel.crashing.org> > +typedef enum { > + BRANCH_RELATIVE, > + BRANCH_RELATIVE_LINK, > + BRANCH_ABSOLUTE, > + BRANCH_ABSOLUTE_LINK > +} branch_t; Why not say BRANCH_RELATIVE = 0, BRANCH_RELATIVE_LINK = 1, etc., and then later just or this value into the insn? > + asm ("dcbst 0, %0; sync; icbi 0,%0; isync" : : "r" (p)); sequence should be dcbst; sync; icbi; sync; isync to ensure the insn cache invalidation is global before continuing. > + if (BRANCH_ABSOLUTE == type || BRANCH_ABSOLUTE_LINK == type) > + instruction |= 0x02; /* set AA (absolute address) */ > + else > + target = target - addr; > + > + if (BRANCH_RELATIVE_LINK == type || BRANCH_ABSOLUTE_LINK == type) > + instruction |= 0x01; /* set LK (set link register) */ This would be instruction |= type; if ((type & 2) == 0) target -= addr; Segher From sleddog at us.ibm.com Tue Oct 18 00:36:44 2005 From: sleddog at us.ibm.com (Dave Boutcher) Date: Mon, 17 Oct 2005 09:36:44 -0500 Subject: [PATCH 0/3] ibmvscsis scsi target Message-ID: <20051017143644.GA9992@cs.umn.edu> James, Here's the ibmvscsis SCSI target submitted for inclusion in 2.4.15. This driver meets a couple of akpm's criteria for worthiness, in that its actually been shipping for a while in a distro kernel, and (given the posts when I broke compatibility) is being used. This version is basically the same as the recent RFC version I sent out, with a few bug fixes. It addresses a comment from Anton about using gratuitously small max_sectors limits, and has a few other miscellanious fixes. The only other significant comment generated by the the RFC was from Christoph, and requested that this work be combined with the sgtg work that Mike Christie and Tomonori Fujita are working on. I definitely will start contributing to that work, and will convert this driver to their framework when it becomes complete. I would rather not keep this driver out of mainline for the amount of time that may take. This device driver provides the SCSI target side of the "virtual SCSI" on IBM Power5 systems. The initiator side has been in mainline for a while now (drivers/scsi/ibmvscsi/ibmvscsi.c.) Targets already exist for AIX and OS/400. -- Dave B From sleddog at us.ibm.com Tue Oct 18 00:37:30 2005 From: sleddog at us.ibm.com (Dave Boutcher) Date: Mon, 17 Oct 2005 09:37:30 -0500 Subject: [PATCH 1/3] ibmvscsis scsi target driver In-Reply-To: <20051017143644.GA9992@cs.umn.edu> References: <20051017020534.GA29968@hound.rchland.ibm.com> Message-ID: <20051017143730.GB9992@cs.umn.edu> ibmvscsis Main SCSI target module. This driver provides a SCSI target for IBM Power5 systems. Signed-off-by: Dave Boutcher Signed-off-by: Santiago Leon Signed-off-by: Linda Xie diff -uNr linux-2.6.14-rc4/drivers/scsi/ibmvscsi/ibmvscsis.c linux-2.6.14-rc4-ibmvscsis-test/drivers/scsi/ibmvscsi/ibmvscsis.c --- linux-2.6.14-rc4/drivers/scsi/ibmvscsi/ibmvscsis.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.14-rc4-ibmvscsis-test/drivers/scsi/ibmvscsi/ibmvscsis.c 2005-10-16 20:31:10.000000000 -0500 @@ -0,0 +1,3301 @@ +/************************************************************************ + + IBM eServer i/pSeries Virtual SCSI Target Driver + Copyright (C) 2003-2005 Dave Boutcher (boutcher at us.ibm.com) IBM Corp. + Santiago Leon (santil at us.ibm.com) IBM Corp. + Linda Xie (lxie at us.ibm.com) IBM Corp. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA + + ********************************************************************** + This driver is a SCSI target that interoperates according to the PAPR + (POWER Architecture Platform Requirements) document. Currently it is + specific to POWER logical partitioning, however in the future it would + be nice to extend this to other virtualized environments. + + The architecture defines virtual adapters, whose configuration is + reported in the Open Firmware device tree. There area number of + power hypervisor calls (such as h_reg_crq, to register the inter-OS + queue) that support the virtual adapters. + + Messages are sent between partitions on a "Command/Response Queue" + (CRQ), which is just a buffer of 16 byte entries in the receiver's + Senders cannot access the buffer directly, but send messages by + making a hypervisor call and passing in the 16 bytes. The hypervisor + puts the message in the next 16 byte space in round-robbin fashion, + turns on the high order bit of the message (the valid bit), and + generates an interrupt to the receiver (if interrupts are turned on.) + The receiver just turns off the valid bit when they have copied out + the message. + + The VSCSI client builds a SCSI Remote Protocol (SRP) Information Unit + (IU) (as defined in the T10 standard available at www.t10.org), gets + a DMA address for the message, and sends it to the target as the + payload of a CRQ message. The target DMAs the SRP IU and processes it, + including doing any additional data transfers. When it is done, it + DMAs the SRP response back to the same address as the request came from + and sends a CRQ message back to inform the client that the request has + completed. + + This target interoperates not only with the Linux client (ibmvscsi.c) + but also with AIX and OS/400 clients. Thus, while the implementation + can be changed, the underlying behaviour (protocol) is fixed. + + Configuration of the target is done via sysfs. The target driver + maps either block devices (e.g. IDE CD drive, loopback file, etc) to + SCSI LUNs, in which case it emulates the SCSI protocol and issues + kernel block device calls, or maps real SCSI devices, in which case + the SCSI commands are just passed on to the real SCSI device. +************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "viosrp.h" + +#define IBMVSCSIS_VERSION "1.0.0" + +#define DEFAULT_TIMEOUT (30*HZ) +#define TARGET_MAX_NAME_LEN 128 +#define INITIAL_SRP_LIMIT 16 +#define TARGETS_PER_BUS (64) +#define BUS_PER_ADAPTER (8) +#define DMA_BUFFER_CACHE_SIZE (16) +#define DMA_BUFFER_INIT_COUNT (4) +#define DMA_BUFFER_INIT_LEN (PAGE_SIZE*16) +#define MODE_SENSE_BUFFER_SIZE (512) +#define REFCOUNT_TIMEOUT_MS (250) /* 1/4 second */ +#define DEFAULT_MAX_SECTORS (512) /* 256 kb */ + +/* + * The following are lifted from usb.h + */ +static int ibmvscsis_debug = 0; +#define dbg(format, arg...) \ + do {\ + if (ibmvscsis_debug) printk(KERN_WARNING __FILE__ ": " \ + format , ## arg);\ + } while(0) +#define err(format, arg...) printk(KERN_ERR "ibmvscsis: " format , ## arg) +#define info(format, arg...) printk(KERN_INFO "ibmvscsis: " format , ## arg) +#define warn(format, arg...) printk(KERN_WARNING "ibmvscsis: " format , ## arg) + +/* + * Given an 8 byte LUN, return the first level bus/target/lun. + * Today this doesn't support multi-level LUNs + */ +#define GETBUS(x) ((int)((((u64)(x)) >> 53) & 0x0007)) +#define GETTARGET(x) ((int)((((u64)(x)) >> 56) & 0x003f)) +#define GETLUN(x) ((int)((((u64)(x)) >> 48) & 0x001f)) + +/* + * sysfs attributes macro + */ +#define ATTR(_type, _name, _mode) \ + struct attribute vscsi_##_type##_##_name##_attr = { \ + .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE \ + }; + +/* + * Hypervisor calls. + */ +#define h_copy_rdma(l, sa, sb, da, db) \ + plpar_hcall_norets(H_COPY_RDMA, l, sa, sb, da, db) +#define h_send_crq(ua, l, h) \ + plpar_hcall_norets(H_SEND_CRQ, ua, l, h) +#define h_reg_crq(ua, tok, sz)\ + plpar_hcall_norets(H_REG_CRQ, ua, tok, sz); +#define h_free_crq(ua) \ + plpar_hcall_norets(H_FREE_CRQ, ua); + +MODULE_DESCRIPTION("IBM Virtual SCSI Target"); +MODULE_AUTHOR("Dave Boutcher"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(IBMVSCSIS_VERSION); + +/* + * These are fixed for the system and come from the Open Firmware device tree. + * We just store them here to save getting them every time. + */ +static char system_id[64] = ""; +static char partition_name[97] = "UNKNOWN"; +static unsigned int partition_number = -1; + +/* + * SCSI defined structure for inquiry data + */ +struct inquiry_data { + u8 qual_type; + u8 rmb_reserve; + u8 version; + u8 aerc_naca_hisup_format; + u8 addl_len; + u8 sccs_reserved; + u8 bque_encserv_vs_multip_mchngr_reserved; + u8 reladr_reserved_linked_cmdqueue_vs; + char vendor[8]; + char product[16]; + char revision[4]; + char vendor_specific[20]; + char reserved1[2]; + char version_descriptor[16]; + char reserved2[22]; + char unique[158]; +}; + +/* + * an RPA command/response transport queue. This is our structure + * that points to the actual queue (not architected by firmware) + */ +struct crq_queue { + struct viosrp_crq *msgs; + int size, cur; + dma_addr_t msg_token; + spinlock_t lock; +}; + +enum iue_flags { + V_IN_USE = 0, + V_DIOVER = 1, + V_WRITE = 2, + V_LINKED = 3, + V_ABORTED = 4, + V_FLYING = 5, + V_BARRIER = 6, + V_PARSED = 7, + V_DONE = 8, +}; + +/* + * This structure tracks our fundamental unit of work. Whenever + * an SRP Information Unit (IU) arrives, we track all the good stuff + * here + */ +struct iu_entry { + union viosrp_iu *iu; + struct server_adapter *adapter; + struct list_head next; + dma_addr_t iu_token; + struct { + dma_addr_t remote_token; + char *data_buffer; + dma_addr_t data_token; + long data_len; + struct vdev *vd; + unsigned long flags; + char *sense; + int data_out_residual_count; + int data_in_residual_count; + int ioerr; + int timeout; + struct scsi_request* sreq; + struct iu_entry *child[2]; + struct iu_entry *parent; + unsigned char child_status; + int rw; + long lba; + long len; + } req; +}; + +/* + * a pool of ius for use + */ +struct iu_pool { + spinlock_t lock; + struct list_head iu_entries; + struct iu_entry *list; + union viosrp_iu *iu_storage; + dma_addr_t iu_token; + u32 size; +}; + +/* + * Represents a single device that someone told us about + * that we treat as a LUN + */ +struct vdev { + struct list_head list; + char direct_scsi; + atomic_t refcount; + int disabled; + u64 lun; + struct kobject kobj; + char device_name[TARGET_MAX_NAME_LEN]; + struct { + struct block_device *bdev; + long blocksize; + long sectsize; + long lastlba; + unsigned char scsi_type; + int ro; + int removable; + int changed; + } b; + struct { + struct scsi_device *sdev; + } s; +}; + +/* + * Represents a bus. target #'s in SCSI are 6 bits long, + * so you can have 64 targets per bus + */ +struct vbus { + struct vdev *vdev[TARGETS_PER_BUS]; + atomic_t num_targets; + struct kobject kobj; + int bus_num; +}; + +/* + * Cached buffer. This is a data buffer that we have issued + * dma_map_foo on. Rather than do this every time we need a + * data buffer, keep a cache of mapped buffers around. + */ +struct dma_buffer { + dma_addr_t token; + char *addr; + size_t len; +}; + +/* all driver data associated with a host adapter */ +struct server_adapter { + struct device *dev; + struct vio_dev *dma_dev; + struct crq_queue queue; + struct work_struct crq_task; + struct tasklet_struct endio_tasklet; + struct iu_pool pool; + spinlock_t lock; + struct bio *bio_done; + struct bio *bio_donetail; + struct list_head cmd_queue; + struct vbus *vbus[BUS_PER_ADAPTER]; + int nvdevs; + int next_rsp_delta; + unsigned long liobn; + unsigned long riobn; + + atomic_t num_buses; + int max_sectors; + struct kobject stats_kobj; + DECLARE_BITMAP(dma_buffer_use, DMA_BUFFER_CACHE_SIZE); + struct dma_buffer dma_buffer[DMA_BUFFER_CACHE_SIZE]; + + /* Statistics only */ + atomic_t iu_count; /* number allocated */ + atomic_t bio_count; /* number allocated */ + atomic_t crq_processed; + atomic_t interrupts; + atomic_t read_processed; + atomic_t write_processed; + atomic_t buffers_allocated; + atomic_t errors; +}; + +/* + * We use the following struct, list, and lock to keep track of the scsi + * devices and their mapping to targets in the vscsis adapters. + */ +struct scsi_dev_node { + struct list_head node; + struct scsi_device *sdev; + struct vdev *vdev; +}; + +/* The state of a request */ +enum ibmvscsis_iue_state { + FREE_IU, + INFLIGHT, + RETRY, + RETRY_SPLIT_BUF, +}; + +static LIST_HEAD(scsi_dev_list); +static spinlock_t sdev_list_lock = SPIN_LOCK_UNLOCKED; + +/* ============================================================== + * Utility Routines + * ============================================================== + */ +/* + * return an 8 byte lun given a bus, target, lun. + * Today this only supports single level luns. + */ +u64 make_lun(unsigned int bus, unsigned int target, unsigned int lun) +{ + u16 result = (0x8000 | + ((target & 0x003f) << 8) | + ((bus & 0x0007) << 5) | (lun & 0x001f)); + return ((u64) result) << 48; +} + +/* + * Get the control byte from a SCSI CDB + */ +static u8 getcontrolbyte(u8 * cdb) +{ + return cdb[COMMAND_SIZE(cdb[0]) - 1]; +} + +/* + * Get the "link" bit from a SCSI CDB + */ +static u8 getlink(struct iu_entry *iue) +{ + return (getcontrolbyte(iue->iu->srp.cmd.cdb) & 0x01); +} + +static int data_out_desc_size(struct srp_cmd *cmd) +{ + switch (cmd->data_out_format) { + case SRP_NO_BUFFER: + return 0; + case SRP_DIRECT_BUFFER: + return sizeof(struct memory_descriptor); + case SRP_INDIRECT_BUFFER: + return sizeof(struct indirect_descriptor) + + ((cmd->data_out_count - + 1) * sizeof(struct memory_descriptor)); + default: + err("client error. Invalid data_out_format %d\n", + cmd->data_out_format); + return 0; + } +} + +/* + * Given an SRP, figure out the "data in" or "data out" length + */ +static int vscsis_data_length(struct srp_cmd *cmd, int out) +{ + struct memory_descriptor *md; + struct indirect_descriptor *id; + int offset = cmd->additional_cdb_len * 4; + int switch_value; + + if (out) + switch_value = cmd->data_out_format; + else { + switch_value = cmd->data_in_format; + offset += data_out_desc_size(cmd); + } + + switch (switch_value) { + case SRP_NO_BUFFER: + return 0; + case SRP_DIRECT_BUFFER: + md = (struct memory_descriptor *)(cmd->additional_data + + offset); + return md->length; + case SRP_INDIRECT_BUFFER: + id = (struct indirect_descriptor *)(cmd->additional_data + + offset); + return id->total_length; + default: + err("invalid data format\n"); + return 0; + } +} + +/* + * Helper function to create a direct buffer descriptor from an indirect + * buffer descriptor of length 1 + */ +static void make_direct_buffer(struct srp_cmd *cmd) +{ + struct indirect_descriptor *id = (struct indirect_descriptor *) + (cmd->additional_data); + struct memory_descriptor *md = (struct memory_descriptor *) + (cmd->additional_data); + unsigned int length = id->list[0].length; + unsigned int address = id->list[0].virtual_address; + + if (cmd->data_out_format == SRP_INDIRECT_BUFFER) + cmd->data_out_format = SRP_DIRECT_BUFFER; + if (cmd->data_in_format == SRP_INDIRECT_BUFFER) + cmd->data_in_format = SRP_DIRECT_BUFFER; + + md->length = length; + md->virtual_address = address; + cmd->data_in_count = cmd->data_out_count = 0; +} + +/* + * Find the vdev structure from the LUN field in an SRP IUE + * Note that this routine bumps a refcount field in the vdev. + * Normally this is done when free_iu is called. + */ +static struct vdev *find_vscsis_vdev(struct iu_entry *iue) +{ + u16 *lun = (u16 *) & iue->iu->srp.cmd.lun; + u32 bus = (lun[0] & 0x00E0) >> 5; + u32 target = (lun[0] & 0x3F00) >> 8; + u32 slun = (lun[0] & 0x001F); + struct vdev *vd = NULL; + unsigned long flags; + + /* If asking for a lun other than 0, return nope */ + if (slun) + return NULL; + + /* Only from SRP CMD */ + if (iue->iu->srp.generic.type != SRP_CMD_TYPE) + return NULL; + + /* if not a recognized LUN format, return NULL */ + if ((lun[0] & 0xC000) != 0x8000) + return NULL; + + spin_lock_irqsave(&iue->adapter->lock, flags); + if (iue->adapter->vbus[bus] == NULL) + goto out_unlock; + + vd = iue->adapter->vbus[bus]->vdev[target]; + + if ((vd == NULL) || (vd->disabled)) { + vd = NULL; + goto out_unlock; + } + + if (vd) + atomic_inc(&vd->refcount); + +out_unlock: + spin_unlock_irqrestore(&iue->adapter->lock, flags); + return vd; +} + +/* ============================================================== + * Information Unit (IU) Pool Routines + * ============================================================== + */ +/* + * We keep a pool of IUs, this routine builds the pool. The pool is + * per-adapter. The size of the pool is negotiated as part of the SRP + * login, where we negotiate the number of requests (IUs) the client + * can send us. This routine is not synchronized, since it happens + * only at probe time. + */ +static int initialize_iu_pool(struct server_adapter *adapter, int size) +{ + struct iu_pool *pool = &adapter->pool; + int i; + + pool->size = size; + pool->lock = SPIN_LOCK_UNLOCKED; + INIT_LIST_HEAD(&pool->iu_entries); + + pool->list = kmalloc(pool->size * sizeof(*pool->list), GFP_KERNEL); + if (!pool->list) { + err("Error: Cannot allocate memory for IU list\n"); + return -ENOMEM; + } + memset(pool->list, 0, pool->size * sizeof(*pool->list)); + + pool->iu_storage = + dma_alloc_coherent(adapter->dev, + pool->size * sizeof(*pool->iu_storage), + &pool->iu_token, GFP_KERNEL); + if (!pool->iu_storage) { + err("Error: Cannot allocate memory for IU pool\n"); + kfree(pool->list); + return -ENOMEM; + } + + for (i = 0; i < pool->size; ++i) { + pool->list[i].iu = pool->iu_storage + i; + pool->list[i].iu_token = + pool->iu_token + sizeof(*pool->iu_storage) * i; + pool->list[i].adapter = adapter; + list_add_tail(&pool->list[i].next, &pool->iu_entries); + } + + return 0; +} + +/* + * Free the pool we allocated in initialize_iu_pool + */ +static void release_iu_pool(struct server_adapter *adapter) +{ + struct iu_pool *pool = &adapter->pool; + int i, in_use = 0; + for (i = 0; i < pool->size; ++i) + if (test_bit(V_IN_USE, &pool->list[i].req.flags)) + ++in_use; + if (in_use) + err("Releasing event pool with %d IUs still in use!\n", in_use); + + kfree(pool->list); + dma_free_coherent(adapter->dev, + pool->size * sizeof(*pool->iu_storage), + pool->iu_storage, pool->iu_token); +} + +/* + * Get an IU from the pool. Return NULL if the pool is empty. This + * routine is syncronized by the adapter lock. The routine sets all the + * important fields to 0 + */ +static struct iu_entry *get_iu(struct server_adapter *adapter) +{ + struct iu_entry *e; + unsigned long flags; + + spin_lock_irqsave(&adapter->pool.lock, flags); + if (!list_empty(&adapter->pool.iu_entries)) { + e = list_entry(adapter->pool.iu_entries.next, struct iu_entry, + next); + list_del(adapter->pool.iu_entries.next); + + if (test_bit(V_IN_USE, &e->req.flags)) + err("Found in-use iu in free pool!"); + + memset(&e->req, 0, sizeof(e->req)); + + __set_bit(V_IN_USE, &e->req.flags); + } else + e = NULL; + + spin_unlock_irqrestore(&adapter->pool.lock, flags); + atomic_inc(&adapter->iu_count); + return e; +} + +/* + * Return an IU to the pool. This routine is synchronized by the + * adapter lock + */ +static void free_iu(struct iu_entry *iue) +{ + /* iue's with parents are kmalloc'ed, not picked from the pool */ + if (iue->req.parent) { + kfree(iue); + return; + } + + if (iue->req.vd) + atomic_dec(&iue->req.vd->refcount); + + if (!test_bit(V_IN_USE, &iue->req.flags)) + warn("Internal error, freeing iue twice!\n"); + else { + __clear_bit(V_IN_USE, &iue->req.flags); + list_add_tail(&iue->next, &iue->adapter->pool.iu_entries); + } + atomic_dec(&iue->adapter->iu_count); +} + +/* + * Allocates two iue's and splits the buffer descriptors between them + */ +static int split_iu(struct iu_entry* iue) +{ + int length = 0, i, child1i = 0, count; + struct iu_entry *child1, *child2; + struct iu_entry *child_iue; + struct srp_cmd *child_cmd; + struct srp_cmd *cmd = &iue->iu->srp.cmd; + struct indirect_descriptor *child_id; + struct indirect_descriptor *id = (struct indirect_descriptor *) + (cmd->additional_data); + + if (cmd->data_out_format && cmd->data_in_format) { + err("Don't support bidirectional buffers yet\n"); + return -EPERM; + } + + dbg("splitting %p len %lx incount %x outcount %x lba %lx\n", iue, + iue->req.len, cmd->data_in_count, cmd->data_out_count, + iue->req.lba); + + if (iue->req.len < PAGE_SIZE) { + err("Can't split buffers less than a page\n"); + return -EPERM; + } + + child1 = kmalloc(sizeof(struct iu_entry) + sizeof(union viosrp_iu), + GFP_KERNEL); + if (child1 == NULL) + return -ENOMEM; + + child2 = kmalloc(sizeof(struct iu_entry) + sizeof(union viosrp_iu), + GFP_KERNEL); + if (child2 == NULL) { + free_iu(child1); + return -ENOMEM; + } + + child1->iu = (union viosrp_iu *)((char*)child1 + sizeof(*child1)); + child2->iu = (union viosrp_iu *)((char*)child2 + sizeof(*child2)); + child1->adapter = child2->adapter = iue->adapter; + memset(&child1->req, 0, sizeof(child1->req)); + memset(&child2->req, 0, sizeof(child2->req)); + memset(&child1->iu->srp.cmd, 0, sizeof(struct srp_cmd)); + memset(&child2->iu->srp.cmd, 0, sizeof(struct srp_cmd)); + __set_bit(V_IN_USE, &child1->req.flags); + __set_bit(V_IN_USE, &child2->req.flags); + + /* Split a direct buffer */ + if (cmd->data_out_format == SRP_DIRECT_BUFFER || + cmd->data_in_format == SRP_DIRECT_BUFFER) { + struct memory_descriptor *md = (struct memory_descriptor *) + (cmd->additional_data); + struct memory_descriptor *ch1_md = (struct memory_descriptor *) + (child1->iu->srp.cmd.additional_data); + struct memory_descriptor *ch2_md = (struct memory_descriptor *) + (child2->iu->srp.cmd.additional_data); + + int npages = (md->length - 1) / PAGE_SIZE + 1; + ch1_md->length = ((npages + 1) / 2) * PAGE_SIZE; + ch2_md->length = md->length - ch1_md->length; + ch1_md->virtual_address = md->virtual_address; + ch2_md->virtual_address = md->virtual_address + ch1_md->length; + child1->req.len = ch1_md->length; + child2->req.len = ch2_md->length; + goto splitted; + } + + child_iue = child1; + child_cmd = &child1->iu->srp.cmd; + child_id = (struct indirect_descriptor *) (child_cmd->additional_data); + count = iue->req.rw ? cmd->data_out_count : cmd->data_in_count; + + for (i = 0; i < count ; i++) { + child_id->list[i - child1i].length = id->list[i].length; + child_id->list[i - child1i].virtual_address = + id->list[i].virtual_address; + if (iue->req.rw) + child_cmd->data_out_count++; + else + child_cmd->data_in_count++; + + child_id->total_length += id->list[i].length; + length += id->list[i].length; + child_iue->req.len += id->list[i].length; + if (!child1i && (length >= iue->req.len / 2 || + i >= count - 2)) { + child_iue = child2; + child_cmd = &child2->iu->srp.cmd; + child_id = (struct indirect_descriptor *) + (child_cmd->additional_data); + child1i = i + 1; + } + } + +splitted: + child1->iu->srp.cmd.data_out_format = iue->iu->srp.cmd.data_out_format; + child1->iu->srp.cmd.data_in_format = iue->iu->srp.cmd.data_in_format; + child2->iu->srp.cmd.data_out_format = iue->iu->srp.cmd.data_out_format; + child2->iu->srp.cmd.data_in_format = iue->iu->srp.cmd.data_in_format; + + if (child1->iu->srp.cmd.data_out_count == 1 || + child1->iu->srp.cmd.data_in_count == 1) + make_direct_buffer(&child1->iu->srp.cmd); + if (child2->iu->srp.cmd.data_out_count == 1 || + child2->iu->srp.cmd.data_in_count == 1) + make_direct_buffer(&child2->iu->srp.cmd); + + child1->req.rw = child2->req.rw = iue->req.rw; + __set_bit(V_PARSED, &child1->req.flags); + __set_bit(V_PARSED, &child2->req.flags); + child1->req.lba = iue->req.lba; + child2->req.lba = iue->req.lba + (child1->req.len >> 9); + + iue->req.child[0] = child1; + iue->req.child[1] = child2; + child1->req.parent = child2->req.parent = iue; + child1->req.vd = child2->req.vd = iue->req.vd; + + return 0; +} + +/* ============================================================== + * Data buffer cache routines. Note that we don't NEED a + * data cache, but this eliminates mapping and unmapping DMA + * addresses for data buffers on every request, which can be quite + * expensive on a PPC64 system. santi hates these routines (that + * just do first-fit allocation) but they are Good Enough (tm) until + * he writes something more elegant. + * ============================================================== + */ +/* + * Get some data buffers to start. This doesn't lock the adapter structure! + */ +static void init_data_buffer(struct server_adapter *adapter) +{ + int i; + + for (i = 0; i < DMA_BUFFER_INIT_COUNT; i++) { + if (adapter->dma_buffer[i].addr == NULL) { + adapter->dma_buffer[i].addr = + dma_alloc_coherent(adapter->dev, + DMA_BUFFER_INIT_LEN, + &adapter->dma_buffer[i]. + token, + GFP_KERNEL); + adapter->dma_buffer[i].len = DMA_BUFFER_INIT_LEN; + atomic_inc(&adapter->buffers_allocated); + } + } +} + +/* + * Get a memory buffer that includes a mapped DMA address. Just use first-fit + */ +static void get_data_buffer(char **buffer, dma_addr_t * data_token, size_t len, + struct server_adapter *adapter) +{ + int i; + + for (i = 0; i < DMA_BUFFER_CACHE_SIZE; i++) { + if ((adapter->dma_buffer[i].addr) && + (adapter->dma_buffer[i].len >= len) && + (!test_and_set_bit(i, adapter->dma_buffer_use))) { + *buffer = adapter->dma_buffer[i].addr; + *data_token = adapter->dma_buffer[i].token; + return; + } + } + + /* Couldn't get a buffer! Try and get a new one */ + *buffer = dma_alloc_coherent(adapter->dev, len, data_token, GFP_KERNEL); + atomic_inc(&adapter->buffers_allocated); + return; +} + +/* + * Free a memory buffer that includes a mapped DMA address. + */ +static void free_data_buffer(char *buffer, dma_addr_t data_token, size_t len, + struct server_adapter *adapter) +{ + int i; + + /* First see if this buffer is already in the cache */ + for (i = 0; i < DMA_BUFFER_CACHE_SIZE; i++) { + if (adapter->dma_buffer[i].addr == buffer) { + if (adapter->dma_buffer[i].token != data_token) + err("Inconsistent data buffer pool info!\n"); + if (!test_and_clear_bit(i, adapter->dma_buffer_use)) + err("Freeing data buffer twice!\n"); + return; + } + } + + /* See if there is an empty slot in our list */ + for (i = 0; i < DMA_BUFFER_CACHE_SIZE; i++) { + if (!test_and_set_bit(i, adapter->dma_buffer_use)) { + if (adapter->dma_buffer[i].addr == NULL) { + adapter->dma_buffer[i].addr = buffer; + adapter->dma_buffer[i].token = data_token; + adapter->dma_buffer[i].len = len; + smp_mb__before_clear_bit(); + clear_bit(i, adapter->dma_buffer_use); + return; + } else + clear_bit(i, adapter->dma_buffer_use); + } + } + + /* Now see if there is a smaller buffer we should throw out */ + for (i = 0; i < DMA_BUFFER_CACHE_SIZE; i++) { + if (!test_and_set_bit(i, adapter->dma_buffer_use)) { + if (adapter->dma_buffer[i].len < len) { + dma_free_coherent(adapter->dev, + adapter->dma_buffer[i].len, + adapter->dma_buffer[i].addr, + adapter->dma_buffer[i].token); + + atomic_dec(&adapter->buffers_allocated); + + adapter->dma_buffer[i].addr = buffer; + adapter->dma_buffer[i].token = data_token; + adapter->dma_buffer[i].len = len; + smp_mb__before_clear_bit(); + clear_bit(i, adapter->dma_buffer_use); + return; + } else + clear_bit(i, adapter->dma_buffer_use); + } + } + + /* No space to cache this. Give it back to the kernel */ + dma_free_coherent(adapter->dev, len, buffer, data_token); + atomic_dec(&adapter->buffers_allocated); +} + +/* + * Release all the data buffers + */ +static void release_data_buffer(struct server_adapter *adapter) +{ + int i; + int free_in_use = 0; + + for (i = 0; i < DMA_BUFFER_CACHE_SIZE; i++) { + if (adapter->dma_buffer[i].addr != NULL) { + if (test_bit(i, adapter->dma_buffer_use)) + free_in_use++; + dma_free_coherent(adapter->dev, + adapter->dma_buffer[i].len, + adapter->dma_buffer[i].addr, + adapter->dma_buffer[i].token); + + atomic_dec(&adapter->buffers_allocated); + } + } + + if (free_in_use) + err("Freeing %d in-use data buffers\n", free_in_use); + return; +} + +/* ============================================================== + * Inter-OS send and receive routines + * ============================================================== + */ +/* + * Get a CRQ from the inter-partition queue. + */ +static struct viosrp_crq *crq_queue_next_crq(struct crq_queue *queue) +{ + struct viosrp_crq *crq; + unsigned long flags; + + spin_lock_irqsave(&queue->lock, flags); + crq = &queue->msgs[queue->cur]; + if (crq->valid & 0x80) { + if (++queue->cur == queue->size) + queue->cur = 0; + } + else + crq = NULL; + spin_unlock_irqrestore(&queue->lock, flags); + + return crq; +} + +/* + * Send an IU to another partition using the CRQ. + */ +static int send_iu(struct iu_entry *iue, u64 length, u8 format) +{ + long rc, rc1; + union { + struct viosrp_crq cooked; + u64 raw[2]; + } crq; + + /* First copy the SRP */ + rc = h_copy_rdma(length, + iue->adapter->liobn, + iue->iu_token, + iue->adapter->riobn, iue->req.remote_token); + + if (rc) + err("Send_iu: Error %ld transferring data to client\n", rc); + + crq.cooked.valid = 0x80; + crq.cooked.format = format; + crq.cooked.reserved = 0x00; + crq.cooked.timeout = 0x00; + crq.cooked.IU_length = length; + crq.cooked.IU_data_ptr = iue->iu->srp.generic.tag; + + if (rc == 0) + crq.cooked.status = 0x99; /* Just needs to be non-zero */ + else + crq.cooked.status = 0x00; + + rc1 = h_send_crq(iue->adapter->dma_dev->unit_address, + crq.raw[0], + crq.raw[1]); + + if (rc1) { + err("Error %ld sending response to client\n", rc1); + return rc1; + } + + return rc; +} + +/* + * Send data to a single SRP memory descriptor + * Returns amount of data sent, or negative value on error + */ +static long send_md_data(dma_addr_t stoken, int len, + struct memory_descriptor *md, + struct server_adapter *adapter) +{ + int tosend; + long rc; + + if (len < md->length) + tosend = len; + else + tosend = md->length; + + rc = h_copy_rdma(tosend, + adapter->liobn, + stoken, adapter->riobn, md->virtual_address); + + if (rc != H_Success) { + err("send_md_data: Error %ld transferring data to client\n", + rc); + return -EIO; + } + + return tosend; +} + +/* Send data to a list of memory descriptors + */ +static int send_md_list(int num_entries, int tosendlen, + dma_addr_t stoken, + struct memory_descriptor *md, + struct iu_entry *iue) +{ + int i, thislen, bytes; + int sentlen = 0; + + for (i = 0; ((i < num_entries) && (tosendlen)); i++) { + if (tosendlen > md[i].length) + thislen = md[i].length; + else + thislen = tosendlen; + + bytes = send_md_data(stoken + sentlen, thislen, + md + i, iue->adapter); + if (bytes < 0) + return bytes; + + if (bytes != thislen) + warn("Error: Tried to send %d, sent %d\n", thislen, + bytes); + + sentlen += bytes; + tosendlen -= bytes; + } + return sentlen; +} + +/* + * Send data to the SRP data_in buffers + * Returns amount of data sent, or negative value on error + */ +static long send_cmd_data(dma_addr_t stoken, int len, struct iu_entry *iue) +{ + struct srp_cmd *cmd = &iue->iu->srp.cmd; + struct memory_descriptor *md = NULL, *ext_list = NULL; + struct indirect_descriptor *id = NULL; + dma_addr_t data_token; + int offset = 0; + int sentlen = 0; + int num_md, rc; + + offset = cmd->additional_cdb_len * 4 + data_out_desc_size(cmd); + + switch (cmd->data_in_format) { + case SRP_NO_BUFFER: + return 0; + case SRP_DIRECT_BUFFER: + md = (struct memory_descriptor *)(cmd->additional_data + + offset); + sentlen = send_md_data(stoken, len, md, iue->adapter); + len -= sentlen; + if (len) { + __set_bit(V_DIOVER, &iue->req.flags); + iue->req.data_in_residual_count = len; + } + return sentlen; + } + + if (cmd->data_in_format != SRP_INDIRECT_BUFFER) { + err("client error Invalid data_in_format %d\n", + cmd->data_in_format); + return 0; + } + + id = (struct indirect_descriptor *)(cmd->additional_data + offset); + num_md = id->head.length / sizeof(struct memory_descriptor); + + if (num_md == cmd->data_in_count) + md = &id->list[0]; + + else { + ext_list = dma_alloc_coherent(iue->adapter->dev, + id->head.length, + &data_token, + GFP_KERNEL); + if (!ext_list) { + err("Error dma_alloc_coherent indirect table!\n"); + return 0; + } + + /* get indirect memory descriptor table from initiator */ + rc = h_copy_rdma(id->head.length, + iue->adapter->riobn, + id->head.virtual_address, + iue->adapter->liobn, + data_token); + if (rc != H_Success) { + err("Error copying indirect table rc %d\n", rc); + return 0; + } + + md = (struct memory_descriptor *)ext_list; + } + + /* Work through the memory descriptor list */ + sentlen = send_md_list(num_md, len, stoken, md, iue); + if (sentlen < 0 ) + return sentlen; + + len -= sentlen; + + if (len) { + __set_bit(V_DIOVER, &iue->req.flags); + iue->req.data_in_residual_count = len; + } + + if (ext_list) + dma_free_coherent(iue->adapter->dev, + id->head.length, ext_list, data_token); + + return sentlen; +} + +/* + * Get data from the other partition from a single SRP memory descriptor + * Returns amount of data received, or negative value on error + */ +static long get_md_data(dma_addr_t ttoken, int len, + struct memory_descriptor *md, + struct server_adapter *adapter) +{ + int toget; + long rc; + + if (len < md->length) + toget = len; + else + toget = md->length; + + rc = h_copy_rdma(toget, + adapter->riobn, + md->virtual_address, adapter->liobn, ttoken); + + if (rc != H_Success) { + err("get_md_data: Error %ld transferring data from client\n", + rc); + return -EIO; + } + + return toget; +} + +static int get_md_list(int num_entries, int togetlen, + dma_addr_t stoken, + struct memory_descriptor *md, + struct iu_entry *iue) +{ + int i, thislen, bytes; + int gotlen = 0; + + for (i = 0; ((i < num_entries) && (togetlen)); i++) { + if (togetlen > md[i].length) + thislen = md[i].length; + else + thislen = togetlen; + + bytes = get_md_data(stoken + gotlen, thislen, md + i, + iue->adapter); + if (bytes < 0) + return bytes; + + if (bytes != thislen) + err("Partial data got from client (%d/%d)\n", + bytes, thislen); + + gotlen += bytes; + togetlen -= bytes; + } + + return gotlen; +} + +/* + * Get data from an SRP data in area. + * Returns amount of data received, or negative value on error + */ +static long get_cmd_data(dma_addr_t stoken, int len, struct iu_entry *iue) +{ + struct srp_cmd *cmd = &iue->iu->srp.cmd; + struct memory_descriptor *md, *ext_list; + struct indirect_descriptor *id; + dma_addr_t data_token; + int offset = 0; + int total_length = 0; + int num_md, rc; + int gotlen = 0; + + offset = cmd->additional_cdb_len * 4; + + switch (cmd->data_out_format) { + case SRP_NO_BUFFER: + return 0; + break; + case SRP_DIRECT_BUFFER: + md = (struct memory_descriptor *)(cmd->additional_data + + offset); + return get_md_data(stoken, len, md, iue->adapter); + break; + } + + if (cmd->data_out_format != SRP_INDIRECT_BUFFER) { + err("client error: Invalid data_out_format %d\n", + cmd->data_out_format); + return 0; + } + + id = (struct indirect_descriptor *)(cmd->additional_data + offset); + + total_length = id->total_length; + + num_md = id->head.length / sizeof(struct memory_descriptor); + + if (num_md == cmd->data_out_count) { + /* Work through the partial memory descriptor list */ + gotlen = get_md_list(num_md, len, + stoken, &id->list[0], iue); + return gotlen; + } + + /* get indirect table */ + + ext_list = dma_alloc_coherent(iue->adapter->dev, + id->head.length, + &data_token, + GFP_KERNEL); + if (!ext_list) { + err("Error dma_alloc_coherent indirect table!\n"); + return 0; + } + + /* get indirect memory descriptor table */ + rc = h_copy_rdma(id->head.length, + iue->adapter->riobn, + id->head.virtual_address, + iue->adapter->liobn, + data_token); + if (rc != H_Success) { + err("Error copying indirect table rc %d\n", rc); + dma_free_coherent(iue->adapter->dev, + id->head.length, + ext_list, data_token); + return 0; + } + + gotlen = get_md_list(num_md, len, + stoken, ext_list, iue); + dma_free_coherent(iue->adapter->dev, + id->head.length, + ext_list, data_token); + + return gotlen; +} + +/* + * Send an SRP response that includes sense data + */ +static long send_rsp(struct iu_entry *iue, + unsigned char status, + unsigned char asc) +{ + u8 *sense = iue->iu->srp.rsp.sense_and_response_data; + u64 tag = iue->iu->srp.generic.tag; + union viosrp_iu *iu = iue->iu; + unsigned long flags; + + if (status != NO_SENSE) + atomic_inc(&iue->adapter->errors); + + if (iue->req.parent) { + struct iu_entry *parent = iue->req.parent; + if (parent->req.child[0] == iue) + parent->req.child[0] = NULL; + else if (parent->req.child[1] == iue) + parent->req.child[1] = NULL; + else + err("parent %p doesn't know child!\n", iue->req.parent); + + /* get only the first error */ + if (status && !parent->req.child_status) + parent->req.child_status = status; + + /* all children are done, send response */ + if (!parent->req.child[0] && !parent->req.child[1]) { + if (!test_bit(V_ABORTED, &parent->req.flags)) + send_rsp(parent, parent->req.child_status, + 0x00); + else + iue->adapter->next_rsp_delta++; + + __set_bit(V_DONE, &parent->req.flags); + kblockd_schedule_work(&iue->adapter->crq_task); + } + return 0; + } + /* If the linked bit is on and status is good */ + if (test_bit(V_LINKED, &iue->req.flags) && (status == NO_SENSE)) + status = 0x10; + + memset(iu, 0, sizeof(struct srp_rsp)); + iu->srp.rsp.type = SRP_RSP_TYPE; + spin_lock_irqsave(&iue->adapter->lock, flags); + iu->srp.rsp.request_limit_delta = 1 + iue->adapter->next_rsp_delta; + iue->adapter->next_rsp_delta = 0; + spin_unlock_irqrestore(&iue->adapter->lock, flags); + iu->srp.rsp.tag = tag; + + iu->srp.rsp.diover = test_bit(V_DIOVER, &iue->req.flags) ? 1 : 0; + + iu->srp.rsp.data_in_residual_count = iue->req.data_in_residual_count; + iu->srp.rsp.data_out_residual_count = iue->req.data_out_residual_count; + + iu->srp.rsp.rspvalid = 0; + + iu->srp.rsp.response_data_list_length = 0; + + if (status && !iue->req.sense) { + iu->srp.rsp.status = SAM_STAT_CHECK_CONDITION; + iu->srp.rsp.snsvalid = 1; + iu->srp.rsp.sense_data_list_length = 18; + + /* Valid bit and 'current errors' */ + sense[0] = (0x1 << 7 | 0x70); + + /* Sense key */ + sense[2] = status; + + /* Additional sense length */ + sense[7] = 0xa; /* 10 bytes */ + + /* Additional sense code */ + sense[12] = asc; + } else { + if (iue->req.sense) { + iu->srp.rsp.snsvalid = 1; + iu->srp.rsp.sense_data_list_length = + SCSI_SENSE_BUFFERSIZE; + memcpy(sense, iue->req.sense, SCSI_SENSE_BUFFERSIZE); + } + iu->srp.rsp.status = status; + } + + send_iu(iue, sizeof(iu->srp.rsp), VIOSRP_SRP_FORMAT); + + return 0; +} + +/* ============================================================== + * Block device endio routines (top and bottom) + * ============================================================== + */ +static void finish_iue(struct iu_entry *iue) +{ + int bytes; + unsigned long flags; + struct server_adapter *adapter = iue->adapter; + + /* Send back the SRP and data if this request was NOT + * aborted + */ + if (test_bit(V_ABORTED, &iue->req.flags)) { + spin_lock_irqsave(&adapter->lock, flags); + adapter->next_rsp_delta++; + spin_unlock_irqrestore(&adapter->lock, flags); + goto out; + } + + if (iue->req.ioerr) { + err("Block operation failed\n"); + send_rsp(iue, HARDWARE_ERROR, 0x00); + goto out; + } + + if (test_bit(V_WRITE, &iue->req.flags)) { + send_rsp(iue, NO_SENSE, 0x00); + goto out; + } + + /* return data if this was a read */ + bytes = send_cmd_data(iue->req.data_token, + iue->req.data_len, + iue); + if (bytes != iue->req.data_len) { + err("Error sending data on response (tried %ld, sent %d\n", + iue->req.data_len, bytes); + send_rsp(iue, ABORTED_COMMAND, 0x00); + } else + send_rsp(iue, NO_SENSE, 0x00); + +out: free_data_buffer(iue->req.data_buffer, + iue->req.data_token, iue->req.data_len, + adapter); + spin_lock_irqsave(&adapter->lock, flags); + free_iu(iue); + spin_unlock_irqrestore(&adapter->lock, flags); +} + +/* + * the routine that gets called on end_io of our bios. We basically + * schedule the processing to be done in our task, since we don't want + * do things like RDMA in someone else's interrupt handler + * + * Each iu request may result in multiple bio requests. only proceed + * when all the bio requests have done. + */ +static int ibmvscsis_end_io(struct bio *bio, unsigned int nbytes, int error) +{ + struct iu_entry *iue = (struct iu_entry *)bio->bi_private; + struct server_adapter *adapter = iue->adapter; + unsigned long flags; + + if (bio->bi_size) + return 1; + + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + iue->req.ioerr = 1; + + /* Add the bio to the done queue */ + spin_lock_irqsave(&adapter->lock, flags); + if (adapter->bio_donetail) { + adapter->bio_donetail->bi_next = bio; + adapter->bio_donetail = bio; + } else + adapter->bio_done = adapter->bio_donetail = bio; + bio->bi_next = NULL; + spin_unlock_irqrestore(&adapter->lock, flags); + + /* Schedule the task */ + tasklet_schedule(&adapter->endio_tasklet); + + return 0; +} + +/* + * Process BH buffer completions. When the end_io routine gets called + * we queue the bio on an internal queue and start a task to process them + */ +static void endio_task(unsigned long data) +{ + struct server_adapter *adapter = (struct server_adapter *)data; + struct iu_entry *iue; + struct bio *bio; + unsigned long flags; + + do { + iue = NULL; + spin_lock_irqsave(&adapter->lock, flags); + bio = adapter->bio_done; + if (bio) { + if (bio == adapter->bio_donetail) + adapter->bio_donetail = NULL; + adapter->bio_done = bio->bi_next; + bio->bi_next = NULL; + + /* Remove this iue from the in-flight list */ + iue = (struct iu_entry *)bio->bi_private; + if (!test_bit(V_IN_USE, &iue->req.flags)) { + err("Internal error! freed iue in bio!!!\n"); + spin_unlock_irqrestore(&adapter->lock, flags); + return; + } + + list_del(&iue->next); + } + + spin_unlock_irqrestore(&adapter->lock, flags); + + if (iue) { + finish_iue(iue); + bio_put(bio); + atomic_dec(&adapter->bio_count); + } + } while (bio); + kblockd_schedule_work(&adapter->crq_task); +} + +/* ============================================================== + * SCSI Command Emulation Routines + * ============================================================== + */ +/* + * Process an inquiry SCSI Command + */ +static int process_inquiry(struct iu_entry *iue) +{ + struct inquiry_data *id; + dma_addr_t data_token; + u8 *raw_id; + int bytes; + unsigned long flags; + int genhd_flags; + + id = dma_alloc_coherent(iue->adapter->dev, sizeof(*id), &data_token, + GFP_KERNEL); + + if (id == NULL) { + err("Not able to get inquiry buffer, retrying later\n"); + return RETRY; + } + + raw_id = (u8 *)id; + memset(id, 0, sizeof(*id)); + + /* If we have a valid device */ + if (iue->req.vd) { + genhd_flags = iue->req.vd->b.bdev->bd_disk->flags; + /* Standard inquiry page */ + if ((iue->iu->srp.cmd.cdb[1] == 0x00) && + (iue->iu->srp.cmd.cdb[2] == 0x00)) { + dbg(" inquiry returning device\n"); + id->qual_type = iue->req.vd->b.scsi_type; + id->rmb_reserve = + iue->req.vd->b.removable ? 0x80 : 0x00; + id->version = 0x84; /* ISO/IE */ + id->aerc_naca_hisup_format = 0x22;/* naca & fmt 0x02 */ + id->addl_len = sizeof(*id) - 4; + id->bque_encserv_vs_multip_mchngr_reserved = 0x00; + id->reladr_reserved_linked_cmdqueue_vs = 0x02;/*CMDQ*/ + memcpy(id->vendor, "IBM ", 8); + /* Don't even ask about the next bit. AIX uses + * hardcoded device naming to recognize device types + * and their client won't work unless we use VOPTA and + * VDASD. + */ + if (id->qual_type == TYPE_ROM) + memcpy(id->product, "VOPTA blkdev ", 16); + else + memcpy(id->product, "VDASD blkdev ", 16); + memcpy(id->revision, "0001", 4); + snprintf(id->unique,sizeof(id->unique), + "IBM-VSCSI-%s-P%d-%x-%d-%d-%d\n", + system_id, + partition_number, + iue->adapter->dma_dev->unit_address, + GETBUS(iue->req.vd->lun), + GETTARGET(iue->req.vd->lun), + GETLUN(iue->req.vd->lun)); + } else if ((iue->iu->srp.cmd.cdb[1] == 0x01) && + (iue->iu->srp.cmd.cdb[2] == 0x00)) { + /* Supported VPD pages */ + id->qual_type = iue->req.vd->b.scsi_type; + raw_id[1] = 0x80; /* page */ + raw_id[2] = 0x00; /* reserved */ + raw_id[3] = 0x03; /* length */ + raw_id[4] = 0x00; /* page 0 */ + raw_id[5] = 0x80; /* serial number page */ + } else if ((iue->iu->srp.cmd.cdb[1] == 0x01) && + (iue->iu->srp.cmd.cdb[2] == 0x80)) { + /* serial number page */ + id->qual_type = iue->req.vd->b.scsi_type; + raw_id[1] = 0x80; /* page */ + raw_id[2] = 0x00; /* reserved */ + snprintf((char *)(raw_id+4), + sizeof(*id)-4, + "IBM-VSCSI-%s-P%d-%x-%d-%d-%d\n", + system_id, + partition_number, + iue->adapter->dma_dev->unit_address, + GETBUS(iue->req.vd->lun), + GETTARGET(iue->req.vd->lun), + GETLUN(iue->req.vd->lun)); + raw_id[3] = strlen((char *)raw_id+4); + } else { + /* Some unsupported data */ + err("unknown inquiry page %d %d\n", + iue->iu->srp.cmd.cdb[1], + iue->iu->srp.cmd.cdb[2]); + send_rsp(iue, ILLEGAL_REQUEST, 0x24); + return FREE_IU; + } + } else { + dbg(" inquiry returning no device\n"); + id->qual_type = 0x7F; /* Not supported, no device */ + } + + if (test_bit(V_ABORTED, &iue->req.flags)) { + spin_lock_irqsave(&iue->adapter->lock, flags); + iue->adapter->next_rsp_delta++; + spin_unlock_irqrestore(&iue->adapter->lock, flags); + dma_free_coherent(iue->adapter->dev, sizeof(*id), id, + data_token); + return FREE_IU; + } + + bytes = send_cmd_data(data_token, sizeof(*id), iue); + + dma_free_coherent(iue->adapter->dev, sizeof(*id), id, data_token); + + if (bytes < 0) + send_rsp(iue, HARDWARE_ERROR, 0x00); + else + send_rsp(iue, NO_SENSE, 0x00); + + return FREE_IU; +} + +/* + * Handle an I/O. Called by WRITE6, WRITE10, etc + */ +static int process_rw(char *cmd, int rw, struct iu_entry *iue, long lba, + long len) +{ + char *buffer; + struct bio *bio; + int bytes; + int num_biovec; + int cur_biovec; + long flags; + + dbg("%s lba %ld, len %ld\n",cmd,lba,len); + + if (rw == WRITE) + atomic_inc(&iue->adapter->write_processed); + else if (rw == READ) + atomic_inc(&iue->adapter->read_processed); + else { + err("Major internal error...rw not read or write\n"); + send_rsp(iue, HARDWARE_ERROR, 0x00); + return FREE_IU; + } + + if (len == 0) { + warn("Zero length I/O\n"); + send_rsp(iue, ILLEGAL_REQUEST, 0x20); + return FREE_IU; + } + + /* Writing to a read-only device */ + if ((rw == WRITE) && (iue->req.vd->b.ro)) { + warn("WRITE to read-only device\n"); + send_rsp(iue, DATA_PROTECT, 0x27); + return FREE_IU; + } + + iue->req.rw = rw; + iue->req.lba = lba; + iue->req.len = len; + __set_bit(V_PARSED, &iue->req.flags); + + if (bdev_get_queue(iue->req.vd->b.bdev)->max_sectors < (len >> 9)) + return RETRY_SPLIT_BUF; + + get_data_buffer(&buffer, &iue->req.data_token, len, iue->adapter); + iue->req.data_buffer = buffer; + iue->req.data_len = len; + + if (buffer == NULL || dma_mapping_error(iue->req.data_token)) { + err("Not able to get %lu pages for buffer, retrying later\n", + len / PAGE_SIZE); + + return RETRY_SPLIT_BUF; + } + + /* if reladr */ + if (iue->iu->srp.cmd.cdb[1] & 0x01) + lba = lba + iue->req.vd->b.lastlba; + + /* If this command is linked, Keep this lba */ + if (test_bit(V_LINKED, &iue->req.flags)) + iue->req.vd->b.lastlba = lba; + else + iue->req.vd->b.lastlba = 0; + + if (rw == WRITE) { + __set_bit(V_WRITE, &iue->req.flags); + /* Get the data */ + bytes = get_cmd_data(iue->req.data_token, len, iue); + if (bytes != len) { + err("Error transferring data\n"); + free_data_buffer(buffer, iue->req.data_token, len, + iue->adapter); + send_rsp(iue, HARDWARE_ERROR, 0x00); + return FREE_IU; + } + } + + num_biovec = (len - 1) / iue->req.vd->b.blocksize + 1; + + bio = bio_alloc(GFP_KERNEL, num_biovec); + if (!bio) { + err("Not able to allocate a bio, retrying later\n"); + + free_data_buffer(buffer, iue->req.data_token, len, + iue->adapter); + + return RETRY; + } + + if (test_bit(V_ABORTED, &iue->req.flags)) { + spin_lock_irqsave(&iue->adapter->lock, flags); + iue->adapter->next_rsp_delta++; + free_data_buffer(buffer, iue->req.data_token, len, + iue->adapter); + spin_unlock_irqrestore(&iue->adapter->lock, flags); + bio_put(bio); + return FREE_IU; + } + + atomic_inc(&iue->adapter->bio_count); + bio->bi_size = len; + bio->bi_bdev = iue->req.vd->b.bdev; + bio->bi_sector = lba * (iue->req.vd->b.sectsize >> 9); + bio->bi_end_io = &ibmvscsis_end_io; + bio->bi_private = iue; + bio->bi_rw = (rw == WRITE) ? 1 : 0; + bio->bi_rw |= 1 << BIO_RW_SYNC; + bio->bi_phys_segments = 1; + bio->bi_hw_segments = 1; + if (bdev_get_queue(bio->bi_bdev)->ordered != QUEUE_ORDERED_NONE + && test_bit(V_BARRIER, &iue->req.flags)) + bio->bi_rw |= 1 << BIO_RW_BARRIER; + + + /* This all assumes that the buffers we get are page-aligned */ + for (cur_biovec = 0; cur_biovec < num_biovec; cur_biovec++) { + long thislen; + + if (len > iue->req.vd->b.blocksize) + thislen = iue->req.vd->b.blocksize; + else + thislen = len; + + bio->bi_io_vec[cur_biovec].bv_page = virt_to_page(buffer); + bio->bi_io_vec[cur_biovec].bv_len = thislen; + bio->bi_io_vec[cur_biovec].bv_offset = + (unsigned long)buffer & (PAGE_SIZE-1); + bio->bi_vcnt++; + + len -= thislen; + buffer += thislen; + } + generic_make_request(bio); + return INFLIGHT; +} + +/* + * Process a READ6 + */ +static int process_read6(struct iu_entry *iue) +{ + long lba = (*((u32 *) (iue->iu->srp.cmd.cdb))) & 0x001FFFFF; + long len = iue->iu->srp.cmd.cdb[4]; + + /* Length of 0 indicates 256 */ + if (len == 0) + len = 256; + + len = len * iue->req.vd->b.sectsize; + + return process_rw("Read6", READ, iue, lba, len); +} + +/* + * Process a {READ,WRITE}{6,10,12} + */ +static int process_read10(struct iu_entry *iue) +{ + long lba = *((u32 *) (iue->iu->srp.cmd.cdb + 2)); + long len = + *((u16 *) (iue->iu->srp.cmd.cdb + 7)) * iue->req.vd->b.sectsize; + + return process_rw("Read10", READ, iue, lba, len); +} + +static int process_read12(struct iu_entry *iue) +{ + long lba = *((u32 *) (iue->iu->srp.cmd.cdb + 2)); + long len = + *((u32 *) (iue->iu->srp.cmd.cdb + 6)) * iue->req.vd->b.sectsize; + + return process_rw("Read12", READ, iue, lba, len); +} + +static int process_write6(struct iu_entry *iue) +{ + long lba = (*((u32 *) (iue->iu->srp.cmd.cdb))) & 0x001FFFFF; + long len = iue->iu->srp.cmd.cdb[4]; + + /* Length of 0 indicates 256 */ + if (len == 0) + len = 256; + + len = len * iue->req.vd->b.sectsize; + + return process_rw("Write6", WRITE, iue, lba, len); +} + +static int process_write10(struct iu_entry *iue) +{ + long lba = *((u32 *) (iue->iu->srp.cmd.cdb + 2)); + long len = + *((u16 *) (iue->iu->srp.cmd.cdb + 7)) * iue->req.vd->b.sectsize; + + return process_rw("Write10", WRITE, iue, lba, len); +} + +static int process_write12(struct iu_entry *iue) +{ + long lba = *((u32 *) (iue->iu->srp.cmd.cdb + 2)); + long len = + *((u32 *) (iue->iu->srp.cmd.cdb + 6)) * iue->req.vd->b.sectsize; + + return process_rw("Write12", WRITE, iue, lba, len); +} + +/* + * Handle Read Capacity + */ +static int process_read_capacity(struct iu_entry *iue) +{ + struct read_capacity_data { + u32 blocks; + u32 blocksize; + } *cap; + dma_addr_t data_token; + int bytes; + unsigned long flags; + + cap = dma_alloc_coherent(iue->adapter->dev, sizeof(*cap), &data_token, + GFP_KERNEL); + + if (cap == NULL) { + err("Not able to get capacity buffer, retrying later\n"); + return RETRY; + } + + /* return block size and last valid block */ + cap->blocksize = iue->req.vd->b.sectsize; + cap->blocks = + iue->req.vd->b.bdev->bd_inode->i_size / cap->blocksize - 1; + + dbg("capacity %ld bytes, %d blocks, %d blocksize\n", + (long)iue->req.vd->b.bdev->bd_inode->i_size, + cap->blocks, + cap->blocksize); + + + if (test_bit(V_ABORTED, &iue->req.flags)) { + spin_lock_irqsave(&iue->adapter->lock, flags); + iue->adapter->next_rsp_delta++; + spin_unlock_irqrestore(&iue->adapter->lock, flags); + dma_free_coherent(iue->adapter->dev, sizeof(*cap), cap, + data_token); + return FREE_IU; + } + + bytes = send_cmd_data(data_token, sizeof(*cap), iue); + + dma_free_coherent(iue->adapter->dev, sizeof(*cap), cap, data_token); + + if (bytes != sizeof(*cap)) + err("Error sending read capacity data. bytes %d, wanted %ld\n", + bytes, sizeof(*cap)); + + send_rsp(iue, NO_SENSE, 0x00); + + return FREE_IU; +} + +/* + * Process Mode Sense + */ +static int process_mode_sense(struct iu_entry *iue) +{ + dma_addr_t data_token; + int bytes; + unsigned long flags; + + u8 *mode = dma_alloc_coherent(iue->adapter->dev, MODE_SENSE_BUFFER_SIZE, + &data_token, GFP_KERNEL); + + if (mode == NULL) { + err("Not able to get mode buffer, retrying later\n"); + return RETRY; + } + + /* which page */ + switch (iue->iu->srp.cmd.cdb[2]) { + case 0: + case 0x3f: + mode[1] = 0x00; /* Default medium */ + if (iue->req.vd->b.ro) + mode[2] = 0x80; /* device specific */ + else + mode[2] = 0x00; /* device specific */ + + /* note the DPOFUA bit is set to zero! */ + mode[3] = 0x08; /* block descriptor length */ + *((u32 *) & mode[4]) = + iue->req.vd->b.bdev->bd_inode->i_size + / iue->req.vd->b.sectsize - 1; + + *((u32 *) & mode[8]) = iue->req.vd->b.sectsize; + bytes = mode[0] = 12; /* length */ + break; + + case 0x08: /* Cache page */ + /* length should be 4 */ + if (iue->iu->srp.cmd.cdb[4] != 4 + && iue->iu->srp.cmd.cdb[4] != 0x20) { + send_rsp(iue, ILLEGAL_REQUEST, 0x20); + dma_free_coherent(iue->adapter->dev, + MODE_SENSE_BUFFER_SIZE, + mode, data_token); + return FREE_IU; + } + + mode[1] = 0x00; /* Default medium */ + if (iue->req.vd->b.ro) + mode[2] = 0x80; /* device specific */ + else + mode[2] = 0x00; /* device specific */ + + /* note the DPOFUA bit is set to zero! */ + mode[3] = 0x08; /* block descriptor length */ + *((u32 *) & mode[4]) = + iue->req.vd->b.bdev->bd_inode->i_size + / iue->req.vd->b.sectsize - 1; + *((u32 *) & mode[8]) = iue->req.vd->b.sectsize; + + /* Cache page */ + mode[12] = 0x08; /* page */ + mode[13] = 0x12; /* page length */ + mode[14] = 0x01; /* no cache (0x04 for read/write cache) */ + + bytes = mode[0] = 12 + mode[13]; /* length */ + break; + default: + warn("Request for unknown mode page %d\n", + iue->iu->srp.cmd.cdb[2]); + send_rsp(iue, ILLEGAL_REQUEST, 0x20); + dma_free_coherent(iue->adapter->dev, + MODE_SENSE_BUFFER_SIZE, mode, data_token); + return FREE_IU; + } + + if (test_bit(V_ABORTED, &iue->req.flags)) { + spin_lock_irqsave(&iue->adapter->lock, flags); + iue->adapter->next_rsp_delta++; + spin_unlock_irqrestore(&iue->adapter->lock, flags); + dma_free_coherent(iue->adapter->dev, + MODE_SENSE_BUFFER_SIZE, mode, data_token); + return FREE_IU; + } + + bytes = send_cmd_data(data_token, bytes, iue); + + dma_free_coherent(iue->adapter->dev, + MODE_SENSE_BUFFER_SIZE, mode, data_token); + + send_rsp(iue, NO_SENSE, 0x00); + + return FREE_IU; +} + +/* + * Report LUNS command. + */ +static int process_reportLUNs(struct iu_entry *iue) +{ + int listsize = vscsis_data_length(&iue->iu->srp.cmd, 0); + dma_addr_t data_token; + int index = 2; /* Start after the two entries (length and LUN0) */ + int bus; + int target; + int bytes; + unsigned long flags; + + u64 *lunlist = dma_alloc_coherent(iue->adapter->dev, listsize, + &data_token, GFP_KERNEL); + + if (lunlist == NULL) { + err("Not able to get lunlist buffer, retrying later\n"); + return RETRY; + } + + memset(lunlist, 0, listsize); + + /* work out list size in units of u64 */ + listsize = listsize / 8; + + if (listsize < 1) { + send_rsp(iue, ILLEGAL_REQUEST, 0x20); + return FREE_IU; + } + + spin_lock_irqsave(&iue->adapter->lock, flags); + + /* send lunlist of size 1 when requesting lun is not all zeros */ + if (iue->iu->srp.cmd.lun != 0x0LL) { + *lunlist = ((u64) 1 * 8) << 32; + goto send_lunlist; + } + + /* return the total number of luns plus LUN0 in bytes */ + *lunlist = (((u64) ((iue->adapter->nvdevs + 1) * 8)) << 32); + + dbg("reporting %d luns\n", iue->adapter->nvdevs + 1); + /* loop through the bus */ + for (bus = 0; bus < BUS_PER_ADAPTER; bus++) { + /* If this bus exists */ + if (!iue->adapter->vbus[bus]) + continue; + /* loop through the targets */ + for (target = 0; target < TARGETS_PER_BUS; target++) { + if (!iue->adapter->vbus[bus]->vdev[target]) + continue; + /* If the target exists */ + if ((index < listsize) && + (!iue->adapter->vbus[bus]-> + vdev[target]->disabled)) { + lunlist[index++] = + iue->adapter->vbus[bus]->vdev[target]->lun; + dbg(" lun %16.16lx\n", + iue->adapter->vbus[bus]->vdev[target]->lun); + } + + } + } + + send_lunlist: + spin_unlock_irqrestore(&iue->adapter->lock, flags); + + if (test_bit(V_ABORTED, &iue->req.flags)) { + spin_lock_irqsave(&iue->adapter->lock, flags); + iue->adapter->next_rsp_delta++; + spin_unlock_irqrestore(&iue->adapter->lock, flags); + dma_free_coherent(iue->adapter->dev, listsize * 8, lunlist, + data_token); + return FREE_IU; + } + + bytes = send_cmd_data(data_token, (index * 8), iue); + + dma_free_coherent(iue->adapter->dev, listsize * 8, + lunlist, data_token); + + if (bytes != (index * 8)) { + err("Error sending report luns data. bytes %d, wanted %d\n", + bytes, index * 4); + send_rsp(iue, ABORTED_COMMAND, 0x00); + } else + send_rsp(iue, NO_SENSE, 0x00); + + return FREE_IU; +} + +/* For unrecognized SCSI commands, try passing them + * through + */ +static int try_passthru(struct iu_entry *iue) +{ + request_queue_t *q = bdev_get_queue(iue->req.vd->b.bdev); + struct request *rq; + char *buffer; + int dodlen = vscsis_data_length(&iue->iu->srp.cmd, 1); + int didlen = vscsis_data_length(&iue->iu->srp.cmd, 0); + int bytes, len, rw; + int err = 0; + + if (dodlen && didlen) + return -EIO; + + if (dodlen) + rw = WRITE; + else + rw = READ; + + len = dodlen + didlen; + + if (len) { + get_data_buffer(&buffer, &iue->req.data_token, len, iue->adapter); + if (!buffer) { + err("Unable to get data buffer of len %d\n",len); + return -ENOMEM; + } + + if (dodlen) { + bytes = get_cmd_data(iue->req.data_token, len, iue); + if (bytes != len) { + err("Error transferring data\n"); + free_data_buffer(buffer, + iue->req.data_token, + len, + iue->adapter); + return -ENOMEM; + } + } + } else + buffer = NULL; + + rq = blk_get_request(q, rw, __GFP_WAIT); + rq->flags |= REQ_BLOCK_PC; + rq->data = buffer; + rq->data_len = len; + rq->timeout = iue->req.timeout; + + memcpy(rq->cmd, iue->iu->srp.cmd.cdb, BLK_MAX_CDB); + err = blk_execute_rq(q, iue->req.vd->b.bdev->bd_disk, rq, + ELEVATOR_INSERT_BACK); + blk_put_request(rq); + if ((err == 0) && (rw == READ) && (len)) { + bytes = send_cmd_data(iue->req.data_token, + iue->req.data_len, + iue); + if (bytes != iue->req.data_len) { + err("Error sending data " + "on response " + "(tried %ld, sent %d\n", + iue->req.data_len, bytes); + free_data_buffer(buffer, + iue->req.data_token, + len, + iue->adapter); + err = -EIO; + } + } + + if (buffer) + free_data_buffer(buffer, + iue->req.data_token, + len, + iue->adapter); + + return err; +} + +static void reset_changed(struct iu_entry *iue) +{ + if (iue->req.vd->b.changed) { + bd_set_size(iue->req.vd->b.bdev, + (loff_t)get_capacity(iue->req.vd->b.bdev->bd_disk) + <<9); + iue->req.vd->b.changed = 0; + } +} + +/* + * Process an IU when the target is a block device + */ +static int process_cmd_block(struct iu_entry *iue) +{ + union viosrp_iu *iu = iue->iu; + unsigned long flags; + + if (test_bit(V_PARSED, &iue->req.flags)) + return process_rw("pre-parsed", iue->req.rw, iue, iue->req.lba, + iue->req.len); + + if (iu->srp.cmd.cdb[0] == INQUIRY) { + dbg("INQUIRY lun %16.16lx\n", iue->iu->srp.cmd.lun); + return process_inquiry(iue); + } + + if (iue->req.vd && + iue->req.vd->b.removable && + check_disk_change(iue->req.vd->b.bdev)) { + if (iue->req.vd->b.changed) { + dbg("Media changed not ready!...cmd 0x%2.2x\n", + iu->srp.cmd.cdb[0]); + send_rsp(iue, NOT_READY, 0x3a); + return FREE_IU; + } + iue->req.vd->b.changed = 1; + dbg("Media changed attention!...cmd 0x%2.2x\n", + iu->srp.cmd.cdb[0]); + send_rsp(iue, UNIT_ATTENTION, 0x3a); + return FREE_IU; + } + + switch (iu->srp.cmd.cdb[0]) { + case REPORT_LUNS: + dbg("REPORT LUNS lun %16.16lx\n", iue->iu->srp.cmd.lun); + return process_reportLUNs(iue); + case READ_CAPACITY: + dbg("READ CAPACITY lun %16.16lx\n", iue->iu->srp.cmd.lun); + return process_read_capacity(iue); + case MODE_SENSE: + dbg("MODE SENSE lun %16.16lx\n", iue->iu->srp.cmd.lun); + return process_mode_sense(iue); + case TEST_UNIT_READY: + /* we already know the device exists */ + dbg("TEST UNIT READY lun %16.16lx\n", iue->iu->srp.cmd.lun); + if (!test_bit(V_ABORTED, &iue->req.flags)) { + reset_changed(iue); + send_rsp(iue, NO_SENSE, 0x00); + } + else { + spin_lock_irqsave(&iue->adapter->lock, flags); + iue->adapter->next_rsp_delta++; + spin_unlock_irqrestore(&iue->adapter->lock, flags); + } + return FREE_IU; + case START_STOP: + dbg("START_STOP lun %16.16lx\n", iue->iu->srp.cmd.lun); + + if (!test_bit(V_ABORTED, &iue->req.flags)) { + reset_changed(iue); + if ((iu->srp.cmd.cdb[5] & 0x03) == 0x02) { + /* Unload! */ + if ((iue->req.vd) && + ioctl_by_bdev(iue->req.vd->b.bdev, + CDROMEJECT, 0) == 0) + send_rsp(iue, NO_SENSE, 0x00); + else + send_rsp(iue, HARDWARE_ERROR, 0x00); + } else if ((iu->srp.cmd.cdb[4] & 0x03) == 0x03) { + iue->req.vd->b.changed = 0; + if ((iue->req.vd) && + ioctl_by_bdev(iue->req.vd->b.bdev, + CDROMCLOSETRAY, 0) == 0) + send_rsp(iue, NO_SENSE, 0x00); + else + send_rsp(iue, HARDWARE_ERROR, 0x00); + } else + send_rsp(iue, NO_SENSE, 0x00); + } else { + spin_lock_irqsave(&iue->adapter->lock, flags); + iue->adapter->next_rsp_delta++; + spin_unlock_irqrestore(&iue->adapter->lock, flags); + } + return FREE_IU; + case READ_6: + return process_read6(iue); + case READ_10: + return process_read10(iue); + case READ_12: + return process_read12(iue); + case WRITE_6: + return process_write6(iue); + case WRITE_10: + case WRITE_VERIFY: + return process_write10(iue); + case WRITE_12: + case WRITE_VERIFY_12: + return process_write12(iue); + default: + dbg("unknown command 0x%2.2x\n`",iu->srp.cmd.cdb[0]); + if (try_passthru(iue) == 0) { + dbg("Successfully passed through command 0x%2.2x!\n", + iu->srp.cmd.cdb[0]); + send_rsp(iue, NO_SENSE, 0x00); + return FREE_IU; + } + + dbg("Unsupported SCSI Command 0x%2.2x\n", iu->srp.cmd.cdb[0]); + + if (!test_bit(V_ABORTED, &iue->req.flags)) + send_rsp(iue, ILLEGAL_REQUEST, 0x20); + else { + spin_lock_irqsave(&iue->adapter->lock, flags); + iue->adapter->next_rsp_delta++; + spin_unlock_irqrestore(&iue->adapter->lock, flags); + } + return FREE_IU; + } +} + +/* ============================================================== + * SCSI Redirection Routines + * ============================================================== + */ +/* + * Callback when the scsi command issued by process_cmd_scsi() is completed + */ +static void scsi_cmd_done(struct scsi_cmnd *cmd) +{ + struct iu_entry *iue = (struct iu_entry*)cmd->sc_request-> + upper_private_data; + struct server_adapter *adapter = iue->adapter; + unsigned long flags; + int bytes; + + dbg("scsi_cmd_done got cmd %p iue %p\n", cmd, iue); + + spin_lock_irqsave(&adapter->lock, flags); + list_del(&iue->next); + spin_unlock_irqrestore(&adapter->lock, flags); + + if (test_bit(V_ABORTED, &iue->req.flags)) { + dbg("scsi_cmd_done: aborted tag %16.16x\n", cmd->tag); + spin_lock_irqsave(&iue->adapter->lock, flags); + iue->adapter->next_rsp_delta++; + spin_unlock_irqrestore(&iue->adapter->lock, flags); + goto out; + } + + if(!test_bit(V_WRITE, &iue->req.flags)) { + bytes = send_cmd_data(iue->req.data_token, + iue->req.data_len, iue); + if(bytes != iue->req.data_len) { + err("Error sending data on response (%ld, sent %d)\n", + iue->req.data_len, bytes); + send_rsp(iue, ABORTED_COMMAND, 0x00); + goto out; + } + } + + if (cmd->result) + iue->req.sense = cmd->sense_buffer; + + send_rsp(iue, cmd->result, 0x00); + +out: scsi_release_request(iue->req.sreq); + if (iue->req.data_len) { + free_data_buffer(iue->req.data_buffer, iue->req.data_token, + iue->req.data_len, adapter); + } + spin_lock_irqsave(&adapter->lock, flags); + free_iu(iue); + spin_unlock_irqrestore(&adapter->lock, flags); +} + +/* + * Process an IU when the target is a scsi device + */ +static int process_cmd_scsi(struct iu_entry *iue) +{ + union viosrp_iu *iu = iue->iu; + struct scsi_request *req; + char *buffer = NULL; + int len = 0; + + dbg("%x %x %16.16lx[%d:%d:%d][%s] link %d iue %p\n", + iu->srp.cmd.cdb[0], + iu->srp.cmd.cdb[1], + iue->iu->srp.cmd.lun, + GETBUS(iue->iu->srp.cmd.lun), + GETTARGET(iue->iu->srp.cmd.lun), + GETLUN(iue->iu->srp.cmd.lun), + iue->req.vd->device_name, + test_bit(V_LINKED, &iue->req.flags), iue); + + req = scsi_allocate_request(iue->req.vd->s.sdev, GFP_KERNEL); + if (req == NULL) { + err("Not able to get scsi_request, retrying later\n"); + return RETRY; + } + + memcpy(req->sr_cmnd, iu->srp.cmd.cdb, sizeof(iu->srp.cmd.cdb)); + + req->sr_cmd_len = sizeof(iu->srp.cmd.cdb); + if (iu->srp.cmd.data_out_format && iu->srp.cmd.data_in_format) { + err("Invalid bidirectional buffer\n"); + send_rsp(iue, ABORTED_COMMAND, 0x00); + scsi_release_request(req); + return FREE_IU; + } else if (iu->srp.cmd.data_out_format) { /* write */ + atomic_inc(&iue->adapter->write_processed); + req->sr_data_direction = DMA_TO_DEVICE; + len = vscsis_data_length(&iue->iu->srp.cmd, 1); + __set_bit(V_WRITE, &iue->req.flags); + if (iue->req.vd->b.ro) { + warn("WRITE to read-only device\n"); + send_rsp(iue, DATA_PROTECT, 0x27); + scsi_release_request(req); + return FREE_IU; + } + } else if (iu->srp.cmd.data_in_format) { /* read */ + atomic_inc(&iue->adapter->read_processed); + req->sr_data_direction = DMA_FROM_DEVICE; + len = vscsis_data_length(&iue->iu->srp.cmd, 0); + } else { + dbg("No buffer command\n"); + req->sr_data_direction = DMA_NONE; + goto nobuf; + } + + get_data_buffer(&buffer, &iue->req.data_token, len, iue->adapter); + iue->req.data_buffer = buffer; + iue->req.data_len = len; + + if (test_bit(V_WRITE, &iue->req.flags)) { + int bytes = get_cmd_data(iue->req.data_token, len, iue); + + if (bytes != len) { + err("Error transferring data\n"); + free_data_buffer(buffer, iue->req.data_token, len, + iue->adapter); + scsi_release_request(req); + send_rsp(iue, HARDWARE_ERROR, 0x00); + return FREE_IU; + } + } + +nobuf: req->sr_use_sg = 0; + req->sr_bufflen = len; + req->sr_buffer = buffer; + req->sr_sense_buffer[0] = 0; + req->sr_request->flags = + test_bit(V_BARRIER, &iue->req.flags) ? REQ_HARDBARRIER : 0; + req->upper_private_data = (void*)iue; + iue->req.sreq = req; + dbg("sending %s of %d bytes, buffer %p, timeout=%d\n", + test_bit(V_WRITE, &iue->req.flags) ? "write" : "read", len, buffer, + iue->req.timeout); + + scsi_do_req(req, iu->srp.cmd.cdb, buffer, len, scsi_cmd_done, + iue->req.timeout, 3); + + return INFLIGHT; + +} + +/* ============================================================== + * SRP Processing Routines + * ============================================================== + */ +/* + * Process an incoming SRP Login request + */ +static void process_login(struct iu_entry *iue) +{ + union viosrp_iu *iu = iue->iu; + u64 tag = iu->srp.generic.tag; + + /* TODO handle case that requested size is wrong and + * buffer format is wrong + */ + memset(iu, 0, sizeof(struct srp_login_rsp)); + iu->srp.login_rsp.type = SRP_LOGIN_RSP_TYPE; + iu->srp.login_rsp.request_limit_delta = INITIAL_SRP_LIMIT; + iu->srp.login_rsp.tag = tag; + iu->srp.login_rsp.max_initiator_to_target_iulen = sizeof(union srp_iu); + iu->srp.login_rsp.max_target_to_initiator_iulen = sizeof(union srp_iu); + /* direct and indirect */ + iu->srp.login_rsp.supported_buffer_formats = 0x0006; + iu->srp.login_rsp.multi_channel_result = 0x00; + + send_iu(iue, sizeof(iu->srp.login_rsp), VIOSRP_SRP_FORMAT); +} + +/* + * Process an incoming device_reset request + */ +static void process_device_reset(struct iu_entry *iue) +{ + struct iu_entry *tmp_iue; + unsigned long flags; + union viosrp_iu *iu = iue->iu; + u64 lun = iu->srp.tsk_mgmt.lun; + + info("device reset for lun %16.16lx\n", lun); + + spin_lock_irqsave(&iue->adapter->lock, flags); + + list_for_each_entry(tmp_iue, &iue->adapter->cmd_queue, next) + if (tmp_iue->iu->srp.cmd.lun == lun) + __set_bit(V_ABORTED, &tmp_iue->req.flags); + + spin_unlock_irqrestore(&iue->adapter->lock, flags); + send_rsp(iue, NO_SENSE, 0x00); +} + +/* + * Process an incoming abort request + */ +static void process_abort(struct iu_entry *iue) +{ + struct iu_entry *tmp_iue; + unsigned long flags; + union viosrp_iu *iu = iue->iu; + u64 tag = iu->srp.tsk_mgmt.managed_task_tag; + unsigned char status = ABORTED_COMMAND; + + info("aborting task with tag %16.16lx, lun %16.16lx\n", + tag, iu->srp.tsk_mgmt.lun); + + spin_lock_irqsave(&iue->adapter->lock, flags); + + list_for_each_entry(tmp_iue, &iue->adapter->cmd_queue, next) { + if (tmp_iue->iu->srp.cmd.tag != tag) + continue; + + __set_bit(V_ABORTED, &tmp_iue->req.flags); + status = NO_SENSE; + break; + } + + spin_unlock_irqrestore(&iue->adapter->lock, flags); + + if (status == NO_SENSE) + info("abort successful\n"); + else + info("unable to abort cmd\n"); + + send_rsp(iue, status, 0x14); +} + +/* + * Process an incoming task management request + */ +static void process_tsk_mgmt(struct iu_entry *iue) +{ + union viosrp_iu *iu = iue->iu; + + if (iu->srp.tsk_mgmt.task_mgmt_flags == 0x01) + process_abort(iue); + else if (iu->srp.tsk_mgmt.task_mgmt_flags == 0x08) + process_device_reset(iue); + else + send_rsp(iue, ILLEGAL_REQUEST, 0x20); +} + +/* + * Process an incoming SRP command + */ +static int process_cmd(struct iu_entry *iue) +{ + union viosrp_iu *iu = iue->iu; + + if (!test_bit(V_PARSED, &iue->req.flags)) + iue->req.vd = find_vscsis_vdev(iue); + + if ((iue->req.vd == NULL) && + (iu->srp.cmd.cdb[0] != REPORT_LUNS) && + (iu->srp.cmd.cdb[0] != INQUIRY)) { + dbg("Cmd %2.2x for unknown LUN %16.16lx\n", + iu->srp.cmd.cdb[0], iue->iu->srp.cmd.lun); + send_rsp(iue, ABORTED_COMMAND, 0x14); + return FREE_IU; + } + + if (getlink(iue)) + __set_bit(V_LINKED, &iue->req.flags); + + switch (iu->srp.cmd.task_attribute) { + case SRP_ORDERED_TASK: + __set_bit(V_BARRIER, &iue->req.flags); + case SRP_SIMPLE_TASK: + break; + default: + __set_bit(V_BARRIER, &iue->req.flags); + warn("Task attribute %d not supported, assuming barrier\n", + iu->srp.cmd.task_attribute); + } + + if (!iue->req.vd || !iue->req.vd->direct_scsi) + return process_cmd_block(iue); + else + return process_cmd_scsi(iue); +} + +/* + * Respond to the adapter_info request + */ +u16 send_adapter_info(struct iu_entry *iue, + dma_addr_t remote_buffer, u16 length) +{ + dma_addr_t data_token; + struct mad_adapter_info_data *info = + dma_alloc_coherent(iue->adapter->dev, sizeof(*info), &data_token, + GFP_KERNEL); + + dbg("in send_adapter_info\n "); + if (info != NULL) { + int rc; + + /* Get remote info */ + rc = h_copy_rdma(sizeof(*info), + iue->adapter->riobn, + remote_buffer, + iue->adapter->liobn, + data_token); + if (rc == H_Success) { + info("Client connect: %s (%d)\n", + info->partition_name, + info->partition_number); + } + + memset(info, 0, sizeof(*info)); + + dbg("building adapter_info\n "); + strcpy(info->srp_version, "16.a"); + strncpy(info->partition_name, partition_name, + sizeof(info->partition_name)); + info->partition_number = partition_number; + info->mad_version = 1; + info->os_type = 2; + info->port_max_txu[0] = iue->adapter->max_sectors << 9; + + /* Send our info to remote */ + rc = h_copy_rdma(sizeof(*info), + iue->adapter->liobn, + data_token, + iue->adapter->riobn, + remote_buffer); + + dma_free_coherent(iue->adapter->dev, + sizeof(*info), info, data_token); + + if (rc != H_Success) { + err("Error sending adapter info rc %d\n",rc); + return 1; + } + } else { + dbg("bad dma_alloc_coherent in adapter_info\n "); + return 1; + } + return 0; + +} + +/* + * Process our queue of incoming commands + */ +static void run_cmd_queue(struct server_adapter *adapter) +{ + struct iu_entry *curr_iue; + struct list_head *next = NULL; + unsigned long flags; + spin_lock_irqsave(&adapter->lock, flags); + + next = list_empty(&adapter->cmd_queue) ? NULL : adapter->cmd_queue.next; + while (next) { + curr_iue = list_entry(next, struct iu_entry, next); + next = next->next == &adapter->cmd_queue ? NULL : next->next; + if (test_bit(V_FLYING, &curr_iue->req.flags)) { + if (test_bit(V_DONE, &curr_iue->req.flags)) { + list_del(&curr_iue->next); + free_iu(curr_iue); + } + continue; + } + if (test_bit(V_ABORTED, &curr_iue->req.flags)) { + adapter->next_rsp_delta++; + list_del(&curr_iue->next); + free_iu(curr_iue); + } else { + int rc; + __set_bit(V_FLYING, &curr_iue->req.flags); + spin_unlock_irqrestore(&adapter->lock, flags); + dbg("process_cmd sending %p\n", curr_iue); + rc = process_cmd(curr_iue); + spin_lock_irqsave(&adapter->lock, flags); + + /* if the iue is not in any list, we're racing with + endio, so we lost the cmd_queue */ + if (curr_iue->next.next == LIST_POISON1) + goto out; + + next = curr_iue->next.next == &adapter->cmd_queue + ? NULL : curr_iue->next.next; + + switch (rc) { + case FREE_IU: + list_del(&curr_iue->next); + free_iu(curr_iue); + break; + case INFLIGHT: + if (!test_bit(V_IN_USE, &curr_iue->req.flags)) + /* this means that the request finished + before process_cmd() returned, so we + lost a handle of the cmd_queue list */ + goto out; + break; + case RETRY_SPLIT_BUF: + if (!split_iu(curr_iue)) { + list_add(&curr_iue->req.child[1]->next, + &curr_iue->next); + list_add(&curr_iue->req.child[0]->next, + &curr_iue->next); + next = curr_iue->next.next; + break; + } + case RETRY: + __clear_bit(V_FLYING, &curr_iue->req.flags); + kblockd_schedule_work(&adapter->crq_task); + + /* if a barrier fails, we don't want anything + new to go through, retry when new cmd arrives + or when workqueue runs */ + if (test_bit(V_BARRIER, &curr_iue->req.flags)) + goto out; + break; + default: + err("Invalid return code %i from process_cmd\n", + rc); + } + } + } + +out: + spin_unlock_irqrestore(&adapter->lock, flags); +} + +/* + * Process an incoming information unit. + */ +static void process_iu(struct viosrp_crq *crq, struct server_adapter *adapter) +{ + struct iu_entry *iue = get_iu(adapter); + union viosrp_iu *iu; + long rc; + unsigned long flags; + + if (iue == NULL) { + warn("Error getting IU from pool, other side exceeded limit\n"); + return; + } + + iue->req.remote_token = crq->IU_data_ptr; + iue->req.timeout= crq->timeout ? crq->timeout*HZ : DEFAULT_TIMEOUT; + + rc = h_copy_rdma(crq->IU_length, + iue->adapter->riobn, + iue->req.remote_token, adapter->liobn, iue->iu_token); + + iu = iue->iu; + + if (rc) { + err("process_iu: Error %ld transferring data from client\n", + rc); + } + + if (crq->format == VIOSRP_MAD_FORMAT) { + switch (iu->mad.empty_iu.common.type) { + case VIOSRP_EMPTY_IU_TYPE: + warn("Unsupported EMPTY MAD IU\n"); + break; + case VIOSRP_ERROR_LOG_TYPE: + warn("Unsupported ERROR LOG MAD IU\n"); + iu->mad.error_log.common.status = 1; + send_iu(iue, sizeof(iu->mad.error_log), + VIOSRP_MAD_FORMAT); + break; + case VIOSRP_ADAPTER_INFO_TYPE: + iu->mad.adapter_info.common.status = + send_adapter_info(iue, + iu->mad.adapter_info.buffer, + iu->mad.adapter_info.common. + length); + + send_iu(iue, sizeof(iu->mad.adapter_info), + VIOSRP_MAD_FORMAT); + break; + case VIOSRP_HOST_CONFIG_TYPE: + iu->mad.host_config.common.status = 1; + send_iu(iue, sizeof(iu->mad.host_config), + VIOSRP_MAD_FORMAT); + break; + default: + warn("Unsupported MAD type %d\n", iu->srp.generic.type); + } + } else { + switch (iu->srp.generic.type) { + case SRP_LOGIN_REQ_TYPE: + dbg("SRP LOGIN\n"); + process_login(iue); + break; + case SRP_LOGIN_RSP_TYPE: + warn("Unsupported LOGIN_RSP SRP IU\n"); + break; + case SRP_I_LOGOUT_TYPE: + warn("Unsupported I_LOGOUT SRP IU\n"); + break; + case SRP_T_LOGOUT_TYPE: + warn("Unsupported T_LOGOUT SRP IU\n"); + break; + case SRP_TSK_MGMT_TYPE: + process_tsk_mgmt(iue); + break; + case SRP_CMD_TYPE: + spin_lock_irqsave(&adapter->lock, flags); + list_add_tail(&iue->next, &adapter->cmd_queue); + spin_unlock_irqrestore(&adapter->lock, flags); + run_cmd_queue(adapter); + return; + break; + case SRP_RSP_TYPE: + warn("Unsupported RSP SRP IU\n"); + break; + case SRP_CRED_REQ_TYPE: + warn("Unsupported CRED_REQ SRP IU\n"); + break; + case SRP_CRED_RSP_TYPE: + warn("Unsupported CRED_RSP SRP IU\n"); + break; + case SRP_AER_REQ_TYPE: + warn("Unsupported AER_REQ SRP IU\n"); + break; + case SRP_AER_RSP_TYPE: + warn("Unsupported AER_RSP SRP IU\n"); + break; + default: + warn("Unsupported SRP type %d\n", iu->srp.generic.type); + } + } + + spin_lock_irqsave(&adapter->lock, flags); + free_iu(iue); + spin_unlock_irqrestore(&adapter->lock, flags); +} + +/* ============================================================== + * CRQ Processing Routines + * ============================================================== + */ + +/* + * Handle a CRQ event + */ +static void handle_crq(struct viosrp_crq *crq, struct server_adapter *adapter) +{ + switch (crq->valid) { + case 0xC0: /* initialization */ + switch (crq->format) { + case 0x01: + h_send_crq(adapter->dma_dev->unit_address, + 0xC002000000000000, 0); + break; + case 0x02: + break; + default: + err("Client error: Unknwn msg format %d\n", + crq->format); + } + return; + case 0xFF: /* transport event */ + return; + case 0x80: /* real payload */ + { + switch (crq->format) { + case VIOSRP_SRP_FORMAT: + case VIOSRP_MAD_FORMAT: + process_iu(crq, adapter); + break; + case VIOSRP_OS400_FORMAT: + warn("Unsupported OS400 format CRQ\n"); + break; + + case VIOSRP_AIX_FORMAT: + warn("Unsupported AIX format CRQ\n"); + break; + + case VIOSRP_LINUX_FORMAT: + warn("Unsupported LINUX format CRQ\n"); + break; + + case VIOSRP_INLINE_FORMAT: + warn("Unsupported _INLINE_ format CRQ\n"); + break; + + default: + err("Client error: Unsupported msg format %d\n", + crq->format); + } + } + break; + default: + err("Client error: unknown message type 0x%02x!?\n", + crq->valid); + return; + } + +} + +/* + * Task to handle CRQs + */ +static void crq_task(void *data) +{ + struct server_adapter *adapter = (struct server_adapter *)data; + struct viosrp_crq *crq; + int done = 0; + + while (!done) { + + /* Loop through and process CRQs */ + while ((crq = crq_queue_next_crq(&adapter->queue)) != NULL) { + atomic_inc(&adapter->crq_processed); + handle_crq(crq, adapter); + crq->valid = 0x00; + } + + vio_enable_interrupts(adapter->dma_dev); + if ((crq = crq_queue_next_crq(&adapter->queue)) != NULL) { + vio_disable_interrupts(adapter->dma_dev); + handle_crq(crq, adapter); + crq->valid = 0x00; + } else + done = 1; + } + run_cmd_queue(adapter); +} + +/* + * Handle the interrupt that occurs when something is placed on our CRQ + */ +static irqreturn_t handle_interrupt(int irq, void *dev_instance, + struct pt_regs *regs) +{ + struct server_adapter *adapter = (struct server_adapter *)dev_instance; + + vio_disable_interrupts(adapter->dma_dev); + + atomic_inc(&adapter->interrupts); + + kblockd_schedule_work(&adapter->crq_task); + + return IRQ_HANDLED; +} + +/* + * Initialize our CRQ + * return zero on success, non-zero on failure + */ +static int initialize_crq_queue(struct crq_queue *queue, + struct server_adapter *adapter) +{ + int rc; + + queue->msgs = (struct viosrp_crq *)get_zeroed_page(GFP_KERNEL); + if (!queue->msgs) + goto malloc_failed; + queue->size = PAGE_SIZE / sizeof(*queue->msgs); + + queue->msg_token = dma_map_single(adapter->dev, queue->msgs, + queue->size * sizeof(*queue->msgs), + DMA_BIDIRECTIONAL); + + if (dma_mapping_error(queue->msg_token)) + goto map_failed; + + rc = h_reg_crq(adapter->dma_dev->unit_address, queue->msg_token, + PAGE_SIZE); + + /* If the adapter was left active for some reason (like kexec) + * try freeing and re-registering + */ + if (rc == H_Resource) { + do { + rc = h_free_crq(adapter->dma_dev->unit_address); + } while ((rc == H_Busy) || (H_isLongBusy(rc))); + rc = h_reg_crq(adapter->dma_dev->unit_address, queue->msg_token, + PAGE_SIZE); + } + + if ((rc != H_Success) && (rc != 2)) { + err("Error 0x%x opening virtual adapter\n", rc); + goto reg_crq_failed; + } + + if (request_irq + (adapter->dma_dev->irq, &handle_interrupt, SA_INTERRUPT, + "ibmvscsis", adapter) != 0) + goto req_irq_failed; + + vio_enable_interrupts(adapter->dma_dev); + + h_send_crq(adapter->dma_dev->unit_address, 0xC001000000000000, 0); + + queue->cur = 0; + queue->lock = SPIN_LOCK_UNLOCKED; + + return 0; + + req_irq_failed: + do { + rc = h_free_crq(adapter->dma_dev->unit_address); + } while ((rc == H_Busy) || (H_isLongBusy(rc))); + + reg_crq_failed: + dma_unmap_single(adapter->dev, queue->msg_token, + queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL); + map_failed: + free_page((unsigned long)queue->msgs); + malloc_failed: + return -ENOMEM; +} + +/* + * Release the CRQ + */ +static void release_crq_queue(struct crq_queue *queue, + struct server_adapter *adapter) +{ + int rc; + + info("releasing adapter\n"); + free_irq(adapter->dma_dev->irq, adapter); + do { + rc = h_free_crq(adapter->dma_dev->unit_address); + } while ((rc == H_Busy) || (H_isLongBusy(rc))); + dma_unmap_single(adapter->dev, queue->msg_token, + queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL); + free_page((unsigned long)queue->msgs); +} + +/* ============================================================== + * Shared Device Management + * ============================================================== + */ +/* + * Add a block device as a SCSI LUN + */ +static int activate_device(struct vdev *vdev) +{ + struct block_device *bdev; + char *name = vdev->device_name; + int ro = vdev->b.ro; + unsigned long flags; + struct scsi_dev_node *tmp_sdn; + + bdev = open_bdev_excl(name, ro, activate_device); + if (IS_ERR(bdev)) + return PTR_ERR(bdev);; + + spin_lock_irqsave(&sdev_list_lock, flags); + list_for_each_entry(tmp_sdn, &scsi_dev_list, node) { + struct scsi_device *sdev = tmp_sdn->sdev; + /* if the block device is a known scsi_device and + device is not a partition */ + if (sdev->request_queue == bdev->bd_disk->queue && + bdev == bdev->bd_contains) { + vdev->s.sdev = sdev; + tmp_sdn->vdev = vdev; + spin_unlock_irqrestore(&sdev_list_lock, flags); + close_bdev_excl(bdev); + vdev->direct_scsi = (char)1; + vdev->disabled = 0; + info("Activating %s (scsi %d:%d:%d:%d) as LUN 0x%lx\n", + name, sdev->host->host_no, sdev->channel, + sdev->id, sdev->lun, vdev->lun); + return 0; + } + } + spin_unlock_irqrestore(&sdev_list_lock, flags); + + vdev->direct_scsi = 0; + vdev->b.bdev = bdev; + vdev->disabled = 0; + vdev->b.sectsize = bdev_hardsect_size(bdev); + vdev->b.blocksize = bdev->bd_block_size; + if (bdev->bd_disk->flags & GENHD_FL_CD) + vdev->b.scsi_type = TYPE_ROM; /* CD/DVD */ + else + vdev->b.scsi_type = TYPE_DISK; /* disk */ + + if (bdev->bd_disk->flags & GENHD_FL_REMOVABLE) { + vdev->b.removable = 1; /* rmb bit of inquiry */ + vdev->b.changed = 1; + } else + vdev->b.removable = 0; + + info("Activating block device %s as %s %s LUN 0x%lx sector size %ld\n", + name, ro ? "read only " : "", + vdev->b.scsi_type ? "CD" : "disk", vdev->lun, + vdev->b.sectsize); + + return 0; +} + +static void deactivate_scsi_device(struct vdev *vdev) +{ + struct scsi_dev_node *tmp_sdn; + + vdev->disabled = 1; + vdev->s.sdev = NULL; + + list_for_each_entry(tmp_sdn, &scsi_dev_list, node) + if (tmp_sdn->vdev == vdev) + tmp_sdn->vdev = NULL; +} + +static void deactivate_device(struct vdev *vdev) +{ + info("Deactivating block device, LUN 0x%lx\n", vdev->lun); + + /* Wait while any users of this device finish. Note there should + * be no new users, since we have marked this disabled + * + * We just poll here, since we are blocking write + */ + while (atomic_read(&vdev->refcount)) { + msleep(REFCOUNT_TIMEOUT_MS); + } + + vdev->disabled = 1; + + if (!vdev->direct_scsi) + close_bdev_excl(vdev->b.bdev); + else + deactivate_scsi_device(vdev); +} + +/* + * Callback when a scsi_device gets added to the system + */ +static int add_scsi_device(struct class_device *cdev) +{ + struct scsi_device *sdev = to_scsi_device(cdev->dev); + struct scsi_dev_node * sdevnode = + kmalloc(sizeof(struct scsi_dev_node), GFP_ATOMIC); + unsigned long flags; + + dbg("add_scsi_device got %p, %d:%d:%d:%d, sdn=%p\n", sdev, + sdev->host->host_no, sdev->channel, sdev->id, sdev->lun, sdevnode); + + sdevnode->sdev = sdev; + sdevnode->vdev = NULL; + + spin_lock_irqsave(&sdev_list_lock, flags); + list_add_tail(&sdevnode->node, &scsi_dev_list); + spin_unlock_irqrestore(&sdev_list_lock, flags); + return 0; +} + +/* + * Callback when a scsi_device gets removed from the system + */ +static void rem_scsi_device(struct class_device *cdev) +{ + struct scsi_dev_node *tmp_sdn; + struct scsi_device *sdev = to_scsi_device(cdev->dev); + unsigned long flags; + + spin_lock_irqsave(&sdev_list_lock, flags); + list_for_each_entry(tmp_sdn, &scsi_dev_list, node) { + if (sdev == tmp_sdn->sdev) { + if (tmp_sdn->vdev && !tmp_sdn->vdev->disabled) + deactivate_scsi_device(tmp_sdn->vdev); + list_del(&tmp_sdn->node); + kfree(tmp_sdn); + goto out; + } + } + + warn("rem_scsi_device: Couldn't find scsi_device %p %d:%d:%d:%d\n", + sdev, sdev->host->host_no, sdev->channel, sdev->id, sdev->lun); +out: spin_unlock_irqrestore(&sdev_list_lock, flags); + return; +} + +/* ============================================================== + * Module load and unload + * ============================================================== + */ +static int ibmvscsis_probe(struct vio_dev *dev, const struct vio_device_id *id) +{ + struct server_adapter *adapter; + int rc; + unsigned int *dma_window; + unsigned int dma_window_property_size; + + adapter = kmalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) { + err("couldn't allocate adapter memory\n"); + return -ENOMEM; + } + memset(adapter, 0, sizeof(*adapter)); + adapter->dma_dev = dev; + adapter->dev = &dev->dev; + adapter->dev->driver_data = adapter; + adapter->next_rsp_delta = 0; + adapter->lock = SPIN_LOCK_UNLOCKED; + + dma_window = + (unsigned int *)vio_get_attribute(dev, "ibm,my-dma-window", + &dma_window_property_size); + if ((!dma_window) || (dma_window_property_size != 40)) { + err("Couldn't get ibm,my-dma-window property\n"); + return -EIO; + } + + adapter->liobn = dma_window[0]; + adapter->riobn = dma_window[5]; + + INIT_WORK(&adapter->crq_task, crq_task, adapter); + + tasklet_init(&adapter->endio_tasklet, + endio_task, (unsigned long)adapter); + + INIT_LIST_HEAD(&adapter->cmd_queue); + + /* Initialize the buffer cache */ + init_data_buffer(adapter); + + /* Arbitrarily support 16 IUs right now */ + rc = initialize_iu_pool(adapter, INITIAL_SRP_LIMIT); + if (rc) { + kfree(adapter); + return rc; + } + + rc = initialize_crq_queue(&adapter->queue, adapter); + if (rc != 0) { + kfree(adapter); + return rc; + } + + return 0; +} + +static int ibmvscsis_remove(struct vio_dev *dev) +{ + int bus; + int target; + unsigned long flags; + struct server_adapter *adapter = + (struct server_adapter *)dev->dev.driver_data; + + spin_lock_irqsave(&adapter->lock, flags); + + /* + * Loop through the bus + */ + for (bus = 0; bus < BUS_PER_ADAPTER; bus++) { + /* If this bus exists */ + if (adapter->vbus[bus]) { + /* loop through the targets */ + for (target = 0; target < TARGETS_PER_BUS; target++) { + /* If the target exists */ + struct vdev *vdev = + adapter->vbus[bus]->vdev[target]; + if (vdev && !vdev ->disabled) + deactivate_device(vdev); + } + } + } + + spin_unlock_irqrestore(&adapter->lock, flags); + release_crq_queue(&adapter->queue, adapter); + + release_iu_pool(adapter); + + release_data_buffer(adapter); + + kfree(adapter); + + return 0; +} + +static struct class_interface vscsis_interface = { + .add = add_scsi_device, + .remove = rem_scsi_device, +}; + +static struct vio_device_id ibmvscsis_device_table[] __devinitdata = { + {"v-scsi-host", "IBM,v-scsi-host"}, + {"",""} +}; + +MODULE_DEVICE_TABLE(vio, ibmvscsis_device_table); + +static struct vio_driver ibmvscsis_driver = { + .name = "ibmvscsis", + .id_table = ibmvscsis_device_table, + .probe = ibmvscsis_probe, + .remove = ibmvscsis_remove, +}; + +static int mod_init(void) +{ + struct device_node *rootdn; + char *ppartition_name; + char *psystem_id; + char *pmodel; + unsigned int *p_number_ptr; + int rc; + + /* Retrieve information about this partition */ + rootdn = find_path_device("/"); + if (rootdn) { + pmodel = get_property(rootdn, "model", NULL); + psystem_id = get_property(rootdn, "system-id", NULL); + if (pmodel && psystem_id) + snprintf(system_id,sizeof(system_id), + "%s-%s", + pmodel, psystem_id); + ppartition_name = + get_property(rootdn, "ibm,partition-name", NULL); + if (ppartition_name) + strncpy(partition_name, ppartition_name, + sizeof(partition_name)); + p_number_ptr = + (unsigned int *)get_property(rootdn, "ibm,partition-no", + NULL); + if (p_number_ptr) + partition_number = *p_number_ptr; + } + + info("initialized version "IBMVSCSIS_VERSION"\n"); + + rc = vio_register_driver(&ibmvscsis_driver); + + if (rc) { + warn("rc %d from vio_register_driver\n", rc); + return rc; + } + + rc = scsi_register_interface(&vscsis_interface); + + if (rc) + warn("rc %d from scsi_register_interface\n", rc); + + return rc; +} + +static void mod_exit(void) +{ + info("terminated\n"); + + scsi_unregister_interface(&vscsis_interface); + vio_unregister_driver(&ibmvscsis_driver); +} + +module_init(mod_init); +module_exit(mod_exit); From sleddog at us.ibm.com Tue Oct 18 00:37:56 2005 From: sleddog at us.ibm.com (Dave Boutcher) Date: Mon, 17 Oct 2005 09:37:56 -0500 Subject: [PATCH 3/3] ibmvscsis scsi target config and include changes In-Reply-To: <20051017143644.GA9992@cs.umn.edu> References: <20051017020534.GA29968@hound.rchland.ibm.com> Message-ID: <20051017143756.GD9992@cs.umn.edu> ibmvscsis config file include file changes Signed-off-by: Dave Boutcher Signed-off-by: Santiago Leon Signed-off-by: Linda Xie diff -uNr linux-2.6.13-rc7/drivers/scsi/ibmvscsi/ibmvscsi.h linux-2.6.13-rc7-ibmvscsis/drivers/scsi/ibmvscsi/ibmvscsi.h --- linux-2.6.13-rc7/drivers/scsi/ibmvscsi/ibmvscsi.h 2005-09-06 15:56:35.231844303 -0500 +++ linux-2.6.13-rc7-ibmvscsis/drivers/scsi/ibmvscsi/ibmvscsi.h 2005-09-06 16:02:41.279485577 -0500 @@ -33,6 +33,7 @@ #include #include #include +#include #include "viosrp.h" struct scsi_cmnd; diff -uNr linux-2.6.13-rc7/drivers/scsi/ibmvscsi/Makefile linux-2.6.13-rc7-ibmvscsis/drivers/scsi/ibmvscsi/Makefile --- linux-2.6.13-rc7/drivers/scsi/ibmvscsi/Makefile 2004-10-18 16:54:37.000000000 -0500 +++ linux-2.6.13-rc7-ibmvscsis/drivers/scsi/ibmvscsi/Makefile 2005-09-06 16:02:41.279485577 -0500 @@ -3,3 +3,5 @@ ibmvscsic-y += ibmvscsi.o ibmvscsic-$(CONFIG_PPC_ISERIES) += iseries_vscsi.o ibmvscsic-$(CONFIG_PPC_PSERIES) += rpa_vscsi.o + +obj-$(CONFIG_SCSI_IBMVSCSIS) += ibmvscsis.o diff -uNr linux-2.6.13-rc7/drivers/scsi/ibmvscsi/srp.h linux-2.6.13-rc7-ibmvscsis/drivers/scsi/ibmvscsi/srp.h --- linux-2.6.13-rc7/drivers/scsi/ibmvscsi/srp.h 2005-09-06 15:57:27.855492658 -0500 +++ linux-2.6.13-rc7-ibmvscsis/drivers/scsi/ibmvscsi/srp.h 2005-09-06 16:02:41.297483076 -0500 @@ -53,6 +53,13 @@ SRP_INDIRECT_BUFFER = 0x02 }; +enum srp_task_attributes { + SRP_SIMPLE_TASK = 0, + SRP_HEAD_TASK = 1, + SRP_ORDERED_TASK = 2, + SRP_ACA_TASK = 4 +}; + struct memory_descriptor { u64 virtual_address; u32 memory_handle; @@ -174,7 +181,7 @@ u32 data_out_residual_count; u32 sense_data_list_length; u32 response_data_list_length; - u8 sense_and_response_data[18]; + u8 sense_and_response_data[SCSI_SENSE_BUFFERSIZE]; }; struct srp_cred_req { diff -uNr linux-2.6.13-rc7/drivers/scsi/Kconfig linux-2.6.13-rc7-ibmvscsis/drivers/scsi/Kconfig --- linux-2.6.13-rc7/drivers/scsi/Kconfig 2005-09-06 15:57:27.674385086 -0500 +++ linux-2.6.13-rc7-ibmvscsis/drivers/scsi/Kconfig 2005-09-06 16:02:41.278485716 -0500 @@ -831,6 +831,16 @@ To compile this driver as a module, choose M here: the module will be called ibmvscsic. +config SCSI_IBMVSCSIS + tristate "IBM Virtual SCSI Server support" + depends on PPC_PSERIES + help + This is the IBM Virtual SCSI Server which can be configured using + the ppc64-utils package available at + http://techsupport.services.ibm.com/server/lopdiags + To compile this driver as a module, choose M here: the + module will be called ibmvscsis. + config SCSI_INITIO tristate "Initio 9100U(W) support" depends on PCI && SCSI From sleddog at us.ibm.com Tue Oct 18 00:37:44 2005 From: sleddog at us.ibm.com (Dave Boutcher) Date: Mon, 17 Oct 2005 09:37:44 -0500 Subject: [PATCH 2/3] ibmvscsis scsi target sysfs interfaces In-Reply-To: <20051017143644.GA9992@cs.umn.edu> References: <20051017020534.GA29968@hound.rchland.ibm.com> Message-ID: <20051017143744.GC9992@cs.umn.edu> ibmvscsis target sysfs configuration interfaces Signed-off-by: Dave Boutcher Signed-off-by: Santiago Leon Signed-off-by: Linda Xie --- linux-2.6.14-rc4-ibmvscsis-test/drivers/scsi/ibmvscsi/ibmvscsis.c 2005-10-16 20:31:10.000000000 -0500 +++ linux-2.6.14-rc4-ibmvscsis/drivers/scsi/ibmvscsi/ibmvscsis.c 2005-10-16 20:37:18.000000000 -0500 @@ -3125,6 +3125,455 @@ } /* ============================================================== + * SYSFS Routines + * ============================================================== + */ +static struct class_interface vscsis_interface = { + .add = add_scsi_device, + .remove = rem_scsi_device, +}; + +static struct kobj_type ktype_vscsi_target; +static struct kobj_type ktype_vscsi_bus; +static struct kobj_type ktype_vscsi_stats; + +static void vscsi_target_release(struct kobject *kobj) { + struct vdev *tmpdev = + container_of(kobj,struct vdev,kobj); + kfree(tmpdev); +} + +static void vscsi_bus_release(struct kobject *kobj) { + struct vbus *tmpbus = + container_of(kobj,struct vbus,kobj); + kfree(tmpbus); +} + +static void set_num_targets(struct vbus* vbus, long value) +{ + struct device *dev = + container_of(vbus->kobj.parent, struct device , kobj); + struct server_adapter *adapter = + (struct server_adapter *)dev->driver_data; + int cur_num_targets = atomic_read(&vbus->num_targets); + unsigned long flags; + struct vdev *tmpdev; + + /* Growing */ + if (cur_num_targets < value) { + int i; + for (i = cur_num_targets; i < value; i++) { + tmpdev = (struct vdev *)kmalloc(sizeof(struct vdev), + GFP_KERNEL); + if (!tmpdev) { + err("Couldn't allocate target memory %d\n", i); + return; + } + memset(tmpdev, 0, sizeof(struct vdev)); + + tmpdev->lun = make_lun(vbus->bus_num, i, 0); + tmpdev->b.blocksize = PAGE_CACHE_SIZE; + tmpdev->b.sectsize = 512; + tmpdev->disabled = 1; + + tmpdev->kobj.parent = &vbus->kobj; + sprintf(tmpdev->kobj.name, "target%d", i); + tmpdev->kobj.ktype = &ktype_vscsi_target; + kobject_register(&tmpdev->kobj); + + spin_lock_irqsave(&adapter->lock, flags); + if (vbus->vdev[i]) { + /* Race!!! */ + spin_unlock_irqrestore(&adapter->lock, flags); + kobject_unregister(&tmpdev->kobj); + continue; + } + + adapter->nvdevs++; + atomic_inc(&vbus->num_targets); + vbus->vdev[i] = tmpdev; + spin_unlock_irqrestore(&adapter->lock, flags); + } + } else { /* shrinking */ + int i; + for (i = cur_num_targets - 1; i >= value; i--) + { + if (!vbus->vdev[i]->disabled) { + err("Can't remove active target %d\n", i); + return; + } + + spin_lock_irqsave(&adapter->lock, flags); + tmpdev = vbus->vdev[i]; + vbus->vdev[i] = NULL; + spin_unlock_irqrestore(&adapter->lock, flags); + + if (tmpdev) + kobject_unregister(&tmpdev->kobj); + + adapter->nvdevs--; + atomic_dec(&vbus->num_targets); + } + } +} + +static void set_num_buses(struct device *dev, long value) +{ + struct server_adapter *adapter = + (struct server_adapter *)dev->driver_data; + int cur_num_buses = atomic_read(&adapter->num_buses); + unsigned long flags; + struct vbus *tmpbus; + + if (cur_num_buses < value) { /* growing */ + int i; + for (i = cur_num_buses; i < value; i++) { + tmpbus = (struct vbus *) kmalloc(sizeof(struct vbus), + GFP_KERNEL); + if (!tmpbus) { + err("Couldn't allocate bus %d memory\n", i); + return; + } + + memset(tmpbus, 0, sizeof(struct vbus)); + tmpbus->bus_num = i; + tmpbus->kobj.parent = &dev->kobj; + sprintf(tmpbus->kobj.name, "bus%d", i); + tmpbus->kobj.ktype = &ktype_vscsi_bus; + kobject_register(&tmpbus->kobj); + + spin_lock_irqsave(&adapter->lock, flags); + + if (adapter->vbus[i] != NULL) { + /* Race condition! */ + spin_unlock_irqrestore(&adapter->lock, flags); + kobject_unregister(&tmpbus->kobj); + continue; + } + + adapter->vbus[i] = tmpbus; + + atomic_inc(&adapter->num_buses); + spin_unlock_irqrestore(&adapter->lock, flags); + + set_num_targets(adapter->vbus[i], 1); + } + + } else if (cur_num_buses > value) { /* shrinking */ + int i, j, active_target; + for (i = cur_num_buses - 1; i >= value; i--) { + active_target = -1; + for (j = 0; j < TARGETS_PER_BUS; j++) { + if (adapter->vbus[i]->vdev[j] && + !adapter->vbus[i]->vdev[j]->disabled) { + active_target = j; + break; + } + } + if (active_target != -1) { + err("Can't remove bus%d, target%d active\n", + i, active_target); + return ; + } + + set_num_targets(adapter->vbus[i], 0); + + spin_lock_irqsave(&adapter->lock, flags); + atomic_dec(&adapter->num_buses); + tmpbus = adapter->vbus[i]; + adapter->vbus[i] = NULL; + spin_unlock_irqrestore(&adapter->lock, flags); + + /* If we race this could already be NULL */ + if (tmpbus) + kobject_unregister(&tmpbus->kobj); + } + } +} + +/* Target sysfs stuff */ +static ATTR(target, device, 0644); +static ATTR(target, active, 0644); +static ATTR(target, ro, 0644); + +static ssize_t vscsi_target_show(struct kobject * kobj, + struct attribute * attr, char * buf) +{ + struct vdev *vdev = container_of(kobj, struct vdev, kobj); + struct device *dev = container_of(kobj->parent->parent, + struct device, kobj); + struct server_adapter *adapter = + (struct server_adapter *)dev->driver_data; + unsigned long flags; + ssize_t returned; + + spin_lock_irqsave(&adapter->lock, flags); + + if (attr == &vscsi_target_device_attr) + returned = sprintf(buf, "%s\n", vdev->device_name); + else if (attr == &vscsi_target_active_attr) + returned = sprintf(buf, "%d\n", !vdev->disabled); + else if (attr == &vscsi_target_ro_attr) + returned = sprintf(buf, "%d\n", vdev->b.ro); + else { + returned = -EFAULT; + BUG(); + } + + spin_unlock_irqrestore(&adapter->lock, flags); + + return returned; +} + +static ssize_t vscsi_target_store(struct kobject * kobj, + struct attribute * attr, + const char * buf, size_t count) +{ + struct vdev *vdev = container_of(kobj, struct vdev, kobj); + struct device *dev = container_of(kobj->parent->parent, + struct device, kobj); + struct server_adapter *adapter = + (struct server_adapter *)dev->driver_data; + long flags; + long value = simple_strtol(buf, NULL, 10); + + if (attr != &vscsi_target_active_attr && !vdev->disabled) { + err("Error: Can't modify properties while target is active.\n"); + return -EPERM; + } + + if (attr == &vscsi_target_device_attr) { + int i; + spin_lock_irqsave(&adapter->lock, flags); + i = strlcpy(vdev->device_name, buf, TARGET_MAX_NAME_LEN); + for (; i >= 0; i--) + if (vdev->device_name[i] == '\n') + vdev->device_name[i] = '\0'; + spin_unlock_irqrestore(&adapter->lock, flags); + } else if (attr == &vscsi_target_active_attr) { + if (value) { + int rc; + if (!vdev->disabled) { + warn("Warning: Target was already active\n"); + return -EINVAL; + } + rc = activate_device(vdev); + if (rc) { + err("Error opening device=%d\n", rc); + return rc; + } + } else { + if (!vdev->disabled) + deactivate_device(vdev); + } + } else if (attr == &vscsi_target_ro_attr) + vdev->b.ro = value > 0 ? 1 : 0; + else + BUG(); + + return count; +} + +static struct attribute * vscsi_target_attrs[] = { + &vscsi_target_device_attr, + &vscsi_target_active_attr, + &vscsi_target_ro_attr, + NULL, +}; + +static struct sysfs_ops vscsi_target_ops = { + .show = vscsi_target_show, + .store = vscsi_target_store, +}; + +static struct kobj_type ktype_vscsi_target = { + .release = vscsi_target_release, + .sysfs_ops = &vscsi_target_ops, + .default_attrs = vscsi_target_attrs, +}; + +/* Bus sysfs stuff */ +static ssize_t vscsi_bus_show(struct kobject * kobj, + struct attribute * attr, char * buf) +{ + struct vbus *vbus = container_of(kobj, struct vbus, kobj); + return sprintf(buf, "%d\n", atomic_read(&vbus->num_targets)); +} + +static ssize_t vscsi_bus_store(struct kobject * kobj, struct attribute * attr, +const char * buf, size_t count) +{ + struct vbus *vbus = container_of(kobj, struct vbus, kobj); + long value = simple_strtol(buf, NULL, 10); + + if (value < 0 || value > TARGETS_PER_BUS) + return -EINVAL; + + set_num_targets(vbus, value); + + return count; +} + +static ATTR(bus, num_targets, 0644); + +static struct attribute * vscsi_bus_attrs[] = { + &vscsi_bus_num_targets_attr, + NULL, +}; + +static struct sysfs_ops vscsi_bus_ops = { + .show = vscsi_bus_show, + .store = vscsi_bus_store, +}; + +static struct kobj_type ktype_vscsi_bus = { + .release = vscsi_bus_release, + .sysfs_ops = &vscsi_bus_ops, + .default_attrs = vscsi_bus_attrs, +}; + +/* Device attributes */ +static ssize_t vscsi_dev_bus_show(struct device * dev, + struct device_attribute *attr, + char * buf) +{ + struct server_adapter *adapter = + (struct server_adapter *)dev->driver_data; + + return sprintf(buf, "%d\n", atomic_read(&adapter->num_buses)); +} + +static ssize_t vscsi_dev_sector_show(struct device * dev, + struct device_attribute *attr, + char * buf) +{ + struct server_adapter *adapter = + (struct server_adapter *)dev->driver_data; + + return sprintf(buf, "%d\n", adapter->max_sectors); +} + +static ssize_t vscsi_dev_bus_store(struct device * dev, + struct device_attribute *attr, + const char * buf, size_t count) +{ + long value = simple_strtol(buf, NULL, 10); + + if (value < 0 || value > BUS_PER_ADAPTER) + return -EINVAL; + + set_num_buses(dev, value); + return count; +} + +static ssize_t vscsi_dev_sector_store(struct device * dev, + struct device_attribute *attr, + const char * buf, size_t count) +{ + long value = simple_strtol(buf, NULL, 10); + struct server_adapter *adapter = + (struct server_adapter *)dev->driver_data; + + if (value <= 8 || value > SCSI_DEFAULT_MAX_SECTORS) + return -EINVAL; + + adapter->max_sectors = value; + + return count; +} + +static ssize_t vscsi_dev_debug_store(struct device * dev, + struct device_attribute *attr, + const char * buf, size_t count) +{ + long value = simple_strtol(buf, NULL, 10); + + ibmvscsis_debug = value; + return count; +} + +static ssize_t vscsi_dev_debug_show(struct device * dev, + struct device_attribute *attr, + char * buf) +{ + return sprintf(buf, "%d\n", ibmvscsis_debug); +} + +static DEVICE_ATTR(debug, 0644, vscsi_dev_debug_show, vscsi_dev_debug_store); +static DEVICE_ATTR(num_buses, 0644, vscsi_dev_bus_show, vscsi_dev_bus_store); +static DEVICE_ATTR(max_sectors, 0644, vscsi_dev_sector_show, + vscsi_dev_sector_store); + +/* Stats kobj stuff */ + +static ATTR(stats, interrupts, 0444); +static ATTR(stats, read_ops, 0444); +static ATTR(stats, write_ops, 0444); +static ATTR(stats, crq_msgs, 0444); +static ATTR(stats, iu_allocs, 0444); +static ATTR(stats, bio_allocs, 0444); +static ATTR(stats, buf_allocs, 0444); +static ATTR(stats, errors, 0444); + +static struct attribute * vscsi_stats_attrs[] = { + &vscsi_stats_interrupts_attr, + &vscsi_stats_read_ops_attr, + &vscsi_stats_write_ops_attr, + &vscsi_stats_crq_msgs_attr, + &vscsi_stats_iu_allocs_attr, + &vscsi_stats_bio_allocs_attr, + &vscsi_stats_buf_allocs_attr, + &vscsi_stats_errors_attr, + NULL, +}; + +static ssize_t vscsi_stats_show(struct kobject * kobj, + struct attribute * attr, char * buf) +{ + struct server_adapter *adapter= container_of(kobj, + struct server_adapter, + stats_kobj); + if (attr == &vscsi_stats_interrupts_attr) + return sprintf(buf, "%d\n", + atomic_read(&adapter->interrupts)); + if (attr == &vscsi_stats_read_ops_attr) + return sprintf(buf, "%d\n", + atomic_read(&adapter->read_processed)); + if (attr == &vscsi_stats_write_ops_attr) + return sprintf(buf, "%d\n", + atomic_read(&adapter->write_processed)); + if (attr == &vscsi_stats_crq_msgs_attr) + return sprintf(buf, "%d\n", + atomic_read(&adapter->crq_processed)); + if (attr == &vscsi_stats_iu_allocs_attr) + return sprintf(buf, "%d\n", + atomic_read(&adapter->iu_count)); + if (attr == &vscsi_stats_bio_allocs_attr) + return sprintf(buf, "%d\n", + atomic_read(&adapter->bio_count)); + if (attr == &vscsi_stats_buf_allocs_attr) + return sprintf(buf, "%d\n", + atomic_read(&adapter->buffers_allocated)); + if (attr == &vscsi_stats_errors_attr) + return sprintf(buf, "%d\n", + atomic_read(&adapter->errors)); + + BUG(); + return 0; +} + +static struct sysfs_ops vscsi_stats_ops = { + .show = vscsi_stats_show, + .store = NULL, +}; + +static struct kobj_type ktype_vscsi_stats = { + .release = NULL, + .sysfs_ops = &vscsi_stats_ops, + .default_attrs = vscsi_stats_attrs, +}; + +/* ============================================================== * Module load and unload * ============================================================== */ @@ -3181,6 +3630,17 @@ return rc; } + set_num_buses(&dev->dev, 1); + adapter->max_sectors = DEFAULT_MAX_SECTORS; + device_create_file(&dev->dev, &dev_attr_debug); + device_create_file(&dev->dev, &dev_attr_num_buses); + device_create_file(&dev->dev, &dev_attr_max_sectors); + + adapter->stats_kobj.parent = &dev->dev.kobj; + strcpy(adapter->stats_kobj.name, "stats"); + adapter->stats_kobj.ktype = & ktype_vscsi_stats; + kobject_register(&adapter->stats_kobj); + return 0; } @@ -3208,26 +3668,30 @@ if (vdev && !vdev ->disabled) deactivate_device(vdev); } + spin_unlock_irqrestore(&adapter->lock, flags); + set_num_targets(adapter->vbus[bus], 0); + spin_lock_irqsave(&adapter->lock, flags); } } spin_unlock_irqrestore(&adapter->lock, flags); + set_num_buses(adapter->dev, 0); release_crq_queue(&adapter->queue, adapter); release_iu_pool(adapter); release_data_buffer(adapter); + kobject_unregister(&adapter->stats_kobj); + device_remove_file(&dev->dev, &dev_attr_debug); + device_remove_file(&dev->dev, &dev_attr_num_buses); + device_remove_file(&dev->dev, &dev_attr_max_sectors); + kfree(adapter); return 0; } -static struct class_interface vscsis_interface = { - .add = add_scsi_device, - .remove = rem_scsi_device, -}; - static struct vio_device_id ibmvscsis_device_table[] __devinitdata = { {"v-scsi-host", "IBM,v-scsi-host"}, {"",""} From sleddog at us.ibm.com Tue Oct 18 03:58:29 2005 From: sleddog at us.ibm.com (Dave C Boutcher) Date: Mon, 17 Oct 2005 12:58:29 -0500 Subject: [PATCH 0/3] ibmvscsis scsi target In-Reply-To: <20051017143644.GA9992@cs.umn.edu> References: <20051017143644.GA9992@cs.umn.edu> Message-ID: <20051017175829.GA12958@cs.umn.edu> On Mon, Oct 17, 2005 at 09:36:44AM -0500, Dave Boutcher wrote: > Here's the ibmvscsis SCSI target submitted for inclusion in 2.4.15. > This driver meets a couple of akpm's criteria for worthiness, in that > its actually been shipping for a while in a distro kernel, and (given > the posts when I broke compatibility) is being used. BTW, s/2.4.15/2.6.15/. Living in the past again... -- Dave Boutcher From paulus at samba.org Mon Oct 17 21:01:42 2005 From: paulus at samba.org (Paul Mackerras) Date: Mon, 17 Oct 2005 21:01:42 +1000 Subject: merge progress In-Reply-To: <434BB09B.9070605@hogyros.de> References: <17227.44943.714106.911471@cargo.ozlabs.ibm.com> <434BB09B.9070605@hogyros.de> Message-ID: <17235.33942.754076.419504@cargo.ozlabs.ibm.com> Simon Richter writes: > Should I fast-forward the APUS patches towards the merge tree, or is it > better to apply them in the unmerged tree and merge them back into the > merge tree from there? Hmmm, probably should work off the merge tree. However, we decided at OLS that platforms would require a device tree before being merged. Have you looked at creating a suitable flattened device tree blob with dtc? (If not, you can continue to compile with ARCH=ppc for now.) Paul. From kumar.gala at freescale.com Tue Oct 18 08:39:02 2005 From: kumar.gala at freescale.com (Kumar Gala) Date: Mon, 17 Oct 2005 17:39:02 -0500 Subject: merge progress In-Reply-To: <17235.33942.754076.419504@cargo.ozlabs.ibm.com> References: <17227.44943.714106.911471@cargo.ozlabs.ibm.com> <434BB09B.9070605@hogyros.de> <17235.33942.754076.419504@cargo.ozlabs.ibm.com> Message-ID: > Hmmm, probably should work off the merge tree. However, we decided at > OLS that platforms would require a device tree before being merged. > Have you looked at creating a suitable flattened device tree blob with > dtc? (If not, you can continue to compile with ARCH=ppc for now.) That would assume the ARCH=ppc builds in the merge tree. Paul, stop breaking ARCH=ppc just because your annoyed at me for killing 970 and POWER4 in the cputable :) syscalls has issues in ARCH=ppc. - kumar From Simon.Richter at hogyros.de Tue Oct 18 08:19:07 2005 From: Simon.Richter at hogyros.de (Simon Richter) Date: Tue, 18 Oct 2005 00:19:07 +0200 Subject: merge progress In-Reply-To: <17235.33942.754076.419504@cargo.ozlabs.ibm.com> References: <17227.44943.714106.911471@cargo.ozlabs.ibm.com> <434BB09B.9070605@hogyros.de> <17235.33942.754076.419504@cargo.ozlabs.ibm.com> Message-ID: <4354235B.8090608@hogyros.de> Hi, Paul Mackerras schrieb: > Hmmm, probably should work off the merge tree. Good, then I'll fast-forward them there. > However, we decided at > OLS that platforms would require a device tree before being merged. > Have you looked at creating a suitable flattened device tree blob with > dtc? (If not, you can continue to compile with ARCH=ppc for now.) In fact my current plan is to move away from m68k bootinfo (which APUS borrows so you can use the same bootloader) towards a flattened dev tree passed in from the bootloader. For that, however, we need a new bootloader first, which is dependant on AmigaOS binutils/gcc. So I guess it will be ARCH=ppc until the new bootloader is ready. Should APUS still get a special treatment then, or should I try to emulate enough of OF in the bootloader so it is possible to build CHRP images that work on APUS? Simon -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 374 bytes Desc: OpenPGP digital signature Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051018/a695f5d7/attachment.pgp From paulus at samba.org Tue Oct 18 14:44:40 2005 From: paulus at samba.org (Paul Mackerras) Date: Tue, 18 Oct 2005 14:44:40 +1000 Subject: Handling of alignment exceptions of load/store multiples In-Reply-To: <5b0bf3bd6357f0a9ce7a4c22b116496b@freescale.com> References: <5b0bf3bd6357f0a9ce7a4c22b116496b@freescale.com> Message-ID: <17236.32184.79015.66205@cargo.ozlabs.ibm.com> Becky Bruce writes: > I'm currently in the process of merging align.c into > arch/powerpc/kernel. I noticed that there's a difference in the > handling of alignment exceptions involving ld/st string instructions > and ld/st multiple instructions between the 2 architectures right now. > The 32-bit code does some handling of these, while the 64-bit version > currently just bails out. > > Should I try to adopt a handling model for these in the merged tree > like the 32-bit code, or is there a reason behind not attempting to > handle these on the 64-bit side that I'm not aware of? I think we might as well include the code for handling them, since we have it. On the 64-bit side, I think they left that stuff out because (AFAICT) all the 64-bit cpus handle misaligned ld/st string in hardware, and misaligned ld/st multiple are deprecated by the architecture (implementations are allowed to produce boundedly undefined results). Regards, Paul. From michael at ellerman.id.au Tue Oct 18 15:39:42 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Tue, 18 Oct 2005 15:39:42 +1000 Subject: [PATCH 10/11] powerpc: Add helper functions for synthesising instructions at runtime In-Reply-To: <309f5f701666369cdd618d06f664439e@kernel.crashing.org> References: <20051017114847.B8EF6685BE@ozlabs.org> <309f5f701666369cdd618d06f664439e@kernel.crashing.org> Message-ID: <200510181539.42184.michael@ellerman.id.au> On Tue, 18 Oct 2005 00:44, Segher Boessenkool wrote: > > +typedef enum { > > + BRANCH_RELATIVE, > > + BRANCH_RELATIVE_LINK, > > + BRANCH_ABSOLUTE, > > + BRANCH_ABSOLUTE_LINK > > +} branch_t; > > Why not say BRANCH_RELATIVE = 0, BRANCH_RELATIVE_LINK = 1, etc., > and then later just or this value into the insn? I thought that might be too tricky, but perhaps you're right. > > + asm ("dcbst 0, %0; sync; icbi 0,%0; isync" : : "r" (p)); > > sequence should be dcbst; sync; icbi; sync; isync to ensure > the insn cache invalidation is global before continuing. Oops, thanks. I just copied that from somewhere. I'll fix it up. cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person From geert at linux-m68k.org Tue Oct 18 17:50:26 2005 From: geert at linux-m68k.org (Geert Uytterhoeven) Date: Tue, 18 Oct 2005 09:50:26 +0200 (CEST) Subject: merge progress In-Reply-To: <4354235B.8090608@hogyros.de> References: <17227.44943.714106.911471@cargo.ozlabs.ibm.com> <434BB09B.9070605@hogyros.de> <17235.33942.754076.419504@cargo.ozlabs.ibm.com> <4354235B.8090608@hogyros.de> Message-ID: On Tue, 18 Oct 2005, Simon Richter wrote: > Paul Mackerras schrieb: > > Hmmm, probably should work off the merge tree. > > Good, then I'll fast-forward them there. > > > However, we decided at > > OLS that platforms would require a device tree before being merged. > > Have you looked at creating a suitable flattened device tree blob with > > dtc? (If not, you can continue to compile with ARCH=ppc for now.) > > In fact my current plan is to move away from m68k bootinfo (which APUS > borrows so you can use the same bootloader) towards a flattened dev tree Is requiring a flattened device tree really the right approach to take, for APUS? APUS borrows from/shares with m68k a lot of code. BTW, why do you want to move away from m68k bootinfo (apart from using a flattened device tree)? > passed in from the bootloader. For that, however, we need a new > bootloader first, which is dependant on AmigaOS binutils/gcc. > > So I guess it will be ARCH=ppc until the new bootloader is ready. Happy hacking! Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert at linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds From becky.bruce at freescale.com Wed Oct 19 00:19:28 2005 From: becky.bruce at freescale.com (Becky Bruce) Date: Tue, 18 Oct 2005 09:19:28 -0500 Subject: Handling of alignment exceptions of load/store multiples In-Reply-To: <17236.32184.79015.66205@cargo.ozlabs.ibm.com> References: <5b0bf3bd6357f0a9ce7a4c22b116496b@freescale.com> <17236.32184.79015.66205@cargo.ozlabs.ibm.com> Message-ID: <1a8fa2a06a11595831d7487a51438cb2@freescale.com> Ahhhhh..... that explains it. Leaving it in shouldn't hurt - I'll do as you suggest. "Boundedly undefined" results - my favorite! Thanks! -B On Oct 17, 2005, at 11:44 PM, Paul Mackerras wrote: > Becky Bruce writes: > >> I'm currently in the process of merging align.c into >> arch/powerpc/kernel. I noticed that there's a difference in the >> handling of alignment exceptions involving ld/st string instructions >> and ld/st multiple instructions between the 2 architectures right now. >> The 32-bit code does some handling of these, while the 64-bit version >> currently just bails out. >> >> Should I try to adopt a handling model for these in the merged tree >> like the 32-bit code, or is there a reason behind not attempting to >> handle these on the 64-bit side that I'm not aware of? > > I think we might as well include the code for handling them, since we > have it. On the 64-bit side, I think they left that stuff out because > (AFAICT) all the 64-bit cpus handle misaligned ld/st string in > hardware, and misaligned ld/st multiple are deprecated by the > architecture (implementations are allowed to produce boundedly > undefined results). > > Regards, > Paul. From ananth at in.ibm.com Wed Oct 19 06:47:16 2005 From: ananth at in.ibm.com (Ananth N Mavinakayanahalli) Date: Tue, 18 Oct 2005 16:47:16 -0400 Subject: [PATCH] Remove duplicate declarations in arch/ppc64/mm/tlb.c Message-ID: <20051018204716.GA9981@in.ibm.com> From: Ananth N Mavinakayanahalli Looks like pte_freelist_cur and pte_freelist_forced_free are both declared twice in the same file (mismerge?). Patch removes one set. Signed-off-by: Ananth N Mavinakayanahalli --- arch/ppc64/mm/tlb.c | 3 --- 1 files changed, 3 deletions(-) Index: linux-2.6.14-rc4/arch/ppc64/mm/tlb.c =================================================================== --- linux-2.6.14-rc4.orig/arch/ppc64/mm/tlb.c 2005-10-10 21:19:19.000000000 -0400 +++ linux-2.6.14-rc4/arch/ppc64/mm/tlb.c 2005-10-18 16:37:48.000000000 -0400 @@ -48,9 +48,6 @@ struct pte_freelist_batch pgtable_free_t tables[0]; }; -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -unsigned long pte_freelist_forced_free; - #define PTE_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ / sizeof(pgtable_free_t)) From olof at lixom.net Wed Oct 19 06:56:08 2005 From: olof at lixom.net (Olof Johansson) Date: Tue, 18 Oct 2005 15:56:08 -0500 Subject: [PATCH 10/11] powerpc: Add helper functions for synthesising instructions at runtime In-Reply-To: <200510181539.42184.michael@ellerman.id.au> References: <20051017114847.B8EF6685BE@ozlabs.org> <309f5f701666369cdd618d06f664439e@kernel.crashing.org> <200510181539.42184.michael@ellerman.id.au> Message-ID: <20051018205608.GD6924@austin.ibm.com> On Tue, Oct 18, 2005 at 03:39:42PM +1000, Michael Ellerman wrote: > On Tue, 18 Oct 2005 00:44, Segher Boessenkool wrote: > > > +typedef enum { > > > + BRANCH_RELATIVE, > > > + BRANCH_RELATIVE_LINK, > > > + BRANCH_ABSOLUTE, > > > + BRANCH_ABSOLUTE_LINK > > > +} branch_t; > > > > Why not say BRANCH_RELATIVE = 0, BRANCH_RELATIVE_LINK = 1, etc., > > and then later just or this value into the insn? > > I thought that might be too tricky, but perhaps you're right. I agree with you, Segher's code is obfuscated. Your current coding is much easier to read for someone that doesn't know the instruction bitfield layout by heart. The saved instructions on doing it Segher's way are not critical since it's not during a runtime hot path that this is called. Go for readability instead. Actually documenting Segher's code so it's as easy to understand will make it just as large as your current one (i.e. your 7 lines, his 3 lines). -Olof From linas at austin.ibm.com Wed Oct 19 06:57:10 2005 From: linas at austin.ibm.com (linas) Date: Tue, 18 Oct 2005 15:57:10 -0500 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <1129076691.17365.250.camel@gaston> References: <1128404215.31063.32.camel@gaston> <20051011171315.2fe087e7.akpm@osdl.org> <1129076691.17365.250.camel@gaston> Message-ID: <20051018205710.GC29826@austin.ibm.com> On Wed, Oct 12, 2005 at 10:24:51AM +1000, Benjamin Herrenschmidt was heard to remark: > > > +#define BUILD_SHOW_FUNC_FIX(name, data) \ > > > +static ssize_t show_##name(struct device *dev, \ > > > + struct device_attribute *attr, \ > > > + char *buf) \ > > > +{ \ > > Ahh no, the problem here is that stupid emacs is very bad with tab > and multi-line macros and just turns the whole thing into shit, so > I used spaces. Sorry, I'm not an emacs guru and don't know how to > work around that ... Anyone who has tabstops set to 3 instead of 8 will see broken-ness for macros like this no matter what. Suggest that the right policy is to use only spaces, and neve tabs, inside of macros. --linas From geoffrey.levand at am.sony.com Wed Oct 19 07:11:43 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Tue, 18 Oct 2005 14:11:43 -0700 Subject: install BOOTIMAGE In-Reply-To: <20051006063823.GA13103@suse.de> References: <434460ED.9070605@am.sony.com> <20051006063823.GA13103@suse.de> Message-ID: <4355650F.5060606@am.sony.com> Olaf Hering wrote: > On Wed, Oct 05, Geoff Levand wrote: > >>With the way the current arch/ppc64/boot/Makefile (2.6.13) calls >>install.sh, it seems 'make install' will always install vmlinux. >> >>It seems the proper behavior is for install.sh to install >>BOOTIMAGE, which for all but iSeries is a kind of zImage. Does >>this change below cause problems for anyone? > > > I dont think booting the zimage from the local hard disk should be > prefered over the plain vmlinux. > Sorry for the late reply, I was out on holiday. It seems queer to me to pass BOOTIMAGE to install.sh just so we can copy vmlinux to BOOTIMAGE. In the case of BOOTIMAGE=zImage, we get vmlinux copied to a file named zImage -- very queer. I guess what I want is for this to work when I build on my PC: $ make INSTALL_PATH=/target/g5/boot install And I get the proper image file zImage.vmode in /target/g5/boot/zImage.vmode, not vmlinux named /target/g5/boot/zImage.vmode. -Geoff From akpm at osdl.org Wed Oct 19 07:23:57 2005 From: akpm at osdl.org (Andrew Morton) Date: Tue, 18 Oct 2005 14:23:57 -0700 Subject: [PATCH] ppc64: Thermal control for SMU based machines In-Reply-To: <20051018205710.GC29826@austin.ibm.com> References: <1128404215.31063.32.camel@gaston> <20051011171315.2fe087e7.akpm@osdl.org> <1129076691.17365.250.camel@gaston> <20051018205710.GC29826@austin.ibm.com> Message-ID: <20051018142357.7eebedfe.akpm@osdl.org> linas wrote: > > On Wed, Oct 12, 2005 at 10:24:51AM +1000, Benjamin Herrenschmidt was heard to remark: > > > > +#define BUILD_SHOW_FUNC_FIX(name, data) \ > > > > +static ssize_t show_##name(struct device *dev, \ > > > > + struct device_attribute *attr, \ > > > > + char *buf) \ > > > > +{ \ > > > > Ahh no, the problem here is that stupid emacs is very bad with tab > > and multi-line macros and just turns the whole thing into shit, so > > I used spaces. Sorry, I'm not an emacs guru and don't know how to > > work around that ... > > Anyone who has tabstops set to 3 instead of 8 wtf? Anyone who has tabstops set to three gets their kernel license revoked. > will see broken-ness for > macros like this no matter what. Suggest that the right policy is to use > only spaces, and neve tabs, inside of macros. It works OK for everyone else... From olh at suse.de Wed Oct 19 09:55:53 2005 From: olh at suse.de (Olaf Hering) Date: Wed, 19 Oct 2005 01:55:53 +0200 Subject: install BOOTIMAGE In-Reply-To: <4355650F.5060606@am.sony.com> References: <434460ED.9070605@am.sony.com> <20051006063823.GA13103@suse.de> <4355650F.5060606@am.sony.com> Message-ID: <20051018235553.GA9315@suse.de> On Tue, Oct 18, Geoff Levand wrote: > It seems queer to me to pass BOOTIMAGE to install.sh just so we can > copy vmlinux to BOOTIMAGE. In the case of BOOTIMAGE=zImage, we get > vmlinux copied to a file named zImage -- very queer. > > I guess what I want is for this to work when I build on my PC: > > $ make INSTALL_PATH=/target/g5/boot install > > And I get the proper image file zImage.vmode in > /target/g5/boot/zImage.vmode, not vmlinux named > /target/g5/boot/zImage.vmode. That would be a bug. The zImage contains no info about the uname -r. If one wants to rerun mkinitrd, it will have no info about what modules to include. Thats the reason why I changed it initially. -- short story of a lazy sysadmin: alias appserv=wotan From benh at kernel.crashing.org Wed Oct 19 10:59:05 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 19 Oct 2005 10:59:05 +1000 Subject: [PATCH] ppc64: Fix error in vDSO 32 bits date Message-ID: <1129683546.7620.163.camel@gaston> The implementation of __kernel_gettimeofday() in the 32 bits vDOS has a small bug (a typo actually) that will cause it to lose 1 bit of precision. Not terribly bad but worth fixing. Signed-off-by: Benjamin Herrenschmidt Index: linux-work/arch/ppc64/kernel/vdso32/gettimeofday.S =================================================================== --- linux-work.orig/arch/ppc64/kernel/vdso32/gettimeofday.S 2005-10-13 13:45:54.000000000 +1000 +++ linux-work/arch/ppc64/kernel/vdso32/gettimeofday.S 2005-10-19 10:59:09.000000000 +1000 @@ -109,7 +109,7 @@ lwz r6,(CFG_TB_TO_XS+4)(r9) mulhwu r4,r7,r5 mulhwu r6,r7,r6 - mullw r6,r7,r5 + mullw r0,r7,r5 addc r6,r6,r0 /* At this point, we have the scaled xsec value in r4 + XER:CA From geoffrey.levand at am.sony.com Wed Oct 19 11:19:38 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Tue, 18 Oct 2005 18:19:38 -0700 Subject: install BOOTIMAGE In-Reply-To: <20051018235553.GA9315@suse.de> References: <20051018235553.GA9315@suse.de> Message-ID: <43559F2A.6070206@am.sony.com> Olaf Hering wrote: > On Tue, Oct 18, Geoff Levand wrote: > >>It seems queer to me to pass BOOTIMAGE to install.sh just so we can >>copy vmlinux to BOOTIMAGE. In the case of BOOTIMAGE=zImage, we get >>vmlinux copied to a file named zImage -- very queer. >> >>I guess what I want is for this to work when I build on my PC: >> >>$ make INSTALL_PATH=/target/g5/boot install >> >>And I get the proper image file zImage.vmode in >>/target/g5/boot/zImage.vmode, not vmlinux named >>/target/g5/boot/zImage.vmode. > > > That would be a bug. > The zImage contains no info about the uname -r. If one wants to rerun > mkinitrd, > it will have no info about what modules to include. > Thats the reason why I changed it initially. > OK, but I don't understand why you do the rename. Why not just install vmlinux to the file ${INSTALL_PATH}/vmlinux? -Geoff From geoffrey.levand at am.sony.com Wed Oct 19 11:01:12 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Tue, 18 Oct 2005 18:01:12 -0700 Subject: kgdb for ppc64 now available In-Reply-To: <20050930063234.GA4763@krispykreme> References: <433C22EF.8050007@am.sony.com> <20050930063234.GA4763@krispykreme> Message-ID: <43559AD8.3040904@am.sony.com> Anton Blanchard wrote: > Hi, > >>For those interested, kgdb now supports ppc64 SMP. On powermac only >>an Ethernet connection is supported currently, but generic 8250 UART >>PCI cards will be supported soon. ... > > + return kgdb_handle_exception(0, computeSignal(regs->trap), 0, regs); > > We should use TRAP(regs) instead of regs->trap since the bottom bits may > not be zero for an exception that only saves a partial register set. > Anton, I came back to do a little more work here. I had a problem in that we have this in ptrace.h: * * We use the least-significant bit of the trap field to indicate * whether we have saved the full set of registers, or only a * partial set. A 1 there means the partial set. */ #define FULL_REGS(regs) (((regs)->trap & 1) == 0) #define TRAP(regs) ((regs)->trap & ~0xF) #define CHECK_FULL_REGS(regs) BUG_ON(regs->trap & 1) But I see TRAP(regs) masks off four bits, not just the LSB. This would seem to cause trouble with the 0x2002 trap that someone put into the kgdb trap table: { 0x2002, 0x05 /* SIGTRAP */}, /* debug */ It seems this code was caried over from the ppc32 BookE code though. Any idea what this is? I would guess it's not a valid 64bit vector so it can be removed. -Geoff From michael at ellerman.id.au Wed Oct 19 14:27:01 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Wed, 19 Oct 2005 14:27:01 +1000 (EST) Subject: [PATCH] powerpc: Add helper functions for synthesising instructions at runtime In-Reply-To: <309f5f701666369cdd618d06f664439e@kernel.crashing.org> Message-ID: <20051019042701.73F9A685D8@ozlabs.org> There's a few places already, and soon will be more, where we synthesise branch instructions at runtime. Rather than doing it by hand in each case, it would make sense to have one implementation. Updated to flush caches properly in create_instruction(). Signed-off-by: Michael Ellerman --- include/asm-powerpc/system.h | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 52 insertions(+) Index: kexec/include/asm-powerpc/system.h =================================================================== --- kexec.orig/include/asm-powerpc/system.h +++ kexec/include/asm-powerpc/system.h @@ -356,5 +356,57 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) +typedef enum { + BRANCH_RELATIVE, + BRANCH_RELATIVE_LINK, + BRANCH_ABSOLUTE, + BRANCH_ABSOLUTE_LINK +} branch_t; + +static inline void create_instruction(unsigned long addr, unsigned int instr) +{ + unsigned int *p; + p = (unsigned int *)addr; + *p = instr; + asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (p)); +} + +static inline void create_branch(unsigned long addr, unsigned long target, + branch_t type) +{ + unsigned int instruction; + + instruction = 0x48000000; /* Basic branch opcode */ + + if (BRANCH_ABSOLUTE == type || BRANCH_ABSOLUTE_LINK == type) + instruction |= 0x02; /* set AA (absolute address) */ + else + target = target - addr; + + if (BRANCH_RELATIVE_LINK == type || BRANCH_ABSOLUTE_LINK == type) + instruction |= 0x01; /* set LK (set link register) */ + + instruction |= target & 0x03FFFFFC; + + create_instruction(addr, instruction); +} + +static inline void create_function_call(unsigned long addr, void * func) +{ + unsigned long func_addr; + +#ifdef CONFIG_PPC64 + /* + * On PPC64 the function pointer actually points to the function's + * descriptor. The first entry in the descriptor is the address + * of the function text. + */ + func_addr = *(unsigned long *)func; +#else + func_addr = (unsigned long)func; +#endif + create_branch(addr, func_addr, BRANCH_RELATIVE_LINK); +} + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SYSTEM_H */ From david at gibson.dropbear.id.au Wed Oct 19 14:53:32 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Wed, 19 Oct 2005 14:53:32 +1000 Subject: Merge pmc.[ch] Message-ID: <20051019045332.GC27451@localhost.localdomain> Merge ppc64 pmc.[ch] with ppc32 perfmon.[ch] This patches the ppc32 and ppc64 versions of the headers and .c files with helper functions for manipulating the performance counting hardware. As a side effect, it removes use of the term "perfmon" from ppc32, thus avoiding confusion with the unrelated performance counter interface from HP Labs also called "perfmon". Built, but not booted, for g5, pSeries, iSeries, and 32-bit Powermac with both ARCH=powerpc and ARCH=ppc{,64} as appropriate. Signed-off-by: David Gibson Index: working-2.6/include/asm-powerpc/pmc.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ working-2.6/include/asm-powerpc/pmc.h 2005-10-19 14:25:42.000000000 +1000 @@ -0,0 +1,46 @@ +/* + * pmc.h + * Copyright (C) 2004 David Gibson, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _POWERPC_PMC_H +#define _POWERPC_PMC_H + +#include + +typedef void (*perf_irq_t)(struct pt_regs *); + +int reserve_pmc_hardware(perf_irq_t new_perf_irq); +void release_pmc_hardware(void); + +#ifdef CONFIG_PPC64 +void power4_enable_pmcs(void); +#endif + +#ifdef CONFIG_FSL_BOOKE +void init_pmc_stop(int ctr); +void set_pmc_event(int ctr, int event); +void set_pmc_user_kernel(int ctr, int user, int kernel); +void set_pmc_marked(int ctr, int mark0, int mark1); +void pmc_start_ctr(int ctr, int enable); +void pmc_start_ctrs(int enable); +void pmc_stop_ctrs(void); +void dump_pmcs(void); + +extern struct op_powerpc_model op_model_fsl_booke; +#endif + +#endif /* _POWERPC_PMC_H */ Index: working-2.6/include/asm-ppc64/pmc.h =================================================================== --- working-2.6.orig/include/asm-ppc64/pmc.h 2005-09-07 15:13:26.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ -/* - * pmc.h - * Copyright (C) 2004 David Gibson, IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _PPC64_PMC_H -#define _PPC64_PMC_H - -#include - -typedef void (*perf_irq_t)(struct pt_regs *); - -int reserve_pmc_hardware(perf_irq_t new_perf_irq); -void release_pmc_hardware(void); - -void power4_enable_pmcs(void); - -#endif /* _PPC64_PMC_H */ Index: working-2.6/arch/powerpc/kernel/pmc.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ working-2.6/arch/powerpc/kernel/pmc.c 2005-10-19 14:25:42.000000000 +1000 @@ -0,0 +1,112 @@ +/* + * arch/powerpc/kernel/pmc.c + * + * Copyright (C) 2004 David Gibson, IBM Corporation. + * Includes code formerly from arch/ppc/kernel/perfmon.c: + * Author: Andy Fleming + * Copyright (c) 2004 Freescale Semiconductor, Inc + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include + +#if defined(CONFIG_FSL_BOOKE) && !defined(CONFIG_E200) +static void dummy_perf(struct pt_regs *regs) +{ + unsigned int pmgc0 = mfpmr(PMRN_PMGC0); + + pmgc0 &= ~PMGC0_PMIE; + mtpmr(PMRN_PMGC0, pmgc0); +} +#elif defined(CONFIG_PPC64) || defined(CONFIG_6xx) + +#ifndef MMCR0_PMAO +#define MMCR0_PMAO 0 +#endif + +/* Ensure exceptions are disabled */ +static void dummy_perf(struct pt_regs *regs) +{ + unsigned int mmcr0 = mfspr(SPRN_MMCR0); + + mmcr0 &= ~(MMCR0_PMXE|MMCR0_PMAO); + mtspr(SPRN_MMCR0, mmcr0); +} +#else +static void dummy_perf(struct pt_regs *regs) +{ +} +#endif + +static DEFINE_SPINLOCK(pmc_owner_lock); +static void *pmc_owner_caller; /* mostly for debugging */ +perf_irq_t perf_irq = dummy_perf; + +int reserve_pmc_hardware(perf_irq_t new_perf_irq) +{ + int err = 0; + + spin_lock(&pmc_owner_lock); + + if (pmc_owner_caller) { + printk(KERN_WARNING "reserve_pmc_hardware: " + "PMC hardware busy (reserved by caller %p)\n", + pmc_owner_caller); + err = -EBUSY; + goto out; + } + + pmc_owner_caller = __builtin_return_address(0); + perf_irq = new_perf_irq ? : dummy_perf; + + out: + spin_unlock(&pmc_owner_lock); + return err; +} +EXPORT_SYMBOL_GPL(reserve_pmc_hardware); + +void release_pmc_hardware(void) +{ + spin_lock(&pmc_owner_lock); + + WARN_ON(! pmc_owner_caller); + + pmc_owner_caller = NULL; + perf_irq = dummy_perf; + + spin_unlock(&pmc_owner_lock); +} +EXPORT_SYMBOL_GPL(release_pmc_hardware); + +#ifdef CONFIG_PPC64 +void power4_enable_pmcs(void) +{ + unsigned long hid0; + + hid0 = mfspr(SPRN_HID0); + hid0 |= 1UL << (63 - 20); + + /* POWER4 requires the following sequence */ + asm volatile( + "sync\n" + "mtspr %1, %0\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "isync" : "=&r" (hid0) : "i" (SPRN_HID0), "0" (hid0): + "memory"); +} +#endif /* CONFIG_PPC64 */ Index: working-2.6/arch/ppc/kernel/perfmon.c =================================================================== --- working-2.6.orig/arch/ppc/kernel/perfmon.c 2005-10-19 14:16:40.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,96 +0,0 @@ -/* kernel/perfmon.c - * PPC 32 Performance Monitor Infrastructure - * - * Author: Andy Fleming - * Copyright (c) 2004 Freescale Semiconductor, Inc - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* A lock to regulate grabbing the interrupt */ -DEFINE_SPINLOCK(perfmon_lock); - -#if defined (CONFIG_FSL_BOOKE) && !defined (CONFIG_E200) -static void dummy_perf(struct pt_regs *regs) -{ - unsigned int pmgc0 = mfpmr(PMRN_PMGC0); - - pmgc0 &= ~PMGC0_PMIE; - mtpmr(PMRN_PMGC0, pmgc0); -} - -#elif defined(CONFIG_6xx) -/* Ensure exceptions are disabled */ -static void dummy_perf(struct pt_regs *regs) -{ - unsigned int mmcr0 = mfspr(SPRN_MMCR0); - - mmcr0 &= ~MMCR0_PMXE; - mtspr(SPRN_MMCR0, mmcr0); -} -#else -static void dummy_perf(struct pt_regs *regs) -{ -} -#endif - -void (*perf_irq)(struct pt_regs *) = dummy_perf; - -/* Grab the interrupt, if it's free. - * Returns 0 on success, -1 if the interrupt is taken already */ -int reserve_pmc_hardware(void (*handler)(struct pt_regs *)) -{ - int err = 0; - - spin_lock(&perfmon_lock); - - if (perf_irq == dummy_perf) - perf_irq = handler; - else { - pr_info("perfmon irq already handled by %p\n", perf_irq); - err = -EBUSY; - } - - spin_unlock(&perfmon_lock); - - return err; -} - -void release_pmc_hardware(void) -{ - spin_lock(&perfmon_lock); - - perf_irq = dummy_perf; - - spin_unlock(&perfmon_lock); -} - -EXPORT_SYMBOL(perf_irq); -EXPORT_SYMBOL(reserve_pmc_hardware); -EXPORT_SYMBOL(release_pmc_hardware); Index: working-2.6/arch/ppc64/kernel/pmc.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/pmc.c 2005-10-19 14:16:41.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,88 +0,0 @@ -/* - * linux/arch/ppc64/kernel/pmc.c - * - * Copyright (C) 2004 David Gibson, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include - -#include -#include - -/* Ensure exceptions are disabled */ -static void dummy_perf(struct pt_regs *regs) -{ - unsigned int mmcr0 = mfspr(SPRN_MMCR0); - - mmcr0 &= ~(MMCR0_PMXE|MMCR0_PMAO); - mtspr(SPRN_MMCR0, mmcr0); -} - -static DEFINE_SPINLOCK(pmc_owner_lock); -static void *pmc_owner_caller; /* mostly for debugging */ -perf_irq_t perf_irq = dummy_perf; - -int reserve_pmc_hardware(perf_irq_t new_perf_irq) -{ - int err = 0; - - spin_lock(&pmc_owner_lock); - - if (pmc_owner_caller) { - printk(KERN_WARNING "reserve_pmc_hardware: " - "PMC hardware busy (reserved by caller %p)\n", - pmc_owner_caller); - err = -EBUSY; - goto out; - } - - pmc_owner_caller = __builtin_return_address(0); - perf_irq = new_perf_irq ? : dummy_perf; - - out: - spin_unlock(&pmc_owner_lock); - return err; -} -EXPORT_SYMBOL_GPL(reserve_pmc_hardware); - -void release_pmc_hardware(void) -{ - spin_lock(&pmc_owner_lock); - - WARN_ON(! pmc_owner_caller); - - pmc_owner_caller = NULL; - perf_irq = dummy_perf; - - spin_unlock(&pmc_owner_lock); -} -EXPORT_SYMBOL_GPL(release_pmc_hardware); - -void power4_enable_pmcs(void) -{ - unsigned long hid0; - - hid0 = mfspr(SPRN_HID0); - hid0 |= 1UL << (63 - 20); - - /* POWER4 requires the following sequence */ - asm volatile( - "sync\n" - "mtspr %1, %0\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "isync" : "=&r" (hid0) : "i" (SPRN_HID0), "0" (hid0): - "memory"); -} Index: working-2.6/include/asm-ppc/perfmon.h =================================================================== --- working-2.6.orig/include/asm-ppc/perfmon.h 2005-10-19 14:16:42.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ -#ifndef __PERFMON_H -#define __PERFMON_H - -extern void (*perf_irq)(struct pt_regs *); - -int reserve_pmc_hardware(void (*handler)(struct pt_regs *)); -void release_pmc_hardware(void); - -#ifdef CONFIG_FSL_BOOKE -void init_pmc_stop(int ctr); -void set_pmc_event(int ctr, int event); -void set_pmc_user_kernel(int ctr, int user, int kernel); -void set_pmc_marked(int ctr, int mark0, int mark1); -void pmc_start_ctr(int ctr, int enable); -void pmc_start_ctrs(int enable); -void pmc_stop_ctrs(void); -void dump_pmcs(void); - -extern struct op_powerpc_model op_model_fsl_booke; -#endif - -#endif /* __PERFMON_H */ Index: working-2.6/arch/powerpc/kernel/Makefile =================================================================== --- working-2.6.orig/arch/powerpc/kernel/Makefile 2005-10-19 14:16:38.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/Makefile 2005-10-19 14:25:42.000000000 +1000 @@ -11,7 +11,7 @@ endif obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ - signal_32.o + signal_32.o pmc.o obj-$(CONFIG_PPC64) += binfmt_elf32.o sys_ppc32.o obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_POWER4) += idle_power4.o Index: working-2.6/arch/ppc/kernel/Makefile =================================================================== --- working-2.6.orig/arch/ppc/kernel/Makefile 2005-10-19 14:16:39.000000000 +1000 +++ working-2.6/arch/ppc/kernel/Makefile 2005-10-19 14:25:42.000000000 +1000 @@ -15,7 +15,7 @@ obj-y := entry.o traps.o irq.o idle.o time.o misc.o \ process.o align.o \ setup.o \ - ppc_htab.o perfmon.o + ppc_htab.o obj-$(CONFIG_6xx) += l2cr.o cpu_setup_6xx.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o obj-$(CONFIG_POWER4) += cpu_setup_power4.o @@ -38,7 +38,7 @@ else obj-y := irq.o idle.o time.o \ - align.o perfmon.o + align.o obj-$(CONFIG_6xx) += l2cr.o cpu_setup_6xx.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o obj-$(CONFIG_MODULES) += module.o Index: working-2.6/arch/ppc64/kernel/Makefile =================================================================== --- working-2.6.orig/arch/ppc64/kernel/Makefile 2005-10-19 14:16:40.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/Makefile 2005-10-19 14:25:42.000000000 +1000 @@ -17,7 +17,7 @@ udbg.o ioctl32.o \ ptrace32.o rtc.o \ cpu_setup_power4.o \ - iommu.o sysfs.o vdso.o pmc.o firmware.o + iommu.o sysfs.o vdso.o firmware.o obj-y += vdso32/ vdso64/ pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o Index: working-2.6/arch/powerpc/kernel/traps.c =================================================================== --- working-2.6.orig/arch/powerpc/kernel/traps.c 2005-10-19 14:16:39.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/traps.c 2005-10-19 14:25:42.000000000 +1000 @@ -40,9 +40,9 @@ #include #include #include +#include #ifdef CONFIG_PPC32 #include -#include #endif #ifdef CONFIG_PMAC_BACKLIGHT #include @@ -51,7 +51,6 @@ #include #include #include -#include #endif #ifdef CONFIG_PPC64 /* XXX */ Index: working-2.6/arch/powerpc/oprofile/common.c =================================================================== --- working-2.6.orig/arch/powerpc/oprofile/common.c 2005-10-19 14:16:39.000000000 +1000 +++ working-2.6/arch/powerpc/oprofile/common.c 2005-10-19 14:25:42.000000000 +1000 @@ -22,11 +22,7 @@ #include #include #include -#ifdef __powerpc64__ #include -#else /* __powerpc64__ */ -#include -#endif /* __powerpc64__ */ #include #include Index: working-2.6/arch/powerpc/oprofile/op_model_fsl_booke.c =================================================================== --- working-2.6.orig/arch/powerpc/oprofile/op_model_fsl_booke.c 2005-10-19 14:16:39.000000000 +1000 +++ working-2.6/arch/powerpc/oprofile/op_model_fsl_booke.c 2005-10-19 14:25:42.000000000 +1000 @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include static unsigned long reset_value[OP_MAX_COUNTER]; Index: working-2.6/arch/ppc/kernel/perfmon_fsl_booke.c =================================================================== --- working-2.6.orig/arch/ppc/kernel/perfmon_fsl_booke.c 2005-05-24 14:12:22.000000000 +1000 +++ working-2.6/arch/ppc/kernel/perfmon_fsl_booke.c 2005-10-19 14:25:42.000000000 +1000 @@ -32,7 +32,7 @@ #include #include #include -#include +#include static inline u32 get_pmlca(int ctr); static inline void set_pmlca(int ctr, u32 pmlca); Index: working-2.6/arch/ppc/kernel/traps.c =================================================================== --- working-2.6.orig/arch/ppc/kernel/traps.c 2005-10-19 14:16:40.000000000 +1000 +++ working-2.6/arch/ppc/kernel/traps.c 2005-10-19 14:25:42.000000000 +1000 @@ -41,7 +41,7 @@ #ifdef CONFIG_PMAC_BACKLIGHT #include #endif -#include +#include #ifdef CONFIG_XMON extern int xmon_bpt(struct pt_regs *regs); -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From segher at kernel.crashing.org Wed Oct 19 22:32:55 2005 From: segher at kernel.crashing.org (Segher Boessenkool) Date: Wed, 19 Oct 2005 14:32:55 +0200 Subject: [PATCH 10/11] powerpc: Add helper functions for synthesising instructions at runtime In-Reply-To: <20051018205608.GD6924@austin.ibm.com> References: <20051017114847.B8EF6685BE@ozlabs.org> <309f5f701666369cdd618d06f664439e@kernel.crashing.org> <200510181539.42184.michael@ellerman.id.au> <20051018205608.GD6924@austin.ibm.com> Message-ID: <92d08f1ea1c2489ed43b609062c62f18@kernel.crashing.org> > I agree with you, Segher's code is obfuscated. Your current coding > is much easier to read for someone that doesn't know the instruction > bitfield layout by heart. The saved instructions on doing it Segher's > way are not critical since it's not during a runtime hot path that this > is called. Go for readability instead. Well I think it is much more readable, we'll just have to disagree here. That's just fine, it's not my code :-) > Actually documenting Segher's code so it's as easy to understand will > make it just as large as your current one (i.e. your 7 lines, his 3 > lines). Nah, it removes more comments than it would add ;-) Segher From bgill at freescale.com Thu Oct 20 02:55:28 2005 From: bgill at freescale.com (Becky Bruce) Date: Wed, 19 Oct 2005 11:55:28 -0500 (CDT) Subject: [PATCH] powerpc: merge types.h Message-ID: powerpc: Merge types.h This patch merges types.h into include/asm-powerpc. The only real changes here are that umode_t has become short instead of int in the 64-bit case, which appears to match what most other platforms have done. Also, the 32-bit version was #including linux/config.h - I have removed this as it does not appear necessary. This patch has been built on several different 32 and 64-bit platforms, and booted on mpc8540_ads. Signed-off-by: Becky Bruce Signed-off-by: Kumar Gala --- commit 995fc5087d7f02d7e570c9a904a16453e1d8d307 tree 8326faf3664a5c2912ab28f822d9ab7e84fc8a1b parent 4746820969c846263fca3f0b54e0c1af883b67ed author Becky Bruce Wed, 19 Oct 2005 11:12:13 -0500 committer Becky Bruce Wed, 19 Oct 2005 11:12:13 -0500 include/asm-powerpc/types.h | 100 +++++++++++++++++++++++++++++++++++++++++++ include/asm-ppc/types.h | 69 ------------------------------ include/asm-ppc64/types.h | 79 ---------------------------------- 3 files changed, 100 insertions(+), 148 deletions(-) diff --git a/include/asm-powerpc/types.h b/include/asm-powerpc/types.h new file mode 100644 --- /dev/null +++ b/include/asm-powerpc/types.h @@ -0,0 +1,100 @@ +#ifndef _ASM_POWERPC_TYPES_H +#define _ASM_POWERPC_TYPES_H + +#ifndef __ASSEMBLY__ + +/* + * This file is never included by application software unless + * explicitly requested (e.g., via linux/types.h) in which case the + * application is Linux specific so (user-) name space pollution is + * not a major issue. However, for interoperability, libraries still + * need to be careful to avoid a name clashes. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +typedef unsigned short umode_t; + +/* + * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the + * header files exported to user space + */ + +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +#ifdef __powerpc64__ +typedef __signed__ long __s64; +typedef unsigned long __u64; +#else +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +typedef __signed__ long long __s64; +typedef unsigned long long __u64; +#endif +#endif /* __powerpc64__ */ + +typedef struct { + __u32 u[4]; +} __attribute((aligned(16))) __vector128; + +#endif /* __ASSEMBLY__ */ + +#ifdef __KERNEL__ +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __powerpc64__ +#define BITS_PER_LONG 64 +#else +#define BITS_PER_LONG 32 +#endif + +#ifndef __ASSEMBLY__ + +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +#ifdef __powerpc64__ +typedef signed long s64; +typedef unsigned long u64; +#else +typedef signed long long s64; +typedef unsigned long long u64; +#endif + +typedef __vector128 vector128; + +typedef u32 dma_addr_t; +typedef u64 dma64_addr_t; + +typedef struct { + unsigned long entry; + unsigned long toc; + unsigned long env; +} func_descr_t; + +#ifdef CONFIG_LBD +typedef u64 sector_t; +#define HAVE_SECTOR_T +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_POWERPC_TYPES_H */ diff --git a/include/asm-ppc/types.h b/include/asm-ppc/types.h deleted file mode 100644 --- a/include/asm-ppc/types.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _PPC_TYPES_H -#define _PPC_TYPES_H - -#ifndef __ASSEMBLY__ - -typedef __signed__ char __s8; -typedef unsigned char __u8; - -typedef __signed__ short __s16; -typedef unsigned short __u16; - -typedef __signed__ int __s32; -typedef unsigned int __u32; - -#if defined(__GNUC__) && !defined(__STRICT_ANSI__) -typedef __signed__ long long __s64; -typedef unsigned long long __u64; -#endif - -typedef struct { - __u32 u[4]; -} __vector128; - -/* - * XXX allowed outside of __KERNEL__ for now, until glibc gets - * a proper set of asm headers of its own. -- paulus - */ -typedef unsigned short umode_t; - -#endif /* __ASSEMBLY__ */ - -#ifdef __KERNEL__ -/* - * These aren't exported outside the kernel to avoid name space clashes - */ -#define BITS_PER_LONG 32 - -#ifndef __ASSEMBLY__ - -#include - -typedef signed char s8; -typedef unsigned char u8; - -typedef signed short s16; -typedef unsigned short u16; - -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long long s64; -typedef unsigned long long u64; - -typedef __vector128 vector128; - -/* DMA addresses are 32-bits wide */ -typedef u32 dma_addr_t; -typedef u64 dma64_addr_t; - -#ifdef CONFIG_LBD -typedef u64 sector_t; -#define HAVE_SECTOR_T -#endif - -#endif /* __ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - -#endif diff --git a/include/asm-ppc64/types.h b/include/asm-ppc64/types.h deleted file mode 100644 --- a/include/asm-ppc64/types.h +++ /dev/null @@ -1,79 +0,0 @@ -#ifndef _PPC64_TYPES_H -#define _PPC64_TYPES_H - -#ifndef __ASSEMBLY__ - -/* - * This file is never included by application software unless - * explicitly requested (e.g., via linux/types.h) in which case the - * application is Linux specific so (user-) name space pollution is - * not a major issue. However, for interoperability, libraries still - * need to be careful to avoid a name clashes. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -typedef unsigned int umode_t; - -/* - * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the - * header files exported to user space - */ - -typedef __signed__ char __s8; -typedef unsigned char __u8; - -typedef __signed__ short __s16; -typedef unsigned short __u16; - -typedef __signed__ int __s32; -typedef unsigned int __u32; - -typedef __signed__ long __s64; -typedef unsigned long __u64; - -typedef struct { - __u32 u[4]; -} __attribute((aligned(16))) __vector128; - -#endif /* __ASSEMBLY__ */ - -#ifdef __KERNEL__ -/* - * These aren't exported outside the kernel to avoid name space clashes - */ -#define BITS_PER_LONG 64 - -#ifndef __ASSEMBLY__ - -typedef signed char s8; -typedef unsigned char u8; - -typedef signed short s16; -typedef unsigned short u16; - -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long s64; -typedef unsigned long u64; - -typedef __vector128 vector128; - -typedef u32 dma_addr_t; -typedef u64 dma64_addr_t; - -typedef struct { - unsigned long entry; - unsigned long toc; - unsigned long env; -} func_descr_t; - -#endif /* __ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - -#endif /* _PPC64_TYPES_H */ From agl at us.ibm.com Thu Oct 20 05:46:46 2005 From: agl at us.ibm.com (Adam Litke) Date: Wed, 19 Oct 2005 14:46:46 -0500 Subject: [RFC] hugetlb: Allow update_mmu_cache to handle hugetlb pages Message-ID: <1129751206.8702.49.camel@localhost.localdomain> Hugetlb demand faulting is now in -mm and copy-on-write support will be coming soon. When updating huge ptes, we'll need to call update_mmu_cache() which currently can't handle huge pages. This patch fixes the problem. Is this acceptable? or is there a more elegant way to solve this? hash_utils.c | 6 ++++-- hugetlbpage.c | 4 ---- init.c | 17 ++++++++++++----- 3 files changed, 16 insertions(+), 11 deletions(-) diff -upN reference/arch/ppc64/mm/hash_utils.c current/arch/ppc64/mm/hash_utils.c --- reference/arch/ppc64/mm/hash_utils.c +++ current/arch/ppc64/mm/hash_utils.c @@ -343,9 +343,11 @@ int hash_page(unsigned long ea, unsigned local = 1; /* Is this a huge page ? */ - if (unlikely(in_hugepage_area(mm->context, ea))) + if (unlikely(in_hugepage_area(mm->context, ea))) { + spin_lock(&mm->page_table_lock); ret = hash_huge_page(mm, access, ea, vsid, local); - else { + spin_unlock(&mm->page_table_lock); + } else { ptep = find_linux_pte(pgdir, ea); if (ptep == NULL) return 1; diff -upN reference/arch/ppc64/mm/hugetlbpage.c current/arch/ppc64/mm/hugetlbpage.c --- reference/arch/ppc64/mm/hugetlbpage.c +++ current/arch/ppc64/mm/hugetlbpage.c @@ -625,8 +625,6 @@ int hash_huge_page(struct mm_struct *mm, long slot; int err = 1; - spin_lock(&mm->page_table_lock); - ptep = huge_pte_offset(mm, ea); /* Search the Linux page table for a match with va */ @@ -739,7 +737,5 @@ repeat: err = 0; out: - spin_unlock(&mm->page_table_lock); - return err; } diff -upN reference/arch/ppc64/mm/init.c current/arch/ppc64/mm/init.c --- reference/arch/ppc64/mm/init.c +++ current/arch/ppc64/mm/init.c @@ -769,6 +769,7 @@ void update_mmu_cache(struct vm_area_str int local = 0; cpumask_t tmp; unsigned long flags; + int huge = in_hugepage_area(vma->vm_mm->context, ea); /* handle i-cache coherency */ if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && @@ -792,9 +793,11 @@ void update_mmu_cache(struct vm_area_str if (pgdir == NULL) return; - ptep = find_linux_pte(pgdir, ea); - if (!ptep) - return; + if (likely(!huge)) { + ptep = find_linux_pte(pgdir, ea); + if (!ptep) + return; + } vsid = get_vsid(vma->vm_mm->context.id, ea); @@ -803,8 +806,12 @@ void update_mmu_cache(struct vm_area_str if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) local = 1; - __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, - 0x300, local); + if (likely(!huge)) + __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, + ptep, 0x300, local); + else + hash_huge_page(vma->vm_mm, pte_val(pte), ea, vsid, local); + local_irq_restore(flags); } -- Adam Litke - (agl at us.ibm.com) IBM Linux Technology Center From benh at kernel.crashing.org Thu Oct 20 08:10:01 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Thu, 20 Oct 2005 08:10:01 +1000 Subject: [RFC] hugetlb: Allow update_mmu_cache to handle hugetlb pages In-Reply-To: <1129751206.8702.49.camel@localhost.localdomain> References: <1129751206.8702.49.camel@localhost.localdomain> Message-ID: <1129759801.7620.215.camel@gaston> On Wed, 2005-10-19 at 14:46 -0500, Adam Litke wrote: > Hugetlb demand faulting is now in -mm and copy-on-write support will be > coming soon. When updating huge ptes, we'll need to call > update_mmu_cache() which currently can't handle huge pages. This patch > fixes the problem. Is this acceptable? or is there a more elegant way > to solve this? Well, that piece of code is beeing shuffled around with the powerpc merge and the 64k pages patch so I'd rather not apply this patch as-is, but I can do the preload for huge pages on top of those. I'll have a look with David. Ben From paulus at samba.org Thu Oct 20 08:34:56 2005 From: paulus at samba.org (Paul Mackerras) Date: Thu, 20 Oct 2005 08:34:56 +1000 Subject: [PATCH] ppc64: update defconfigs Message-ID: <17238.51728.398804.781324@cargo.ozlabs.ibm.com> Just in time for 2.6.14, I hope... Signed-off-by: Paul Mackerras --- configs/bpa_defconfig | 79 +++++++++++++++++++++-------- configs/g5_defconfig | 124 +++++++++++++++++++++++++++------------------- configs/iSeries_defconfig | 78 +++++++++++++++++++++------- configs/maple_defconfig | 67 ++++++++++++++++-------- configs/pSeries_defconfig | 95 +++++++++++++++++++++++++---------- defconfig | 100 +++++++++++++++++++++++++++---------- 6 files changed, 380 insertions(+), 163 deletions(-) diff -urN linux-2.6/arch/ppc64/configs/bpa_defconfig test/arch/ppc64/configs/bpa_defconfig --- linux-2.6/arch/ppc64/configs/bpa_defconfig 2005-08-09 11:34:54.000000000 +1000 +++ test/arch/ppc64/configs/bpa_defconfig 2005-10-20 08:29:10.000000000 +1000 @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:12:19 2005 +# Linux kernel version: 2.6.14-rc4 +# Thu Oct 20 08:29:10 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y # CONFIG_POSIX_MQUEUE is not set @@ -36,6 +37,7 @@ CONFIG_KOBJECT_UEVENT=y # CONFIG_IKCONFIG is not set # CONFIG_CPUSETS is not set +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set @@ -95,6 +97,7 @@ # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set # CONFIG_NUMA is not set CONFIG_SCHED_SMT=y CONFIG_PREEMPT_NONE=y @@ -110,17 +113,18 @@ CONFIG_RTAS_PROC=y CONFIG_RTAS_FLASH=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y -CONFIG_PCI_NAMES=y # CONFIG_PCI_DEBUG is not set # @@ -132,8 +136,6 @@ # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set -CONFIG_PROC_DEVICETREE=y -# CONFIG_CMDLINE_BOOL is not set # # Networking @@ -163,8 +165,8 @@ # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set CONFIG_INET_TUNNEL=y -CONFIG_IP_TCPDIAG=y -CONFIG_IP_TCPDIAG_IPV6=y +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y @@ -181,6 +183,7 @@ CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set +# CONFIG_NETFILTER_NETLINK is not set # # IP: Netfilter Configuration @@ -188,11 +191,14 @@ CONFIG_IP_NF_CONNTRACK=y # CONFIG_IP_NF_CT_ACCT is not set # CONFIG_IP_NF_CONNTRACK_MARK is not set +# CONFIG_IP_NF_CONNTRACK_EVENTS is not set CONFIG_IP_NF_CT_PROTO_SCTP=y CONFIG_IP_NF_FTP=m CONFIG_IP_NF_IRC=m +# CONFIG_IP_NF_NETBIOS_NS is not set CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m +# CONFIG_IP_NF_PPTP is not set CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_LIMIT=m @@ -216,13 +222,16 @@ CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_REALM=m CONFIG_IP_NF_MATCH_SCTP=m +# CONFIG_IP_NF_MATCH_DCCP is not set CONFIG_IP_NF_MATCH_COMMENT=m CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_MATCH_STRING=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_TARGET_NFQUEUE=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_NAT_NEEDED=y CONFIG_IP_NF_TARGET_MASQUERADE=m @@ -240,6 +249,7 @@ CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_MARK=m CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_TARGET_NOTRACK=m CONFIG_IP_NF_ARPTABLES=m @@ -251,6 +261,12 @@ # # CONFIG_IP6_NF_QUEUE is not set # CONFIG_IP6_NF_IPTABLES is not set +# CONFIG_IP6_NF_TARGET_NFQUEUE is not set + +# +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set # # SCTP Configuration (EXPERIMENTAL) @@ -278,6 +294,7 @@ # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -292,6 +309,11 @@ # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -322,7 +344,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=131072 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set # @@ -395,6 +416,7 @@ # # SCSI device support # +# CONFIG_RAID_ATTRS is not set # CONFIG_SCSI is not set # @@ -436,12 +458,18 @@ # CONFIG_ARCNET is not set # +# PHY device support +# +# CONFIG_PHYLIB is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set # @@ -462,15 +490,18 @@ # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set CONFIG_SKGE=m # CONFIG_SK98LIN is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set +# CONFIG_SPIDER_NET is not set # CONFIG_MV643XX_ETH is not set # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set @@ -552,6 +583,7 @@ CONFIG_SERIAL_NONSTANDARD=y # CONFIG_ROCKETPORT is not set # CONFIG_CYCLADES is not set +# CONFIG_DIGIEPCA is not set # CONFIG_MOXA_SMARTIO is not set # CONFIG_ISI is not set # CONFIG_SYNCLINK is not set @@ -642,7 +674,6 @@ # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_ISA is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_PROSAVAGE is not set @@ -656,7 +687,6 @@ # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set # CONFIG_I2C_PCA_ISA is not set -# CONFIG_I2C_SENSOR is not set # # Miscellaneous I2C Chip support @@ -683,12 +713,17 @@ # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -756,10 +791,6 @@ # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set @@ -768,6 +799,7 @@ CONFIG_DNOTIFY=y # CONFIG_AUTOFS_FS is not set # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -794,13 +826,11 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -# CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -# CONFIG_TMPFS_SECURITY is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -846,6 +876,7 @@ # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -923,6 +954,7 @@ CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=15 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -981,7 +1013,12 @@ # Library routines # # CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=m CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m diff -urN linux-2.6/arch/ppc64/configs/g5_defconfig test/arch/ppc64/configs/g5_defconfig --- linux-2.6/arch/ppc64/configs/g5_defconfig 2005-08-30 16:31:03.000000000 +1000 +++ test/arch/ppc64/configs/g5_defconfig 2005-10-20 08:30:23.000000000 +1000 @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:16:59 2005 +# Linux kernel version: 2.6.14-rc4 +# Thu Oct 20 08:30:23 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -37,6 +38,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set @@ -97,6 +99,7 @@ # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set # CONFIG_NUMA is not set # CONFIG_SCHED_SMT is not set CONFIG_PREEMPT_NONE=y @@ -109,19 +112,20 @@ CONFIG_HZ=250 CONFIG_GENERIC_HARDIRQS=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +# CONFIG_HOTPLUG_CPU is not set +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y -CONFIG_PCI_NAMES=y # CONFIG_PCI_DEBUG is not set -# CONFIG_HOTPLUG_CPU is not set # # PCCARD (PCMCIA/CardBus) support @@ -132,8 +136,6 @@ # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set -CONFIG_PROC_DEVICETREE=y -# CONFIG_CMDLINE_BOOL is not set # # Networking @@ -163,8 +165,8 @@ CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m CONFIG_INET_TUNNEL=y -CONFIG_IP_TCPDIAG=m -# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y @@ -175,6 +177,7 @@ # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set +# CONFIG_NETFILTER_NETLINK is not set # # IP: Netfilter Configuration @@ -182,11 +185,14 @@ CONFIG_IP_NF_CONNTRACK=m CONFIG_IP_NF_CT_ACCT=y CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_CONNTRACK_EVENTS=y CONFIG_IP_NF_CT_PROTO_SCTP=m CONFIG_IP_NF_FTP=m CONFIG_IP_NF_IRC=m +# CONFIG_IP_NF_NETBIOS_NS is not set CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m +# CONFIG_IP_NF_PPTP is not set CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_LIMIT=m @@ -210,14 +216,18 @@ CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_REALM=m CONFIG_IP_NF_MATCH_SCTP=m +# CONFIG_IP_NF_MATCH_DCCP is not set CONFIG_IP_NF_MATCH_COMMENT=m CONFIG_IP_NF_MATCH_CONNMARK=m +CONFIG_IP_NF_MATCH_CONNBYTES=m CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_MATCH_STRING=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_TARGET_NFQUEUE=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_NAT_NEEDED=y CONFIG_IP_NF_TARGET_MASQUERADE=m @@ -235,6 +245,7 @@ CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_MARK=m CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_TARGET_CONNMARK=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m @@ -244,6 +255,11 @@ CONFIG_IP_NF_ARP_MANGLE=m # +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set @@ -270,6 +286,7 @@ # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -284,6 +301,11 @@ # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -315,7 +337,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" CONFIG_CDROM_PKTCDVD=m CONFIG_CDROM_PKTCDVD_BUFFERS=8 # CONFIG_CDROM_PKTCDVD_WCACHE is not set @@ -395,6 +416,7 @@ # # SCSI device support # +# CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y CONFIG_SCSI_PROC_FS=y @@ -422,6 +444,7 @@ CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set # # SCSI low-level drivers @@ -435,10 +458,12 @@ # CONFIG_SCSI_AIC79XX is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set CONFIG_SCSI_SATA=y # CONFIG_SCSI_SATA_AHCI is not set CONFIG_SCSI_SATA_SVW=y # CONFIG_SCSI_ATA_PIIX is not set +# CONFIG_SCSI_SATA_MV is not set # CONFIG_SCSI_SATA_NV is not set # CONFIG_SCSI_SATA_PROMISE is not set # CONFIG_SCSI_SATA_QSTOR is not set @@ -498,6 +523,7 @@ # CONFIG_FUSION is not set # CONFIG_FUSION_SPI is not set # CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set # # IEEE 1394 (FireWire) support @@ -540,7 +566,6 @@ # CONFIG_ADB_PMU=y CONFIG_PMAC_SMU=y -# CONFIG_PMAC_BACKLIGHT is not set CONFIG_THERM_PM72=y # @@ -558,12 +583,18 @@ # CONFIG_ARCNET is not set # +# PHY device support +# +# CONFIG_PHYLIB is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set CONFIG_SUNGEM=y +# CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set # @@ -585,6 +616,7 @@ # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set CONFIG_TIGON3=m @@ -594,6 +626,7 @@ # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set @@ -760,8 +793,8 @@ # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_ISA is not set CONFIG_I2C_KEYWEST=y +CONFIG_I2C_PMAC_SMU=y # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_PROSAVAGE is not set @@ -775,7 +808,6 @@ # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set # CONFIG_I2C_PCA_ISA is not set -# CONFIG_I2C_SENSOR is not set # # Miscellaneous I2C Chip support @@ -802,12 +834,17 @@ # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -856,6 +893,7 @@ # CONFIG_FB_KYRO is not set # CONFIG_FB_3DFX is not set # CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_CYBLA is not set # CONFIG_FB_TRIDENT is not set # CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set @@ -937,6 +975,7 @@ CONFIG_USB_STORAGE_SDDR09=y CONFIG_USB_STORAGE_SDDR55=y CONFIG_USB_STORAGE_JUMPSHOT=y +# CONFIG_USB_STORAGE_ONETOUCH is not set # # USB Input Devices @@ -956,9 +995,11 @@ # CONFIG_USB_MTOUCH is not set # CONFIG_USB_ITMTOUCH is not set # CONFIG_USB_EGALAX is not set +# CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set # CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set # # USB Imaging devices @@ -983,30 +1024,14 @@ CONFIG_USB_PEGASUS=m CONFIG_USB_RTL8150=m CONFIG_USB_USBNET=m - -# -# USB Host-to-Host Cables -# -CONFIG_USB_ALI_M5632=y -CONFIG_USB_AN2720=y -CONFIG_USB_BELKIN=y -CONFIG_USB_GENESYS=y -CONFIG_USB_NET1080=y -CONFIG_USB_PL2301=y -CONFIG_USB_KC2190=y - -# -# Intelligent USB Devices/Gadgets -# -CONFIG_USB_ARMLINUX=y -CONFIG_USB_EPSON2888=y -CONFIG_USB_ZAURUS=y -CONFIG_USB_CDCETHER=y - -# -# USB Network Adapters -# -CONFIG_USB_AX8817X=y +# CONFIG_USB_NET_AX8817X is not set +CONFIG_USB_NET_CDCETHER=m +# CONFIG_USB_NET_GL620A is not set +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_NET_PLUSB is not set +# CONFIG_USB_NET_RNDIS_HOST is not set +# CONFIG_USB_NET_CDC_SUBSET is not set +# CONFIG_USB_NET_ZAURUS is not set CONFIG_USB_MON=y # @@ -1124,16 +1149,12 @@ CONFIG_REISERFS_FS_SECURITY=y # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# CONFIG_XFS_FS=m CONFIG_XFS_EXPORT=y -# CONFIG_XFS_RT is not set # CONFIG_XFS_QUOTA is not set CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y +# CONFIG_XFS_RT is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y @@ -1141,6 +1162,7 @@ CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=m # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -1168,14 +1190,11 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -CONFIG_DEVPTS_FS_XATTR=y -# CONFIG_DEVPTS_FS_SECURITY is not set CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_SECURITY=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -1225,6 +1244,7 @@ # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -1303,6 +1323,7 @@ CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1360,7 +1381,12 @@ # Library routines # CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set CONFIG_CRC32=y CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m diff -urN linux-2.6/arch/ppc64/configs/iSeries_defconfig test/arch/ppc64/configs/iSeries_defconfig --- linux-2.6/arch/ppc64/configs/iSeries_defconfig 2005-08-30 16:31:03.000000000 +1000 +++ test/arch/ppc64/configs/iSeries_defconfig 2005-10-20 08:30:56.000000000 +1000 @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:17:02 2005 +# Linux kernel version: 2.6.14-rc4 +# Thu Oct 20 08:30:56 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -38,6 +39,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set @@ -88,6 +90,7 @@ # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set # CONFIG_NUMA is not set # CONFIG_SCHED_SMT is not set CONFIG_PREEMPT_NONE=y @@ -101,17 +104,16 @@ CONFIG_GENERIC_HARDIRQS=y CONFIG_LPARCFG=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y -CONFIG_PCI_NAMES=y # CONFIG_PCI_DEBUG is not set # @@ -152,8 +154,8 @@ CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m CONFIG_INET_TUNNEL=y -CONFIG_IP_TCPDIAG=m -# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y @@ -164,6 +166,7 @@ # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set +# CONFIG_NETFILTER_NETLINK is not set # # IP: Netfilter Configuration @@ -171,11 +174,14 @@ CONFIG_IP_NF_CONNTRACK=m CONFIG_IP_NF_CT_ACCT=y CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_CONNTRACK_EVENTS=y CONFIG_IP_NF_CT_PROTO_SCTP=m CONFIG_IP_NF_FTP=m CONFIG_IP_NF_IRC=m +# CONFIG_IP_NF_NETBIOS_NS is not set CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m +# CONFIG_IP_NF_PPTP is not set CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_LIMIT=m @@ -199,14 +205,18 @@ CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_REALM=m CONFIG_IP_NF_MATCH_SCTP=m +# CONFIG_IP_NF_MATCH_DCCP is not set CONFIG_IP_NF_MATCH_COMMENT=m CONFIG_IP_NF_MATCH_CONNMARK=m +CONFIG_IP_NF_MATCH_CONNBYTES=m CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_MATCH_STRING=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_TARGET_NFQUEUE=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_NAT_NEEDED=y CONFIG_IP_NF_TARGET_MASQUERADE=m @@ -224,6 +234,7 @@ CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_MARK=m CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_TARGET_CONNMARK=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m @@ -233,6 +244,11 @@ CONFIG_IP_NF_ARP_MANGLE=m # +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set @@ -259,6 +275,7 @@ # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -273,6 +290,11 @@ # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -303,7 +325,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set # @@ -323,6 +344,7 @@ # # SCSI device support # +# CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y CONFIG_SCSI_PROC_FS=y @@ -350,6 +372,7 @@ CONFIG_SCSI_SPI_ATTRS=y CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set # # SCSI low-level drivers @@ -363,6 +386,7 @@ # CONFIG_SCSI_AIC79XX is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set # CONFIG_SCSI_SATA is not set # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set @@ -415,6 +439,7 @@ # CONFIG_FUSION is not set # CONFIG_FUSION_SPI is not set # CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set # # IEEE 1394 (FireWire) support @@ -445,12 +470,18 @@ # CONFIG_ARCNET is not set # +# PHY device support +# +# CONFIG_PHYLIB is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set # @@ -489,6 +520,7 @@ # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set @@ -498,6 +530,7 @@ # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set @@ -632,7 +665,6 @@ # I2C support # # CONFIG_I2C is not set -# CONFIG_I2C_SENSOR is not set # # Dallas's 1-wire bus @@ -643,12 +675,17 @@ # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -722,16 +759,12 @@ # CONFIG_JFS_DEBUG is not set # CONFIG_JFS_STATISTICS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# CONFIG_XFS_FS=m CONFIG_XFS_EXPORT=y -# CONFIG_XFS_RT is not set # CONFIG_XFS_QUOTA is not set CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y +# CONFIG_XFS_RT is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y @@ -739,6 +772,7 @@ CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=m # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -766,14 +800,11 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -CONFIG_DEVPTS_FS_XATTR=y -CONFIG_DEVPTS_FS_SECURITY=y CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_SECURITY=y # CONFIG_HUGETLBFS is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -824,6 +855,7 @@ # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -897,6 +929,7 @@ CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -954,7 +987,12 @@ # Library routines # CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set CONFIG_CRC32=y CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m diff -urN linux-2.6/arch/ppc64/configs/maple_defconfig test/arch/ppc64/configs/maple_defconfig --- linux-2.6/arch/ppc64/configs/maple_defconfig 2005-08-30 16:31:03.000000000 +1000 +++ test/arch/ppc64/configs/maple_defconfig 2005-10-20 08:31:24.000000000 +1000 @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:17:04 2005 +# Linux kernel version: 2.6.14-rc4 +# Thu Oct 20 08:31:24 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -37,6 +38,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y @@ -97,6 +99,7 @@ # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set # CONFIG_NUMA is not set # CONFIG_SCHED_SMT is not set CONFIG_PREEMPT_NONE=y @@ -109,17 +112,18 @@ CONFIG_HZ=250 CONFIG_GENERIC_HARDIRQS=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y -CONFIG_PCI_NAMES=y # CONFIG_PCI_DEBUG is not set # @@ -131,8 +135,6 @@ # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set -CONFIG_PROC_DEVICETREE=y -# CONFIG_CMDLINE_BOOL is not set # # Networking @@ -163,14 +165,19 @@ # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set # CONFIG_INET_TUNNEL is not set -CONFIG_IP_TCPDIAG=y -# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set # +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set @@ -196,6 +203,7 @@ # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -210,6 +218,11 @@ # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -240,7 +253,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=8192 # CONFIG_BLK_DEV_INITRD is not set -CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set # @@ -313,6 +325,7 @@ # # SCSI device support # +# CONFIG_RAID_ATTRS is not set # CONFIG_SCSI is not set # @@ -354,12 +367,18 @@ # CONFIG_ARCNET is not set # +# PHY device support +# +# CONFIG_PHYLIB is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set # @@ -398,6 +417,7 @@ # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set @@ -408,6 +428,7 @@ # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set @@ -553,7 +574,6 @@ # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_ISA is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_PROSAVAGE is not set @@ -567,7 +587,6 @@ # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set # CONFIG_I2C_PCA_ISA is not set -# CONFIG_I2C_SENSOR is not set # # Miscellaneous I2C Chip support @@ -594,12 +613,17 @@ # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -681,9 +705,11 @@ # CONFIG_USB_MTOUCH is not set # CONFIG_USB_ITMTOUCH is not set # CONFIG_USB_EGALAX is not set +# CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set # CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set # # USB Imaging devices @@ -814,10 +840,6 @@ # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set @@ -826,6 +848,7 @@ CONFIG_DNOTIFY=y # CONFIG_AUTOFS_FS is not set # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -849,14 +872,11 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -CONFIG_DEVPTS_FS_XATTR=y -# CONFIG_DEVPTS_FS_SECURITY is not set CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_SECURITY=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -898,6 +918,7 @@ # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -975,6 +996,7 @@ CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set CONFIG_DEBUG_SLAB=y # CONFIG_DEBUG_SPINLOCK is not set @@ -1034,6 +1056,7 @@ # Library routines # CONFIG_CRC_CCITT=y +# CONFIG_CRC16 is not set CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y diff -urN linux-2.6/arch/ppc64/configs/pSeries_defconfig test/arch/ppc64/configs/pSeries_defconfig --- linux-2.6/arch/ppc64/configs/pSeries_defconfig 2005-08-30 16:31:03.000000000 +1000 +++ test/arch/ppc64/configs/pSeries_defconfig 2005-10-20 08:32:17.000000000 +1000 @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:17:07 2005 +# Linux kernel version: 2.6.14-rc4 +# Thu Oct 20 08:32:17 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -38,6 +39,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CPUSETS=y +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y @@ -104,6 +106,7 @@ CONFIG_DISCONTIGMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_NEED_MULTIPLE_NODES=y +# CONFIG_SPARSEMEM_STATIC is not set CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y CONFIG_NODES_SPAN_OTHER_NODES=y CONFIG_NUMA=y @@ -124,19 +127,20 @@ CONFIG_SCANLOG=m CONFIG_LPARCFG=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_HOTPLUG_CPU=y +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y -CONFIG_PCI_NAMES=y # CONFIG_PCI_DEBUG is not set -CONFIG_HOTPLUG_CPU=y # # PCCARD (PCMCIA/CardBus) support @@ -152,8 +156,6 @@ # CONFIG_HOTPLUG_PCI_SHPC is not set CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m -CONFIG_PROC_DEVICETREE=y -# CONFIG_CMDLINE_BOOL is not set # # Networking @@ -183,8 +185,8 @@ CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m CONFIG_INET_TUNNEL=y -CONFIG_IP_TCPDIAG=m -# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y @@ -195,6 +197,9 @@ # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m # # IP: Netfilter Configuration @@ -202,11 +207,15 @@ CONFIG_IP_NF_CONNTRACK=m CONFIG_IP_NF_CT_ACCT=y CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_CONNTRACK_EVENTS=y +CONFIG_IP_NF_CONNTRACK_NETLINK=m CONFIG_IP_NF_CT_PROTO_SCTP=m CONFIG_IP_NF_FTP=m CONFIG_IP_NF_IRC=m +# CONFIG_IP_NF_NETBIOS_NS is not set CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m +# CONFIG_IP_NF_PPTP is not set CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_LIMIT=m @@ -230,14 +239,18 @@ CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_REALM=m CONFIG_IP_NF_MATCH_SCTP=m +# CONFIG_IP_NF_MATCH_DCCP is not set CONFIG_IP_NF_MATCH_COMMENT=m CONFIG_IP_NF_MATCH_CONNMARK=m +CONFIG_IP_NF_MATCH_CONNBYTES=m CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_MATCH_STRING=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_TARGET_NFQUEUE=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_NAT_NEEDED=y CONFIG_IP_NF_TARGET_MASQUERADE=m @@ -255,6 +268,7 @@ CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_MARK=m CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_TARGET_CONNMARK=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m @@ -264,6 +278,11 @@ CONFIG_IP_NF_ARP_MANGLE=m # +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set @@ -290,6 +309,7 @@ # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -304,6 +324,11 @@ # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -342,7 +367,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set # @@ -416,6 +440,7 @@ # # SCSI device support # +# CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y CONFIG_SCSI_PROC_FS=y @@ -443,6 +468,7 @@ CONFIG_SCSI_SPI_ATTRS=y CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_ISCSI_ATTRS=m +# CONFIG_SCSI_SAS_ATTRS is not set # # SCSI low-level drivers @@ -456,6 +482,7 @@ # CONFIG_SCSI_AIC79XX is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set # CONFIG_SCSI_SATA is not set # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set @@ -517,6 +544,7 @@ # CONFIG_FUSION is not set # CONFIG_FUSION_SPI is not set # CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set # # IEEE 1394 (FireWire) support @@ -547,12 +575,18 @@ # CONFIG_ARCNET is not set # +# PHY device support +# +# CONFIG_PHYLIB is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set CONFIG_NET_VENDOR_3COM=y CONFIG_VORTEX=y # CONFIG_TYPHOON is not set @@ -581,6 +615,7 @@ # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set # CONFIG_VIA_RHINE is not set +# CONFIG_NET_POCKET is not set # # Ethernet (1000 Mbit) @@ -594,6 +629,7 @@ # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set @@ -604,6 +640,7 @@ # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set CONFIG_IXGB=m # CONFIG_IXGB_NAPI is not set CONFIG_S2IO=m @@ -789,7 +826,6 @@ # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_ISA is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT is not set # CONFIG_I2C_PARPORT_LIGHT is not set @@ -804,7 +840,6 @@ # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set # CONFIG_I2C_PCA_ISA is not set -# CONFIG_I2C_SENSOR is not set # # Miscellaneous I2C Chip support @@ -831,12 +866,17 @@ # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -885,6 +925,7 @@ # CONFIG_FB_KYRO is not set # CONFIG_FB_3DFX is not set # CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_CYBLA is not set # CONFIG_FB_TRIDENT is not set # CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set @@ -982,9 +1023,11 @@ # CONFIG_USB_MTOUCH is not set # CONFIG_USB_ITMTOUCH is not set # CONFIG_USB_EGALAX is not set +# CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set # CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set # # USB Imaging devices @@ -1057,7 +1100,8 @@ # InfiniBand support # CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_VERBS=m +# CONFIG_INFINIBAND_USER_MAD is not set +# CONFIG_INFINIBAND_USER_ACCESS is not set CONFIG_INFINIBAND_MTHCA=m # CONFIG_INFINIBAND_MTHCA_DEBUG is not set CONFIG_INFINIBAND_IPOIB=m @@ -1095,16 +1139,12 @@ # CONFIG_JFS_DEBUG is not set # CONFIG_JFS_STATISTICS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# CONFIG_XFS_FS=m CONFIG_XFS_EXPORT=y -# CONFIG_XFS_RT is not set # CONFIG_XFS_QUOTA is not set CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y +# CONFIG_XFS_RT is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y @@ -1112,6 +1152,7 @@ CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=m # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -1139,14 +1180,11 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -CONFIG_DEVPTS_FS_XATTR=y -CONFIG_DEVPTS_FS_SECURITY=y CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_SECURITY=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -1197,6 +1235,7 @@ # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -1261,6 +1300,7 @@ CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1320,7 +1360,12 @@ # Library routines # CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set CONFIG_CRC32=y CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m diff -urN linux-2.6/arch/ppc64/defconfig test/arch/ppc64/defconfig --- linux-2.6/arch/ppc64/defconfig 2005-08-30 16:31:03.000000000 +1000 +++ test/arch/ppc64/defconfig 2005-10-20 08:28:33.000000000 +1000 @@ -1,17 +1,17 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6 -# Mon Aug 8 14:16:54 2005 +# Linux kernel version: 2.6.14-rc4 +# Thu Oct 20 08:28:33 2005 # CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_FORCE_MAX_ZONEORDER=13 # @@ -26,6 +26,7 @@ # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -37,6 +38,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CPUSETS=y +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set @@ -106,6 +108,7 @@ CONFIG_DISCONTIGMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_NEED_MULTIPLE_NODES=y +# CONFIG_SPARSEMEM_STATIC is not set CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y CONFIG_NODES_SPAN_OTHER_NODES=y # CONFIG_NUMA is not set @@ -126,19 +129,20 @@ CONFIG_SCANLOG=m CONFIG_LPARCFG=y CONFIG_SECCOMP=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_HOTPLUG_CPU=y +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set CONFIG_ISA_DMA_API=y # -# General setup +# Bus Options # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m # CONFIG_PCI_LEGACY_PROC is not set -# CONFIG_PCI_NAMES is not set # CONFIG_PCI_DEBUG is not set -CONFIG_HOTPLUG_CPU=y # # PCCARD (PCMCIA/CardBus) support @@ -154,8 +158,6 @@ # CONFIG_HOTPLUG_PCI_SHPC is not set CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m -CONFIG_PROC_DEVICETREE=y -# CONFIG_CMDLINE_BOOL is not set # # Networking @@ -185,8 +187,8 @@ CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m CONFIG_INET_TUNNEL=y -# CONFIG_IP_TCPDIAG is not set -# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y @@ -197,6 +199,9 @@ # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m # # IP: Netfilter Configuration @@ -204,11 +209,15 @@ CONFIG_IP_NF_CONNTRACK=m CONFIG_IP_NF_CT_ACCT=y CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_CONNTRACK_EVENTS=y +CONFIG_IP_NF_CONNTRACK_NETLINK=m CONFIG_IP_NF_CT_PROTO_SCTP=m CONFIG_IP_NF_FTP=m CONFIG_IP_NF_IRC=m +# CONFIG_IP_NF_NETBIOS_NS is not set CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m +# CONFIG_IP_NF_PPTP is not set CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_LIMIT=m @@ -232,14 +241,18 @@ CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_REALM=m CONFIG_IP_NF_MATCH_SCTP=m +CONFIG_IP_NF_MATCH_DCCP=m CONFIG_IP_NF_MATCH_COMMENT=m CONFIG_IP_NF_MATCH_CONNMARK=m +CONFIG_IP_NF_MATCH_CONNBYTES=m CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_MATCH_STRING=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_TARGET_NFQUEUE=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_NAT_NEEDED=y CONFIG_IP_NF_TARGET_MASQUERADE=m @@ -257,6 +270,7 @@ CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_MARK=m CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_TARGET_CONNMARK=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m @@ -266,6 +280,11 @@ CONFIG_IP_NF_ARP_MANGLE=m # +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set @@ -292,6 +311,7 @@ # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -306,6 +326,11 @@ # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -344,7 +369,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set # @@ -422,6 +446,7 @@ # # SCSI device support # +# CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y CONFIG_SCSI_PROC_FS=y @@ -449,6 +474,7 @@ CONFIG_SCSI_SPI_ATTRS=y CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_ISCSI_ATTRS=m +# CONFIG_SCSI_SAS_ATTRS is not set # # SCSI low-level drivers @@ -462,10 +488,12 @@ # CONFIG_SCSI_AIC79XX is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set CONFIG_SCSI_SATA=y # CONFIG_SCSI_SATA_AHCI is not set CONFIG_SCSI_SATA_SVW=y # CONFIG_SCSI_ATA_PIIX is not set +# CONFIG_SCSI_SATA_MV is not set # CONFIG_SCSI_SATA_NV is not set # CONFIG_SCSI_SATA_PROMISE is not set # CONFIG_SCSI_SATA_QSTOR is not set @@ -535,6 +563,7 @@ # CONFIG_FUSION is not set # CONFIG_FUSION_SPI is not set # CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set # # IEEE 1394 (FireWire) support @@ -578,7 +607,6 @@ # CONFIG_ADB_PMU=y CONFIG_PMAC_SMU=y -# CONFIG_PMAC_BACKLIGHT is not set CONFIG_THERM_PM72=y # @@ -596,12 +624,18 @@ # CONFIG_ARCNET is not set # +# PHY device support +# +# CONFIG_PHYLIB is not set + +# # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set CONFIG_SUNGEM=y +# CONFIG_CASSINI is not set CONFIG_NET_VENDOR_3COM=y CONFIG_VORTEX=y # CONFIG_TYPHOON is not set @@ -630,6 +664,7 @@ # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set # CONFIG_VIA_RHINE is not set +# CONFIG_NET_POCKET is not set # # Ethernet (1000 Mbit) @@ -643,16 +678,19 @@ # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y # CONFIG_BNX2 is not set +# CONFIG_SPIDER_NET is not set # CONFIG_MV643XX_ETH is not set # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set CONFIG_IXGB=m # CONFIG_IXGB_NAPI is not set # CONFIG_S2IO is not set @@ -838,8 +876,8 @@ # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_ISA is not set CONFIG_I2C_KEYWEST=y +CONFIG_I2C_PMAC_SMU=y # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT is not set # CONFIG_I2C_PARPORT_LIGHT is not set @@ -854,7 +892,6 @@ # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set # CONFIG_I2C_PCA_ISA is not set -# CONFIG_I2C_SENSOR is not set # # Miscellaneous I2C Chip support @@ -881,12 +918,17 @@ # Hardware Monitoring support # # CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # +# Multimedia Capabilities Port drivers +# + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -939,6 +981,7 @@ # CONFIG_FB_KYRO is not set # CONFIG_FB_3DFX is not set # CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_CYBLA is not set # CONFIG_FB_TRIDENT is not set # CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set @@ -1020,6 +1063,7 @@ # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set # # USB Input Devices @@ -1036,9 +1080,11 @@ # CONFIG_USB_MTOUCH is not set # CONFIG_USB_ITMTOUCH is not set # CONFIG_USB_EGALAX is not set +# CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set # CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set # # USB Imaging devices @@ -1111,7 +1157,8 @@ # InfiniBand support # CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_VERBS=m +# CONFIG_INFINIBAND_USER_MAD is not set +# CONFIG_INFINIBAND_USER_ACCESS is not set CONFIG_INFINIBAND_MTHCA=m # CONFIG_INFINIBAND_MTHCA_DEBUG is not set CONFIG_INFINIBAND_IPOIB=m @@ -1149,16 +1196,12 @@ # CONFIG_JFS_DEBUG is not set # CONFIG_JFS_STATISTICS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# CONFIG_XFS_FS=m CONFIG_XFS_EXPORT=y -# CONFIG_XFS_RT is not set # CONFIG_XFS_QUOTA is not set CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y +# CONFIG_XFS_RT is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y @@ -1166,6 +1209,7 @@ CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=y # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -1192,14 +1236,11 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -CONFIG_DEVPTS_FS_XATTR=y -CONFIG_DEVPTS_FS_SECURITY=y CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_SECURITY=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -1250,6 +1291,7 @@ # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -1328,6 +1370,7 @@ CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1387,7 +1430,12 @@ # Library routines # CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set CONFIG_CRC32=y CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m From paulus at samba.org Thu Oct 20 08:44:07 2005 From: paulus at samba.org (Paul Mackerras) Date: Thu, 20 Oct 2005 08:44:07 +1000 Subject: [PATCH] powerpc: merge types.h In-Reply-To: References: Message-ID: <17238.52279.428407.411558@cargo.ozlabs.ibm.com> Becky Bruce writes: > This patch merges types.h into include/asm-powerpc. The only real changes > here are that umode_t has become short instead of int in the 64-bit case, > which appears to match what most other platforms have done. Ummm, isn't that an ABI change? We don't want to do that unless you're completely absolutely positively sure it couldn't possibly cause any userspace breakage. Regards, Paul. From paulus at samba.org Thu Oct 20 08:53:11 2005 From: paulus at samba.org (Paul Mackerras) Date: Thu, 20 Oct 2005 08:53:11 +1000 Subject: [PATCH] powerpc: Add helper functions for synthesising instructions at runtime In-Reply-To: <20051019042701.73F9A685D8@ozlabs.org> References: <309f5f701666369cdd618d06f664439e@kernel.crashing.org> <20051019042701.73F9A685D8@ozlabs.org> Message-ID: <17238.52823.505817.958071@cargo.ozlabs.ibm.com> Michael Ellerman writes: > +typedef enum { > + BRANCH_RELATIVE, > + BRANCH_RELATIVE_LINK, > + BRANCH_ABSOLUTE, > + BRANCH_ABSOLUTE_LINK > +} branch_t; Just to pursue this thread a bit further... I think your branch_t is folding together two things that are really orthogonal, and this is causing your code to get longer and more confusing than necessary. I would just have defined two separate flags, say BRANCH_ABSOLUTE and BRANCH_LINK (either with #define or enum), and then declared the "type" argument to create_branch as "bitwise". Then you can simply OR the type argument into the instruction word, and you can do tests like if (!(type & BRANCH_ABSOLUTE)) quite naturally. This ends up being almost exactly what Segher suggested, but maybe a little less obfuscated. Paul. From becky.bruce at freescale.com Thu Oct 20 09:41:21 2005 From: becky.bruce at freescale.com (Becky Bruce) Date: Wed, 19 Oct 2005 18:41:21 -0500 Subject: [PATCH] powerpc: merge types.h In-Reply-To: <17238.52279.428407.411558@cargo.ozlabs.ibm.com> References: <17238.52279.428407.411558@cargo.ozlabs.ibm.com> Message-ID: <0f2d7ce137c9ab4123647a0c9fedcffe@freescale.com> Gack! While I doubt that should break anything, I can't promise 100% that it won't. I'll resubmit with that wrapped up in a nice shiny ifdef. Thanks for the catch - I wouldn't want irate users picketing my house :) Cheers, B On Oct 19, 2005, at 5:44 PM, Paul Mackerras wrote: > Becky Bruce writes: > >> This patch merges types.h into include/asm-powerpc. The only real >> changes >> here are that umode_t has become short instead of int in the 64-bit >> case, >> which appears to match what most other platforms have done. > > Ummm, isn't that an ABI change? We don't want to do that unless > you're completely absolutely positively sure it couldn't possibly > cause any userspace breakage. > > Regards, > Paul. From bgill at freescale.com Thu Oct 20 09:45:03 2005 From: bgill at freescale.com (Becky Bruce) Date: Wed, 19 Oct 2005 18:45:03 -0500 (CDT) Subject: [PATCH] (modified) Merge types.h Message-ID: powerpc: Merge types.h This patch merges types.h into include/asm-powerpc. The only real change is the removal of the include of linux/config.h from the 32-bit version - it doesn't appear to be necessary. This patch has been built on several different 32 and 64-bit platforms, and booted on mpc8540_ads. This patch supersedes the previous patch [PATCH] Merge types.h Signed-off-by: Becky Bruce Signed-off-by: Kumar Gala --- commit 995fc5087d7f02d7e570c9a904a16453e1d8d307 tree 8326faf3664a5c2912ab28f822d9ab7e84fc8a1b parent 4746820969c846263fca3f0b54e0c1af883b67ed author Becky Bruce Wed, 19 Oct 2005 11:12:13 -0500 committer Becky Bruce Wed, 19 Oct 2005 11:12:13 -0500 include/asm-powerpc/types.h | 104 +++++++++++++++++++++++++++++++++++++++++++ include/asm-ppc/types.h | 69 ------------------------------ include/asm-ppc64/types.h | 79 ---------------------------------- 3 files changed, 100 insertions(+), 148 deletions(-) diff --git a/include/asm-powerpc/types.h b/include/asm-powerpc/types.h new file mode 100644 --- /dev/null +++ b/include/asm-powerpc/types.h @@ -0,0 +1,104 @@ +#ifndef _ASM_POWERPC_TYPES_H +#define _ASM_POWERPC_TYPES_H + +#ifndef __ASSEMBLY__ + +/* + * This file is never included by application software unless + * explicitly requested (e.g., via linux/types.h) in which case the + * application is Linux specific so (user-) name space pollution is + * not a major issue. However, for interoperability, libraries still + * need to be careful to avoid a name clashes. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __powerpc64__ +typedef unsigned int umode_t; +#else +typedef unsigned short umode_t; +#endif + +/* + * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the + * header files exported to user space + */ + +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +#ifdef __powerpc64__ +typedef __signed__ long __s64; +typedef unsigned long __u64; +#else +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +typedef __signed__ long long __s64; +typedef unsigned long long __u64; +#endif +#endif /* __powerpc64__ */ + +typedef struct { + __u32 u[4]; +} __attribute((aligned(16))) __vector128; + +#endif /* __ASSEMBLY__ */ + +#ifdef __KERNEL__ +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __powerpc64__ +#define BITS_PER_LONG 64 +#else +#define BITS_PER_LONG 32 +#endif + +#ifndef __ASSEMBLY__ + +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +#ifdef __powerpc64__ +typedef signed long s64; +typedef unsigned long u64; +#else +typedef signed long long s64; +typedef unsigned long long u64; +#endif + +typedef __vector128 vector128; + +typedef u32 dma_addr_t; +typedef u64 dma64_addr_t; + +typedef struct { + unsigned long entry; + unsigned long toc; + unsigned long env; +} func_descr_t; + +#ifdef CONFIG_LBD +typedef u64 sector_t; +#define HAVE_SECTOR_T +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_POWERPC_TYPES_H */ diff --git a/include/asm-ppc/types.h b/include/asm-ppc/types.h deleted file mode 100644 --- a/include/asm-ppc/types.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _PPC_TYPES_H -#define _PPC_TYPES_H - -#ifndef __ASSEMBLY__ - -typedef __signed__ char __s8; -typedef unsigned char __u8; - -typedef __signed__ short __s16; -typedef unsigned short __u16; - -typedef __signed__ int __s32; -typedef unsigned int __u32; - -#if defined(__GNUC__) && !defined(__STRICT_ANSI__) -typedef __signed__ long long __s64; -typedef unsigned long long __u64; -#endif - -typedef struct { - __u32 u[4]; -} __vector128; - -/* - * XXX allowed outside of __KERNEL__ for now, until glibc gets - * a proper set of asm headers of its own. -- paulus - */ -typedef unsigned short umode_t; - -#endif /* __ASSEMBLY__ */ - -#ifdef __KERNEL__ -/* - * These aren't exported outside the kernel to avoid name space clashes - */ -#define BITS_PER_LONG 32 - -#ifndef __ASSEMBLY__ - -#include - -typedef signed char s8; -typedef unsigned char u8; - -typedef signed short s16; -typedef unsigned short u16; - -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long long s64; -typedef unsigned long long u64; - -typedef __vector128 vector128; - -/* DMA addresses are 32-bits wide */ -typedef u32 dma_addr_t; -typedef u64 dma64_addr_t; - -#ifdef CONFIG_LBD -typedef u64 sector_t; -#define HAVE_SECTOR_T -#endif - -#endif /* __ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - -#endif diff --git a/include/asm-ppc64/types.h b/include/asm-ppc64/types.h deleted file mode 100644 --- a/include/asm-ppc64/types.h +++ /dev/null @@ -1,79 +0,0 @@ -#ifndef _PPC64_TYPES_H -#define _PPC64_TYPES_H - -#ifndef __ASSEMBLY__ - -/* - * This file is never included by application software unless - * explicitly requested (e.g., via linux/types.h) in which case the - * application is Linux specific so (user-) name space pollution is - * not a major issue. However, for interoperability, libraries still - * need to be careful to avoid a name clashes. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -typedef unsigned int umode_t; - -/* - * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the - * header files exported to user space - */ - -typedef __signed__ char __s8; -typedef unsigned char __u8; - -typedef __signed__ short __s16; -typedef unsigned short __u16; - -typedef __signed__ int __s32; -typedef unsigned int __u32; - -typedef __signed__ long __s64; -typedef unsigned long __u64; - -typedef struct { - __u32 u[4]; -} __attribute((aligned(16))) __vector128; - -#endif /* __ASSEMBLY__ */ - -#ifdef __KERNEL__ -/* - * These aren't exported outside the kernel to avoid name space clashes - */ -#define BITS_PER_LONG 64 - -#ifndef __ASSEMBLY__ - -typedef signed char s8; -typedef unsigned char u8; - -typedef signed short s16; -typedef unsigned short u16; - -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long s64; -typedef unsigned long u64; - -typedef __vector128 vector128; - -typedef u32 dma_addr_t; -typedef u64 dma64_addr_t; - -typedef struct { - unsigned long entry; - unsigned long toc; - unsigned long env; -} func_descr_t; - -#endif /* __ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - -#endif /* _PPC64_TYPES_H */ From michael at ellerman.id.au Thu Oct 20 10:49:39 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Thu, 20 Oct 2005 10:49:39 +1000 Subject: [PATCH] powerpc: Add helper functions for synthesising instructions at runtime In-Reply-To: <17238.52823.505817.958071@cargo.ozlabs.ibm.com> References: <309f5f701666369cdd618d06f664439e@kernel.crashing.org> <20051019042701.73F9A685D8@ozlabs.org> <17238.52823.505817.958071@cargo.ozlabs.ibm.com> Message-ID: <200510201049.40154.michael@ellerman.id.au> On Thu, 20 Oct 2005 08:53, Paul Mackerras wrote: > Michael Ellerman writes: > > +typedef enum { > > + BRANCH_RELATIVE, > > + BRANCH_RELATIVE_LINK, > > + BRANCH_ABSOLUTE, > > + BRANCH_ABSOLUTE_LINK > > +} branch_t; > > Just to pursue this thread a bit further... > > I think your branch_t is folding together two things that are really > orthogonal, and this is causing your code to get longer and more > confusing than necessary. > > I would just have defined two separate flags, say BRANCH_ABSOLUTE and > BRANCH_LINK (either with #define or enum), and then declared the > "type" argument to create_branch as "bitwise". Then you can simply OR > the type argument into the instruction word, and you can do tests like > if (!(type & BRANCH_ABSOLUTE)) quite naturally. > > This ends up being almost exactly what Segher suggested, but maybe > a little less obfuscated. OK, you're the boss. We're going to have a very nice create_branch() macro at the end of this. On the other hand page.h might be a big pile of merge-poo, but no-one seems interested in it! :D -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person From david at gibson.dropbear.id.au Thu Oct 20 13:23:42 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Thu, 20 Oct 2005 13:23:42 +1000 Subject: [PATCH] Fix Kconfig performance bug Message-ID: <20051020032342.GA11273@localhost.localdomain> Fix kconfig performance bug Roman, I'm pretty sure this patch is correct, but obviously someone more familiar with the kconfig code should check it over. If it's ok (i.e. it makes no change in external behaviour), please apply. When doing its recursive dependency check, scripts/kconfig/conf uses the flag SYMBOL_CHECK_DONE to avoid rechecking a symbol it has already checked. However, that flag is only set at the top level, so if a symbol is first encountered as a dependency of another symbol it will be rechecked every time it is encountered until it's encountered at the top level. This patch adjusts the flag setting so that each symbol will only be checked once, regardless of whether it is first encountered at the top level, or while recursing down from another symbol. On complex configurations, this vastly speeds up scripts/kconfig/conf. The config in the powerpc merge tree is particularly bad: this patch reduces the time for 'scripts/kconfig/conf -o arch/powerpc/Kconfig' by a factor of 40 on a G5. That's even including the time to print the config, so the speedup in the actual checking is more likely 2 or 3 orders of magnitude. Signed-off-by: David Gibson Index: working-2.6/scripts/kconfig/symbol.c =================================================================== --- working-2.6.orig/scripts/kconfig/symbol.c 2005-10-20 12:40:45.000000000 +1000 +++ working-2.6/scripts/kconfig/symbol.c 2005-10-20 12:41:43.000000000 +1000 @@ -758,6 +758,8 @@ out: if (sym2) printf(" %s", sym->name); + else + sym->flags |= SYMBOL_CHECK_DONE; sym->flags &= ~SYMBOL_CHECK; return sym2; } Index: working-2.6/scripts/kconfig/zconf.y =================================================================== --- working-2.6.orig/scripts/kconfig/zconf.y 2005-10-20 12:40:45.000000000 +1000 +++ working-2.6/scripts/kconfig/zconf.y 2005-10-20 12:41:43.000000000 +1000 @@ -495,10 +495,9 @@ exit(1); menu_finalize(&rootmenu); for_all_symbols(i, sym) { +/* fprintf(stderr, "Checking %s...\n", sym->name); */ if (!(sym->flags & SYMBOL_CHECKED) && sym_check_deps(sym)) printf("\n"); - else - sym->flags |= SYMBOL_CHECK_DONE; } sym_change_count = 1; Index: working-2.6/scripts/kconfig/zconf.tab.c_shipped =================================================================== --- working-2.6.orig/scripts/kconfig/zconf.tab.c_shipped 2005-10-12 17:10:16.000000000 +1000 +++ working-2.6/scripts/kconfig/zconf.tab.c_shipped 2005-10-20 13:01:50.000000000 +1000 @@ -1935,8 +1935,6 @@ for_all_symbols(i, sym) { if (!(sym->flags & SYMBOL_CHECKED) && sym_check_deps(sym)) printf("\n"); - else - sym->flags |= SYMBOL_CHECK_DONE; } sym_change_count = 1; -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From david at gibson.dropbear.id.au Thu Oct 20 14:05:51 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Thu, 20 Oct 2005 14:05:51 +1000 Subject: Prevent ARCH=powerpc make errors due to xmon Message-ID: <20051020040551.GA12038@localhost.localdomain> Paulus, please apply in the merge tree. Non-working 'clean' targets can cause confusion. Currently we don't have xmon in the arch/powerpc, but it's listed in the Makefile. This causes make to fall over on certain targets - notably clean and distclean - even if CONFIG_XMON=n. This patch makes make happy again, obviously it'll need reverting when we come to merge xmon. Signed-off-by: David Gibson Index: working-2.6/arch/powerpc/Makefile =================================================================== --- working-2.6.orig/arch/powerpc/Makefile 2005-10-20 13:24:18.000000000 +1000 +++ working-2.6/arch/powerpc/Makefile 2005-10-20 14:01:21.000000000 +1000 @@ -131,7 +131,7 @@ arch/powerpc/sysdev/ \ arch/powerpc/platforms/ core-$(CONFIG_MATH_EMULATION) += arch/ppc/math-emu/ -core-$(CONFIG_XMON) += arch/powerpc/xmon/ +#core-$(CONFIG_XMON) += arch/powerpc/xmon/ core-$(CONFIG_APUS) += arch/ppc/amiga/ drivers-$(CONFIG_8xx) += arch/ppc/8xx_io/ drivers-$(CONFIG_4xx) += arch/ppc/4xx_io/ -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From haveblue at us.ibm.com Thu Oct 20 22:06:18 2005 From: haveblue at us.ibm.com (Dave Hansen) Date: Thu, 20 Oct 2005 14:06:18 +0200 Subject: ppc64 2.6.14-rc5 compile error (boot_cpuid) Message-ID: <1129809979.19516.29.camel@localhost> I swear I've seen this before, but I thought it was fixed. arch/ppc64/kernel/time.c: In function `timer_interrupt': arch/ppc64/kernel/time.c:349: error: `boot_cpuid' undeclared (first use in this function) arch/ppc64/kernel/time.c:349: error: (Each undeclared identifier is reported only once arch/ppc64/kernel/time.c:349: error: for each function it appears in.) Config is here: http://www.sr71.net/patches/2.6.14/2.6.14-rc2-git8-mhp1/configs/config-ppc64-abat Signed-off-by: Dave Hansen --- memhotplug-dave/arch/ppc64/kernel/pSeries_setup.c | 1 + memhotplug-dave/arch/ppc64/kernel/time.c | 1 + 2 files changed, 2 insertions(+) diff -puN arch/ppc64/kernel/pSeries_setup.c~no-found-boot_cpuid arch/ppc64/kernel/pSeries_setup.c --- memhotplug/arch/ppc64/kernel/pSeries_setup.c~no-found-boot_cpuid 2005-09-30 12:37:50.000000000 -0700 +++ memhotplug-dave/arch/ppc64/kernel/pSeries_setup.c 2005-09-30 12:37:50.000000000 -0700 @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include diff -puN arch/ppc64/kernel/time.c~no-found-boot_cpuid arch/ppc64/kernel/time.c --- memhotplug/arch/ppc64/kernel/time.c~no-found-boot_cpuid 2005-09-30 12:37:50.000000000 -0700 +++ memhotplug-dave/arch/ppc64/kernel/time.c 2005-09-30 12:37:50.000000000 -0700 @@ -65,6 +65,7 @@ #include #include #include +#include #include #include diff -L sr -puN /dev/null /dev/null _ -- Dave From miltonm at bga.com Fri Oct 21 00:48:02 2005 From: miltonm at bga.com (Milton Miller) Date: Thu, 20 Oct 2005 09:48:02 -0500 Subject: [PATCH] powerpc: merge types.h Message-ID: <27a3dac7c5942564457d59267f2766cc@bga.com> > which appears to match what most other platforms have done. Also, the > 32-bit version was #including linux/config.h - I have removed this as > it > does not appear necessary. But it is necessary: > +#ifdef CONFIG_LBD > +typedef u64 sector_t; > +#define HAVE_SECTOR_T > +#endif milton From galak at freescale.com Fri Oct 21 02:43:33 2005 From: galak at freescale.com (Kumar Gala) Date: Thu, 20 Oct 2005 11:43:33 -0500 (CDT) Subject: [PATCH] powerpc: Make sure we have an RTC before trying to adjust it Message-ID: Its valid for ppc_md.set_rtc_time to be NULL. We need to check that its non-NULL before trying to update the RTC. Signed-off-by: Kumar K. Gala --- commit e597837e138d9526562a68ec2f3c77a47ce1045e tree d5f982c7881efe43276b85ece4a371cb66f64ae5 parent de6cc780f1ec6143474219697c249d7266f56120 author Kumar K. Gala Thu, 20 Oct 2005 11:35:28 -0500 committer Kumar K. Gala Thu, 20 Oct 2005 11:35:28 -0500 arch/powerpc/kernel/time.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -144,7 +144,7 @@ static __inline__ void timer_check_rtc(v * We should have an rtc call that only sets the minutes and * seconds like on Intel to avoid problems with non UTC clocks. */ - if (ntp_synced() && + if (ppc_md.set_rtc_time && ntp_synced() && xtime.tv_sec - last_rtc_update >= 659 && abs((xtime.tv_nsec/1000) - (1000000-1000000/HZ)) < 500000/HZ && jiffies - wall_jiffies == 1) { From galak at freescale.com Fri Oct 21 02:44:03 2005 From: galak at freescale.com (Kumar Gala) Date: Thu, 20 Oct 2005 11:44:03 -0500 (CDT) Subject: [PATCH] powerpc: some prom.c cleanups Message-ID: On !CONFIG_PPC_MULTIPLATFORM _machine is defined as 0. This is ok, but we can't assign a value to _machine then. We may not have CONFIG_PCI available, so only build in support for find_parent_pci_resource(), request_OF_resource(), release_OF_resource() if PCI is enabled. This is probably not the long term fix but works out for now. Make reg_property64 contain 64-bit elements on a 32-bit machine. Mark the deprecated prom.c functions as __deprecated. Signed-off-by: Kumar K. Gala --- commit 3f11459b85450aa70da16ab1b1c82ef29498ef1a tree 8b625ab8868872bf5b909d0d55dd704604858048 parent e597837e138d9526562a68ec2f3c77a47ce1045e author Kumar K. Gala Thu, 20 Oct 2005 11:41:18 -0500 committer Kumar K. Gala Thu, 20 Oct 2005 11:41:18 -0500 arch/powerpc/kernel/prom.c | 4 ++++ include/asm-powerpc/prom.h | 14 +++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -1133,8 +1133,10 @@ static int __init early_init_dt_scan_cho #ifdef CONFIG_PPC64 systemcfg->platform = *prop; #else +#ifdef CONFIG_PPC_MULTIPLATFORM _machine = *prop; #endif +#endif #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ @@ -1971,6 +1973,7 @@ bus_space_to_resource_flags(unsigned int } } +#ifdef CONFIG_PCI static struct resource *find_parent_pci_resource(struct pci_dev* pdev, struct address_range *range) { @@ -2123,3 +2126,4 @@ int release_OF_resource(struct device_no return 0; } EXPORT_SYMBOL(release_OF_resource); +#endif /* CONFIG_PCI */ diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h --- a/include/asm-powerpc/prom.h +++ b/include/asm-powerpc/prom.h @@ -104,8 +104,8 @@ struct reg_property32 { }; struct reg_property64 { - unsigned long address; - unsigned long size; + u64 address; + u64 size; }; struct property { @@ -155,12 +155,12 @@ static inline void set_node_proc_entry(s /* OBSOLETE: Old style node lookup */ -extern struct device_node *find_devices(const char *name); -extern struct device_node *find_type_devices(const char *type); -extern struct device_node *find_path_device(const char *path); -extern struct device_node *find_compatible_devices(const char *type, +extern __deprecated struct device_node *find_devices(const char *name); +extern __deprecated struct device_node *find_type_devices(const char *type); +extern __deprecated struct device_node *find_path_device(const char *path); +extern __deprecated struct device_node *find_compatible_devices(const char *type, const char *compat); -extern struct device_node *find_all_nodes(void); +extern __deprecated struct device_node *find_all_nodes(void); /* New style node lookup */ extern struct device_node *of_find_node_by_name(struct device_node *from, From galak at freescale.com Fri Oct 21 02:44:46 2005 From: galak at freescale.com (Kumar Gala) Date: Thu, 20 Oct 2005 11:44:46 -0500 (CDT) Subject: [PATCH][RESEND] powerpc: merge include/asm-ppc*/checksum.h into include/asm-powerpc/checksum.h Message-ID: Signed-off-by: Kumar Gala --- commit 5bb6a54bbcd717339f737eba160b6824ec99cd27 tree b23e814e1546aeceaa58ccfafc714a8cf0a13656 parent 1288b50c0dd4747595c5e73dbff02088d8eeefcc author Kumar K. Gala Wed, 21 Sep 2005 14:10:11 -0500 committer Kumar K. Gala Wed, 21 Sep 2005 14:10:11 -0500 include/asm-powerpc/checksum.h | 132 ++++++++++++++++++++++++++++++++++++++++ include/asm-ppc/checksum.h | 107 -------------------------------- include/asm-ppc64/checksum.h | 107 -------------------------------- 3 files changed, 132 insertions(+), 214 deletions(-) diff --git a/include/asm-powerpc/checksum.h b/include/asm-powerpc/checksum.h new file mode 100644 --- /dev/null +++ b/include/asm-powerpc/checksum.h @@ -0,0 +1,132 @@ +#ifndef _ASM_POWERPC_CHECKSUM_H +#define _ASM_POWERPC_CHECKSUM_H + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. ihl is the number + * of 32-bit words and is always >= 5. + */ +extern unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl); + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +extern unsigned short csum_tcpudp_magic(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum); + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +extern unsigned int csum_partial(const unsigned char * buff, int len, + unsigned int sum); + +/* + * Computes the checksum of a memory block at src, length len, + * and adds in "sum" (32-bit), while copying the block to dst. + * If an access exception occurs on src or dst, it stores -EFAULT + * to *src_err or *dst_err respectively (if that pointer is not + * NULL), and, for an error on src, zeroes the rest of dst. + * + * Like csum_partial, this must be called with even lengths, + * except for the last fragment. + */ +extern unsigned int csum_partial_copy_generic(const char *src, char *dst, + int len, unsigned int sum, + int *src_err, int *dst_err); +/* + * the same as csum_partial, but copies from src to dst while it + * checksums. + */ +unsigned int csum_partial_copy_nocheck(const char *src, + char *dst, + int len, + unsigned int sum); + +#define csum_partial_copy_from_user(src, dst, len, sum, errp) \ + csum_partial_copy_generic((src), (dst), (len), (sum), (errp), NULL) + +#define csum_partial_copy_nocheck(src, dst, len, sum) \ + csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL) + + +/* + * turns a 32-bit partial checksum (e.g. from csum_partial) into a + * 1's complement 16-bit checksum. + */ +static inline unsigned int csum_fold(unsigned int sum) +{ + unsigned int tmp; + + /* swap the two 16-bit halves of sum */ + __asm__("rlwinm %0,%1,16,0,31" : "=r" (tmp) : "r" (sum)); + /* if there is a carry from adding the two 16-bit halves, + it will carry from the lower half into the upper half, + giving us the correct sum in the upper half. */ + sum = ~(sum + tmp) >> 16; + return sum; +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +static inline unsigned short ip_compute_csum(unsigned char * buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#ifdef __powerpc64__ +static inline u32 csum_tcpudp_nofold(u32 saddr, + u32 daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + unsigned long s = sum; + + s += saddr; + s += daddr; + s += (proto << 16) + len; + s += (s >> 32); + return (u32) s; +} +#else +static inline unsigned long csum_tcpudp_nofold(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + __asm__("\n\ + addc %0,%0,%1 \n\ + adde %0,%0,%2 \n\ + adde %0,%0,%3 \n\ + addze %0,%0 \n\ + " + : "=r" (sum) + : "r" (daddr), "r"(saddr), "r"((proto<<16)+len), "0"(sum)); + return sum; +} + +#endif +#endif diff --git a/include/asm-ppc/checksum.h b/include/asm-ppc/checksum.h deleted file mode 100644 --- a/include/asm-ppc/checksum.h +++ /dev/null @@ -1,107 +0,0 @@ -#ifdef __KERNEL__ -#ifndef _PPC_CHECKSUM_H -#define _PPC_CHECKSUM_H - - -/* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 32-bit boundary - */ -extern unsigned int csum_partial(const unsigned char * buff, int len, - unsigned int sum); - -/* - * Computes the checksum of a memory block at src, length len, - * and adds in "sum" (32-bit), while copying the block to dst. - * If an access exception occurs on src or dst, it stores -EFAULT - * to *src_err or *dst_err respectively (if that pointer is not - * NULL), and, for an error on src, zeroes the rest of dst. - * - * Like csum_partial, this must be called with even lengths, - * except for the last fragment. - */ -extern unsigned int csum_partial_copy_generic(const char *src, char *dst, - int len, unsigned int sum, - int *src_err, int *dst_err); - -#define csum_partial_copy_from_user(src, dst, len, sum, errp) \ - csum_partial_copy_generic((__force void *)(src), (dst), (len), (sum), (errp), NULL) - -/* FIXME: this needs to be written to really do no check -- Cort */ -#define csum_partial_copy_nocheck(src, dst, len, sum) \ - csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL) - -/* - * turns a 32-bit partial checksum (e.g. from csum_partial) into a - * 1's complement 16-bit checksum. - */ -static inline unsigned int csum_fold(unsigned int sum) -{ - unsigned int tmp; - - /* swap the two 16-bit halves of sum */ - __asm__("rlwinm %0,%1,16,0,31" : "=r" (tmp) : "r" (sum)); - /* if there is a carry from adding the two 16-bit halves, - it will carry from the lower half into the upper half, - giving us the correct sum in the upper half. */ - sum = ~(sum + tmp) >> 16; - return sum; -} - -/* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c - */ -static inline unsigned short ip_compute_csum(unsigned char * buff, int len) -{ - return csum_fold(csum_partial(buff, len, 0)); -} - -/* - * FIXME: I swiped this one from the sparc and made minor modifications. - * It may not be correct. -- Cort - */ -static inline unsigned long csum_tcpudp_nofold(unsigned long saddr, - unsigned long daddr, - unsigned short len, - unsigned short proto, - unsigned int sum) -{ - __asm__("\n\ - addc %0,%0,%1 \n\ - adde %0,%0,%2 \n\ - adde %0,%0,%3 \n\ - addze %0,%0 \n\ - " - : "=r" (sum) - : "r" (daddr), "r"(saddr), "r"((proto<<16)+len), "0"(sum)); - return sum; -} - -/* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. ihl is the number - * of 32-bit words and is always >= 5. - */ -extern unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl); - -/* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ -extern unsigned short csum_tcpudp_magic(unsigned long saddr, - unsigned long daddr, - unsigned short len, - unsigned short proto, - unsigned int sum); - -#endif -#endif /* __KERNEL__ */ diff --git a/include/asm-ppc64/checksum.h b/include/asm-ppc64/checksum.h deleted file mode 100644 --- a/include/asm-ppc64/checksum.h +++ /dev/null @@ -1,107 +0,0 @@ -#ifndef _PPC64_CHECKSUM_H -#define _PPC64_CHECKSUM_H - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. ihl is the number - * of 32-bit words and is always >= 5. - */ -extern unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl); - -/* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ -extern unsigned short csum_tcpudp_magic(unsigned long saddr, - unsigned long daddr, - unsigned short len, - unsigned short proto, - unsigned int sum); - -/* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 32-bit boundary - */ -extern unsigned int csum_partial(const unsigned char * buff, int len, - unsigned int sum); - -/* - * the same as csum_partial, but copies from src to dst while it - * checksums - */ -extern unsigned int csum_partial_copy_generic(const char *src, char *dst, - int len, unsigned int sum, - int *src_err, int *dst_err); -/* - * the same as csum_partial, but copies from src to dst while it - * checksums. - */ - -unsigned int csum_partial_copy_nocheck(const char *src, - char *dst, - int len, - unsigned int sum); - -/* - * turns a 32-bit partial checksum (e.g. from csum_partial) into a - * 1's complement 16-bit checksum. - */ -static inline unsigned int csum_fold(unsigned int sum) -{ - unsigned int tmp; - - /* swap the two 16-bit halves of sum */ - __asm__("rlwinm %0,%1,16,0,31" : "=r" (tmp) : "r" (sum)); - /* if there is a carry from adding the two 16-bit halves, - it will carry from the lower half into the upper half, - giving us the correct sum in the upper half. */ - sum = ~(sum + tmp) >> 16; - return sum; -} - -/* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c - */ -static inline unsigned short ip_compute_csum(unsigned char * buff, int len) -{ - return csum_fold(csum_partial(buff, len, 0)); -} - -#define csum_partial_copy_from_user(src, dst, len, sum, errp) \ - csum_partial_copy_generic((src), (dst), (len), (sum), (errp), NULL) - -#define csum_partial_copy_nocheck(src, dst, len, sum) \ - csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL) - -static inline u32 csum_tcpudp_nofold(u32 saddr, - u32 daddr, - unsigned short len, - unsigned short proto, - unsigned int sum) -{ - unsigned long s = sum; - - s += saddr; - s += daddr; - s += (proto << 16) + len; - s += (s >> 32); - return (u32) s; -} - -#endif From paulus at samba.org Fri Oct 7 09:24:07 2005 From: paulus at samba.org (Paul Mackerras) Date: Fri, 7 Oct 2005 09:24:07 +1000 Subject: Starting the arch/powerpc merge In-Reply-To: References: <1127865914.6102.0.camel@gaston> Message-ID: <17221.45591.341270.888791@cargo.ozlabs.ibm.com> Giuliano Pochini writes: > Out of curiosity, is there any advantage in using a 32 bits > kernel on ppc64 over a 64 bits kernel ? Speed ? Complexity ? > Compatibility ? Memory ? Not really. The main thing in the past has been that DRI with 32-bit X server and clients would work with a 32-bit kernel but not a 64-bit kernel, but that's fixed now. A 64-bit kernel is faster on most lmbench tests. I guess a 32-bit kernel might end up a little smaller, but that's the only possible advantage I can think of. Paul. From paulus at samba.org Fri Oct 21 09:01:40 2005 From: paulus at samba.org (Paul Mackerras) Date: Fri, 21 Oct 2005 09:01:40 +1000 Subject: Starting the arch/powerpc merge In-Reply-To: <17221.45591.341270.888791@cargo.ozlabs.ibm.com> References: <1127865914.6102.0.camel@gaston> <17221.45591.341270.888791@cargo.ozlabs.ibm.com> Message-ID: <17240.8660.61369.647302@cargo.ozlabs.ibm.com> I wrote: > Not really. The main thing in the past has been that DRI with 32-bit > X server and clients would work with a 32-bit kernel but not a 64-bit > kernel, but that's fixed now. A 64-bit kernel is faster on most > lmbench tests. I guess a 32-bit kernel might end up a little smaller, > but that's the only possible advantage I can think of. Oops, sorry, mail system breakage here... Paul. From zippel at linux-m68k.org Fri Oct 21 10:46:30 2005 From: zippel at linux-m68k.org (Roman Zippel) Date: Fri, 21 Oct 2005 02:46:30 +0200 (CEST) Subject: [PATCH] Fix Kconfig performance bug In-Reply-To: <20051020032342.GA11273@localhost.localdomain> References: <20051020032342.GA11273@localhost.localdomain> Message-ID: Hi, On Thu, 20 Oct 2005, David Gibson wrote: > When doing its recursive dependency check, scripts/kconfig/conf uses > the flag SYMBOL_CHECK_DONE to avoid rechecking a symbol it has already > checked. However, that flag is only set at the top level, so if a > symbol is first encountered as a dependency of another symbol it will > be rechecked every time it is encountered until it's encountered at > the top level. You're correct, the check does too much. > Index: working-2.6/scripts/kconfig/symbol.c > =================================================================== > --- working-2.6.orig/scripts/kconfig/symbol.c 2005-10-20 12:40:45.000000000 +1000 > +++ working-2.6/scripts/kconfig/symbol.c 2005-10-20 12:41:43.000000000 +1000 > @@ -758,6 +758,8 @@ > out: > if (sym2) > printf(" %s", sym->name); > + else > + sym->flags |= SYMBOL_CHECK_DONE; > sym->flags &= ~SYMBOL_CHECK; > return sym2; > } Actually this way it becomes redundant with SYMBOL_CHECKED, could you merge these two flags? The above check would be also probably better: if (sym2) { printf(" %s", sym->name); if (sym2 == sym) { printf("\n"); sym2 = NULL; } } So that this check will stop when it hits the start symbol and continue looking for more dependency problems, which is I think I intended with the original code. > Index: working-2.6/scripts/kconfig/zconf.y > =================================================================== > --- working-2.6.orig/scripts/kconfig/zconf.y 2005-10-20 12:40:45.000000000 +1000 > +++ working-2.6/scripts/kconfig/zconf.y 2005-10-20 12:41:43.000000000 +1000 > @@ -495,10 +495,9 @@ > exit(1); > menu_finalize(&rootmenu); > for_all_symbols(i, sym) { > +/* fprintf(stderr, "Checking %s...\n", sym->name); */ One "quilt refresh" missing? :-) bye, Roman From david at gibson.dropbear.id.au Fri Oct 21 11:49:55 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Fri, 21 Oct 2005 11:49:55 +1000 Subject: [PATCH] Fix Kconfig performance bug In-Reply-To: References: <20051020032342.GA11273@localhost.localdomain> Message-ID: <20051021014955.GA12976@localhost.localdomain> On Fri, Oct 21, 2005 at 02:46:30AM +0200, Roman Zippel wrote: > Hi, > > On Thu, 20 Oct 2005, David Gibson wrote: > > > When doing its recursive dependency check, scripts/kconfig/conf uses > > the flag SYMBOL_CHECK_DONE to avoid rechecking a symbol it has already > > checked. However, that flag is only set at the top level, so if a > > symbol is first encountered as a dependency of another symbol it will > > be rechecked every time it is encountered until it's encountered at > > the top level. > > You're correct, the check does too much. > > > Index: working-2.6/scripts/kconfig/symbol.c > > =================================================================== > > --- working-2.6.orig/scripts/kconfig/symbol.c 2005-10-20 12:40:45.000000000 +1000 > > +++ working-2.6/scripts/kconfig/symbol.c 2005-10-20 12:41:43.000000000 +1000 > > @@ -758,6 +758,8 @@ > > out: > > if (sym2) > > printf(" %s", sym->name); > > + else > > + sym->flags |= SYMBOL_CHECK_DONE; > > sym->flags &= ~SYMBOL_CHECK; > > return sym2; > > } > > Actually this way it becomes redundant with SYMBOL_CHECKED, could you > merge these two flags? The above check would be also probably better: Ok, done. There is now only SYMBOL_CHECKED (seemed a clearer name to me), but it's semantics are like those of SYMBOL_CHECK_DONE were. > if (sym2) { > printf(" %s", sym->name); > if (sym2 == sym) { > printf("\n"); > sym2 = NULL; > } > } > > So that this check will stop when it hits the start symbol and continue > looking for more dependency problems, which is I think I intended with the > original code. Erm.. ok. I don't entirely understand the intent of this is, but applied anyway. > > Index: working-2.6/scripts/kconfig/zconf.y > > =================================================================== > > --- working-2.6.orig/scripts/kconfig/zconf.y 2005-10-20 12:40:45.000000000 +1000 > > +++ working-2.6/scripts/kconfig/zconf.y 2005-10-20 12:41:43.000000000 +1000 > > @@ -495,10 +495,9 @@ > > exit(1); > > menu_finalize(&rootmenu); > > for_all_symbols(i, sym) { > > +/* fprintf(stderr, "Checking %s...\n", sym->name); */ > > One "quilt refresh" missing? :-) Oops. Something like that. Oh.. one caveat, the diffs I have here to zconf.tab.c_shipped are direct edits to match zconf.y - I didn't regenerate the file with bison. I've done that to getting a whole lot of irrelevant changes in the patch because I'm using a different version of bison to that used for the existing zconf.tab.[ch]_shipped Anyway, revised version below: Signed-off-by: David Gibson Index: working-2.6/scripts/kconfig/symbol.c =================================================================== --- working-2.6.orig/scripts/kconfig/symbol.c 2005-10-21 11:29:33.000000000 +1000 +++ working-2.6/scripts/kconfig/symbol.c 2005-10-21 11:41:07.000000000 +1000 @@ -731,14 +731,14 @@ struct symbol *sym2; struct property *prop; - if (sym->flags & SYMBOL_CHECK_DONE) + if (sym->flags & SYMBOL_CHECKED) return NULL; if (sym->flags & SYMBOL_CHECK) { printf("Warning! Found recursive dependency: %s", sym->name); return sym; } - sym->flags |= (SYMBOL_CHECK | SYMBOL_CHECKED); + sym->flags |= SYMBOL_CHECK; sym2 = sym_check_expr_deps(sym->rev_dep.expr); if (sym2) goto out; @@ -756,8 +756,15 @@ goto out; } out: - if (sym2) + if (sym2) { printf(" %s", sym->name); + if (sym2 == sym) { + printf("\n"); + sym2 = NULL; + } + } else { + sym->flags |= SYMBOL_CHECKED; + } sym->flags &= ~SYMBOL_CHECK; return sym2; } Index: working-2.6/scripts/kconfig/zconf.y =================================================================== --- working-2.6.orig/scripts/kconfig/zconf.y 2005-10-21 11:29:33.000000000 +1000 +++ working-2.6/scripts/kconfig/zconf.y 2005-10-21 11:31:01.000000000 +1000 @@ -497,8 +497,6 @@ for_all_symbols(i, sym) { if (!(sym->flags & SYMBOL_CHECKED) && sym_check_deps(sym)) printf("\n"); - else - sym->flags |= SYMBOL_CHECK_DONE; } sym_change_count = 1; Index: working-2.6/scripts/kconfig/zconf.tab.c_shipped =================================================================== --- working-2.6.orig/scripts/kconfig/zconf.tab.c_shipped 2005-10-21 11:29:33.000000000 +1000 +++ working-2.6/scripts/kconfig/zconf.tab.c_shipped 2005-10-21 11:29:37.000000000 +1000 @@ -1935,8 +1935,6 @@ for_all_symbols(i, sym) { if (!(sym->flags & SYMBOL_CHECKED) && sym_check_deps(sym)) printf("\n"); - else - sym->flags |= SYMBOL_CHECK_DONE; } sym_change_count = 1; Index: working-2.6/scripts/kconfig/expr.h =================================================================== --- working-2.6.orig/scripts/kconfig/expr.h 2005-05-24 14:12:26.000000000 +1000 +++ working-2.6/scripts/kconfig/expr.h 2005-10-21 11:30:13.000000000 +1000 @@ -93,7 +93,6 @@ #define SYMBOL_NEW 0x0800 #define SYMBOL_AUTO 0x1000 #define SYMBOL_CHECKED 0x2000 -#define SYMBOL_CHECK_DONE 0x4000 #define SYMBOL_WARNED 0x8000 #define SYMBOL_MAXLENGTH 256 -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From david at gibson.dropbear.id.au Fri Oct 21 13:41:19 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Fri, 21 Oct 2005 13:41:19 +1000 Subject: ppc64: Fix typo bug in iSeries hash code Message-ID: <20051021034119.GB12976@localhost.localdomain> Linus, please apply for 2.6.14 - this one-liner fixes a serious bug. This patch fixes a stupid typo bug in the iSeries hash table code. When we place a hash PTE in the secondary bucket, instead of setting the SECONDARY flag bit, as we should, we (redundantly) set the VALID flag. This was introduced with the patch abolishing bitfields from the hash table code. Mea culpa, oops. It hasn't been noticed until now because in practice we don't hit the secondary bucket terribly often. Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/kernel/iSeries_htab.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/iSeries_htab.c 2005-10-21 13:29:50.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/iSeries_htab.c 2005-10-21 13:30:55.000000000 +1000 @@ -66,7 +66,7 @@ } if (slot < 0) { /* MSB set means secondary group */ - vflags |= HPTE_V_VALID; + vflags |= HPTE_V_SECONDARY; secondary = 1; slot &= 0x7fffffffffffffff; } -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From benh at kernel.crashing.org Fri Oct 21 14:12:51 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Fri, 21 Oct 2005 14:12:51 +1000 Subject: [PATCH] ppc64: Fix pages marked dirty abusively Message-ID: <1129867972.6265.20.camel@gaston> While working on 64K pages, I found this little buglet in our update_mmu_cache() implementation. This code calls __hash_page() passing it an "access" parameter (the type of access that triggers the hash) containing the bits _PAGE_RW and _PAGE_USER of the linux PTE. The later is useless in this case and the former is wrong. In fact, if we have a writeable PTE and we pass _PAGE_RW to hash_page(), it will set _PAGE_DIRTY (since we track dirty that way, by hash faulting !dirty) which is not what we want. In fact, the correct fix is to always pass 0. That means that only read-only or already dirty read write PTEs will be preloaded. The (hopefully rare) case of a non dirty read write PTE can't be preloaded this way, it will have to fault in hash_page on the actual access. Signed-off-by: Benjamin Herrenschmidt Index: linux-work/arch/ppc64/mm/init.c =================================================================== --- linux-work.orig/arch/ppc64/mm/init.c 2005-09-23 12:43:22.000000000 +1000 +++ linux-work/arch/ppc64/mm/init.c 2005-10-21 14:07:51.000000000 +1000 @@ -799,8 +799,7 @@ if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) local = 1; - __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, - 0x300, local); + __hash_page(ea, 0, vsid, ptep, 0x300, local); local_irq_restore(flags); } From david at gibson.dropbear.id.au Fri Oct 21 15:43:42 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Fri, 21 Oct 2005 15:43:42 +1000 Subject: Fix broken initialization of conswitchp for ARCH=ppc64 Message-ID: <20051021054342.GC12976@localhost.localdomain> Paulus, please apply to the merge tree. In the merge tree, commit 0458060c1c59c5378d8fb5daabe18cf4681c35cd broke boot on some machines because the initialization of conswitchp was moved to arch/powerpc/kernel/setup_64.c, but a corresponding copy was not added to arch/ppc64/kernel/setup.c. This patch fixes it. Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/kernel/setup.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/setup.c 2005-10-21 15:03:46.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/setup.c 2005-10-21 15:34:30.000000000 +1000 @@ -1039,6 +1039,10 @@ /* initialize the syscall map in systemcfg */ setup_syscall_map(); +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + ppc_md.setup_arch(); /* Use the default idle loop if the platform hasn't provided one. */ -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From david at gibson.dropbear.id.au Fri Oct 21 15:45:50 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Fri, 21 Oct 2005 15:45:50 +1000 Subject: powerpc: Merge thread_info.h Message-ID: <20051021054550.GD12976@localhost.localdomain> Merge ppc32 and ppc64 versions of thread_info.h. They were pretty similar already, the chief changes are: - Instead of inline asm to implement current_thread_info(), which needs to be different for ppc32 and ppc64, we use C with an asm("r1") register variable. gcc turns it into the same asm as we used to have for both platforms. - We replace ppc32's 'local_flags' with the ppc64 'syscall_noerror' field. The noerror flag was in fact the only thing in the local_flags field anyway, so the ppc64 approach is simpler, and means we only need a load-immediate/store instead of load/mask/store when clearing the flag. - In readiness for 64k pages, when THREAD_SIZE will be less than a page, ppc64 used kmalloc() rather than get_free_pages() to allocate the kernel stack. With this patch we do the same for ppc32, since there's no strong reason not to. - For ppc64, we no longer export THREAD_SHIFT and THREAD_SIZE via asm-offsets, thread_info.h can now be safely included in asm, as on ppc32. Built and booted on G4 Powerbook (ARCH=ppc and ARCH=powerpc) and Power5 (ARCH=ppc64 and ARCH=powerpc). Signed-off-by: David Gibson Index: working-2.6/include/asm-powerpc/thread_info.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ working-2.6/include/asm-powerpc/thread_info.h 2005-10-21 15:36:05.000000000 +1000 @@ -0,0 +1,135 @@ +/* thread_info.h: PowerPC low-level thread information + * adapted from the i386 version by Paul Mackerras + * + * Copyright (C) 2002 David Howells (dhowells at redhat.com) + * - Incorporating suggestions made by Linus Torvalds and Dave Miller + */ + +#ifndef _ASM_POWERPC_THREAD_INFO_H +#define _ASM_POWERPC_THREAD_INFO_H + +#ifdef __KERNEL__ + +/* We have 8k stacks on ppc32 and 16k on ppc64 */ + +#ifdef CONFIG_PPC64 +#define THREAD_SHIFT 14 +#else +#define THREAD_SHIFT 13 +#endif + +#define THREAD_SIZE (1 << THREAD_SHIFT) + +#ifndef __ASSEMBLY__ +#include +#include +#include +#include +#include + +/* + * low level task data. + */ +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + int cpu; /* cpu we're on */ + int preempt_count; /* 0 => preemptable, + <0 => BUG */ + struct restart_block restart_block; + /* set by force_successful_syscall_return */ + unsigned char syscall_noerror; + /* low level flags - has atomic operations done on it */ + unsigned long flags ____cacheline_aligned_in_smp; +}; + +/* + * macros/functions for gaining access to the thread information structure + * + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .cpu = 0, \ + .preempt_count = 1, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ + .flags = 0, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +/* thread information allocation */ + +#ifdef CONFIG_DEBUG_STACK_USAGE +#define alloc_thread_info(tsk) \ + ({ \ + struct thread_info *ret; \ + \ + ret = kmalloc(THREAD_SIZE, GFP_KERNEL); \ + if (ret) \ + memset(ret, 0, THREAD_SIZE); \ + ret; \ + }) +#else +#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL) +#endif +#define free_thread_info(ti) kfree(ti) +#define get_thread_info(ti) get_task_struct((ti)->task) +#define put_thread_info(ti) put_task_struct((ti)->task) + +/* how to get the thread information struct from C */ +static inline struct thread_info *current_thread_info(void) +{ + register unsigned long sp asm("r1"); + + /* gcc4, at least, is smart enough to turn this into a single + * rlwinm for ppc32 and clrrdi for ppc64 */ + return (struct thread_info *)(sp & ~(THREAD_SIZE-1)); +} + +#endif /* __ASSEMBLY__ */ + +#define PREEMPT_ACTIVE 0x10000000 + +/* + * thread information flag bit numbers + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_POLLING_NRFLAG 4 /* true if poll_idle() is polling + TIF_NEED_RESCHED */ +#define TIF_32BIT 5 /* 32 bit binary */ +/* #define SPARE 6 */ +#define TIF_ABI_PENDING 7 /* 32/64 bit switch needed */ +#define TIF_SYSCALL_AUDIT 8 /* syscall auditing active */ +#define TIF_SINGLESTEP 9 /* singlestepping active */ +#define TIF_MEMDIE 10 +#define TIF_SECCOMP 11 /* secure computing */ + +/* as above, but as bit values */ +#define _TIF_SYSCALL_TRACE (1< -#include -#include -#include -#include - -/* - * low level task data. - */ -struct thread_info { - struct task_struct *task; /* main task structure */ - struct exec_domain *exec_domain; /* execution domain */ - int cpu; /* cpu we're on */ - int preempt_count; /* 0 => preemptable, <0 => BUG */ - struct restart_block restart_block; - /* set by force_successful_syscall_return */ - unsigned char syscall_noerror; - /* low level flags - has atomic operations done on it */ - unsigned long flags ____cacheline_aligned_in_smp; -}; - -/* - * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .cpu = 0, \ - .preempt_count = 1, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ - .flags = 0, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - -/* thread information allocation */ - -#define THREAD_SHIFT 14 -#define THREAD_ORDER (THREAD_SHIFT - PAGE_SHIFT) -#define THREAD_SIZE (1 << THREAD_SHIFT) -#ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) \ - ({ \ - struct thread_info *ret; \ - \ - ret = kmalloc(THREAD_SIZE, GFP_KERNEL); \ - if (ret) \ - memset(ret, 0, THREAD_SIZE); \ - ret; \ - }) -#else -#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL) -#endif -#define free_thread_info(ti) kfree(ti) -#define get_thread_info(ti) get_task_struct((ti)->task) -#define put_thread_info(ti) put_task_struct((ti)->task) - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - __asm__("clrrdi %0,1,%1" : "=r"(ti) : "i" (THREAD_SHIFT)); - return ti; -} - -#endif /* __ASSEMBLY__ */ - -#define PREEMPT_ACTIVE 0x10000000 - -/* - * thread information flag bit numbers - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 4 /* true if poll_idle() is polling - TIF_NEED_RESCHED */ -#define TIF_32BIT 5 /* 32 bit binary */ -/* #define SPARE 6 */ -#define TIF_ABI_PENDING 7 /* 32/64 bit switch needed */ -#define TIF_SYSCALL_AUDIT 8 /* syscall auditing active */ -#define TIF_SINGLESTEP 9 /* singlestepping active */ -#define TIF_MEMDIE 10 -#define TIF_SECCOMP 11 /* secure computing */ - -/* as above, but as bit values */ -#define _TIF_SYSCALL_TRACE (1< preemptable, - <0 => BUG */ - struct restart_block restart_block; -}; - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .local_flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - -/* - * macros/functions for gaining access to the thread information structure - */ - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - __asm__("rlwinm %0,1,0,0,18" : "=r"(ti)); - return ti; -} - -/* thread information allocation */ -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL, 1)) -#define free_thread_info(ti) free_pages((unsigned long) (ti), 1) -#define get_thread_info(ti) get_task_struct((ti)->task) -#define put_thread_info(ti) put_task_struct((ti)->task) -#endif /* __ASSEMBLY__ */ - -/* - * Size of kernel stack for each process. - */ -#define THREAD_SIZE 8192 /* 2 pages */ - -#define PREEMPT_ACTIVE 0x10000000 - -/* - * thread information flag bit numbers - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 4 /* true if poll_idle() is polling - TIF_NEED_RESCHED */ -#define TIF_MEMDIE 5 -#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ -#define TIF_SECCOMP 7 /* secure computing */ - -/* as above, but as bit values */ -#define _TIF_SYSCALL_TRACE (1<flags */ - rlwinm r9,r1,0,0,18 + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED) bne do_work @@ -677,7 +676,7 @@ /* N.B. the only way to get here is from the beq following ret_from_except. */ resume_kernel: /* check current_thread_info->preempt_count */ - rlwinm r9,r1,0,0,18 + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r0,TI_PREEMPT(r9) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore @@ -687,7 +686,7 @@ andi. r0,r3,MSR_EE /* interrupts off? */ beq restore /* don't schedule if so */ 1: bl preempt_schedule_irq - rlwinm r9,r1,0,0,18 + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r3,TI_FLAGS(r9) andi. r0,r3,_TIF_NEED_RESCHED bne- 1b @@ -889,7 +888,7 @@ LOAD_MSR_KERNEL(r10,MSR_KERNEL) SYNC MTMSRD(r10) /* disable interrupts */ - rlwinm r9,r1,0,0,18 + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) andi. r0,r9,_TIF_NEED_RESCHED bne- do_resched Index: working-2.6/arch/ppc/kernel/asm-offsets.c =================================================================== --- working-2.6.orig/arch/ppc/kernel/asm-offsets.c 2005-10-21 14:23:30.000000000 +1000 +++ working-2.6/arch/ppc/kernel/asm-offsets.c 2005-10-21 15:36:05.000000000 +1000 @@ -130,10 +130,10 @@ DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); + DEFINE(TI_SC_NOERR, offsetof(struct thread_info, syscall_noerror)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain)); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); - DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); Index: working-2.6/include/asm-ppc/ptrace.h =================================================================== --- working-2.6.orig/include/asm-ppc/ptrace.h 2005-10-12 17:10:16.000000000 +1000 +++ working-2.6/include/asm-ppc/ptrace.h 2005-10-21 15:36:05.000000000 +1000 @@ -57,7 +57,7 @@ #define force_successful_syscall_return() \ do { \ - current_thread_info()->local_flags |= _TIFL_FORCE_NOERROR; \ + current_thread_info()->syscall_noerror = 1; \ } while(0) /* Index: working-2.6/arch/ppc/kernel/entry.S =================================================================== --- working-2.6.orig/arch/ppc/kernel/entry.S 2005-10-21 14:23:30.000000000 +1000 +++ working-2.6/arch/ppc/kernel/entry.S 2005-10-21 15:36:05.000000000 +1000 @@ -200,9 +200,8 @@ bl do_show_syscall #endif /* SHOW_SYSCALLS */ rlwinm r10,r1,0,0,18 /* current_thread_info() */ - lwz r11,TI_LOCAL_FLAGS(r10) - rlwinm r11,r11,0,~_TIFL_FORCE_NOERROR - stw r11,TI_LOCAL_FLAGS(r10) + li r11,0 + stb r11,TI_SC_NOERR(r10) lwz r11,TI_FLAGS(r10) andi. r11,r11,_TIF_SYSCALL_T_OR_A bne- syscall_dotrace @@ -227,8 +226,8 @@ cmplw 0,r3,r11 rlwinm r12,r1,0,0,18 /* current_thread_info() */ blt+ 30f - lwz r11,TI_LOCAL_FLAGS(r12) - andi. r11,r11,_TIFL_FORCE_NOERROR + lbz r11,TI_SC_NOERR(r12) + cmpwi r11,0 bne 30f neg r3,r3 lwz r10,_CCR(r1) /* Set SO bit in CR */ Index: working-2.6/arch/powerpc/kernel/head_64.S =================================================================== --- working-2.6.orig/arch/powerpc/kernel/head_64.S 2005-10-21 14:23:30.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/head_64.S 2005-10-21 15:36:05.000000000 +1000 @@ -36,6 +36,7 @@ #include #include #include +#include #ifdef CONFIG_PPC_ISERIES #define DO_SOFT_DISABLE Index: working-2.6/arch/powerpc/kernel/misc_64.S =================================================================== --- working-2.6.orig/arch/powerpc/kernel/misc_64.S 2005-10-21 14:23:30.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/misc_64.S 2005-10-21 15:36:05.000000000 +1000 @@ -26,6 +26,7 @@ #include #include #include +#include .text Index: working-2.6/arch/ppc64/kernel/asm-offsets.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/asm-offsets.c 2005-10-21 14:23:30.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/asm-offsets.c 2005-10-21 15:36:28.000000000 +1000 @@ -46,8 +46,6 @@ int main(void) { /* thread struct on stack */ - DEFINE(THREAD_SHIFT, THREAD_SHIFT); - DEFINE(THREAD_SIZE, THREAD_SIZE); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_SC_NOERR, offsetof(struct thread_info, syscall_noerror)); Index: working-2.6/arch/ppc64/kernel/head.S =================================================================== --- working-2.6.orig/arch/ppc64/kernel/head.S 2005-10-21 14:23:30.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/head.S 2005-10-21 15:36:50.000000000 +1000 @@ -36,6 +36,7 @@ #include #include #include +#include #ifdef CONFIG_PPC_ISERIES #define DO_SOFT_DISABLE Index: working-2.6/arch/ppc64/kernel/misc.S =================================================================== --- working-2.6.orig/arch/ppc64/kernel/misc.S 2005-10-21 15:01:20.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/misc.S 2005-10-21 15:37:10.000000000 +1000 @@ -28,6 +28,7 @@ #include #include #include +#include .text -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From michael at ellerman.id.au Fri Oct 21 16:01:31 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Fri, 21 Oct 2005 16:01:31 +1000 (EST) Subject: [PATCH 0/2] powerpc: Make 64 bit binaries work on the merge-tree Message-ID: <1129874489.890671.910188650972.qpush@concordia> These two patches, on top of 46aab8c584ea69cdbe1060f5eae5bf092763f710 (the last commit that works for me), give me a kernel that boots and actually runs 64 bit binaries, woo. cheers From michael at ellerman.id.au Fri Oct 21 16:01:33 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Fri, 21 Oct 2005 16:01:33 +1000 (EST) Subject: [PATCH 1/2] powerpc: Don't blow away load_addr in start_thread In-Reply-To: <1129874489.890671.910188650972.qpush@concordia> Message-ID: <20051021060133.D0D4A68649@ozlabs.org> The patch to make process.c work for 32-bit and 64-bit (06d67d54741a5bfefa31945ef195dfa748c29025) broke some 64-bit binaries. We were blowing away load_addr in gpr[2], so we weren't properly relocating the entry point. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 6 +++++- 1 files changed, 5 insertions(+), 1 deletion(-) Index: kexec/arch/powerpc/kernel/process.c =================================================================== --- kexec.orig/arch/powerpc/kernel/process.c +++ kexec/arch/powerpc/kernel/process.c @@ -595,6 +595,10 @@ int copy_thread(int nr, unsigned long cl */ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) { +#ifdef CONFIG_PPC64 + unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ +#endif + set_fs(USER_DS); /* @@ -621,7 +625,7 @@ void start_thread(struct pt_regs *regs, regs->msr = MSR_USER; #else if (!test_thread_flag(TIF_32BIT)) { - unsigned long entry, toc, load_addr = regs->gpr[2]; + unsigned long entry, toc; /* start is a relocated pointer to the function descriptor for * the elf _start routine. The first entry in the function From michael at ellerman.id.au Fri Oct 21 16:01:34 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Fri, 21 Oct 2005 16:01:34 +1000 (EST) Subject: [PATCH 2/2] powerpc: Fix mmap returning 64 bit addresses In-Reply-To: <1129874489.890671.910188650972.qpush@concordia> Message-ID: <20051021060134.C2F906864A@ozlabs.org> The merge of syscalls.c & sys_ppc32.c (30286ef6e044bc3d9019c3d8b900572e3fa05e65) broke mmap, if the mmap returned a 64 bit address. do_mmap2 was taking the return value from do_mmap_pgoff (an unsigned long), and storing it in an int, before returning it to sys_mmap as an unsigned long. So we were losing the high bits of the address. You would have thought the compiler could catch this for us ... Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/syscalls.c | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: kexec/arch/powerpc/kernel/syscalls.c =================================================================== --- kexec.orig/arch/powerpc/kernel/syscalls.c +++ kexec/arch/powerpc/kernel/syscalls.c @@ -162,7 +162,7 @@ static inline unsigned long do_mmap2(uns unsigned long fd, unsigned long off, int shift) { struct file * file = NULL; - int ret = -EINVAL; + unsigned long ret = -EINVAL; if (shift) { if (off & ((1 << shift) - 1)) From hch at lst.de Fri Oct 21 17:53:46 2005 From: hch at lst.de (Christoph Hellwig) Date: Fri, 21 Oct 2005 09:53:46 +0200 Subject: powerpc: Merge thread_info.h In-Reply-To: <20051021054550.GD12976@localhost.localdomain> References: <20051021054550.GD12976@localhost.localdomain> Message-ID: <20051021075346.GA29897@lst.de> On Fri, Oct 21, 2005 at 03:45:50PM +1000, David Gibson wrote: > - In readiness for 64k pages, when THREAD_SIZE will be less > than a page, ppc64 used kmalloc() rather than get_free_pages() to > allocate the kernel stack. With this patch we do the same for ppc32, > since there's no strong reason not to. This adds quite a bit of overhead and wasted memory. Please don't do it for ppc32 or ppc64 with THREAD_SIZE >= PAGE_SIZE either. From dhowells at redhat.com Fri Oct 21 19:36:11 2005 From: dhowells at redhat.com (David Howells) Date: Fri, 21 Oct 2005 10:36:11 +0100 Subject: powerpc: Merge thread_info.h In-Reply-To: <20051021054550.GD12976@localhost.localdomain> References: <20051021054550.GD12976@localhost.localdomain> Message-ID: <5603.1129887371@warthog.cambridge.redhat.com> David Gibson wrote: > - Instead of inline asm to implement current_thread_info(), > which needs to be different for ppc32 and ppc64, we use C with an > asm("r1") register variable. gcc turns it into the same asm as we > used to have for both platforms. Doesn't the compiler complain at that as R1 is the stack pointer? There's another trick you may wish to consider. It's one I've used in the FRV arch to great effect. That's similar to the PPC arch in that instructions are fixed size, and so displacements are limited. What I did is to use a general register to hold current whilst inside the kernel: [include/asm-frv/current.h] register struct task_struct *current asm("gr29"); Whilst the FRV ABI specifies that GR28-GR31 are reserved for O/S use, I suspect the standard PPC ABI doesn't do similar. What you can do is pass gcc a flag to tell it not to use that register for its own purposes ("-ffixed-r29" for example). This means rather than calculating current each time from the stack pointer and the thread_info struct, you can just refer to members of the current task struct directly. You could also place the thread_info struct at a known displacement (perhaps negative) from current and thus access that directly too. What you would need to do, though, is load the current register on entry to an exception. It would be slightly more complicated for you because PPC can be SMP. I found that this improved performance and shrank code size in the kernel. David From paulus at samba.org Fri Oct 21 21:26:52 2005 From: paulus at samba.org (Paul Mackerras) Date: Fri, 21 Oct 2005 21:26:52 +1000 Subject: powerpc: Merge thread_info.h In-Reply-To: <5603.1129887371@warthog.cambridge.redhat.com> References: <20051021054550.GD12976@localhost.localdomain> <5603.1129887371@warthog.cambridge.redhat.com> Message-ID: <17240.53372.519465.529667@cargo.ozlabs.ibm.com> David Howells writes: > What I did is to use a general register to hold current whilst inside the > kernel: PPC32 already stores current in r2. PPC64 stores a pointer to the paca (basically a per-cpu data area) in r13 and stores current in there. I'm not convinced that having current in a register, rather than being able to get it with 1 instruction, would make a significant different, but it may be worth the experiment. Paul. From paulus at samba.org Fri Oct 21 22:39:36 2005 From: paulus at samba.org (Paul Mackerras) Date: Fri, 21 Oct 2005 22:39:36 +1000 Subject: [PATCH] ppc64: Fix typo in time calculations Message-ID: <17240.57736.363524.91613@cargo.ozlabs.ibm.com> This fixes a typo in the div128_by_32 function used in the timekeeping calculations on ppc64. If you look at the code it's quite obvious that we need (rb + c) rather than (rb + b). The "b" is clearly just a typo. Signed-off-by: Paul Mackerras --- Linus, I think this should go in for 2.6.14, since it's very simple and obvious. However, the bug has been there forever and presumably only has a very minor impact, so if you want to defer it, that's OK. diff -urN linux-2.6/arch/ppc64/kernel/time.c test/arch/ppc64/kernel/time.c --- linux-2.6/arch/ppc64/kernel/time.c 2005-09-08 15:22:59.000000000 +1000 +++ test/arch/ppc64/kernel/time.c 2005-10-21 22:00:44.000000000 +1000 @@ -870,7 +870,7 @@ rb = ((ra + b) - (x * divisor)) << 32; y = (rb + c)/divisor; - rc = ((rb + b) - (y * divisor)) << 32; + rc = ((rb + c) - (y * divisor)) << 32; z = (rc + d)/divisor; From pochini at shiny.it Fri Oct 21 17:19:08 2005 From: pochini at shiny.it (Giuliano Pochini) Date: Fri, 21 Oct 2005 09:19:08 +0200 (CEST) Subject: Starting the arch/powerpc merge In-Reply-To: <17221.45591.341270.888791@cargo.ozlabs.ibm.com> Message-ID: On 06-Oct-2005 Paul Mackerras wrote: >> Out of curiosity, is there any advantage in using a 32 bits >> kernel on ppc64 over a 64 bits kernel ? Speed ? Complexity ? >> Compatibility ? Memory ? > > Not really. The main thing in the past has been that DRI with 32-bit > X server and clients would work with a 32-bit kernel but not a 64-bit > kernel, but that's fixed now. A 64-bit kernel is faster on most > lmbench tests. I guess a 32-bit kernel might end up a little smaller, > but that's the only possible advantage I can think of. And 32<->64 bits compatibility layer for 32bits apps is not needed, so maybe they run a bit faster. Well, at this point IMHO 32-on-64 support may be dropped without regrets. Spending time for an useless thing is - uhm - useless. > Oops, sorry, mail system breakage here... My mail wasn't important anyway :)) -- Giuliano. From bgill at freescale.com Sat Oct 22 06:57:31 2005 From: bgill at freescale.com (Becky Bruce) Date: Fri, 21 Oct 2005 15:57:31 -0500 (CDT) Subject: [PATCH] (revised again) merge types.h Message-ID: Hopefully, this is the last revision of this patch :) powerpc: Merge types.h This patch merges types.h into include/asm-powerpc. At this point, there are no significant changes. On 64-bit, we've picked up a #include of linux/config.h. This patch has been built on several different 32 and 64-bit platforms, and booted on mpc8540_ads. Signed-off-by: Becky Bruce Signed-off-by: Kumar Gala --- commit 995fc5087d7f02d7e570c9a904a16453e1d8d307 tree 8326faf3664a5c2912ab28f822d9ab7e84fc8a1b parent 4746820969c846263fca3f0b54e0c1af883b67ed author Becky Bruce Wed, 19 Oct 2005 11:12:13 -0500 committer Becky Bruce Wed, 19 Oct 2005 11:12:13 -0500 include/asm-powerpc/types.h | 106 +++++++++++++++++++++++++++++++++++++++++++ include/asm-ppc/types.h | 69 ------------------------------ include/asm-ppc64/types.h | 79 ---------------------------------- 3 files changed, 106 insertions(+), 148 deletions(-) diff --git a/include/asm-powerpc/types.h b/include/asm-powerpc/types.h new file mode 100644 --- /dev/null +++ b/include/asm-powerpc/types.h @@ -0,0 +1,106 @@ +#ifndef _ASM_POWERPC_TYPES_H +#define _ASM_POWERPC_TYPES_H + +#ifndef __ASSEMBLY__ + +/* + * This file is never included by application software unless + * explicitly requested (e.g., via linux/types.h) in which case the + * application is Linux specific so (user-) name space pollution is + * not a major issue. However, for interoperability, libraries still + * need to be careful to avoid a name clashes. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __powerpc64__ +typedef unsigned int umode_t; +#else +typedef unsigned short umode_t; +#endif + +/* + * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the + * header files exported to user space + */ + +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +#ifdef __powerpc64__ +typedef __signed__ long __s64; +typedef unsigned long __u64; +#else +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +typedef __signed__ long long __s64; +typedef unsigned long long __u64; +#endif +#endif /* __powerpc64__ */ + +typedef struct { + __u32 u[4]; +} __attribute((aligned(16))) __vector128; + +#endif /* __ASSEMBLY__ */ + +#ifdef __KERNEL__ +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __powerpc64__ +#define BITS_PER_LONG 64 +#else +#define BITS_PER_LONG 32 +#endif + +#ifndef __ASSEMBLY__ + +#include + +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +#ifdef __powerpc64__ +typedef signed long s64; +typedef unsigned long u64; +#else +typedef signed long long s64; +typedef unsigned long long u64; +#endif + +typedef __vector128 vector128; + +typedef u32 dma_addr_t; +typedef u64 dma64_addr_t; + +typedef struct { + unsigned long entry; + unsigned long toc; + unsigned long env; +} func_descr_t; + +#ifdef CONFIG_LBD +typedef u64 sector_t; +#define HAVE_SECTOR_T +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_POWERPC_TYPES_H */ diff --git a/include/asm-ppc/types.h b/include/asm-ppc/types.h deleted file mode 100644 --- a/include/asm-ppc/types.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _PPC_TYPES_H -#define _PPC_TYPES_H - -#ifndef __ASSEMBLY__ - -typedef __signed__ char __s8; -typedef unsigned char __u8; - -typedef __signed__ short __s16; -typedef unsigned short __u16; - -typedef __signed__ int __s32; -typedef unsigned int __u32; - -#if defined(__GNUC__) && !defined(__STRICT_ANSI__) -typedef __signed__ long long __s64; -typedef unsigned long long __u64; -#endif - -typedef struct { - __u32 u[4]; -} __vector128; - -/* - * XXX allowed outside of __KERNEL__ for now, until glibc gets - * a proper set of asm headers of its own. -- paulus - */ -typedef unsigned short umode_t; - -#endif /* __ASSEMBLY__ */ - -#ifdef __KERNEL__ -/* - * These aren't exported outside the kernel to avoid name space clashes - */ -#define BITS_PER_LONG 32 - -#ifndef __ASSEMBLY__ - -#include - -typedef signed char s8; -typedef unsigned char u8; - -typedef signed short s16; -typedef unsigned short u16; - -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long long s64; -typedef unsigned long long u64; - -typedef __vector128 vector128; - -/* DMA addresses are 32-bits wide */ -typedef u32 dma_addr_t; -typedef u64 dma64_addr_t; - -#ifdef CONFIG_LBD -typedef u64 sector_t; -#define HAVE_SECTOR_T -#endif - -#endif /* __ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - -#endif diff --git a/include/asm-ppc64/types.h b/include/asm-ppc64/types.h deleted file mode 100644 --- a/include/asm-ppc64/types.h +++ /dev/null @@ -1,79 +0,0 @@ -#ifndef _PPC64_TYPES_H -#define _PPC64_TYPES_H - -#ifndef __ASSEMBLY__ - -/* - * This file is never included by application software unless - * explicitly requested (e.g., via linux/types.h) in which case the - * application is Linux specific so (user-) name space pollution is - * not a major issue. However, for interoperability, libraries still - * need to be careful to avoid a name clashes. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -typedef unsigned int umode_t; - -/* - * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the - * header files exported to user space - */ - -typedef __signed__ char __s8; -typedef unsigned char __u8; - -typedef __signed__ short __s16; -typedef unsigned short __u16; - -typedef __signed__ int __s32; -typedef unsigned int __u32; - -typedef __signed__ long __s64; -typedef unsigned long __u64; - -typedef struct { - __u32 u[4]; -} __attribute((aligned(16))) __vector128; - -#endif /* __ASSEMBLY__ */ - -#ifdef __KERNEL__ -/* - * These aren't exported outside the kernel to avoid name space clashes - */ -#define BITS_PER_LONG 64 - -#ifndef __ASSEMBLY__ - -typedef signed char s8; -typedef unsigned char u8; - -typedef signed short s16; -typedef unsigned short u16; - -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long s64; -typedef unsigned long u64; - -typedef __vector128 vector128; - -typedef u32 dma_addr_t; -typedef u64 dma64_addr_t; - -typedef struct { - unsigned long entry; - unsigned long toc; - unsigned long env; -} func_descr_t; - -#endif /* __ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - -#endif /* _PPC64_TYPES_H */ From becky.bruce at freescale.com Sat Oct 22 06:59:49 2005 From: becky.bruce at freescale.com (Becky Bruce) Date: Fri, 21 Oct 2005 15:59:49 -0500 Subject: [PATCH] powerpc: merge types.h In-Reply-To: <27a3dac7c5942564457d59267f2766cc@bga.com> References: <27a3dac7c5942564457d59267f2766cc@bga.com> Message-ID: <3f468678db0a9d271324839686de65b9@freescale.com> Well, I totally zoned out on that one. Too much staring at this file, I guess. Patch resubmitted, again. And I'll spend this weekend looking for my misplaced brain. Thanks, Milton! -B On Oct 20, 2005, at 9:48 AM, Milton Miller wrote: >> which appears to match what most other platforms have done. Also, the >> 32-bit version was #including linux/config.h - I have removed this as >> it >> does not appear necessary. > > But it is necessary: > >> +#ifdef CONFIG_LBD >> +typedef u64 sector_t; >> +#define HAVE_SECTOR_T >> +#endif > > milton From bgill at freescale.com Sat Oct 22 07:16:58 2005 From: bgill at freescale.com (Becky Bruce) Date: Fri, 21 Oct 2005 16:16:58 -0500 (CDT) Subject: [PATCH] powerpc: Fix types.h Message-ID: Powerpc: Fix types.h I noticed that Paul had already pulled the version of types.h that is missing the config.h include into the merge tree - this patch adds it back in. Signed-off-by: Becky Bruce --- commit ffaa354bbf9c75b1101167137662ba37bb6cdb02 tree 4b9df364146754813e7425efab1d6ac322ee2abc parent b30219b583bf29e7f6a1236f07e5727626979d6c author Becky Bruce Fri, 21 Oct 2005 16:11:26 -0500 committer Becky Bruce Fri, 21 Oct 2005 16:11:26 -0500 include/asm-powerpc/types.h | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/include/asm-powerpc/types.h b/include/asm-powerpc/types.h --- a/include/asm-powerpc/types.h +++ b/include/asm-powerpc/types.h @@ -64,6 +64,8 @@ typedef struct { #ifndef __ASSEMBLY__ +#include + typedef signed char s8; typedef unsigned char u8; From olh at suse.de Sun Oct 23 02:32:22 2005 From: olh at suse.de (Olaf Hering) Date: Sat, 22 Oct 2005 18:32:22 +0200 Subject: [PATCH] reenable make install with ppc64 defconfig Message-ID: <20051022163222.GA30823@suse.de> 'make ARCH=ppc64 O=../O install' does not work with the defconfig. CONFIG_PPC_BPA is part of it, but the BPA bootimage variable is wrong: make[2]: *** No rule to make target `zImage', needed by `install'. Stop. Signed-off-by: Olaf Hering arch/ppc64/Makefile | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: linux-2.6.14-rc5/arch/ppc64/Makefile =================================================================== --- linux-2.6.14-rc5.orig/arch/ppc64/Makefile +++ linux-2.6.14-rc5/arch/ppc64/Makefile @@ -100,7 +100,7 @@ $(boottargets-y): vmlinux bootimage-$(CONFIG_PPC_PSERIES) := $(boot)/zImage bootimage-$(CONFIG_PPC_PMAC) := vmlinux bootimage-$(CONFIG_PPC_MAPLE) := $(boot)/zImage -bootimage-$(CONFIG_PPC_BPA) := zImage +bootimage-$(CONFIG_PPC_BPA) := $(boot)/zImage bootimage-$(CONFIG_PPC_ISERIES) := vmlinux BOOTIMAGE := $(bootimage-y) install: vmlinux -- short story of a lazy sysadmin: alias appserv=wotan From olh at suse.de Sun Oct 23 04:13:49 2005 From: olh at suse.de (Olaf Hering) Date: Sat, 22 Oct 2005 20:13:49 +0200 Subject: [PATCH] change name of target file during make install In-Reply-To: <43559F2A.6070206@am.sony.com> References: <20051018235553.GA9315@suse.de> <43559F2A.6070206@am.sony.com> Message-ID: <20051022181349.GB30823@suse.de> 'make install' creates a /boot/zImage[.vmode] file when the defconfig is used. It uses the second arg as file content, which is the vmlinux, and the 5th arg as file name, which is the BOOTIMAGE name. A comment in an earlier patch to install.sh states that yaboot can not load a zImage+initrd combo. This was true in kernel 2.6.5 because it did use bi_recs to pass the initrd info. But this concept was always broken. Register r3 holds the initrd address and r4 holds the initrd size. This works with all kernel versions. The current code in main.c leaves r3 and r4 alone, so the kernel should be able to see and use the memory range with the initrd content. If one wants to rerun mkinitrd, it is currently hard to get the uname -r value for the installed zImage. Without this info, mkinitrd can not know what modules to use. This would be fixable by including the /proc/version output of the new kernel. But it is simpler to just use the plain vmlinux. So all this patch does is to write to /boot/vmlinux instead to /boot/zImage Signed-off-by: Olaf Hering arch/ppc64/boot/install.sh | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: linux-2.6.14-rc5/arch/ppc64/boot/install.sh =================================================================== --- linux-2.6.14-rc5.orig/arch/ppc64/boot/install.sh +++ linux-2.6.14-rc5/arch/ppc64/boot/install.sh @@ -28,7 +28,7 @@ if [ -x /sbin/${CROSS_COMPILE}installker # Default install # this should work for both the pSeries zImage and the iSeries vmlinux.sm -image_name=`basename $5` +image_name=`basename $2` if [ -f $4/$image_name ]; then mv $4/$image_name $4/$image_name.old -- short story of a lazy sysadmin: alias appserv=wotan From olh at suse.de Sun Oct 23 23:30:34 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 23 Oct 2005 15:30:34 +0200 Subject: [PATCH] remove duplicate local variable in set_preferred_console Message-ID: <20051023133034.GA30628@suse.de> remove duplicate local variable, saves 2 asm instructions. Signed-off-by: Olaf Hering arch/ppc64/kernel/setup.c | 1 - 1 files changed, 1 deletion(-) Index: linux-2.6.14-rc5/arch/ppc64/kernel/setup.c =================================================================== --- linux-2.6.14-rc5.orig/arch/ppc64/kernel/setup.c +++ linux-2.6.14-rc5/arch/ppc64/kernel/setup.c @@ -881,7 +881,6 @@ static int __init set_preferred_console( if (reg && compat && (strcmp(compat, "hvterm-protocol") == 0)) { /* Host Virtual Serial Interface */ - int offset; switch (reg[0]) { case 0x30000000: offset = 0; -- short story of a lazy sysadmin: alias appserv=wotan From david at gibson.dropbear.id.au Mon Oct 24 11:41:33 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Mon, 24 Oct 2005 11:41:33 +1000 Subject: powerpc: Purge bootinfo.h Message-ID: <20051024014133.GA30183@localhost.localdomain> Paulus, another one for the merge tree: With ARCH=powerpc we assume the presence of a device tree, so we don't require any support for the old bi_recs method of passing boot parameters. Likewise, we've never needed it for ppc64, but we still had an include/asm-ppc64/bootinfo.h from which nothing was used. This patch removes that file, and all references to it in arch/ppc64 and arch/powerpc. A related, unused variable 'boot_mem_size' is also removed from setup_32.c. The bootinfo stuff remains in ARCH=ppc for the time being. Built and booted on Power5 (ARCH=ppc64 and ARCH=powerpc), built for 32-bit powermac (ARCH=powerpc and ARCH=ppc). Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/kernel/prom.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/prom.c 2005-10-24 10:36:11.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/prom.c 2005-10-24 11:27:48.000000000 +1000 @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include Index: working-2.6/arch/ppc64/kernel/prom_init.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/prom_init.c 2005-10-14 16:43:39.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/prom_init.c 2005-10-24 11:27:48.000000000 +1000 @@ -44,7 +44,6 @@ #include #include #include -#include #include #include #include Index: working-2.6/arch/ppc64/kernel/setup.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/setup.c 2005-10-24 10:36:28.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/setup.c 2005-10-24 11:27:48.000000000 +1000 @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include Index: working-2.6/include/asm-ppc64/bootinfo.h =================================================================== --- working-2.6.orig/include/asm-ppc64/bootinfo.h 2005-05-24 14:12:25.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,70 +0,0 @@ -/* - * Non-machine dependent bootinfo structure. Basic idea - * borrowed from the m68k. - * - * Copyright (C) 1999 Cort Dougan - * Copyright (c) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - - -#ifndef _PPC64_BOOTINFO_H -#define _PPC64_BOOTINFO_H - -#include - -/* We use a u32 for the type of the fields since they're written by - * the bootloader which is a 32-bit process and read by the kernel - * which is a 64-bit process. This way they can both agree on the - * size of the type. - */ -typedef u32 bi_rec_field; - -struct bi_record { - bi_rec_field tag; /* tag ID */ - bi_rec_field size; /* size of record (in bytes) */ - bi_rec_field data[0]; /* data */ -}; - -#define BI_FIRST 0x1010 /* first record - marker */ -#define BI_LAST 0x1011 /* last record - marker */ -#define BI_CMD_LINE 0x1012 -#define BI_BOOTLOADER_ID 0x1013 -#define BI_INITRD 0x1014 -#define BI_SYSMAP 0x1015 -#define BI_MACHTYPE 0x1016 - -static __inline__ struct bi_record * bi_rec_init(unsigned long addr) -{ - struct bi_record *bi_recs; - bi_recs = (struct bi_record *)_ALIGN(addr, PAGE_SIZE); - bi_recs->size = 0; - return bi_recs; -} - -static __inline__ struct bi_record * bi_rec_alloc(struct bi_record *rec, - unsigned long args) -{ - rec = (struct bi_record *)((unsigned long)rec + rec->size); - rec->size = sizeof(struct bi_record) + args*sizeof(bi_rec_field); - return rec; -} - -static __inline__ struct bi_record * bi_rec_alloc_bytes(struct bi_record *rec, - unsigned long bytes) -{ - rec = (struct bi_record *)((unsigned long)rec + rec->size); - rec->size = sizeof(struct bi_record) + bytes; - return rec; -} - -static __inline__ struct bi_record * bi_rec_next(struct bi_record *rec) -{ - return (struct bi_record *)((unsigned long)rec + rec->size); -} - -#endif /* _PPC64_BOOTINFO_H */ Index: working-2.6/arch/powerpc/mm/mem.c =================================================================== --- working-2.6.orig/arch/powerpc/mm/mem.c 2005-10-24 10:36:28.000000000 +1000 +++ working-2.6/arch/powerpc/mm/mem.c 2005-10-24 11:27:48.000000000 +1000 @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include Index: working-2.6/arch/powerpc/kernel/prom_init.c =================================================================== --- working-2.6.orig/arch/powerpc/kernel/prom_init.c 2005-10-24 10:36:28.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/prom_init.c 2005-10-24 11:27:48.000000000 +1000 @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include Index: working-2.6/arch/powerpc/kernel/setup_64.c =================================================================== --- working-2.6.orig/arch/powerpc/kernel/setup_64.c 2005-10-24 10:36:09.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/setup_64.c 2005-10-24 11:27:48.000000000 +1000 @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include Index: working-2.6/arch/powerpc/kernel/setup_32.c =================================================================== --- working-2.6.orig/arch/powerpc/kernel/setup_32.c 2005-10-24 10:36:28.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/setup_32.c 2005-10-24 11:27:48.000000000 +1000 @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -62,10 +61,6 @@ boot_infos_t *boot_infos; struct ide_machdep_calls ppc_ide_md; -/* Used with the BI_MEMSIZE bootinfo parameter to store the memory - size value reported by the boot loader. */ -unsigned long boot_mem_size; - unsigned long ISA_DMA_THRESHOLD; unsigned int DMA_MODE_READ; unsigned int DMA_MODE_WRITE; Index: working-2.6/arch/powerpc/mm/init_32.c =================================================================== --- working-2.6.orig/arch/powerpc/mm/init_32.c 2005-10-24 10:36:28.000000000 +1000 +++ working-2.6/arch/powerpc/mm/init_32.c 2005-10-24 11:27:48.000000000 +1000 @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From david at gibson.dropbear.id.au Mon Oct 24 13:07:23 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Mon, 24 Oct 2005 13:07:23 +1000 Subject: powerpc: Merge parport.h Message-ID: <20051024030723.GB30183@localhost.localdomain> Save for the header #define, ppc32 and ppc64 versions of parport.h are identical. This patch merges them. Signed-off-by: David Gibson Index: working-2.6/include/asm-powerpc/parport.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ working-2.6/include/asm-powerpc/parport.h 2005-10-24 11:58:10.000000000 +1000 @@ -0,0 +1,18 @@ +/* + * parport.h: platform-specific PC-style parport initialisation + * + * Copyright (C) 1999, 2000 Tim Waugh + * + * This file should only be included by drivers/parport/parport_pc.c. + */ + +#ifndef _ASM_POWERPC_PARPORT_H +#define _ASM_POWERPC_PARPORT_H + +static int __devinit parport_pc_find_isa_ports (int autoirq, int autodma); +static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma) +{ + return parport_pc_find_isa_ports (autoirq, autodma); +} + +#endif /* !(_ASM_POWERPC_PARPORT_H) */ Index: working-2.6/include/asm-ppc/parport.h =================================================================== --- working-2.6.orig/include/asm-ppc/parport.h 2005-05-24 14:12:25.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,18 +0,0 @@ -/* - * parport.h: platform-specific PC-style parport initialisation - * - * Copyright (C) 1999, 2000 Tim Waugh - * - * This file should only be included by drivers/parport/parport_pc.c. - */ - -#ifndef _ASM_PPC_PARPORT_H -#define _ASM_PPC_PARPORT_H - -static int __devinit parport_pc_find_isa_ports (int autoirq, int autodma); -static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma) -{ - return parport_pc_find_isa_ports (autoirq, autodma); -} - -#endif /* !(_ASM_PPC_PARPORT_H) */ Index: working-2.6/include/asm-ppc64/parport.h =================================================================== --- working-2.6.orig/include/asm-ppc64/parport.h 2005-05-24 14:12:25.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,18 +0,0 @@ -/* - * parport.h: platform-specific PC-style parport initialisation - * - * Copyright (C) 1999, 2000 Tim Waugh - * - * This file should only be included by drivers/parport/parport_pc.c. - */ - -#ifndef _ASM_PPC64_PARPORT_H -#define _ASM_PPC64_PARPORT_H - -static int __devinit parport_pc_find_isa_ports (int autoirq, int autodma); -static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma) -{ - return parport_pc_find_isa_ports (autoirq, autodma); -} - -#endif /* !(_ASM_PPC_PARPORT_H) */ -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From david at gibson.dropbear.id.au Mon Oct 24 14:05:38 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Mon, 24 Oct 2005 14:05:38 +1000 Subject: powerpc: Don't use kmalloc() for kernel stacks In-Reply-To: <20051021075346.GA29897@lst.de> References: <20051021054550.GD12976@localhost.localdomain> <20051021075346.GA29897@lst.de> Message-ID: <20051024040538.GD30183@localhost.localdomain> On Fri, Oct 21, 2005 at 09:53:46AM +0200, Christoph Hellwig wrote: > On Fri, Oct 21, 2005 at 03:45:50PM +1000, David Gibson wrote: > > - In readiness for 64k pages, when THREAD_SIZE will be less > > than a page, ppc64 used kmalloc() rather than get_free_pages() to > > allocate the kernel stack. With this patch we do the same for ppc32, > > since there's no strong reason not to. > > This adds quite a bit of overhead and wasted memory. Please don't do it > for ppc32 or ppc64 with THREAD_SIZE >= PAGE_SIZE either. Very well, the patch below addresses this. In readiness for 64k pages, when THREAD_SIZE will be less than PAGE_SIZE, ppc64 uses kmalloc() rather than __get_free_pages() to allocate kernel stacks, and since thread_info.h was merged, so does ppc32. However that adds some overhead which we don't really want when PAGE_SIZE <= THREAD_SIZE (including all ppc32 machines), so this patch avoids it. Signed-off-by: David Gibson Index: working-2.6/include/asm-powerpc/thread_info.h =================================================================== --- working-2.6.orig/include/asm-powerpc/thread_info.h 2005-10-24 10:36:28.000000000 +1000 +++ working-2.6/include/asm-powerpc/thread_info.h 2005-10-24 13:57:10.000000000 +1000 @@ -66,19 +66,26 @@ /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) \ - ({ \ - struct thread_info *ret; \ - \ - ret = kmalloc(THREAD_SIZE, GFP_KERNEL); \ - if (ret) \ - memset(ret, 0, THREAD_SIZE); \ - ret; \ - }) +#define THREAD_INFO_GFP GFP_KERNEL | __GFP_ZERO #else -#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL) +#define THREAD_INFO_GFP GFP_KERNEL #endif + +#if THREAD_SHIFT >= PAGE_SHIFT + +#define THREAD_ORDER (THREAD_SHIFT - PAGE_SHIFT) + +#define alloc_thread_info(tsk) \ + ((struct thread_info *)__get_free_pages(THREAD_INFO_GFP, THREAD_ORDER)) +#define free_thread_info(ti) free_pages((unsigned long)ti, THREAD_ORDER) + +#else /* THREAD_SHIFT < PAGE_SHIFT */ + +#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, THREAD_INFO_GFP) #define free_thread_info(ti) kfree(ti) + +#endif /* THREAD_SHIFT < PAGE_SHIFT */ + #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From michael at ellerman.id.au Mon Oct 24 15:07:26 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 24 Oct 2005 15:07:26 +1000 (EST) Subject: [PATCH 1/5] powerpc: Remove duplicate definition of set_tb() Message-ID: <1130130446.113659.789254640331.qpush@concordia> Somewhere along the line we got two definitions of set_tb(). They look to be identical although they're not textually identical. So remove the #ifdef CONFIG_PPC64 version, leaving the common version in time.h. Signed-off-by: Michael Ellerman --- include/asm-powerpc/reg.h | 7 ------- 1 files changed, 7 deletions(-) Index: kexec/include/asm-powerpc/reg.h =================================================================== --- kexec.orig/include/asm-powerpc/reg.h +++ kexec/include/asm-powerpc/reg.h @@ -585,13 +585,6 @@ static inline void ppc64_runlatch_off(vo mtspr(SPRN_CTRLT, ctrl); } } - -static inline void set_tb(unsigned int upper, unsigned int lower) -{ - mttbl(0); - mttbu(upper); - mttbl(lower); -} #endif #define __get_SP() ({unsigned long sp; \ From michael at ellerman.id.au Mon Oct 24 15:07:27 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 24 Oct 2005 15:07:27 +1000 (EST) Subject: [PATCH 2/5] powerpc: Remove trailing \n" in HMT macros In-Reply-To: <1130130446.113659.789254640331.qpush@concordia> Message-ID: <20051024050727.584AB685CC@ozlabs.org> GCC 3.3.3 barfs on the trailing \n" in the HMT macros. Signed-off-by: Michael Ellerman --- include/asm-powerpc/ppc_asm.h | 14 +++++++------- 1 files changed, 7 insertions(+), 7 deletions(-) Index: kexec/include/asm-powerpc/ppc_asm.h =================================================================== --- kexec.orig/include/asm-powerpc/ppc_asm.h +++ kexec/include/asm-powerpc/ppc_asm.h @@ -76,13 +76,13 @@ #define REST_16EVRS(n,s,base) REST_8EVRS(n,s,base); REST_8EVRS(n+8,s,base) #define REST_32EVRS(n,s,base) REST_16EVRS(n,s,base); REST_16EVRS(n+16,s,base) -/* Macros to adjust thread priority for Iseries hardware multithreading */ -#define HMT_VERY_LOW or 31,31,31 # very low priority\n" -#define HMT_LOW or 1,1,1 -#define HMT_MEDIUM_LOW or 6,6,6 # medium low priority\n" -#define HMT_MEDIUM or 2,2,2 -#define HMT_MEDIUM_HIGH or 5,5,5 # medium high priority\n" -#define HMT_HIGH or 3,3,3 +/* Macros to adjust thread priority for hardware multithreading */ +#define HMT_VERY_LOW or 31,31,31 # very low priority +#define HMT_LOW or 1,1,1 +#define HMT_MEDIUM_LOW or 6,6,6 # medium low priority +#define HMT_MEDIUM or 2,2,2 +#define HMT_MEDIUM_HIGH or 5,5,5 # medium high priority +#define HMT_HIGH or 3,3,3 /* handle instructions that older assemblers may not know */ #define RFCI .long 0x4c000066 /* rfci instruction */ From michael at ellerman.id.au Mon Oct 24 15:07:28 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 24 Oct 2005 15:07:28 +1000 (EST) Subject: [PATCH 3/5] powerpc: Move plpar_wrappers.h into include/asm-powerpc In-Reply-To: <1130130446.113659.789254640331.qpush@concordia> Message-ID: <20051024050728.3C554685C8@ozlabs.org> Move plpar_wrappers.h into include/asm-powerpc, fix up a bit of whitespace while we're there. Signed-off-by: Michael Ellerman --- include/asm-powerpc/plpar_wrappers.h | 107 +++++++++++++++++++++++++++++++ include/asm-ppc64/plpar_wrappers.h | 120 ----------------------------------- 2 files changed, 107 insertions(+), 120 deletions(-) Index: kexec/include/asm-powerpc/plpar_wrappers.h =================================================================== --- /dev/null +++ kexec/include/asm-powerpc/plpar_wrappers.h @@ -0,0 +1,107 @@ +#ifndef _ASM_POWERPC_PLPAR_WRAPPERS_H +#define _ASM_POWERPC_PLPAR_WRAPPERS_H + +#include + +static inline long poll_pending(void) +{ + unsigned long dummy; + return plpar_hcall(H_POLL_PENDING, 0, 0, 0, 0, &dummy, &dummy, &dummy); +} + +static inline long prod_processor(void) +{ + plpar_hcall_norets(H_PROD); + return 0; +} + +static inline long cede_processor(void) +{ + plpar_hcall_norets(H_CEDE); + return 0; +} + +static inline long register_vpa(unsigned long flags, unsigned long proc, + unsigned long vpa) +{ + return plpar_hcall_norets(H_REGISTER_VPA, flags, proc, vpa); +} + +void vpa_init(int cpu); + +static inline long plpar_pte_remove(unsigned long flags, unsigned long ptex, + unsigned long avpn, unsigned long *old_pteh_ret, + unsigned long *old_ptel_ret) +{ + unsigned long dummy; + return plpar_hcall(H_REMOVE, flags, ptex, avpn, 0, old_pteh_ret, + old_ptel_ret, &dummy); +} + +static inline long plpar_pte_read(unsigned long flags, unsigned long ptex, + unsigned long *old_pteh_ret, unsigned long *old_ptel_ret) +{ + unsigned long dummy; + return plpar_hcall(H_READ, flags, ptex, 0, 0, old_pteh_ret, + old_ptel_ret, &dummy); +} + +static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex, + unsigned long avpn) +{ + return plpar_hcall_norets(H_PROTECT, flags, ptex, avpn); +} + +static inline long plpar_tce_get(unsigned long liobn, unsigned long ioba, + unsigned long *tce_ret) +{ + unsigned long dummy; + return plpar_hcall(H_GET_TCE, liobn, ioba, 0, 0, tce_ret, &dummy, + &dummy); +} + +static inline long plpar_tce_put(unsigned long liobn, unsigned long ioba, + unsigned long tceval) +{ + return plpar_hcall_norets(H_PUT_TCE, liobn, ioba, tceval); +} + +static inline long plpar_tce_put_indirect(unsigned long liobn, + unsigned long ioba, unsigned long page, unsigned long count) +{ + return plpar_hcall_norets(H_PUT_TCE_INDIRECT, liobn, ioba, page, count); +} + +static inline long plpar_tce_stuff(unsigned long liobn, unsigned long ioba, + unsigned long tceval, unsigned long count) +{ + return plpar_hcall_norets(H_STUFF_TCE, liobn, ioba, tceval, count); +} + +static inline long plpar_get_term_char(unsigned long termno, + unsigned long *len_ret, char *buf_ret) +{ + unsigned long *lbuf = (unsigned long *)buf_ret; /* TODO: alignment? */ + return plpar_hcall(H_GET_TERM_CHAR, termno, 0, 0, 0, len_ret, + lbuf + 0, lbuf + 1); +} + +static inline long plpar_put_term_char(unsigned long termno, unsigned long len, + const char *buffer) +{ + unsigned long *lbuf = (unsigned long *)buffer; /* TODO: alignment? */ + return plpar_hcall_norets(H_PUT_TERM_CHAR, termno, len, lbuf[0], + lbuf[1]); +} + +static inline long plpar_set_xdabr(unsigned long address, unsigned long flags) +{ + return plpar_hcall_norets(H_SET_XDABR, address, flags); +} + +static inline long plpar_set_dabr(unsigned long val) +{ + return plpar_hcall_norets(H_SET_DABR, val); +} + +#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ Index: kexec/include/asm-ppc64/plpar_wrappers.h =================================================================== --- kexec.orig/include/asm-ppc64/plpar_wrappers.h +++ /dev/null @@ -1,120 +0,0 @@ -#ifndef _PPC64_PLPAR_WRAPPERS_H -#define _PPC64_PLPAR_WRAPPERS_H - -#include - -static inline long poll_pending(void) -{ - unsigned long dummy; - return plpar_hcall(H_POLL_PENDING, 0, 0, 0, 0, - &dummy, &dummy, &dummy); -} - -static inline long prod_processor(void) -{ - plpar_hcall_norets(H_PROD); - return(0); -} - -static inline long cede_processor(void) -{ - plpar_hcall_norets(H_CEDE); - return(0); -} - -static inline long register_vpa(unsigned long flags, unsigned long proc, - unsigned long vpa) -{ - return plpar_hcall_norets(H_REGISTER_VPA, flags, proc, vpa); -} - -void vpa_init(int cpu); - -static inline long plpar_pte_remove(unsigned long flags, - unsigned long ptex, - unsigned long avpn, - unsigned long *old_pteh_ret, - unsigned long *old_ptel_ret) -{ - unsigned long dummy; - return plpar_hcall(H_REMOVE, flags, ptex, avpn, 0, - old_pteh_ret, old_ptel_ret, &dummy); -} - -static inline long plpar_pte_read(unsigned long flags, - unsigned long ptex, - unsigned long *old_pteh_ret, unsigned long *old_ptel_ret) -{ - unsigned long dummy; - return plpar_hcall(H_READ, flags, ptex, 0, 0, - old_pteh_ret, old_ptel_ret, &dummy); -} - -static inline long plpar_pte_protect(unsigned long flags, - unsigned long ptex, - unsigned long avpn) -{ - return plpar_hcall_norets(H_PROTECT, flags, ptex, avpn); -} - -static inline long plpar_tce_get(unsigned long liobn, - unsigned long ioba, - unsigned long *tce_ret) -{ - unsigned long dummy; - return plpar_hcall(H_GET_TCE, liobn, ioba, 0, 0, - tce_ret, &dummy, &dummy); -} - -static inline long plpar_tce_put(unsigned long liobn, - unsigned long ioba, - unsigned long tceval) -{ - return plpar_hcall_norets(H_PUT_TCE, liobn, ioba, tceval); -} - -static inline long plpar_tce_put_indirect(unsigned long liobn, - unsigned long ioba, - unsigned long page, - unsigned long count) -{ - return plpar_hcall_norets(H_PUT_TCE_INDIRECT, liobn, ioba, page, count); -} - -static inline long plpar_tce_stuff(unsigned long liobn, - unsigned long ioba, - unsigned long tceval, - unsigned long count) -{ - return plpar_hcall_norets(H_STUFF_TCE, liobn, ioba, tceval, count); -} - -static inline long plpar_get_term_char(unsigned long termno, - unsigned long *len_ret, - char *buf_ret) -{ - unsigned long *lbuf = (unsigned long *)buf_ret; /* ToDo: alignment? */ - return plpar_hcall(H_GET_TERM_CHAR, termno, 0, 0, 0, - len_ret, lbuf+0, lbuf+1); -} - -static inline long plpar_put_term_char(unsigned long termno, - unsigned long len, - const char *buffer) -{ - unsigned long *lbuf = (unsigned long *)buffer; /* ToDo: alignment? */ - return plpar_hcall_norets(H_PUT_TERM_CHAR, termno, len, lbuf[0], - lbuf[1]); -} - -static inline long plpar_set_xdabr(unsigned long address, unsigned long flags) -{ - return plpar_hcall_norets(H_SET_XDABR, address, flags); -} - -static inline long plpar_set_dabr(unsigned long val) -{ - return plpar_hcall_norets(H_SET_DABR, val); -} - -#endif /* _PPC64_PLPAR_WRAPPERS_H */ From michael at ellerman.id.au Mon Oct 24 15:07:29 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 24 Oct 2005 15:07:29 +1000 (EST) Subject: [PATCH 4/5] powerpc: Move firmware.h into include/asm-powerpc In-Reply-To: <1130130446.113659.789254640331.qpush@concordia> Message-ID: <20051024050729.38FCA685D0@ozlabs.org> Move firmware.h into include/asm-powerpc. Signed-off-by: Michael Ellerman --- include/asm-powerpc/firmware.h | 97 +++++++++++++++++++++++++++++++++++++++ include/asm-ppc64/firmware.h | 101 ----------------------------------------- 2 files changed, 97 insertions(+), 101 deletions(-) Index: kexec/include/asm-powerpc/firmware.h =================================================================== --- /dev/null +++ kexec/include/asm-powerpc/firmware.h @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh at kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef __ASM_POWERPC_FIRMWARE_H +#define __ASM_POWERPC_FIRMWARE_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +/* firmware feature bitmask values */ +#define FIRMWARE_MAX_FEATURES 63 + +#define FW_FEATURE_PFT (1UL<<0) +#define FW_FEATURE_TCE (1UL<<1) +#define FW_FEATURE_SPRG0 (1UL<<2) +#define FW_FEATURE_DABR (1UL<<3) +#define FW_FEATURE_COPY (1UL<<4) +#define FW_FEATURE_ASR (1UL<<5) +#define FW_FEATURE_DEBUG (1UL<<6) +#define FW_FEATURE_TERM (1UL<<7) +#define FW_FEATURE_PERF (1UL<<8) +#define FW_FEATURE_DUMP (1UL<<9) +#define FW_FEATURE_INTERRUPT (1UL<<10) +#define FW_FEATURE_MIGRATE (1UL<<11) +#define FW_FEATURE_PERFMON (1UL<<12) +#define FW_FEATURE_CRQ (1UL<<13) +#define FW_FEATURE_VIO (1UL<<14) +#define FW_FEATURE_RDMA (1UL<<15) +#define FW_FEATURE_LLAN (1UL<<16) +#define FW_FEATURE_BULK (1UL<<17) +#define FW_FEATURE_XDABR (1UL<<18) +#define FW_FEATURE_MULTITCE (1UL<<19) +#define FW_FEATURE_SPLPAR (1UL<<20) +#define FW_FEATURE_ISERIES (1UL<<21) + +enum { + FW_FEATURE_PSERIES_POSSIBLE = FW_FEATURE_PFT | FW_FEATURE_TCE | + FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY | + FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM | + FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT | + FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | + FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | + FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | + FW_FEATURE_SPLPAR, + FW_FEATURE_PSERIES_ALWAYS = 0, + FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES, + FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES, + FW_FEATURE_POSSIBLE = +#ifdef CONFIG_PPC_PSERIES + FW_FEATURE_PSERIES_POSSIBLE | +#endif +#ifdef CONFIG_PPC_ISERIES + FW_FEATURE_ISERIES_POSSIBLE | +#endif + 0, + FW_FEATURE_ALWAYS = +#ifdef CONFIG_PPC_PSERIES + FW_FEATURE_PSERIES_ALWAYS & +#endif +#ifdef CONFIG_PPC_ISERIES + FW_FEATURE_ISERIES_ALWAYS & +#endif + FW_FEATURE_POSSIBLE, +}; + +/* This is used to identify firmware features which are available + * to the kernel. + */ +extern unsigned long ppc64_firmware_features; + +static inline unsigned long firmware_has_feature(unsigned long feature) +{ + return (FW_FEATURE_ALWAYS & feature) || + (FW_FEATURE_POSSIBLE & ppc64_firmware_features & feature); +} + +#ifdef CONFIG_PPC_PSERIES +typedef struct { + unsigned long val; + char * name; +} firmware_feature_t; + +extern firmware_feature_t firmware_features_table[]; +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* __ASM_POWERPC_FIRMWARE_H */ Index: kexec/include/asm-ppc64/firmware.h =================================================================== --- kexec.orig/include/asm-ppc64/firmware.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * include/asm-ppc64/firmware.h - * - * Extracted from include/asm-ppc64/cputable.h - * - * Copyright (C) 2001 Ben. Herrenschmidt (benh at kernel.crashing.org) - * - * Modifications for ppc64: - * Copyright (C) 2003 Dave Engebretsen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef __ASM_PPC_FIRMWARE_H -#define __ASM_PPC_FIRMWARE_H - -#ifdef __KERNEL__ - -#ifndef __ASSEMBLY__ - -/* firmware feature bitmask values */ -#define FIRMWARE_MAX_FEATURES 63 - -#define FW_FEATURE_PFT (1UL<<0) -#define FW_FEATURE_TCE (1UL<<1) -#define FW_FEATURE_SPRG0 (1UL<<2) -#define FW_FEATURE_DABR (1UL<<3) -#define FW_FEATURE_COPY (1UL<<4) -#define FW_FEATURE_ASR (1UL<<5) -#define FW_FEATURE_DEBUG (1UL<<6) -#define FW_FEATURE_TERM (1UL<<7) -#define FW_FEATURE_PERF (1UL<<8) -#define FW_FEATURE_DUMP (1UL<<9) -#define FW_FEATURE_INTERRUPT (1UL<<10) -#define FW_FEATURE_MIGRATE (1UL<<11) -#define FW_FEATURE_PERFMON (1UL<<12) -#define FW_FEATURE_CRQ (1UL<<13) -#define FW_FEATURE_VIO (1UL<<14) -#define FW_FEATURE_RDMA (1UL<<15) -#define FW_FEATURE_LLAN (1UL<<16) -#define FW_FEATURE_BULK (1UL<<17) -#define FW_FEATURE_XDABR (1UL<<18) -#define FW_FEATURE_MULTITCE (1UL<<19) -#define FW_FEATURE_SPLPAR (1UL<<20) -#define FW_FEATURE_ISERIES (1UL<<21) - -enum { - FW_FEATURE_PSERIES_POSSIBLE = FW_FEATURE_PFT | FW_FEATURE_TCE | - FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY | - FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM | - FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT | - FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | - FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | - FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | - FW_FEATURE_SPLPAR, - FW_FEATURE_PSERIES_ALWAYS = 0, - FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES, - FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES, - FW_FEATURE_POSSIBLE = -#ifdef CONFIG_PPC_PSERIES - FW_FEATURE_PSERIES_POSSIBLE | -#endif -#ifdef CONFIG_PPC_ISERIES - FW_FEATURE_ISERIES_POSSIBLE | -#endif - 0, - FW_FEATURE_ALWAYS = -#ifdef CONFIG_PPC_PSERIES - FW_FEATURE_PSERIES_ALWAYS & -#endif -#ifdef CONFIG_PPC_ISERIES - FW_FEATURE_ISERIES_ALWAYS & -#endif - FW_FEATURE_POSSIBLE, -}; - -/* This is used to identify firmware features which are available - * to the kernel. - */ -extern unsigned long ppc64_firmware_features; - -static inline unsigned long firmware_has_feature(unsigned long feature) -{ - return (FW_FEATURE_ALWAYS & feature) || - (FW_FEATURE_POSSIBLE & ppc64_firmware_features & feature); -} - -#ifdef CONFIG_PPC_PSERIES -typedef struct { - unsigned long val; - char * name; -} firmware_feature_t; - -extern firmware_feature_t firmware_features_table[]; -#endif - -#endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ -#endif /* __ASM_PPC_FIRMWARE_H */ From michael at ellerman.id.au Mon Oct 24 15:07:30 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 24 Oct 2005 15:07:30 +1000 (EST) Subject: [PATCH 5/5] powerpc: Move ras.c into arch/powerpc/platforms/pseries In-Reply-To: <1130130446.113659.789254640331.qpush@concordia> Message-ID: <20051024050730.E746D685D0@ozlabs.org> ras.o is only built for CONFIG_PPC_PSERIES, so move it into arch/powerpc/platforms/pseries. Update Makefiles to suit. Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/Makefile | 2 arch/powerpc/platforms/pseries/ras.c | 352 +++++++++++++++++++++++++++++++ arch/ppc64/kernel/Makefile | 2 arch/ppc64/kernel/ras.c | 353 -------------------------------- 4 files changed, 354 insertions(+), 355 deletions(-) Index: kexec/arch/powerpc/platforms/pseries/Makefile =================================================================== --- kexec.orig/arch/powerpc/platforms/pseries/Makefile +++ kexec/arch/powerpc/platforms/pseries/Makefile @@ -1,4 +1,4 @@ obj-y := pci.o lpar.o hvCall.o nvram.o reconfig.o \ - setup.o iommu.o + setup.o iommu.o ras.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_IBMVIO) += vio.o Index: kexec/arch/powerpc/platforms/pseries/ras.c =================================================================== --- /dev/null +++ kexec/arch/powerpc/platforms/pseries/ras.c @@ -0,0 +1,352 @@ +/* + * Copyright (C) 2001 Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Change Activity: + * 2001/09/21 : engebret : Created with minimal EPOW and HW exception support. + * End Change Activity + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; +static DEFINE_SPINLOCK(ras_log_buf_lock); + +char mce_data_buf[RTAS_ERROR_LOG_MAX] +; +/* This is true if we are using the firmware NMI handler (typically LPAR) */ +extern int fwnmi_active; + +static int ras_get_sensor_state_token; +static int ras_check_exception_token; + +#define EPOW_SENSOR_TOKEN 9 +#define EPOW_SENSOR_INDEX 0 +#define RAS_VECTOR_OFFSET 0x500 + +static irqreturn_t ras_epow_interrupt(int irq, void *dev_id, + struct pt_regs * regs); +static irqreturn_t ras_error_interrupt(int irq, void *dev_id, + struct pt_regs * regs); + +/* #define DEBUG */ + +static void request_ras_irqs(struct device_node *np, char *propname, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + const char *name) +{ + unsigned int *ireg, len, i; + int virq, n_intr; + + ireg = (unsigned int *)get_property(np, propname, &len); + if (ireg == NULL) + return; + n_intr = prom_n_intr_cells(np); + len /= n_intr * sizeof(*ireg); + + for (i = 0; i < len; i++) { + virq = virt_irq_create_mapping(*ireg); + if (virq == NO_IRQ) { + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", np->full_name); + return; + } + if (request_irq(irq_offset_up(virq), handler, 0, name, NULL)) { + printk(KERN_ERR "Unable to request interrupt %d for " + "%s\n", irq_offset_up(virq), np->full_name); + return; + } + ireg += n_intr; + } +} + +/* + * Initialize handlers for the set of interrupts caused by hardware errors + * and power system events. + */ +static int __init init_ras_IRQ(void) +{ + struct device_node *np; + + ras_get_sensor_state_token = rtas_token("get-sensor-state"); + ras_check_exception_token = rtas_token("check-exception"); + + /* Internal Errors */ + np = of_find_node_by_path("/event-sources/internal-errors"); + if (np != NULL) { + request_ras_irqs(np, "open-pic-interrupt", ras_error_interrupt, + "RAS_ERROR"); + request_ras_irqs(np, "interrupts", ras_error_interrupt, + "RAS_ERROR"); + of_node_put(np); + } + + /* EPOW Events */ + np = of_find_node_by_path("/event-sources/epow-events"); + if (np != NULL) { + request_ras_irqs(np, "open-pic-interrupt", ras_epow_interrupt, + "RAS_EPOW"); + request_ras_irqs(np, "interrupts", ras_epow_interrupt, + "RAS_EPOW"); + of_node_put(np); + } + + return 1; +} +__initcall(init_ras_IRQ); + +/* + * Handle power subsystem events (EPOW). + * + * Presently we just log the event has occurred. This should be fixed + * to examine the type of power failure and take appropriate action where + * the time horizon permits something useful to be done. + */ +static irqreturn_t +ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs) +{ + int status = 0xdeadbeef; + int state = 0; + int critical; + + status = rtas_call(ras_get_sensor_state_token, 2, 2, &state, + EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX); + + if (state > 3) + critical = 1; /* Time Critical */ + else + critical = 0; + + spin_lock(&ras_log_buf_lock); + + status = rtas_call(ras_check_exception_token, 6, 1, NULL, + RAS_VECTOR_OFFSET, + virt_irq_to_real(irq_offset_down(irq)), + RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, + critical, __pa(&ras_log_buf), + rtas_get_error_log_max()); + + udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n", + *((unsigned long *)&ras_log_buf), status, state); + printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n", + *((unsigned long *)&ras_log_buf), status, state); + + /* format and print the extended information */ + log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); + + spin_unlock(&ras_log_buf_lock); + return IRQ_HANDLED; +} + +/* + * Handle hardware error interrupts. + * + * RTAS check-exception is called to collect data on the exception. If + * the error is deemed recoverable, we log a warning and return. + * For nonrecoverable errors, an error is logged and we stop all processing + * as quickly as possible in order to prevent propagation of the failure. + */ +static irqreturn_t +ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs) +{ + struct rtas_error_log *rtas_elog; + int status = 0xdeadbeef; + int fatal; + + spin_lock(&ras_log_buf_lock); + + status = rtas_call(ras_check_exception_token, 6, 1, NULL, + RAS_VECTOR_OFFSET, + virt_irq_to_real(irq_offset_down(irq)), + RTAS_INTERNAL_ERROR, 1 /*Time Critical */, + __pa(&ras_log_buf), + rtas_get_error_log_max()); + + rtas_elog = (struct rtas_error_log *)ras_log_buf; + + if ((status == 0) && (rtas_elog->severity >= RTAS_SEVERITY_ERROR_SYNC)) + fatal = 1; + else + fatal = 0; + + /* format and print the extended information */ + log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); + + if (fatal) { + udbg_printf("Fatal HW Error <0x%lx 0x%x>\n", + *((unsigned long *)&ras_log_buf), status); + printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n", + *((unsigned long *)&ras_log_buf), status); + +#ifndef DEBUG + /* Don't actually power off when debugging so we can test + * without actually failing while injecting errors. + * Error data will not be logged to syslog. + */ + ppc_md.power_off(); +#endif + } else { + udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n", + *((unsigned long *)&ras_log_buf), status); + printk(KERN_WARNING + "Warning: Recoverable hardware error <0x%lx 0x%x>\n", + *((unsigned long *)&ras_log_buf), status); + } + + spin_unlock(&ras_log_buf_lock); + return IRQ_HANDLED; +} + +/* Get the error information for errors coming through the + * FWNMI vectors. The pt_regs' r3 will be updated to reflect + * the actual r3 if possible, and a ptr to the error log entry + * will be returned if found. + * + * The mce_data_buf does not have any locks or protection around it, + * if a second machine check comes in, or a system reset is done + * before we have logged the error, then we will get corruption in the + * error log. This is preferable over holding off on calling + * ibm,nmi-interlock which would result in us checkstopping if a + * second machine check did come in. + */ +static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) +{ + unsigned long errdata = regs->gpr[3]; + struct rtas_error_log *errhdr = NULL; + unsigned long *savep; + + if ((errdata >= 0x7000 && errdata < 0x7fff0) || + (errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) { + savep = __va(errdata); + regs->gpr[3] = savep[0]; /* restore original r3 */ + memset(mce_data_buf, 0, RTAS_ERROR_LOG_MAX); + memcpy(mce_data_buf, (char *)(savep + 1), RTAS_ERROR_LOG_MAX); + errhdr = (struct rtas_error_log *)mce_data_buf; + } else { + printk("FWNMI: corrupt r3\n"); + } + return errhdr; +} + +/* Call this when done with the data returned by FWNMI_get_errinfo. + * It will release the saved data area for other CPUs in the + * partition to receive FWNMI errors. + */ +static void fwnmi_release_errinfo(void) +{ + int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); + if (ret != 0) + printk("FWNMI: nmi-interlock failed: %d\n", ret); +} + +void pSeries_system_reset_exception(struct pt_regs *regs) +{ + if (fwnmi_active) { + struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); + if (errhdr) { + /* XXX Should look at FWNMI information */ + } + fwnmi_release_errinfo(); + } +} + +/* + * See if we can recover from a machine check exception. + * This is only called on power4 (or above) and only via + * the Firmware Non-Maskable Interrupts (fwnmi) handler + * which provides the error analysis for us. + * + * Return 1 if corrected (or delivered a signal). + * Return 0 if there is nothing we can do. + */ +static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err) +{ + int nonfatal = 0; + + if (err->disposition == RTAS_DISP_FULLY_RECOVERED) { + /* Platform corrected itself */ + nonfatal = 1; + } else if ((regs->msr & MSR_RI) && + user_mode(regs) && + err->severity == RTAS_SEVERITY_ERROR_SYNC && + err->disposition == RTAS_DISP_NOT_RECOVERED && + err->target == RTAS_TARGET_MEMORY && + err->type == RTAS_TYPE_ECC_UNCORR && + !(current->pid == 0 || current->pid == 1)) { + /* Kill off a user process with an ECC error */ + printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n", + current->pid); + /* XXX something better for ECC error? */ + _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); + nonfatal = 1; + } + + log_error((char *)err, ERR_TYPE_RTAS_LOG, !nonfatal); + + return nonfatal; +} + +/* + * Handle a machine check. + * + * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) + * should be present. If so the handler which called us tells us if the + * error was recovered (never true if RI=0). + * + * On hardware prior to Power 4 these exceptions were asynchronous which + * means we can't tell exactly where it occurred and so we can't recover. + */ +int pSeries_machine_check_exception(struct pt_regs *regs) +{ + struct rtas_error_log *errp; + + if (fwnmi_active) { + errp = fwnmi_get_errinfo(regs); + fwnmi_release_errinfo(); + if (errp && recover_mce(regs, errp)) + return 1; + } + + return 0; +} Index: kexec/arch/ppc64/kernel/Makefile =================================================================== --- kexec.orig/arch/ppc64/kernel/Makefile +++ kexec/arch/ppc64/kernel/Makefile @@ -29,7 +29,7 @@ ifneq ($(CONFIG_PPC_MERGE),y) obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o endif -obj-$(CONFIG_PPC_PSERIES) += rtasd.o ras.o udbg_16550.o +obj-$(CONFIG_PPC_PSERIES) += rtasd.o udbg_16550.o obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \ bpa_iic.o spider-pic.o Index: kexec/arch/ppc64/kernel/ras.c =================================================================== --- kexec.orig/arch/ppc64/kernel/ras.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - * ras.c - * Copyright (C) 2001 Dave Engebretsen IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* Change Activity: - * 2001/09/21 : engebret : Created with minimal EPOW and HW exception support. - * End Change Activity - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; -static DEFINE_SPINLOCK(ras_log_buf_lock); - -char mce_data_buf[RTAS_ERROR_LOG_MAX] -; -/* This is true if we are using the firmware NMI handler (typically LPAR) */ -extern int fwnmi_active; - -static int ras_get_sensor_state_token; -static int ras_check_exception_token; - -#define EPOW_SENSOR_TOKEN 9 -#define EPOW_SENSOR_INDEX 0 -#define RAS_VECTOR_OFFSET 0x500 - -static irqreturn_t ras_epow_interrupt(int irq, void *dev_id, - struct pt_regs * regs); -static irqreturn_t ras_error_interrupt(int irq, void *dev_id, - struct pt_regs * regs); - -/* #define DEBUG */ - -static void request_ras_irqs(struct device_node *np, char *propname, - irqreturn_t (*handler)(int, void *, struct pt_regs *), - const char *name) -{ - unsigned int *ireg, len, i; - int virq, n_intr; - - ireg = (unsigned int *)get_property(np, propname, &len); - if (ireg == NULL) - return; - n_intr = prom_n_intr_cells(np); - len /= n_intr * sizeof(*ireg); - - for (i = 0; i < len; i++) { - virq = virt_irq_create_mapping(*ireg); - if (virq == NO_IRQ) { - printk(KERN_ERR "Unable to allocate interrupt " - "number for %s\n", np->full_name); - return; - } - if (request_irq(irq_offset_up(virq), handler, 0, name, NULL)) { - printk(KERN_ERR "Unable to request interrupt %d for " - "%s\n", irq_offset_up(virq), np->full_name); - return; - } - ireg += n_intr; - } -} - -/* - * Initialize handlers for the set of interrupts caused by hardware errors - * and power system events. - */ -static int __init init_ras_IRQ(void) -{ - struct device_node *np; - - ras_get_sensor_state_token = rtas_token("get-sensor-state"); - ras_check_exception_token = rtas_token("check-exception"); - - /* Internal Errors */ - np = of_find_node_by_path("/event-sources/internal-errors"); - if (np != NULL) { - request_ras_irqs(np, "open-pic-interrupt", ras_error_interrupt, - "RAS_ERROR"); - request_ras_irqs(np, "interrupts", ras_error_interrupt, - "RAS_ERROR"); - of_node_put(np); - } - - /* EPOW Events */ - np = of_find_node_by_path("/event-sources/epow-events"); - if (np != NULL) { - request_ras_irqs(np, "open-pic-interrupt", ras_epow_interrupt, - "RAS_EPOW"); - request_ras_irqs(np, "interrupts", ras_epow_interrupt, - "RAS_EPOW"); - of_node_put(np); - } - - return 1; -} -__initcall(init_ras_IRQ); - -/* - * Handle power subsystem events (EPOW). - * - * Presently we just log the event has occurred. This should be fixed - * to examine the type of power failure and take appropriate action where - * the time horizon permits something useful to be done. - */ -static irqreturn_t -ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs) -{ - int status = 0xdeadbeef; - int state = 0; - int critical; - - status = rtas_call(ras_get_sensor_state_token, 2, 2, &state, - EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX); - - if (state > 3) - critical = 1; /* Time Critical */ - else - critical = 0; - - spin_lock(&ras_log_buf_lock); - - status = rtas_call(ras_check_exception_token, 6, 1, NULL, - RAS_VECTOR_OFFSET, - virt_irq_to_real(irq_offset_down(irq)), - RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, - critical, __pa(&ras_log_buf), - rtas_get_error_log_max()); - - udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n", - *((unsigned long *)&ras_log_buf), status, state); - printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n", - *((unsigned long *)&ras_log_buf), status, state); - - /* format and print the extended information */ - log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); - - spin_unlock(&ras_log_buf_lock); - return IRQ_HANDLED; -} - -/* - * Handle hardware error interrupts. - * - * RTAS check-exception is called to collect data on the exception. If - * the error is deemed recoverable, we log a warning and return. - * For nonrecoverable errors, an error is logged and we stop all processing - * as quickly as possible in order to prevent propagation of the failure. - */ -static irqreturn_t -ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs) -{ - struct rtas_error_log *rtas_elog; - int status = 0xdeadbeef; - int fatal; - - spin_lock(&ras_log_buf_lock); - - status = rtas_call(ras_check_exception_token, 6, 1, NULL, - RAS_VECTOR_OFFSET, - virt_irq_to_real(irq_offset_down(irq)), - RTAS_INTERNAL_ERROR, 1 /*Time Critical */, - __pa(&ras_log_buf), - rtas_get_error_log_max()); - - rtas_elog = (struct rtas_error_log *)ras_log_buf; - - if ((status == 0) && (rtas_elog->severity >= RTAS_SEVERITY_ERROR_SYNC)) - fatal = 1; - else - fatal = 0; - - /* format and print the extended information */ - log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); - - if (fatal) { - udbg_printf("Fatal HW Error <0x%lx 0x%x>\n", - *((unsigned long *)&ras_log_buf), status); - printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n", - *((unsigned long *)&ras_log_buf), status); - -#ifndef DEBUG - /* Don't actually power off when debugging so we can test - * without actually failing while injecting errors. - * Error data will not be logged to syslog. - */ - ppc_md.power_off(); -#endif - } else { - udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n", - *((unsigned long *)&ras_log_buf), status); - printk(KERN_WARNING - "Warning: Recoverable hardware error <0x%lx 0x%x>\n", - *((unsigned long *)&ras_log_buf), status); - } - - spin_unlock(&ras_log_buf_lock); - return IRQ_HANDLED; -} - -/* Get the error information for errors coming through the - * FWNMI vectors. The pt_regs' r3 will be updated to reflect - * the actual r3 if possible, and a ptr to the error log entry - * will be returned if found. - * - * The mce_data_buf does not have any locks or protection around it, - * if a second machine check comes in, or a system reset is done - * before we have logged the error, then we will get corruption in the - * error log. This is preferable over holding off on calling - * ibm,nmi-interlock which would result in us checkstopping if a - * second machine check did come in. - */ -static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) -{ - unsigned long errdata = regs->gpr[3]; - struct rtas_error_log *errhdr = NULL; - unsigned long *savep; - - if ((errdata >= 0x7000 && errdata < 0x7fff0) || - (errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) { - savep = __va(errdata); - regs->gpr[3] = savep[0]; /* restore original r3 */ - memset(mce_data_buf, 0, RTAS_ERROR_LOG_MAX); - memcpy(mce_data_buf, (char *)(savep + 1), RTAS_ERROR_LOG_MAX); - errhdr = (struct rtas_error_log *)mce_data_buf; - } else { - printk("FWNMI: corrupt r3\n"); - } - return errhdr; -} - -/* Call this when done with the data returned by FWNMI_get_errinfo. - * It will release the saved data area for other CPUs in the - * partition to receive FWNMI errors. - */ -static void fwnmi_release_errinfo(void) -{ - int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); - if (ret != 0) - printk("FWNMI: nmi-interlock failed: %d\n", ret); -} - -void pSeries_system_reset_exception(struct pt_regs *regs) -{ - if (fwnmi_active) { - struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); - if (errhdr) { - /* XXX Should look at FWNMI information */ - } - fwnmi_release_errinfo(); - } -} - -/* - * See if we can recover from a machine check exception. - * This is only called on power4 (or above) and only via - * the Firmware Non-Maskable Interrupts (fwnmi) handler - * which provides the error analysis for us. - * - * Return 1 if corrected (or delivered a signal). - * Return 0 if there is nothing we can do. - */ -static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err) -{ - int nonfatal = 0; - - if (err->disposition == RTAS_DISP_FULLY_RECOVERED) { - /* Platform corrected itself */ - nonfatal = 1; - } else if ((regs->msr & MSR_RI) && - user_mode(regs) && - err->severity == RTAS_SEVERITY_ERROR_SYNC && - err->disposition == RTAS_DISP_NOT_RECOVERED && - err->target == RTAS_TARGET_MEMORY && - err->type == RTAS_TYPE_ECC_UNCORR && - !(current->pid == 0 || current->pid == 1)) { - /* Kill off a user process with an ECC error */ - printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n", - current->pid); - /* XXX something better for ECC error? */ - _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); - nonfatal = 1; - } - - log_error((char *)err, ERR_TYPE_RTAS_LOG, !nonfatal); - - return nonfatal; -} - -/* - * Handle a machine check. - * - * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) - * should be present. If so the handler which called us tells us if the - * error was recovered (never true if RI=0). - * - * On hardware prior to Power 4 these exceptions were asynchronous which - * means we can't tell exactly where it occurred and so we can't recover. - */ -int pSeries_machine_check_exception(struct pt_regs *regs) -{ - struct rtas_error_log *errp; - - if (fwnmi_active) { - errp = fwnmi_get_errinfo(regs); - fwnmi_release_errinfo(); - if (errp && recover_mce(regs, errp)) - return 1; - } - - return 0; -} From michael at ellerman.id.au Mon Oct 24 15:13:18 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 24 Oct 2005 15:13:18 +1000 Subject: [PATCH] Merge configs Message-ID: <200510241513.21783.michael@ellerman.id.au> We currently don't have any configs for ARCH=powerpc, which means make defconfig doesn't work. This is too big for the list (I think), so it's here: http://michael.ellerman.id.au/files/merge_configs.patch I've turned of XMON because currently it doesn't build, and it strikes me as nasty to have a defconfig that doesn't build. cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051024/eda7c063/attachment.pgp From david at gibson.dropbear.id.au Mon Oct 24 15:16:38 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Mon, 24 Oct 2005 15:16:38 +1000 Subject: [PATCH 3/5] powerpc: Move plpar_wrappers.h into include/asm-powerpc In-Reply-To: <20051024050728.3C554685C8@ozlabs.org> References: <1130130446.113659.789254640331.qpush@concordia> <20051024050728.3C554685C8@ozlabs.org> Message-ID: <20051024051638.GG30183@localhost.localdomain> On Mon, Oct 24, 2005 at 03:07:28PM +1000, Michael Ellerman wrote: > Move plpar_wrappers.h into include/asm-powerpc, fix up a bit of whitespace > while we're there. Doesn't this belong in arch/powerpc/platforms/pseries instead? -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From michael at ellerman.id.au Mon Oct 24 15:40:34 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 24 Oct 2005 15:40:34 +1000 Subject: [PATCH 3/5] powerpc: Move plpar_wrappers.h into include/asm-powerpc In-Reply-To: <20051024051638.GG30183@localhost.localdomain> References: <1130130446.113659.789254640331.qpush@concordia> <20051024050728.3C554685C8@ozlabs.org> <20051024051638.GG30183@localhost.localdomain> Message-ID: <200510241540.48460.michael@ellerman.id.au> On Mon, 24 Oct 2005 15:16, David Gibson wrote: > On Mon, Oct 24, 2005 at 03:07:28PM +1000, Michael Ellerman wrote: > > Move plpar_wrappers.h into include/asm-powerpc, fix up a bit of > > whitespace while we're there. > > Doesn't this belong in arch/powerpc/platforms/pseries instead? Not quite, arch/powerpc/kernel/process.c needs plpar_set_xdabr() and plpar_set_dabr(). I also have a patch for machine_kexec.c that will need something from here. cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051024/eb04ba33/attachment.pgp From sfr at canb.auug.org.au Mon Oct 24 16:05:03 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Mon, 24 Oct 2005 16:05:03 +1000 Subject: [PATCH] powerpc: don't duplicate name between vio_driver and device_driver Message-ID: <20051024160503.6851dc80.sfr@canb.auug.org.au> There has been a discussion on LKML about the duplication of the name and owner fields in struct pci_driver which concluded that we should just set the embedded device_driver fields instead. This does the same for the name field of struct vio_driver. Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/vio.c | 3 +-- drivers/block/viodasd.c | 6 ++++-- drivers/cdrom/viocd.c | 6 ++++-- drivers/char/hvc_vio.c | 2 +- drivers/char/hvcs.c | 4 +++- drivers/char/viotape.c | 6 ++++-- drivers/net/ibmveth.c | 10 ++++++---- drivers/net/iseries_veth.c | 6 ++++-- drivers/scsi/ibmvscsi/ibmvscsi.c | 6 ++++-- include/asm-powerpc/vio.h | 1 - 10 files changed, 31 insertions(+), 19 deletions(-) This patch depends on Michael Ellerman's patches merging vio.[ch] int the merge tree. -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ e787d14a6d87d4963c91a176139d1e83d0c845fa diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -76,10 +76,9 @@ static int vio_bus_remove(struct device int vio_register_driver(struct vio_driver *viodrv) { printk(KERN_DEBUG "%s: driver %s registering\n", __FUNCTION__, - viodrv->name); + viodrv->driver.name); /* fill in 'struct driver' fields */ - viodrv->driver.name = viodrv->name; viodrv->driver.bus = &vio_bus_type; viodrv->driver.probe = vio_bus_probe; viodrv->driver.remove = vio_bus_remove; diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -781,10 +781,12 @@ static struct vio_device_id viodasd_devi MODULE_DEVICE_TABLE(vio, viodasd_device_table); static struct vio_driver viodasd_driver = { - .name = "viodasd", .id_table = viodasd_device_table, .probe = viodasd_probe, - .remove = viodasd_remove + .remove = viodasd_remove, + .driver = { + .name = "viodasd", + } }; /* diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -739,10 +739,12 @@ static struct vio_device_id viocd_device MODULE_DEVICE_TABLE(vio, viocd_device_table); static struct vio_driver viocd_driver = { - .name = "viocd", .id_table = viocd_device_table, .probe = viocd_probe, - .remove = viocd_remove + .remove = viocd_remove, + .driver = { + .name = "viocd", + } }; static int __init viocd_init(void) diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c @@ -95,11 +95,11 @@ static int __devexit hvc_vio_remove(stru } static struct vio_driver hvc_vio_driver = { - .name = hvc_driver_name, .id_table = hvc_driver_table, .probe = hvc_vio_probe, .remove = hvc_vio_remove, .driver = { + .name = hvc_driver_name, .owner = THIS_MODULE, } }; diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c --- a/drivers/char/hvcs.c +++ b/drivers/char/hvcs.c @@ -720,10 +720,12 @@ static int __devexit hvcs_remove(struct }; static struct vio_driver hvcs_vio_driver = { - .name = hvcs_driver_name, .id_table = hvcs_driver_table, .probe = hvcs_probe, .remove = hvcs_remove, + .driver = { + .name = hvcs_driver_name, + } }; /* Only called from hvcs_get_pi please */ diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c --- a/drivers/char/viotape.c +++ b/drivers/char/viotape.c @@ -996,10 +996,12 @@ static struct vio_device_id viotape_devi MODULE_DEVICE_TABLE(vio, viotape_device_table); static struct vio_driver viotape_driver = { - .name = "viotape", .id_table = viotape_device_table, .probe = viotape_probe, - .remove = viotape_remove + .remove = viotape_remove, + .driver = { + .name = "viotape", + } }; diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -1150,10 +1150,12 @@ static struct vio_device_id ibmveth_devi MODULE_DEVICE_TABLE(vio, ibmveth_device_table); static struct vio_driver ibmveth_driver = { - .name = (char *)ibmveth_driver_name, - .id_table = ibmveth_device_table, - .probe = ibmveth_probe, - .remove = ibmveth_remove + .id_table = ibmveth_device_table, + .probe = ibmveth_probe, + .remove = ibmveth_remove, + .driver = { + .name = ibmveth_driver_name, + } }; static int __init ibmveth_module_init(void) diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -1648,10 +1648,12 @@ static struct vio_device_id veth_device_ MODULE_DEVICE_TABLE(vio, veth_device_table); static struct vio_driver veth_driver = { - .name = DRV_NAME, .id_table = veth_device_table, .probe = veth_probe, - .remove = veth_remove + .remove = veth_remove, + .driver = { + .name = DRV_NAME, + } }; /* diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1546,10 +1546,12 @@ static struct vio_device_id ibmvscsi_dev MODULE_DEVICE_TABLE(vio, ibmvscsi_device_table); static struct vio_driver ibmvscsi_driver = { - .name = "ibmvscsi", .id_table = ibmvscsi_device_table, .probe = ibmvscsi_probe, - .remove = ibmvscsi_remove + .remove = ibmvscsi_remove, + .driver = { + .name = "ibmvscsi", + } }; int __init ibmvscsi_module_init(void) diff --git a/include/asm-powerpc/vio.h b/include/asm-powerpc/vio.h --- a/include/asm-powerpc/vio.h +++ b/include/asm-powerpc/vio.h @@ -55,7 +55,6 @@ struct vio_dev { struct vio_driver { struct list_head node; - char *name; const struct vio_device_id *id_table; int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); int (*remove)(struct vio_dev *dev); From sfr at canb.auug.org.au Mon Oct 24 17:01:00 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Mon, 24 Oct 2005 17:01:00 +1000 Subject: [PATCH] powerpc: set the driver.owner field for all vio drivers Message-ID: <20051024170100.36607dbb.sfr@canb.auug.org.au> This will create a link from the driver to the module in sysfs. Signed-off-by: Stephen Rothwell --- drivers/block/viodasd.c | 3 ++- drivers/cdrom/viocd.c | 3 ++- drivers/char/hvcs.c | 1 + drivers/char/viotape.c | 3 ++- drivers/net/ibmveth.c | 2 +- drivers/net/iseries_veth.c | 1 + drivers/scsi/ibmvscsi/ibmvscsi.c | 3 ++- 7 files changed, 11 insertions(+), 5 deletions(-) -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ a6cfb870abff38ce9aa574d56cb71524fc4660e1 diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -778,14 +778,15 @@ static struct vio_device_id viodasd_devi { "viodasd", "" }, { "", "" } }; - MODULE_DEVICE_TABLE(vio, viodasd_device_table); + static struct vio_driver viodasd_driver = { .id_table = viodasd_device_table, .probe = viodasd_probe, .remove = viodasd_remove, .driver = { .name = "viodasd", + .owner = THIS_MODULE, } }; diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -736,14 +736,15 @@ static struct vio_device_id viocd_device { "viocd", "" }, { "", "" } }; - MODULE_DEVICE_TABLE(vio, viocd_device_table); + static struct vio_driver viocd_driver = { .id_table = viocd_device_table, .probe = viocd_probe, .remove = viocd_remove, .driver = { .name = "viocd", + .owner = THIS_MODULE, } }; diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c --- a/drivers/char/hvcs.c +++ b/drivers/char/hvcs.c @@ -725,6 +725,7 @@ static struct vio_driver hvcs_vio_driver .remove = hvcs_remove, .driver = { .name = hvcs_driver_name, + .owner = THIS_MODULE, } }; diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c --- a/drivers/char/viotape.c +++ b/drivers/char/viotape.c @@ -993,14 +993,15 @@ static struct vio_device_id viotape_devi { "viotape", "" }, { "", "" } }; - MODULE_DEVICE_TABLE(vio, viotape_device_table); + static struct vio_driver viotape_driver = { .id_table = viotape_device_table, .probe = viotape_probe, .remove = viotape_remove, .driver = { .name = "viotape", + .owner = THIS_MODULE, } }; diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -1146,7 +1146,6 @@ static struct vio_device_id ibmveth_devi { "network", "IBM,l-lan"}, { "", "" } }; - MODULE_DEVICE_TABLE(vio, ibmveth_device_table); static struct vio_driver ibmveth_driver = { @@ -1155,6 +1154,7 @@ static struct vio_driver ibmveth_driver .remove = ibmveth_remove, .driver = { .name = ibmveth_driver_name, + .owner = THIS_MODULE, } }; diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -1653,6 +1653,7 @@ static struct vio_driver veth_driver = { .remove = veth_remove, .driver = { .name = DRV_NAME, + .owner = THIS_MODULE, } }; diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1543,14 +1543,15 @@ static struct vio_device_id ibmvscsi_dev {"vscsi", "IBM,v-scsi"}, { "", "" } }; - MODULE_DEVICE_TABLE(vio, ibmvscsi_device_table); + static struct vio_driver ibmvscsi_driver = { .id_table = ibmvscsi_device_table, .probe = ibmvscsi_probe, .remove = ibmvscsi_remove, .driver = { .name = "ibmvscsi", + .owner = THIS_MODULE, } }; From arnd at arndb.de Mon Oct 24 20:38:53 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Mon, 24 Oct 2005 12:38:53 +0200 Subject: [PATCH 3/5] powerpc: Move plpar_wrappers.h into include/asm-powerpc In-Reply-To: <200510241540.48460.michael@ellerman.id.au> References: <1130130446.113659.789254640331.qpush@concordia> <20051024051638.GG30183@localhost.localdomain> <200510241540.48460.michael@ellerman.id.au> Message-ID: <200510241238.53655.arnd@arndb.de> On Maandag 24 Oktober 2005 07:40, Michael Ellerman wrote: > > Doesn't this belong in arch/powerpc/platforms/pseries instead? > > Not quite, arch/powerpc/kernel/process.c needs plpar_set_xdabr() and > plpar_set_dabr(). I also have a patch for machine_kexec.c that will need > something from here. For process.c, might it be better to have a ppc_md->set_dabr() function call instead? Maybe there is also a similar solution for the kexec problem. Arnd <>< From arnd at arndb.de Mon Oct 24 20:42:12 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Mon, 24 Oct 2005 12:42:12 +0200 Subject: [PATCH] Merge configs In-Reply-To: <200510241513.21783.michael@ellerman.id.au> References: <200510241513.21783.michael@ellerman.id.au> Message-ID: <200510241242.12913.arnd@arndb.de> On Maandag 24 Oktober 2005 07:13, Michael Ellerman wrote: > We currently don't have any configs for ARCH=powerpc, which means make > defconfig doesn't work. > > This is too big for the list (I think), so it's here: > http://michael.ellerman.id.au/files/merge_configs.patch Please rename bpa_defconfig to cell_defconfig while you are moving it. I want to change the naming as I move the cell code over to arch/powerpc. Arnd <>< From michael at ellerman.id.au Mon Oct 24 20:58:05 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 24 Oct 2005 20:58:05 +1000 Subject: [PATCH] Merge configs In-Reply-To: <200510241242.12913.arnd@arndb.de> References: <200510241513.21783.michael@ellerman.id.au> <200510241242.12913.arnd@arndb.de> Message-ID: <200510242058.09773.michael@ellerman.id.au> On Mon, 24 Oct 2005 20:42, Arnd Bergmann wrote: > On Maandag 24 Oktober 2005 07:13, Michael Ellerman wrote: > > We currently don't have any configs for ARCH=powerpc, which means make > > defconfig doesn't work. > > > > This is too big for the list (I think), so it's here: > > http://michael.ellerman.id.au/files/merge_configs.patch > > Please rename bpa_defconfig to cell_defconfig while you are moving it. > I want to change the naming as I move the cell code over to arch/powerpc. Done. I haven't tested it though, I assume there's no fancy logic that guesses bpa_defconfig based on CONFIG_BPA or anything? cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051024/dff6d91a/attachment.pgp From arnd at arndb.de Mon Oct 24 21:07:45 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Mon, 24 Oct 2005 13:07:45 +0200 Subject: [PATCH] Merge configs In-Reply-To: <200510242058.09773.michael@ellerman.id.au> References: <200510241513.21783.michael@ellerman.id.au> <200510241242.12913.arnd@arndb.de> <200510242058.09773.michael@ellerman.id.au> Message-ID: <200510241307.46471.arnd@arndb.de> On Maandag 24 Oktober 2005 12:58, Michael Ellerman wrote: > Done. I haven't tested it though, I assume there's no fancy logic that guesses > bpa_defconfig based on CONFIG_BPA or anything? No, there isn't. I'll rename the CONFIG_* options when I move the other files. Thanks, Arnd <>< From michael at ellerman.id.au Mon Oct 24 21:09:00 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 24 Oct 2005 21:09:00 +1000 Subject: [PATCH 3/5] powerpc: Move plpar_wrappers.h into include/asm-powerpc In-Reply-To: <200510241238.53655.arnd@arndb.de> References: <1130130446.113659.789254640331.qpush@concordia> <200510241540.48460.michael@ellerman.id.au> <200510241238.53655.arnd@arndb.de> Message-ID: <200510242109.07750.michael@ellerman.id.au> On Mon, 24 Oct 2005 20:38, Arnd Bergmann wrote: > On Maandag 24 Oktober 2005 07:40, Michael Ellerman wrote: > > > Doesn't this belong in arch/powerpc/platforms/pseries instead? > > > > Not quite, arch/powerpc/kernel/process.c needs plpar_set_xdabr() and > > plpar_set_dabr(). I also have a patch for machine_kexec.c that will need > > something from here. > > For process.c, might it be better to have a ppc_md->set_dabr() function > call instead? > Maybe there is also a similar solution for the kexec problem. I new some clever person would say that :D We have: int set_dabr(unsigned long dabr) { int ret = 0; #ifdef CONFIG_PPC64 if (firmware_has_feature(FW_FEATURE_XDABR)) { /* We want to catch accesses from kernel and userspace */ unsigned long flags = H_DABRX_KERNEL|H_DABRX_USER; ret = plpar_set_xdabr(dabr, flags); } else if (firmware_has_feature(FW_FEATURE_DABR)) { ret = plpar_set_dabr(dabr); } else #endif set_dabr_spr(dabr); return ret; } It looks like those firmware features will only ever be true on pSeries, so we could make it a ppc_md, with everyone except pSeries having a null callback. I'm not sure how crazy we want to get with ppc_md though? It already has 52 members for PPC64, 74 all up. cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051024/9a743a70/attachment.pgp From arnd at arndb.de Mon Oct 24 21:42:20 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Mon, 24 Oct 2005 13:42:20 +0200 Subject: [PATCH 3/5] powerpc: Move plpar_wrappers.h into include/asm-powerpc In-Reply-To: <200510242109.07750.michael@ellerman.id.au> References: <1130130446.113659.789254640331.qpush@concordia> <200510241238.53655.arnd@arndb.de> <200510242109.07750.michael@ellerman.id.au> Message-ID: <200510241342.21179.arnd@arndb.de> On Maandag 24 Oktober 2005 13:09, Michael Ellerman wrote: > It looks like those firmware features will only ever be true on pSeries, so we > could make it a ppc_md, with everyone except pSeries having a null callback. I rather thought of making the callback point to set_dabr_spr for everything except pSeries and get rid of set_dabr(), but that's just a detail. > I'm not sure how crazy we want to get with ppc_md though? It already has 52 > members for PPC64, 74 all up. Yes, that certainly is a problem. It has the potential to become a horrible mess if it isn't already. Maybe that should be revisited later, but IMHO it seems to be the right abstraction here because it helps get the code layering clearer. There is always a tradeoff between using ppc_md, smp_ops, systemcfg->platform, ppc64_firmware_features, cur_cpu_spec->cpu_features and compile-time decisions. Maybe we already have too many mechanisms to do the same thing (or not enough), and probably we're using the wrong abstraction in some places, but figuring that out can become a very long discussion... Arnd <>< From michael at ellerman.id.au Mon Oct 24 22:28:53 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Mon, 24 Oct 2005 22:28:53 +1000 Subject: [PATCH 3/5] powerpc: Move plpar_wrappers.h into include/asm-powerpc In-Reply-To: <200510241342.21179.arnd@arndb.de> References: <1130130446.113659.789254640331.qpush@concordia> <200510242109.07750.michael@ellerman.id.au> <200510241342.21179.arnd@arndb.de> Message-ID: <200510242228.57888.michael@ellerman.id.au> On Mon, 24 Oct 2005 21:42, Arnd Bergmann wrote: > On Maandag 24 Oktober 2005 13:09, Michael Ellerman wrote: > > It looks like those firmware features will only ever be true on pSeries, > > so we could make it a ppc_md, with everyone except pSeries having a null > > callback. > > I rather thought of making the callback point to set_dabr_spr for > everything except pSeries and get rid of set_dabr(), but that's just a > detail. Mmmm, except then you lose (or obscure) the fact that most platforms just want set_dabr_spr(), and that pSeries is the exception. Plus it means you have to patch every platform, but you do lose set_dabr() so it's a tradeoff. > > I'm not sure how crazy we want to get with ppc_md though? It already has > > 52 members for PPC64, 74 all up. > > Yes, that certainly is a problem. It has the potential to become a horrible > mess if it isn't already. Maybe that should be revisited later, but IMHO > it seems to be the right abstraction here because it helps get the code > layering clearer. > > There is always a tradeoff between using ppc_md, smp_ops, > systemcfg->platform, ppc64_firmware_features, cur_cpu_spec->cpu_features > and compile-time decisions. Maybe we already have too many mechanisms to > do the same thing (or not enough), and probably we're using the wrong > abstraction in some places, but figuring that out can become a very long > discussion... I agree, we could spend a lot of time talking about it for not much gain. Remember Mozilla! I think we especially don't want to get stuck for too long having these sort of discussions while we're trying to do the merge. In this case I think I will add a ppc_md.set_dabr(), as it means we can make an entire header (plpar_wrappers) private to a platform, and that's a Good Thing. But at some point we might need to sit down and try and sort it all out a bit more thoroughly. cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051024/ec36d2fd/attachment.pgp From jschopp at austin.ibm.com Tue Oct 25 01:21:26 2005 From: jschopp at austin.ibm.com (Joel Schopp) Date: Mon, 24 Oct 2005 10:21:26 -0500 Subject: [Fwd: [PATCH *-mm] Serial disable jsm in ppc64 defconfig] Message-ID: <435CFBF6.4000907@austin.ibm.com> I had run across this build problem in the past on some -mm trees with serial on ppc64. Does anybody 1. know what machines use this serial and 2. who knows enough about this code to fix it instead of turning it off? -Joel -------- Original Message -------- Subject: [PATCH *-mm] Serial disable jsm in ppc64 defconfig Date: Sat, 22 Oct 2005 17:25:28 -0700 (PDT) From: Paul Jackson To: Andrew Morton CC: linux-kernel at vger.kernel.org, Joel Schopp , Alan Cox , serue at us.ibm.com, Paul Jackson , Damir Perisa Changes to the serial driver to remove flip buffers have broken the serial jsm driver. It doesn't even compile anymore. The jsm driver was enabled in only one defconfig - ppc64. In order to keep defconfigs building, disable CONFIG_SERIAL_JSM for the time being. Signed-off-by: Paul Jackson --- arch/ppc64/defconfig | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) --- 2.6.14-rc4-mm1.orig/arch/ppc64/defconfig 2005-10-22 16:52:34.000000000 -0700 +++ 2.6.14-rc4-mm1/arch/ppc64/defconfig 2005-10-22 16:53:46.000000000 -0700 @@ -824,7 +824,7 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_PMACZILOG is not set CONFIG_SERIAL_ICOM=m -CONFIG_SERIAL_JSM=m +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 -- I won't rest till it's the best ... Programmer, Linux Scalability Paul Jackson 1.650.933.1373, 1.925.600.0401 From dwm at maxeymade.com Tue Oct 25 04:38:53 2005 From: dwm at maxeymade.com (Doug Maxey) Date: Mon, 24 Oct 2005 13:38:53 -0500 Subject: [Fwd: [PATCH *-mm] Serial disable jsm in ppc64 defconfig] In-Reply-To: <435CFBF6.4000907@austin.ibm.com> Message-ID: <200510241838.j9OIcrTt000797@falcon30.maxeymade.com> On Mon, 24 Oct 2005 10:21:26 CDT, Joel Schopp wrote: >I had run across this build problem in the past on some -mm trees with serial on >ppc64. Does anybody 1. know what machines use this serial and 2. who knows >enough about this code to fix it instead of turning it off? > It will be in a new, as yet un-announced system. ++doug From sleddog at us.ibm.com Tue Oct 25 07:37:27 2005 From: sleddog at us.ibm.com (Dave C Boutcher) Date: Mon, 24 Oct 2005 16:37:27 -0500 Subject: [Fwd: [PATCH *-mm] Serial disable jsm in ppc64 defconfig] In-Reply-To: <435CFBF6.4000907@austin.ibm.com> References: <435CFBF6.4000907@austin.ibm.com> Message-ID: <20051024213727.GA28073@cs.umn.edu> On Mon, Oct 24, 2005 at 10:21:26AM -0500, Joel Schopp wrote: > I had run across this build problem in the past on some -mm trees with > serial on ppc64. Does anybody 1. know what machines use this serial and 2. > who knows enough about this code to fix it instead of turning it off? Alan Cox was wandering around making fixes to various serial drivers (he did the flip changes.) You might consider pinging him. -- Dave Boutcher From michael at ellerman.id.au Tue Oct 25 13:33:04 2005 From: michael at ellerman.id.au (Michael Ellerman) Date: Tue, 25 Oct 2005 13:33:04 +1000 Subject: [PATCH 1/5] powerpc: Remove duplicate definition of set_tb() In-Reply-To: <1130130446.113659.789254640331.qpush@concordia> References: <1130130446.113659.789254640331.qpush@concordia> Message-ID: <200510251333.14822.michael@ellerman.id.au> On Mon, 24 Oct 2005 15:07, Michael Ellerman wrote: > Somewhere along the line we got two definitions of set_tb(). They look to > be identical although they're not textually identical. So remove the #ifdef > CONFIG_PPC64 version, leaving the common version in time.h. GCC 4.0 doesn't seem to mind that there's two definitions, but GCC 3.4.2 does. cheers -- Michael Ellerman IBM OzLabs email: michael:ellerman.id.au inmsg: mpe:jabber.org wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051025/62676a3f/attachment.pgp From sfr at canb.auug.org.au Tue Oct 25 16:56:43 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Tue, 25 Oct 2005 16:56:43 +1000 Subject: [PATCH 1/2] ppc64: make dma_addr_t 64 bits Message-ID: <20051025165643.39cac31d.sfr@canb.auug.org.au> There has been a need expressed for dma_addr_t to be 64 bits on PPC64. This patch does that. Signed-off-by: Stephen Rothwell --- drivers/net/ibmveth.c | 2 +- include/asm-powerpc/types.h | 4 ++++ include/asm-ppc64/scatterlist.h | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ 3be60638aece21cb4ca9b98ffb0ed349ec3e192a diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -532,7 +532,7 @@ static int ibmveth_open(struct net_devic if(lpar_rc != H_Success) { ibmveth_error_printk("h_register_logical_lan failed with %ld\n", lpar_rc); - ibmveth_error_printk("buffer TCE:0x%x filter TCE:0x%x rxq desc:0x%lx MAC:0x%lx\n", + ibmveth_error_printk("buffer TCE:0x%lx filter TCE:0x%lx rxq desc:0x%lx MAC:0x%lx\n", adapter->buffer_list_dma, adapter->filter_list_dma, rxq_desc.desc, diff --git a/include/asm-powerpc/types.h b/include/asm-powerpc/types.h --- a/include/asm-powerpc/types.h +++ b/include/asm-powerpc/types.h @@ -83,7 +83,11 @@ typedef unsigned long long u64; typedef __vector128 vector128; +#ifdef __powerpc64__ +typedef u64 dma_addr_t; +#else typedef u32 dma_addr_t; +#endif typedef u64 dma64_addr_t; typedef struct { diff --git a/include/asm-ppc64/scatterlist.h b/include/asm-ppc64/scatterlist.h --- a/include/asm-ppc64/scatterlist.h +++ b/include/asm-ppc64/scatterlist.h @@ -19,7 +19,7 @@ struct scatterlist { unsigned int length; /* For TCE support */ - u32 dma_address; + dma_addr_t dma_address; u32 dma_length; }; From sfr at canb.auug.org.au Tue Oct 25 16:59:03 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Tue, 25 Oct 2005 16:59:03 +1000 Subject: [PATCH 2/2] powerpc: merge scatterlist.h In-Reply-To: <20051025165643.39cac31d.sfr@canb.auug.org.au> References: <20051025165643.39cac31d.sfr@canb.auug.org.au> Message-ID: <20051025165903.3d4cf14c.sfr@canb.auug.org.au> This depends on the 64bit dma_addr_t patch. Signed-off-by: Stephen Rothwell --- include/asm-powerpc/scatterlist.h | 45 +++++++++++++++++++++++++++++++++++++ include/asm-ppc/scatterlist.h | 25 --------------------- include/asm-ppc64/scatterlist.h | 31 ------------------------- 3 files changed, 45 insertions(+), 56 deletions(-) create mode 100644 include/asm-powerpc/scatterlist.h delete mode 100644 include/asm-ppc/scatterlist.h delete mode 100644 include/asm-ppc64/scatterlist.h -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ d b7d026746092f2b10eb5c2931aac88cdbec3eb3 diff --git a/include/asm-powerpc/scatterlist.h b/include/asm-powerpc/scatterlist.h new file mode 100644 --- /dev/null +++ b/include/asm-powerpc/scatterlist.h @@ -0,0 +1,45 @@ +#ifndef _ASM_POWERPC_SCATTERLIST_H +#define _ASM_POWERPC_SCATTERLIST_H +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __KERNEL__ +#include +#include + +struct scatterlist { + struct page *page; + unsigned int offset; + unsigned int length; + + /* For TCE support */ + dma_addr_t dma_address; + u32 dma_length; +}; + +/* + * These macros should be used after a dma_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns, or alternatively stop on the first sg_dma_len(sg) which + * is 0. + */ +#define sg_dma_address(sg) ((sg)->dma_address) +#ifdef __powerpc64__ +#define sg_dma_len(sg) ((sg)->dma_length) +#else +#define sg_dma_len(sg) ((sg)->length) +#endif + +#ifdef __powerpc64__ +#define ISA_DMA_THRESHOLD (~0UL) +#endif + +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_SCATTERLIST_H */ diff --git a/include/asm-ppc/scatterlist.h b/include/asm-ppc/scatterlist.h deleted file mode 100644 --- a/include/asm-ppc/scatterlist.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifdef __KERNEL__ -#ifndef _PPC_SCATTERLIST_H -#define _PPC_SCATTERLIST_H - -#include - -struct scatterlist { - struct page *page; - unsigned int offset; - dma_addr_t dma_address; - unsigned int length; -}; - -/* - * These macros should be used after a pci_map_sg call has been done - * to get bus addresses of each of the SG entries and their lengths. - * You should only work with the number of sg entries pci_map_sg - * returns, or alternatively stop on the first sg_dma_len(sg) which - * is 0. - */ -#define sg_dma_address(sg) ((sg)->dma_address) -#define sg_dma_len(sg) ((sg)->length) - -#endif /* !(_PPC_SCATTERLIST_H) */ -#endif /* __KERNEL__ */ diff --git a/include/asm-ppc64/scatterlist.h b/include/asm-ppc64/scatterlist.h deleted file mode 100644 --- a/include/asm-ppc64/scatterlist.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef _PPC64_SCATTERLIST_H -#define _PPC64_SCATTERLIST_H - -/* - * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include - -struct scatterlist { - struct page *page; - unsigned int offset; - unsigned int length; - - /* For TCE support */ - dma_addr_t dma_address; - u32 dma_length; -}; - -#define sg_dma_address(sg) ((sg)->dma_address) -#define sg_dma_len(sg) ((sg)->dma_length) - -#define ISA_DMA_THRESHOLD (~0UL) - -#endif /* !(_PPC64_SCATTERLIST_H) */ From sfr at canb.auug.org.au Tue Oct 25 18:06:13 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Tue, 25 Oct 2005 18:06:13 +1000 Subject: [PATCH] ppc64: remove arch/ppc64/kernel/setup.c Message-ID: <20051025180613.576b6983.sfr@canb.auug.org.au> and use setup_64.c from the merged tree instead. The only difference between them was the code to set up the syscall maps. Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/Makefile | 5 arch/powerpc/kernel/setup_64.c | 18 - arch/ppc64/kernel/Makefile | 2 arch/ppc64/kernel/setup.c | 1307 ---------------------------------------- 4 files changed, 17 insertions(+), 1315 deletions(-) delete mode 100644 arch/ppc64/kernel/setup.c Built and booted on iSeries for ARCH=ppc64 and ARCH=powerpc. Interestingly, we have one more 32 bit syscall in the merged tree. -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ a4baa5aad9cf31adb559c79b1888221a30215cc1 diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -12,7 +12,8 @@ endif obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ signal_32.o pmc.o -obj-$(CONFIG_PPC64) += binfmt_elf32.o sys_ppc32.o ptrace32.o +obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ + ptrace32.o obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_POWER4) += idle_power4.o obj-$(CONFIG_PPC_OF) += of_device.o @@ -33,7 +34,7 @@ extra-y += vmlinux.lds obj-y += process.o init_task.o time.o \ prom.o systbl.o traps.o obj-$(CONFIG_PPC32) += entry_32.o idle_6xx.o setup_32.o misc_32.o -obj-$(CONFIG_PPC64) += setup_64.o misc_64.o +obj-$(CONFIG_PPC64) += misc_64.o obj-$(CONFIG_PPC_OF) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -972,6 +972,17 @@ static void __init emergency_stack_init( limit)) + PAGE_SIZE; } +extern unsigned long *sys_call_table; +extern unsigned long sys_ni_syscall; +#ifdef CONFIG_PPC_MERGE +#define SYS_CALL_ENTRY64(i) sys_call_table[(i) * 2] +#define SYS_CALL_ENTRY32(i) sys_call_table[(i) * 2 + 1] +#else +extern unsigned long *sys_call_table32; +#define SYS_CALL_ENTRY64(i) sys_call_table[(i)] +#define SYS_CALL_ENTRY32(i) sys_call_table32[(i)] +#endif + /* * Called from setup_arch to initialize the bitmap of available * syscalls in the systemcfg page @@ -979,17 +990,14 @@ static void __init emergency_stack_init( void __init setup_syscall_map(void) { unsigned int i, count64 = 0, count32 = 0; - extern unsigned long *sys_call_table; - extern unsigned long sys_ni_syscall; - for (i = 0; i < __NR_syscalls; i++) { - if (sys_call_table[i*2] != sys_ni_syscall) { + if (SYS_CALL_ENTRY64(i) != sys_ni_syscall) { count64++; systemcfg->syscall_map_64[i >> 5] |= 0x80000000UL >> (i & 0x1f); } - if (sys_call_table[i*2+1] != sys_ni_syscall) { + if (SYS_CALL_ENTRY32(i) != sys_ni_syscall) { count32++; systemcfg->syscall_map_32[i >> 5] |= 0x80000000UL >> (i & 0x1f); diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -7,7 +7,7 @@ ifneq ($(CONFIG_PPC_MERGE),y) EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds -obj-y := setup.o entry.o misc.o prom.o +obj-y := entry.o misc.o prom.o endif diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c deleted file mode 100644 --- a/arch/ppc64/kernel/setup.c +++ /dev/null @@ -1,1307 +0,0 @@ -/* - * - * Common boot and setup code. - * - * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -/* - * Here are some early debugging facilities. You can enable one - * but your kernel will not boot on anything else if you do so - */ - -/* This one is for use on LPAR machines that support an HVC console - * on vterm 0 - */ -extern void udbg_init_debug_lpar(void); -/* This one is for use on Apple G5 machines - */ -extern void udbg_init_pmac_realmode(void); -/* That's RTAS panel debug */ -extern void call_rtas_display_status_delay(unsigned char c); -/* Here's maple real mode debug */ -extern void udbg_init_maple_realmode(void); - -#define EARLY_DEBUG_INIT() do {} while(0) - -#if 0 -#define EARLY_DEBUG_INIT() udbg_init_debug_lpar() -#define EARLY_DEBUG_INIT() udbg_init_maple_realmode() -#define EARLY_DEBUG_INIT() udbg_init_pmac_realmode() -#define EARLY_DEBUG_INIT() \ - do { udbg_putc = call_rtas_display_status_delay; } while(0) -#endif - -/* extern void *stab; */ -extern unsigned long klimit; - -extern void mm_init_ppc64(void); -extern void stab_initialize(unsigned long stab); -extern void htab_initialize(void); -extern void early_init_devtree(void *flat_dt); -extern void unflatten_device_tree(void); - -extern void smp_release_cpus(void); - -int have_of = 1; -int boot_cpuid = 0; -int boot_cpuid_phys = 0; -dev_t boot_dev; -u64 ppc64_pft_size; - -struct ppc64_caches ppc64_caches; -EXPORT_SYMBOL_GPL(ppc64_caches); - -/* - * These are used in binfmt_elf.c to put aux entries on the stack - * for each elf executable being started. - */ -int dcache_bsize; -int icache_bsize; -int ucache_bsize; - -/* The main machine-dep calls structure - */ -struct machdep_calls ppc_md; -EXPORT_SYMBOL(ppc_md); - -#ifdef CONFIG_MAGIC_SYSRQ -unsigned long SYSRQ_KEY; -#endif /* CONFIG_MAGIC_SYSRQ */ - - -static int ppc64_panic_event(struct notifier_block *, unsigned long, void *); -static struct notifier_block ppc64_panic_block = { - .notifier_call = ppc64_panic_event, - .priority = INT_MIN /* may not return; must be done last */ -}; - -/* - * Perhaps we can put the pmac screen_info[] here - * on pmac as well so we don't need the ifdef's. - * Until we get multiple-console support in here - * that is. -- Cort - * Maybe tie it to serial consoles, since this is really what - * these processors use on existing boards. -- Dan - */ -struct screen_info screen_info = { - .orig_x = 0, - .orig_y = 25, - .orig_video_cols = 80, - .orig_video_lines = 25, - .orig_video_isVGA = 1, - .orig_video_points = 16 -}; - -#ifdef CONFIG_SMP - -static int smt_enabled_cmdline; - -/* Look for ibm,smt-enabled OF option */ -static void check_smt_enabled(void) -{ - struct device_node *dn; - char *smt_option; - - /* Allow the command line to overrule the OF option */ - if (smt_enabled_cmdline) - return; - - dn = of_find_node_by_path("/options"); - - if (dn) { - smt_option = (char *)get_property(dn, "ibm,smt-enabled", NULL); - - if (smt_option) { - if (!strcmp(smt_option, "on")) - smt_enabled_at_boot = 1; - else if (!strcmp(smt_option, "off")) - smt_enabled_at_boot = 0; - } - } -} - -/* Look for smt-enabled= cmdline option */ -static int __init early_smt_enabled(char *p) -{ - smt_enabled_cmdline = 1; - - if (!p) - return 0; - - if (!strcmp(p, "on") || !strcmp(p, "1")) - smt_enabled_at_boot = 1; - else if (!strcmp(p, "off") || !strcmp(p, "0")) - smt_enabled_at_boot = 0; - - return 0; -} -early_param("smt-enabled", early_smt_enabled); - -/** - * setup_cpu_maps - initialize the following cpu maps: - * cpu_possible_map - * cpu_present_map - * cpu_sibling_map - * - * Having the possible map set up early allows us to restrict allocations - * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. - * - * We do not initialize the online map here; cpus set their own bits in - * cpu_online_map as they come up. - * - * This function is valid only for Open Firmware systems. finish_device_tree - * must be called before using this. - * - * While we're here, we may as well set the "physical" cpu ids in the paca. - */ -static void __init setup_cpu_maps(void) -{ - struct device_node *dn = NULL; - int cpu = 0; - int swap_cpuid = 0; - - check_smt_enabled(); - - while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { - u32 *intserv; - int j, len = sizeof(u32), nthreads; - - intserv = (u32 *)get_property(dn, "ibm,ppc-interrupt-server#s", - &len); - if (!intserv) - intserv = (u32 *)get_property(dn, "reg", NULL); - - nthreads = len / sizeof(u32); - - for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { - cpu_set(cpu, cpu_present_map); - set_hard_smp_processor_id(cpu, intserv[j]); - - if (intserv[j] == boot_cpuid_phys) - swap_cpuid = cpu; - cpu_set(cpu, cpu_possible_map); - cpu++; - } - } - - /* Swap CPU id 0 with boot_cpuid_phys, so we can always assume that - * boot cpu is logical 0. - */ - if (boot_cpuid_phys != get_hard_smp_processor_id(0)) { - u32 tmp; - tmp = get_hard_smp_processor_id(0); - set_hard_smp_processor_id(0, boot_cpuid_phys); - set_hard_smp_processor_id(swap_cpuid, tmp); - } - - /* - * On pSeries LPAR, we need to know how many cpus - * could possibly be added to this partition. - */ - if (systemcfg->platform == PLATFORM_PSERIES_LPAR && - (dn = of_find_node_by_path("/rtas"))) { - int num_addr_cell, num_size_cell, maxcpus; - unsigned int *ireg; - - num_addr_cell = prom_n_addr_cells(dn); - num_size_cell = prom_n_size_cells(dn); - - ireg = (unsigned int *) - get_property(dn, "ibm,lrdr-capacity", NULL); - - if (!ireg) - goto out; - - maxcpus = ireg[num_addr_cell + num_size_cell]; - - /* Double maxcpus for processors which have SMT capability */ - if (cpu_has_feature(CPU_FTR_SMT)) - maxcpus *= 2; - - if (maxcpus > NR_CPUS) { - printk(KERN_WARNING - "Partition configured for %d cpus, " - "operating system maximum is %d.\n", - maxcpus, NR_CPUS); - maxcpus = NR_CPUS; - } else - printk(KERN_INFO "Partition configured for %d cpus.\n", - maxcpus); - - for (cpu = 0; cpu < maxcpus; cpu++) - cpu_set(cpu, cpu_possible_map); - out: - of_node_put(dn); - } - - /* - * Do the sibling map; assume only two threads per processor. - */ - for_each_cpu(cpu) { - cpu_set(cpu, cpu_sibling_map[cpu]); - if (cpu_has_feature(CPU_FTR_SMT)) - cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); - } - - systemcfg->processorCount = num_present_cpus(); -} -#endif /* CONFIG_SMP */ - -extern struct machdep_calls pSeries_md; -extern struct machdep_calls pmac_md; -extern struct machdep_calls maple_md; -extern struct machdep_calls bpa_md; -extern struct machdep_calls iseries_md; - -/* Ultimately, stuff them in an elf section like initcalls... */ -static struct machdep_calls __initdata *machines[] = { -#ifdef CONFIG_PPC_PSERIES - &pSeries_md, -#endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_PPC_PMAC - &pmac_md, -#endif /* CONFIG_PPC_PMAC */ -#ifdef CONFIG_PPC_MAPLE - &maple_md, -#endif /* CONFIG_PPC_MAPLE */ -#ifdef CONFIG_PPC_BPA - &bpa_md, -#endif -#ifdef CONFIG_PPC_ISERIES - &iseries_md, -#endif - NULL -}; - -/* - * Early initialization entry point. This is called by head.S - * with MMU translation disabled. We rely on the "feature" of - * the CPU that ignores the top 2 bits of the address in real - * mode so we can access kernel globals normally provided we - * only toy with things in the RMO region. From here, we do - * some early parsing of the device-tree to setup out LMB - * data structures, and allocate & initialize the hash table - * and segment tables so we can start running with translation - * enabled. - * - * It is this function which will call the probe() callback of - * the various platform types and copy the matching one to the - * global ppc_md structure. Your platform can eventually do - * some very early initializations from the probe() routine, but - * this is not recommended, be very careful as, for example, the - * device-tree is not accessible via normal means at this point. - */ - -void __init early_setup(unsigned long dt_ptr) -{ - struct paca_struct *lpaca = get_paca(); - static struct machdep_calls **mach; - - /* - * Enable early debugging if any specified (see top of - * this file) - */ - EARLY_DEBUG_INIT(); - - DBG(" -> early_setup()\n"); - - /* - * Fill the default DBG level (do we want to keep - * that old mecanism around forever ?) - */ - ppcdbg_initialize(); - - /* - * Do early initializations using the flattened device - * tree, like retreiving the physical memory map or - * calculating/retreiving the hash table size - */ - early_init_devtree(__va(dt_ptr)); - - /* - * Iterate all ppc_md structures until we find the proper - * one for the current machine type - */ - DBG("Probing machine type for platform %x...\n", - systemcfg->platform); - - for (mach = machines; *mach; mach++) { - if ((*mach)->probe(systemcfg->platform)) - break; - } - /* What can we do if we didn't find ? */ - if (*mach == NULL) { - DBG("No suitable machine found !\n"); - for (;;); - } - ppc_md = **mach; - - DBG("Found, Initializing memory management...\n"); - - /* - * Initialize stab / SLB management - */ - if (!firmware_has_feature(FW_FEATURE_ISERIES)) - stab_initialize(lpaca->stab_real); - - /* - * Initialize the MMU Hash table and create the linear mapping - * of memory - */ - htab_initialize(); - - DBG(" <- early_setup()\n"); -} - - -/* - * Initialize some remaining members of the ppc64_caches and systemcfg structures - * (at least until we get rid of them completely). This is mostly some - * cache informations about the CPU that will be used by cache flush - * routines and/or provided to userland - */ -static void __init initialize_cache_info(void) -{ - struct device_node *np; - unsigned long num_cpus = 0; - - DBG(" -> initialize_cache_info()\n"); - - for (np = NULL; (np = of_find_node_by_type(np, "cpu"));) { - num_cpus += 1; - - /* We're assuming *all* of the CPUs have the same - * d-cache and i-cache sizes... -Peter - */ - - if ( num_cpus == 1 ) { - u32 *sizep, *lsizep; - u32 size, lsize; - const char *dc, *ic; - - /* Then read cache informations */ - if (systemcfg->platform == PLATFORM_POWERMAC) { - dc = "d-cache-block-size"; - ic = "i-cache-block-size"; - } else { - dc = "d-cache-line-size"; - ic = "i-cache-line-size"; - } - - size = 0; - lsize = cur_cpu_spec->dcache_bsize; - sizep = (u32 *)get_property(np, "d-cache-size", NULL); - if (sizep != NULL) - size = *sizep; - lsizep = (u32 *) get_property(np, dc, NULL); - if (lsizep != NULL) - lsize = *lsizep; - if (sizep == 0 || lsizep == 0) - DBG("Argh, can't find dcache properties ! " - "sizep: %p, lsizep: %p\n", sizep, lsizep); - - systemcfg->dcache_size = ppc64_caches.dsize = size; - systemcfg->dcache_line_size = - ppc64_caches.dline_size = lsize; - ppc64_caches.log_dline_size = __ilog2(lsize); - ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; - - size = 0; - lsize = cur_cpu_spec->icache_bsize; - sizep = (u32 *)get_property(np, "i-cache-size", NULL); - if (sizep != NULL) - size = *sizep; - lsizep = (u32 *)get_property(np, ic, NULL); - if (lsizep != NULL) - lsize = *lsizep; - if (sizep == 0 || lsizep == 0) - DBG("Argh, can't find icache properties ! " - "sizep: %p, lsizep: %p\n", sizep, lsizep); - - systemcfg->icache_size = ppc64_caches.isize = size; - systemcfg->icache_line_size = - ppc64_caches.iline_size = lsize; - ppc64_caches.log_iline_size = __ilog2(lsize); - ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; - } - } - - /* Add an eye catcher and the systemcfg layout version number */ - strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); - systemcfg->version.major = SYSTEMCFG_MAJOR; - systemcfg->version.minor = SYSTEMCFG_MINOR; - systemcfg->processor = mfspr(SPRN_PVR); - - DBG(" <- initialize_cache_info()\n"); -} - -static void __init check_for_initrd(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - u64 *prop; - - DBG(" -> check_for_initrd()\n"); - - if (of_chosen) { - prop = (u64 *)get_property(of_chosen, - "linux,initrd-start", NULL); - if (prop != NULL) { - initrd_start = (unsigned long)__va(*prop); - prop = (u64 *)get_property(of_chosen, - "linux,initrd-end", NULL); - if (prop != NULL) { - initrd_end = (unsigned long)__va(*prop); - initrd_below_start_ok = 1; - } else - initrd_start = 0; - } - } - - /* If we were passed an initrd, set the ROOT_DEV properly if the values - * look sensible. If not, clear initrd reference. - */ - if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && - initrd_end > initrd_start) - ROOT_DEV = Root_RAM0; - else - initrd_start = initrd_end = 0; - - if (initrd_start) - printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); - - DBG(" <- check_for_initrd()\n"); -#endif /* CONFIG_BLK_DEV_INITRD */ -} - -/* - * Do some initial setup of the system. The parameters are those which - * were passed in from the bootloader. - */ -void __init setup_system(void) -{ - DBG(" -> setup_system()\n"); - - /* - * Unflatten the device-tree passed by prom_init or kexec - */ - unflatten_device_tree(); - - /* - * Fill the ppc64_caches & systemcfg structures with informations - * retreived from the device-tree. Need to be called before - * finish_device_tree() since the later requires some of the - * informations filled up here to properly parse the interrupt - * tree. - * It also sets up the cache line sizes which allows to call - * routines like flush_icache_range (used by the hash init - * later on). - */ - initialize_cache_info(); - -#ifdef CONFIG_PPC_RTAS - /* - * Initialize RTAS if available - */ - rtas_initialize(); -#endif /* CONFIG_PPC_RTAS */ - - /* - * Check if we have an initrd provided via the device-tree - */ - check_for_initrd(); - - /* - * Do some platform specific early initializations, that includes - * setting up the hash table pointers. It also sets up some interrupt-mapping - * related options that will be used by finish_device_tree() - */ - ppc_md.init_early(); - - /* - * "Finish" the device-tree, that is do the actual parsing of - * some of the properties like the interrupt map - */ - finish_device_tree(); - -#ifdef CONFIG_BOOTX_TEXT - init_boot_display(); -#endif - - /* - * Initialize xmon - */ -#ifdef CONFIG_XMON_DEFAULT - xmon_init(1); -#endif - /* - * Register early console - */ - register_early_udbg_console(); - - /* Save unparsed command line copy for /proc/cmdline */ - strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); - - parse_early_param(); - -#ifdef CONFIG_SMP - /* - * iSeries has already initialized the cpu maps at this point. - */ - setup_cpu_maps(); - - /* Release secondary cpus out of their spinloops at 0x60 now that - * we can map physical -> logical CPU ids - */ - smp_release_cpus(); -#endif - - printk("Starting Linux PPC64 %s\n", system_utsname.version); - - printk("-----------------------------------------------------\n"); - printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); - printk("ppc64_debug_switch = 0x%lx\n", ppc64_debug_switch); - printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller); - printk("systemcfg = 0x%p\n", systemcfg); - printk("systemcfg->platform = 0x%x\n", systemcfg->platform); - printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount); - printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize); - printk("ppc64_caches.dcache_line_size = 0x%x\n", - ppc64_caches.dline_size); - printk("ppc64_caches.icache_line_size = 0x%x\n", - ppc64_caches.iline_size); - printk("htab_address = 0x%p\n", htab_address); - printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); - printk("-----------------------------------------------------\n"); - - mm_init_ppc64(); - - DBG(" <- setup_system()\n"); -} - -/* also used by kexec */ -void machine_shutdown(void) -{ - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); -} - -void machine_restart(char *cmd) -{ - machine_shutdown(); - ppc_md.restart(cmd); -#ifdef CONFIG_SMP - smp_send_stop(); -#endif - printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); - while (1) ; -} - -void machine_power_off(void) -{ - machine_shutdown(); - ppc_md.power_off(); -#ifdef CONFIG_SMP - smp_send_stop(); -#endif - printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); - while (1) ; -} -/* Used by the G5 thermal driver */ -EXPORT_SYMBOL_GPL(machine_power_off); - -void machine_halt(void) -{ - machine_shutdown(); - ppc_md.halt(); -#ifdef CONFIG_SMP - smp_send_stop(); -#endif - printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); - while (1) ; -} - -static int ppc64_panic_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - ppc_md.panic((char *)ptr); /* May not return */ - return NOTIFY_DONE; -} - - -#ifdef CONFIG_SMP -DEFINE_PER_CPU(unsigned int, pvr); -#endif - -static int show_cpuinfo(struct seq_file *m, void *v) -{ - unsigned long cpu_id = (unsigned long)v - 1; - unsigned int pvr; - unsigned short maj; - unsigned short min; - - if (cpu_id == NR_CPUS) { - seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); - - if (ppc_md.show_cpuinfo != NULL) - ppc_md.show_cpuinfo(m); - - return 0; - } - - /* We only show online cpus: disable preempt (overzealous, I - * knew) to prevent cpu going down. */ - preempt_disable(); - if (!cpu_online(cpu_id)) { - preempt_enable(); - return 0; - } - -#ifdef CONFIG_SMP - pvr = per_cpu(pvr, cpu_id); -#else - pvr = mfspr(SPRN_PVR); -#endif - maj = (pvr >> 8) & 0xFF; - min = pvr & 0xFF; - - seq_printf(m, "processor\t: %lu\n", cpu_id); - seq_printf(m, "cpu\t\t: "); - - if (cur_cpu_spec->pvr_mask) - seq_printf(m, "%s", cur_cpu_spec->cpu_name); - else - seq_printf(m, "unknown (%08x)", pvr); - -#ifdef CONFIG_ALTIVEC - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - seq_printf(m, ", altivec supported"); -#endif /* CONFIG_ALTIVEC */ - - seq_printf(m, "\n"); - - /* - * Assume here that all clock rates are the same in a - * smp system. -- Cort - */ - seq_printf(m, "clock\t\t: %lu.%06luMHz\n", ppc_proc_freq / 1000000, - ppc_proc_freq % 1000000); - - seq_printf(m, "revision\t: %hd.%hd\n\n", maj, min); - - preempt_enable(); - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos <= NR_CPUS ? (void *)((*pos)+1) : NULL; -} -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} -static void c_stop(struct seq_file *m, void *v) -{ -} -struct seq_operations cpuinfo_op = { - .start =c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; - -/* - * These three variables are used to save values passed to us by prom_init() - * via the device tree. The TCE variables are needed because with a memory_limit - * in force we may need to explicitly map the TCE are at the top of RAM. - */ -unsigned long memory_limit; -unsigned long tce_alloc_start; -unsigned long tce_alloc_end; - -#ifdef CONFIG_PPC_ISERIES -/* - * On iSeries we just parse the mem=X option from the command line. - * On pSeries it's a bit more complicated, see prom_init_mem() - */ -static int __init early_parsemem(char *p) -{ - if (!p) - return 0; - - memory_limit = ALIGN(memparse(p, &p), PAGE_SIZE); - - return 0; -} -early_param("mem", early_parsemem); -#endif /* CONFIG_PPC_ISERIES */ - -#ifdef CONFIG_PPC_MULTIPLATFORM -static int __init set_preferred_console(void) -{ - struct device_node *prom_stdout = NULL; - char *name; - u32 *spd; - int offset = 0; - - DBG(" -> set_preferred_console()\n"); - - /* The user has requested a console so this is already set up. */ - if (strstr(saved_command_line, "console=")) { - DBG(" console was specified !\n"); - return -EBUSY; - } - - if (!of_chosen) { - DBG(" of_chosen is NULL !\n"); - return -ENODEV; - } - /* We are getting a weird phandle from OF ... */ - /* ... So use the full path instead */ - name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); - if (name == NULL) { - DBG(" no linux,stdout-path !\n"); - return -ENODEV; - } - prom_stdout = of_find_node_by_path(name); - if (!prom_stdout) { - DBG(" can't find stdout package %s !\n", name); - return -ENODEV; - } - DBG("stdout is %s\n", prom_stdout->full_name); - - name = (char *)get_property(prom_stdout, "name", NULL); - if (!name) { - DBG(" stdout package has no name !\n"); - goto not_found; - } - spd = (u32 *)get_property(prom_stdout, "current-speed", NULL); - - if (0) - ; -#ifdef CONFIG_SERIAL_8250_CONSOLE - else if (strcmp(name, "serial") == 0) { - int i; - u32 *reg = (u32 *)get_property(prom_stdout, "reg", &i); - if (i > 8) { - switch (reg[1]) { - case 0x3f8: - offset = 0; - break; - case 0x2f8: - offset = 1; - break; - case 0x898: - offset = 2; - break; - case 0x890: - offset = 3; - break; - default: - /* We dont recognise the serial port */ - goto not_found; - } - } - } -#endif /* CONFIG_SERIAL_8250_CONSOLE */ -#ifdef CONFIG_PPC_PSERIES - else if (strcmp(name, "vty") == 0) { - u32 *reg = (u32 *)get_property(prom_stdout, "reg", NULL); - char *compat = (char *)get_property(prom_stdout, "compatible", NULL); - - if (reg && compat && (strcmp(compat, "hvterm-protocol") == 0)) { - /* Host Virtual Serial Interface */ - int offset; - switch (reg[0]) { - case 0x30000000: - offset = 0; - break; - case 0x30000001: - offset = 1; - break; - default: - goto not_found; - } - of_node_put(prom_stdout); - DBG("Found hvsi console at offset %d\n", offset); - return add_preferred_console("hvsi", offset, NULL); - } else { - /* pSeries LPAR virtual console */ - of_node_put(prom_stdout); - DBG("Found hvc console\n"); - return add_preferred_console("hvc", 0, NULL); - } - } -#endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE - else if (strcmp(name, "ch-a") == 0) - offset = 0; - else if (strcmp(name, "ch-b") == 0) - offset = 1; -#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */ - else - goto not_found; - of_node_put(prom_stdout); - - DBG("Found serial console at ttyS%d\n", offset); - - if (spd) { - static char __initdata opt[16]; - sprintf(opt, "%d", *spd); - return add_preferred_console("ttyS", offset, opt); - } else - return add_preferred_console("ttyS", offset, NULL); - - not_found: - DBG("No preferred console found !\n"); - of_node_put(prom_stdout); - return -ENODEV; -} -console_initcall(set_preferred_console); -#endif /* CONFIG_PPC_MULTIPLATFORM */ - -#ifdef CONFIG_IRQSTACKS -static void __init irqstack_early_init(void) -{ - unsigned int i; - - /* - * interrupt stacks must be under 256MB, we cannot afford to take - * SLB misses on them. - */ - for_each_cpu(i) { - softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); - hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); - } -} -#else -#define irqstack_early_init() -#endif - -/* - * Stack space used when we detect a bad kernel stack pointer, and - * early in SMP boots before relocation is enabled. - */ -static void __init emergency_stack_init(void) -{ - unsigned long limit; - unsigned int i; - - /* - * Emergency stacks must be under 256MB, we cannot afford to take - * SLB misses on them. The ABI also requires them to be 128-byte - * aligned. - * - * Since we use these as temporary stacks during secondary CPU - * bringup, we need to get at them in real mode. This means they - * must also be within the RMO region. - */ - limit = min(0x10000000UL, lmb.rmo_size); - - for_each_cpu(i) - paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128, - limit)) + PAGE_SIZE; -} - -/* - * Called from setup_arch to initialize the bitmap of available - * syscalls in the systemcfg page - */ -void __init setup_syscall_map(void) -{ - unsigned int i, count64 = 0, count32 = 0; - extern unsigned long *sys_call_table; - extern unsigned long *sys_call_table32; - extern unsigned long sys_ni_syscall; - - - for (i = 0; i < __NR_syscalls; i++) { - if (sys_call_table[i] == sys_ni_syscall) - continue; - count64++; - systemcfg->syscall_map_64[i >> 5] |= 0x80000000UL >> (i & 0x1f); - } - for (i = 0; i < __NR_syscalls; i++) { - if (sys_call_table32[i] == sys_ni_syscall) - continue; - count32++; - systemcfg->syscall_map_32[i >> 5] |= 0x80000000UL >> (i & 0x1f); - } - printk(KERN_INFO "Syscall map setup, %d 32 bits and %d 64 bits syscalls\n", - count32, count64); -} - -/* - * Called into from start_kernel, after lock_kernel has been called. - * Initializes bootmem, which is unsed to manage page allocation until - * mem_init is called. - */ -void __init setup_arch(char **cmdline_p) -{ - extern void do_init_bootmem(void); - - ppc64_boot_msg(0x12, "Setup Arch"); - - *cmdline_p = cmd_line; - - /* - * Set cache line size based on type of cpu as a default. - * Systems with OF can look in the properties on the cpu node(s) - * for a possibly more accurate value. - */ - dcache_bsize = ppc64_caches.dline_size; - icache_bsize = ppc64_caches.iline_size; - - /* reboot on panic */ - panic_timeout = 180; - - if (ppc_md.panic) - notifier_chain_register(&panic_notifier_list, &ppc64_panic_block); - - init_mm.start_code = PAGE_OFFSET; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; - - irqstack_early_init(); - emergency_stack_init(); - - stabs_alloc(); - - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); - sparse_init(); - - /* initialize the syscall map in systemcfg */ - setup_syscall_map(); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - ppc_md.setup_arch(); - - /* Use the default idle loop if the platform hasn't provided one. */ - if (NULL == ppc_md.idle_loop) { - ppc_md.idle_loop = default_idle; - printk(KERN_INFO "Using default idle loop\n"); - } - - paging_init(); - ppc64_boot_msg(0x15, "Setup Done"); -} - - -/* ToDo: do something useful if ppc_md is not yet setup. */ -#define PPC64_LINUX_FUNCTION 0x0f000000 -#define PPC64_IPL_MESSAGE 0xc0000000 -#define PPC64_TERM_MESSAGE 0xb0000000 - -static void ppc64_do_msg(unsigned int src, const char *msg) -{ - if (ppc_md.progress) { - char buf[128]; - - sprintf(buf, "%08X\n", src); - ppc_md.progress(buf, 0); - snprintf(buf, 128, "%s", msg); - ppc_md.progress(buf, 0); - } -} - -/* Print a boot progress message. */ -void ppc64_boot_msg(unsigned int src, const char *msg) -{ - ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg); - printk("[boot]%04x %s\n", src, msg); -} - -/* Print a termination message (print only -- does not stop the kernel) */ -void ppc64_terminate_msg(unsigned int src, const char *msg) -{ - ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_TERM_MESSAGE|src, msg); - printk("[terminate]%04x %s\n", src, msg); -} - -#ifndef CONFIG_PPC_ISERIES -/* - * This function can be used by platforms to "find" legacy serial ports. - * It works for "serial" nodes under an "isa" node, and will try to - * respect the "ibm,aix-loc" property if any. It works with up to 8 - * ports. - */ - -#define MAX_LEGACY_SERIAL_PORTS 8 -static struct plat_serial8250_port serial_ports[MAX_LEGACY_SERIAL_PORTS+1]; -static unsigned int old_serial_count; - -void __init generic_find_legacy_serial_ports(u64 *physport, - unsigned int *default_speed) -{ - struct device_node *np; - u32 *sizeprop; - - struct isa_reg_property { - u32 space; - u32 address; - u32 size; - }; - struct pci_reg_property { - struct pci_address addr; - u32 size_hi; - u32 size_lo; - }; - - DBG(" -> generic_find_legacy_serial_port()\n"); - - *physport = 0; - if (default_speed) - *default_speed = 0; - - np = of_find_node_by_path("/"); - if (!np) - return; - - /* First fill our array */ - for (np = NULL; (np = of_find_node_by_type(np, "serial"));) { - struct device_node *isa, *pci; - struct isa_reg_property *reg; - unsigned long phys_size, addr_size, io_base; - u32 *rangesp; - u32 *interrupts, *clk, *spd; - char *typep; - int index, rlen, rentsize; - - /* Ok, first check if it's under an "isa" parent */ - isa = of_get_parent(np); - if (!isa || strcmp(isa->name, "isa")) { - DBG("%s: no isa parent found\n", np->full_name); - continue; - } - - /* Now look for an "ibm,aix-loc" property that gives us ordering - * if any... - */ - typep = (char *)get_property(np, "ibm,aix-loc", NULL); - - /* Get the ISA port number */ - reg = (struct isa_reg_property *)get_property(np, "reg", NULL); - if (reg == NULL) - goto next_port; - /* We assume the interrupt number isn't translated ... */ - interrupts = (u32 *)get_property(np, "interrupts", NULL); - /* get clock freq. if present */ - clk = (u32 *)get_property(np, "clock-frequency", NULL); - /* get default speed if present */ - spd = (u32 *)get_property(np, "current-speed", NULL); - /* Default to locate at end of array */ - index = old_serial_count; /* end of the array by default */ - - /* If we have a location index, then use it */ - if (typep && *typep == 'S') { - index = simple_strtol(typep+1, NULL, 0) - 1; - /* if index is out of range, use end of array instead */ - if (index >= MAX_LEGACY_SERIAL_PORTS) - index = old_serial_count; - /* if our index is still out of range, that mean that - * array is full, we could scan for a free slot but that - * make little sense to bother, just skip the port - */ - if (index >= MAX_LEGACY_SERIAL_PORTS) - goto next_port; - if (index >= old_serial_count) - old_serial_count = index + 1; - /* Check if there is a port who already claimed our slot */ - if (serial_ports[index].iobase != 0) { - /* if we still have some room, move it, else override */ - if (old_serial_count < MAX_LEGACY_SERIAL_PORTS) { - DBG("Moved legacy port %d -> %d\n", index, - old_serial_count); - serial_ports[old_serial_count++] = - serial_ports[index]; - } else { - DBG("Replacing legacy port %d\n", index); - } - } - } - if (index >= MAX_LEGACY_SERIAL_PORTS) - goto next_port; - if (index >= old_serial_count) - old_serial_count = index + 1; - - /* Now fill the entry */ - memset(&serial_ports[index], 0, sizeof(struct plat_serial8250_port)); - serial_ports[index].uartclk = clk ? *clk : BASE_BAUD * 16; - serial_ports[index].iobase = reg->address; - serial_ports[index].irq = interrupts ? interrupts[0] : 0; - serial_ports[index].flags = ASYNC_BOOT_AUTOCONF; - - DBG("Added legacy port, index: %d, port: %x, irq: %d, clk: %d\n", - index, - serial_ports[index].iobase, - serial_ports[index].irq, - serial_ports[index].uartclk); - - /* Get phys address of IO reg for port 1 */ - if (index != 0) - goto next_port; - - pci = of_get_parent(isa); - if (!pci) { - DBG("%s: no pci parent found\n", np->full_name); - goto next_port; - } - - rangesp = (u32 *)get_property(pci, "ranges", &rlen); - if (rangesp == NULL) { - of_node_put(pci); - goto next_port; - } - rlen /= 4; - - /* we need the #size-cells of the PCI bridge node itself */ - phys_size = 1; - sizeprop = (u32 *)get_property(pci, "#size-cells", NULL); - if (sizeprop != NULL) - phys_size = *sizeprop; - /* we need the parent #addr-cells */ - addr_size = prom_n_addr_cells(pci); - rentsize = 3 + addr_size + phys_size; - io_base = 0; - for (;rlen >= rentsize; rlen -= rentsize,rangesp += rentsize) { - if (((rangesp[0] >> 24) & 0x3) != 1) - continue; /* not IO space */ - io_base = rangesp[3]; - if (addr_size == 2) - io_base = (io_base << 32) | rangesp[4]; - } - if (io_base != 0) { - *physport = io_base + reg->address; - if (default_speed && spd) - *default_speed = *spd; - } - of_node_put(pci); - next_port: - of_node_put(isa); - } - - DBG(" <- generic_find_legacy_serial_port()\n"); -} - -static struct platform_device serial_device = { - .name = "serial8250", - .id = PLAT8250_DEV_PLATFORM, - .dev = { - .platform_data = serial_ports, - }, -}; - -static int __init serial_dev_init(void) -{ - return platform_device_register(&serial_device); -} -arch_initcall(serial_dev_init); - -#endif /* CONFIG_PPC_ISERIES */ - -int check_legacy_ioport(unsigned long base_port) -{ - if (ppc_md.check_legacy_ioport == NULL) - return 0; - return ppc_md.check_legacy_ioport(base_port); -} -EXPORT_SYMBOL(check_legacy_ioport); - -#ifdef CONFIG_XMON -static int __init early_xmon(char *p) -{ - /* ensure xmon is enabled */ - if (p) { - if (strncmp(p, "on", 2) == 0) - xmon_init(1); - if (strncmp(p, "off", 3) == 0) - xmon_init(0); - if (strncmp(p, "early", 5) != 0) - return 0; - } - xmon_init(1); - debugger(NULL); - - return 0; -} -early_param("xmon", early_xmon); -#endif - -void cpu_die(void) -{ - if (ppc_md.cpu_die) - ppc_md.cpu_die(); -} From mniyer at us.ibm.com Wed Oct 26 04:03:03 2005 From: mniyer at us.ibm.com (Murali N Iyer) Date: Tue, 25 Oct 2005 13:03:03 -0500 Subject: Murali N Iyer/Rochester/IBM is out of the office. Message-ID: I will be out of the office starting 10/25/2005 and will not return until 10/28/2005. I will respond to your message(s) when I return. If you need immediate assistance you may contact Alice Leisch at (507) 253-6355 or aleisch at us.ibm.com From geoffrey.levand at am.sony.com Wed Oct 26 08:34:49 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Tue, 25 Oct 2005 15:34:49 -0700 Subject: kgdb for ppc64 now available In-Reply-To: <1128032514.6102.74.camel@gaston> References: <1128032514.6102.74.camel@gaston> Message-ID: <435EB309.2040003@am.sony.com> Benjamin Herrenschmidt wrote: > On Thu, 2005-09-29 at 10:22 -0700, Geoff Levand wrote: > >>For those interested, kgdb now supports ppc64 SMP. On powermac >>only an Ethernet connection is supported currently, but generic >>8250 UART PCI cards will be supported soon. > > > It should be fairly trivial to support the SCC of the powermacs. You can > access it with a serial port adapter from www.geethree.com, it's handy > as it also works with OF. > I am starting to look into this. I got the geethree hardware, but I see there is some code in pmac_zilog.c and some in udbg.c. Do both these use this port, or are they using different H/W? First step is to just get some char's out. How do I get the kernel configured to use this port? -Geoff From benh at kernel.crashing.org Wed Oct 26 08:32:14 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 26 Oct 2005 08:32:14 +1000 Subject: kgdb for ppc64 now available In-Reply-To: <435EB309.2040003@am.sony.com> References: <1128032514.6102.74.camel@gaston> <435EB309.2040003@am.sony.com> Message-ID: <1130279534.7919.132.camel@gaston> On Tue, 2005-10-25 at 15:34 -0700, Geoff Levand wrote: > Benjamin Herrenschmidt wrote: > > On Thu, 2005-09-29 at 10:22 -0700, Geoff Levand wrote: > > > >>For those interested, kgdb now supports ppc64 SMP. On powermac > >>only an Ethernet connection is supported currently, but generic > >>8250 UART PCI cards will be supported soon. > > > > > > It should be fairly trivial to support the SCC of the powermacs. You can > > access it with a serial port adapter from www.geethree.com, it's handy > > as it also works with OF. > > > > I am starting to look into this. I got the geethree hardware, but I see > there is some code in pmac_zilog.c and some in udbg.c. Do both these use this > port, or are they using different H/W? First step is to just get some char's out. > How do I get the kernel configured to use this port? They both use the same HW. udbg is "low level" stuff, pmac_zilog is a proper driver for it. The pmac kernel automatically enables udbg to the SCC if booted with "sccdbg" on the command line. Ben. From benh at kernel.crashing.org Wed Oct 26 10:21:28 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 26 Oct 2005 10:21:28 +1000 Subject: [PATCH] ppc64: Fix wrong register mapping in mpic driver Message-ID: <1130286089.7919.158.camel@gaston> The mpic interrupt controller driver (used on G5 and early pSeries among others) has a bug where it doesn't get the right virtual address for the timer registers. It causes the driver to poke at the MMIO space of whatever has been mapped just next to it (ouch !) when initializing and causes boot failures on some IBM machines. This fixes it. Please apply. Signed-off-by: Benjamin Herrenschmidt Index: linux-work/arch/ppc64/kernel/mpic.c =================================================================== --- linux-work.orig/arch/ppc64/kernel/mpic.c 2005-09-23 12:43:21.000000000 +1000 +++ linux-work/arch/ppc64/kernel/mpic.c 2005-10-26 10:21:53.000000000 +1000 @@ -506,8 +506,8 @@ mpic->senses_count = senses_count; /* Map the global registers */ - mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000); - mpic->tmregs = mpic->gregs + (MPIC_TIMER_BASE >> 2); + mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x2000); + mpic->tmregs = mpic->gregs + ((MPIC_TIMER_BASE - MPIC_GREG_BASE) >> 2); BUG_ON(mpic->gregs == NULL); /* Reset */ From geoffrey.levand at am.sony.com Wed Oct 26 11:59:58 2005 From: geoffrey.levand at am.sony.com (Geoff Levand) Date: Tue, 25 Oct 2005 18:59:58 -0700 Subject: kgdb for ppc64 now available In-Reply-To: <1130279534.7919.132.camel@gaston> References: <1128032514.6102.74.camel@gaston> <435EB309.2040003@am.sony.com> <1130279534.7919.132.camel@gaston> Message-ID: <435EE31E.9000904@am.sony.com> Benjamin Herrenschmidt wrote: > On Tue, 2005-10-25 at 15:34 -0700, Geoff Levand wrote: > >>Benjamin Herrenschmidt wrote: >> >>>On Thu, 2005-09-29 at 10:22 -0700, Geoff Levand wrote: >>> >>> >>>>For those interested, kgdb now supports ppc64 SMP. On powermac >>>>only an Ethernet connection is supported currently, but generic >>>>8250 UART PCI cards will be supported soon. >>> >>> >>>It should be fairly trivial to support the SCC of the powermacs. You can >>>access it with a serial port adapter from www.geethree.com, it's handy >>>as it also works with OF. >>> >> >>I am starting to look into this. I got the geethree hardware, but I see >>there is some code in pmac_zilog.c and some in udbg.c. Do both these use this >>port, or are they using different H/W? First step is to just get some char's out. >>How do I get the kernel configured to use this port? > > > They both use the same HW. udbg is "low level" stuff, pmac_zilog is a > proper driver for it. The pmac kernel automatically enables udbg to the > SCC if booted with "sccdbg" on the command line. > Ok, I see. Is that thing register compatible (or at least close) with a 8250/16550 UART? It seems the udbg.c code treats it that way, but then pmac_zilog.c seems to have all its own routines for i/o. I'd like to just hook it up to the existing kgdb 8250 driver code. -Geoff From benh at kernel.crashing.org Wed Oct 26 12:33:39 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Wed, 26 Oct 2005 12:33:39 +1000 Subject: kgdb for ppc64 now available In-Reply-To: <435EE31E.9000904@am.sony.com> References: <1128032514.6102.74.camel@gaston> <435EB309.2040003@am.sony.com> <1130279534.7919.132.camel@gaston> <435EE31E.9000904@am.sony.com> Message-ID: <1130294020.1691.1.camel@gaston> > Ok, I see. Is that thing register compatible (or at least close) with a > 8250/16550 UART? It seems the udbg.c code treats it that way, but then > pmac_zilog.c seems to have all its own routines for i/o. I'd like to just > hook it up to the existing kgdb 8250 driver code. No, it's not, it's a zilog 8530. (look at udbg_scc.c in recent kernels) Ben. From mikey at neuling.org Wed Oct 26 11:55:35 2005 From: mikey at neuling.org (Michael Neuling) Date: Wed, 26 Oct 2005 11:55:35 +1000 Subject: [PATCH 0/2] kexec tools: ppc64 direct blob load Message-ID: <20051026115535.514e248c.mikey@neuling.org> Hi, Below are a couple of patches to add the ability to directly load a device tree blob, say generated with DTC (http://ozlabs.org/people/dgibson/dtc/). Both patches are for the kexec user space code and need Sharada patches to be applied first: http://ozlabs.org/pipermail/linuxppc64-dev/2005-September/005517.html Mikey From mikey at neuling.org Wed Oct 26 12:48:02 2005 From: mikey at neuling.org (Michael Neuling) Date: Wed, 26 Oct 2005 12:48:02 +1000 Subject: [PATCH 2/2] kexec tools: devicetreeblob option In-Reply-To: <20051026115202.f2acc73f.mikey@neuling.org> References: <20051026115202.f2acc73f.mikey@neuling.org> Message-ID: <20051026124802.6adbe2b5.mikey@neuling.org> This adds the device tree blob option to kexec. This option is useful if you want to generate you own device tree blobs. Also, you can now do something like: dtc -I fs /proc/device-tree -O dts -f > dt.dts dtc -I dts dt.dts -O dtb -f > dt.dtb kexec -l vmlinux --devicetreeblob=dt.dtb taskset 1 kexec -e And get a similar results to just running kexec normally. Also, the --devicetreeblob option won't work with the --initrd option. 1. massaging includes (from memory) - removal of htab entries - ensure CPU are ordered from 0 onwards - adding of reserve map entries (need to create fake last entry for the blob which kexec will patch after loading) Signed-off-by: Michael Neuling --- kexec-elf-ppc64.c | 30 +++++++++++++++++++++++++++++- kexec-ppc64.c | 1 + 2 files changed, 30 insertions(+), 1 deletion(-) Signed-off-by: Michael Neuling Index: kexec-tools-1.101/kexec/arch/ppc64/kexec-elf-ppc64.c =================================================================== --- kexec-tools-1.101.orig/kexec/arch/ppc64/kexec-elf-ppc64.c 2005-10-26 12:00:33.000000000 +1000 +++ kexec-tools-1.101/kexec/arch/ppc64/kexec-elf-ppc64.c 2005-10-26 12:00:44.000000000 +1000 @@ -73,16 +73,19 @@ const char *input_options; int command_line_len; const char *ramdisk; + const char *devicetreeblob; unsigned long *lp; int result; int opt; #define OPT_APPEND (OPT_ARCH_MAX+0) #define OPT_RAMDISK (OPT_ARCH_MAX+1) +#define OPT_DEVICETREEBLOB (OPT_ARCH_MAX+2) static const struct option options[] = { KEXEC_ARCH_OPTIONS { "append", 1, NULL, OPT_APPEND }, { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "devicetreeblob", 1, NULL, OPT_DEVICETREEBLOB }, { 0, 0, NULL, 0 }, }; @@ -94,6 +97,7 @@ command_line = 0; input_options = 0; ramdisk = 0; + devicetreeblob = 0; while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { switch (opt) { @@ -111,6 +115,9 @@ case OPT_RAMDISK: ramdisk = optarg; break; + case OPT_DEVICETREEBLOB: + devicetreeblob = optarg; + break; } } @@ -156,6 +163,10 @@ /* Add a ram-disk to the current image */ if (ramdisk) { + if (devicetreeblob) { + fprintf(stderr, "Can't use ramdisk with device tree blob input\n"); + return -1; + } unsigned char *ramdisk_buf = NULL; off_t ramdisk_size = 0; unsigned long long ramdisk_addr; @@ -182,7 +193,24 @@ } memcpy(v2wrap_buf, purgatory, purgatory_size); v2wrap_size = purgatory_size; - create_flatten_tree(info, &v2wrap_buf, &v2wrap_size); + if (devicetreeblob) { + unsigned char *blob_buf = NULL; + off_t blob_size = 0; + unsigned char *tmp_buf = NULL; + + /* Grab device tree from buffer */ + blob_buf = slurp_file(devicetreeblob, &blob_size); + + /* Append to purgatory */ + tmp_buf = (unsigned char *) realloc(v2wrap_buf, v2wrap_size + blob_size); + v2wrap_buf = tmp_buf; + memcpy(v2wrap_buf+v2wrap_size, blob_buf, blob_size); + v2wrap_size += blob_size; + + } else { + /* create from fs2dt */ + create_flatten_tree(info, &v2wrap_buf, &v2wrap_size); + } add_buffer(info, v2wrap_buf, v2wrap_size, v2wrap_size, 0, 0, 0xFFFFFFFFFFFFFFFFUL, -1); Index: kexec-tools-1.101/kexec/arch/ppc64/kexec-ppc64.c =================================================================== --- kexec-tools-1.101.orig/kexec/arch/ppc64/kexec-ppc64.c 2005-10-26 12:00:33.000000000 +1000 +++ kexec-tools-1.101/kexec/arch/ppc64/kexec-ppc64.c 2005-10-26 12:00:40.000000000 +1000 @@ -468,6 +468,7 @@ void arch_usage(void) { + fprintf(stderr, " --devicetreeblob= Specify device tree blob file.\n"); } static struct { From mikey at neuling.org Wed Oct 26 12:51:12 2005 From: mikey at neuling.org (Michael Neuling) Date: Wed, 26 Oct 2005 12:51:12 +1000 Subject: [PATCH 1/2] kexec tools: device tree blob reserve memory map entry cleanup In-Reply-To: <20051026115202.f2acc73f.mikey@neuling.org> References: <20051026115202.f2acc73f.mikey@neuling.org> Message-ID: <20051026125112.ceddab99.mikey@neuling.org> Patch cleans up how the reserve memory maps entry for the device tree are modified. Shouldn't change any functionality. Signed-off-by: Michael Neuling --- kexec-elf-ppc64.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) Index: kexec-tools-1.101/kexec/arch/ppc64/kexec-elf-ppc64.c =================================================================== --- kexec-tools-1.101.orig/kexec/arch/ppc64/kexec-elf-ppc64.c 2005-10-24 14:18:38.000000000 +1000 +++ kexec-tools-1.101/kexec/arch/ppc64/kexec-elf-ppc64.c 2005-10-26 11:18:14.000000000 +1000 @@ -171,10 +171,7 @@ /* Add v2wrap to the current image */ unsigned char *v2wrap_buf = NULL; off_t v2wrap_size = 0; - unsigned int off_len; - unsigned char *seg_buf; - unsigned int rsvmap_len; - unsigned long long *ptr; + unsigned long long *rsvmap_ptr; struct bootblock *bb_ptr; unsigned int devtree_size; @@ -189,23 +186,20 @@ add_buffer(info, v2wrap_buf, v2wrap_size, v2wrap_size, 0, 0, 0xFFFFFFFFFFFFFFFFUL, -1); - /* patch reserve map address for flattened device-tree */ - base_addr = info->segment[(info->nr_segments)-1].mem; - seg_buf = (unsigned char *)info->segment[(info->nr_segments)-1].buf; - seg_buf = seg_buf + 0x100; /* offset to end of v2wrap */ - bb_ptr = (struct bootblock *)seg_buf; - rsvmap_len = bb_ptr->off_dt_struct - bb_ptr->off_mem_rsvmap; - devtree_size = bb_ptr->totalsize; - off_len = sizeof(struct bootblock); - off_len += 7; off_len &= ~7; - seg_buf = seg_buf + off_len; - off_len = rsvmap_len / (2 * sizeof(unsigned long long)); - - ptr = (unsigned long long *)seg_buf; - ptr = ptr + 2*(off_len-2); - *ptr = base_addr + 0x100; - ptr++; - *ptr = (unsigned long long)devtree_size; + /* patch reserve map address for flattened device-tree + find last entry (both 0) in the reserve mem list. Assume DT + entry is before this one */ + bb_ptr = (struct bootblock *)(unsigned char *)info->segment[(info->nr_segments)-1].buf + 0x100; + + rsvmap_ptr = info->segment[(info->nr_segments)-1].buf + 0x100 + + bb_ptr->off_mem_rsvmap; + while (*rsvmap_ptr || *(rsvmap_ptr+1)){ + rsvmap_ptr += 2; + } + rsvmap_ptr -= 2; + *rsvmap_ptr = info->segment[(info->nr_segments)-1].mem + 0x100; + rsvmap_ptr++; + *rsvmap_ptr = (unsigned long long)bb_ptr->totalsize; unsigned int nr_segments; nr_segments = info->nr_segments; From dougg at torque.net Wed Oct 26 12:12:44 2005 From: dougg at torque.net (Douglas Gilbert) Date: Wed, 26 Oct 2005 12:12:44 +1000 Subject: [PATCH 0/3] ibmvscsis scsi target In-Reply-To: <20051017143644.GA9992@cs.umn.edu> References: <20051017143644.GA9992@cs.umn.edu> Message-ID: <435EE61C.8020404@torque.net> Dave Boutcher wrote: > James, > > Here's the ibmvscsis SCSI target submitted for inclusion in 2.4.15. > This driver meets a couple of akpm's criteria for worthiness, in that > its actually been shipping for a while in a distro kernel, and (given > the posts when I broke compatibility) is being used. > > This version is basically the same as the recent RFC version I sent > out, with a few bug fixes. It addresses a comment from Anton about > using gratuitously small max_sectors limits, and has a few other > miscellanious fixes. > > The only other significant comment generated by the the RFC was from > Christoph, and requested that this work be combined with the sgtg work > that Mike Christie and Tomonori Fujita are working on. I definitely > will start contributing to that work, and will convert this driver to > their framework when it becomes complete. I would rather not keep > this driver out of mainline for the amount of time that may take. Dave, While I'm partial to things that start with "sg...", I had problems finding that project until I tried "stgt". Reference to the Linux SCSI target project: http://developer.berlios.de/projects/stgt Doug Gilbert From galak at freescale.com Wed Oct 26 14:02:59 2005 From: galak at freescale.com (Kumar Gala) Date: Tue, 25 Oct 2005 23:02:59 -0500 (CDT) Subject: [PATCH] powerpc: Add support for Book-E timer config to generic_calibrate_decr Message-ID: We need to initialize some control SPRS for timers on Book-E before we start taking decrementer interrupts. Signed-off-by: Kumar K. Gala --- commit 34feb902f76428e507b305dd511496aa8d35d57e tree 192923184852696d4bcb55ae41b02f39b68e38d2 parent 4e031e5142a4034f03f8fe223b9e2b9f47a125d4 author Kumar K. Gala Tue, 25 Oct 2005 23:02:01 -0500 committer Kumar K. Gala Tue, 25 Oct 2005 23:02:01 -0500 arch/powerpc/kernel/time.c | 11 +++++++++++ 1 files changed, 11 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -610,6 +610,17 @@ void __init generic_calibrate_decr(void) ppc_proc_freq = *fp; } } +#ifdef CONFIG_BOOKE + /* Set the time base to zero */ + mtspr(SPRN_TBWL, 0); + mtspr(SPRN_TBWU, 0); + + /* Clear any pending timer interrupts */ + mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); + + /* Enable decrementer interrupt */ + mtspr(SPRN_TCR, TCR_DIE); +#endif if (!node_found) printk(KERN_ERR "WARNING: Estimating processor frequency " "(not found)\n"); From galak at freescale.com Wed Oct 26 14:57:33 2005 From: galak at freescale.com (Kumar Gala) Date: Tue, 25 Oct 2005 23:57:33 -0500 (CDT) Subject: [PATCH] powerpc: Some minor cleanups to setup_32.c Message-ID: * Removed of_show_percpuinfo and just report CPU frequency in generic show_cpuinfo code. * Killed OCP and PPC_SYS related code which doesn't belong in the merge tree Signed-off-by: Kumar K. Gala --- commit c77ae9103af0c3f77a298a7edd626da0675eddc5 tree 2e0286b5f502f31cc204fe2bf6c8bc4052bde123 parent 34feb902f76428e507b305dd511496aa8d35d57e author Kumar K. Gala Tue, 25 Oct 2005 23:56:17 -0500 committer Kumar K. Gala Tue, 25 Oct 2005 23:56:17 -0500 arch/powerpc/kernel/setup_32.c | 45 ++++++------------------------- arch/powerpc/platforms/powermac/setup.c | 3 -- 2 files changed, 8 insertions(+), 40 deletions(-) diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -39,15 +39,7 @@ #include #include #include -#include - -#define USES_PPC_SYS (defined(CONFIG_85xx) || defined(CONFIG_83xx) || \ - defined(CONFIG_MPC10X_BRIDGE) || defined(CONFIG_8260) || \ - defined(CONFIG_PPC_MPC52xx)) - -#if USES_PPC_SYS -#include -#endif +#include #if defined CONFIG_KGDB #include @@ -234,18 +226,19 @@ int show_cpuinfo(struct seq_file *m, voi } } + /* + * Assume here that all clock rates are the same in a + * smp system. -- Cort + */ + seq_printf(m, "clock\t\t: %lu.%06luMHz\n", ppc_proc_freq / 1000000, + ppc_proc_freq % 1000000); + seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n", maj, min, PVR_VER(pvr), PVR_REV(pvr)); seq_printf(m, "bogomips\t: %lu.%02lu\n", lpj / (500000/HZ), (lpj / (5000/HZ)) % 100); -#if USES_PPC_SYS - if (cur_ppc_sys_spec->ppc_sys_name) - seq_printf(m, "chipset\t\t: %s\n", - cur_ppc_sys_spec->ppc_sys_name); -#endif - #ifdef CONFIG_SMP seq_printf(m, "\n"); #endif @@ -305,28 +298,6 @@ unsigned long __init early_init(unsigned } #ifdef CONFIG_PPC_OF -/* - * Assume here that all clock rates are the same in a - * smp system. -- Cort - */ -int -of_show_percpuinfo(struct seq_file *m, int i) -{ - struct device_node *cpu_node; - u32 *fp; - int s; - - cpu_node = find_type_devices("cpu"); - if (!cpu_node) - return 0; - for (s = 0; s < i && cpu_node->next; s++) - cpu_node = cpu_node->next; - fp = (u32 *)get_property(cpu_node, "clock-frequency", NULL); - if (fp) - seq_printf(m, "clock\t\t: %dMHz\n", *fp / 1000000); - return 0; -} - void __init intuit_machine_type(void) { diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -202,9 +202,6 @@ static void pmac_show_percpuinfo(struct return; } #endif /* CONFIG_CPU_FREQ_PMAC */ -#ifdef CONFIG_PPC32 - of_show_percpuinfo(m, i); -#endif } #ifndef CONFIG_ADB_CUDA From sfr at canb.auug.org.au Wed Oct 26 16:12:53 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Wed, 26 Oct 2005 16:12:53 +1000 Subject: [PATCH] ppc64: use the merged syscall table Message-ID: <20051026161253.2eb437c4.sfr@canb.auug.org.au> This allows us to also use entry_64.S and setup_64.c from the merged tree. Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/Makefile | 8 arch/ppc64/Makefile | 1 arch/ppc64/kernel/Makefile | 2 arch/ppc64/kernel/entry.S | 845 --------------------------- arch/ppc64/kernel/misc.S | 563 ------------------ arch/ppc64/kernel/setup.c | 1307 ------------------------------------------ 6 files changed, 7 insertions(+), 2719 deletions(-) delete mode 100644 arch/ppc64/kernel/entry.S delete mode 100644 arch/ppc64/kernel/setup.c -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ 67f19b6ed6aca7158adcd696fa35bc8f755907dd diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -12,7 +12,8 @@ endif obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ signal_32.o pmc.o -obj-$(CONFIG_PPC64) += binfmt_elf32.o sys_ppc32.o ptrace32.o +obj-$(CONFIG_PPC64) += binfmt_elf32.o sys_ppc32.o ptrace32.o \ + setup_64.o systbl.o obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_POWER4) += idle_power4.o obj-$(CONFIG_PPC_OF) += of_device.o @@ -31,9 +32,9 @@ extra-$(CONFIG_PPC_FPU) += fpu.o extra-y += vmlinux.lds obj-y += process.o init_task.o time.o \ - prom.o systbl.o traps.o + prom.o traps.o obj-$(CONFIG_PPC32) += entry_32.o idle_6xx.o setup_32.o misc_32.o -obj-$(CONFIG_PPC64) += setup_64.o misc_64.o +obj-$(CONFIG_PPC64) += misc_64.o obj-$(CONFIG_PPC_OF) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o @@ -47,6 +48,7 @@ else # stuff used from here for ARCH=ppc or ARCH=ppc64 obj-$(CONFIG_PPC64) += traps.o process.o init_task.o time.o +extra-$(CONFIG_PPC64) += entry_64.o fpux-$(CONFIG_PPC32) += fpu.o extra-$(CONFIG_PPC_FPU) += $(fpux-y) diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile --- a/arch/ppc64/Makefile +++ b/arch/ppc64/Makefile @@ -80,6 +80,7 @@ endif CFLAGS += $(call cc-option,-funit-at-a-time) head-y := arch/ppc64/kernel/head.o +head-y += arch/powerpc/kernel/entry_64.o libs-y += arch/ppc64/lib/ core-y += arch/ppc64/kernel/ arch/powerpc/kernel/ diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -7,7 +7,7 @@ ifneq ($(CONFIG_PPC_MERGE),y) EXTRA_CFLAGS += -mno-minimal-toc extra-y := head.o vmlinux.lds -obj-y := setup.o entry.o misc.o prom.o +obj-y := misc.o prom.o endif diff --git a/arch/ppc64/kernel/entry.S b/arch/ppc64/kernel/entry.S deleted file mode 100644 --- a/arch/ppc64/kernel/entry.S +++ /dev/null @@ -1,845 +0,0 @@ -/* - * arch/ppc64/kernel/entry.S - * - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org) - * Rewritten by Cort Dougan (cort at cs.nmt.edu) for PReP - * Copyright (C) 1996 Cort Dougan - * Adapted for Power Macintosh by Paul Mackerras. - * Low-level exception handlers and MMU support - * rewritten by Paul Mackerras. - * Copyright (C) 1996 Paul Mackerras. - * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek at jlc.net). - * - * This file contains the system call entry code, context switch - * code, and exception/interrupt return code for PowerPC. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_PPC_ISERIES -#define DO_SOFT_DISABLE -#endif - -/* - * System calls. - */ - .section ".toc","aw" -.SYS_CALL_TABLE: - .tc .sys_call_table[TC],.sys_call_table - -.SYS_CALL_TABLE32: - .tc .sys_call_table32[TC],.sys_call_table32 - -/* This value is used to mark exception frames on the stack. */ -exception_marker: - .tc ID_72656773_68657265[TC],0x7265677368657265 - - .section ".text" - .align 7 - -#undef SHOW_SYSCALLS - - .globl system_call_common -system_call_common: - andi. r10,r12,MSR_PR - mr r10,r1 - addi r1,r1,-INT_FRAME_SIZE - beq- 1f - ld r1,PACAKSAVE(r13) -1: std r10,0(r1) - std r11,_NIP(r1) - std r12,_MSR(r1) - std r0,GPR0(r1) - std r10,GPR1(r1) - std r2,GPR2(r1) - std r3,GPR3(r1) - std r4,GPR4(r1) - std r5,GPR5(r1) - std r6,GPR6(r1) - std r7,GPR7(r1) - std r8,GPR8(r1) - li r11,0 - std r11,GPR9(r1) - std r11,GPR10(r1) - std r11,GPR11(r1) - std r11,GPR12(r1) - std r9,GPR13(r1) - crclr so - mfcr r9 - mflr r10 - li r11,0xc01 - std r9,_CCR(r1) - std r10,_LINK(r1) - std r11,_TRAP(r1) - mfxer r9 - mfctr r10 - std r9,_XER(r1) - std r10,_CTR(r1) - std r3,ORIG_GPR3(r1) - ld r2,PACATOC(r13) - addi r9,r1,STACK_FRAME_OVERHEAD - ld r11,exception_marker at toc(r2) - std r11,-16(r9) /* "regshere" marker */ -#ifdef CONFIG_PPC_ISERIES - /* Hack for handling interrupts when soft-enabling on iSeries */ - cmpdi cr1,r0,0x5555 /* syscall 0x5555 */ - andi. r10,r12,MSR_PR /* from kernel */ - crand 4*cr0+eq,4*cr1+eq,4*cr0+eq - beq hardware_interrupt_entry - lbz r10,PACAPROCENABLED(r13) - std r10,SOFTE(r1) -#endif - mfmsr r11 - ori r11,r11,MSR_EE - mtmsrd r11,1 - -#ifdef SHOW_SYSCALLS - bl .do_show_syscall - REST_GPR(0,r1) - REST_4GPRS(3,r1) - REST_2GPRS(7,r1) - addi r9,r1,STACK_FRAME_OVERHEAD -#endif - clrrdi r11,r1,THREAD_SHIFT - li r12,0 - ld r10,TI_FLAGS(r11) - stb r12,TI_SC_NOERR(r11) - andi. r11,r10,_TIF_SYSCALL_T_OR_A - bne- syscall_dotrace -syscall_dotrace_cont: - cmpldi 0,r0,NR_syscalls - bge- syscall_enosys - -system_call: /* label this so stack traces look sane */ -/* - * Need to vector to 32 Bit or default sys_call_table here, - * based on caller's run-mode / personality. - */ - ld r11,.SYS_CALL_TABLE at toc(2) - andi. r10,r10,_TIF_32BIT - beq 15f - ld r11,.SYS_CALL_TABLE32 at toc(2) - clrldi r3,r3,32 - clrldi r4,r4,32 - clrldi r5,r5,32 - clrldi r6,r6,32 - clrldi r7,r7,32 - clrldi r8,r8,32 -15: - slwi r0,r0,3 - ldx r10,r11,r0 /* Fetch system call handler [ptr] */ - mtctr r10 - bctrl /* Call handler */ - -syscall_exit: -#ifdef SHOW_SYSCALLS - std r3,GPR3(r1) - bl .do_show_syscall_exit - ld r3,GPR3(r1) -#endif - std r3,RESULT(r1) - ld r5,_CCR(r1) - li r10,-_LAST_ERRNO - cmpld r3,r10 - clrrdi r12,r1,THREAD_SHIFT - bge- syscall_error -syscall_error_cont: - - /* check for syscall tracing or audit */ - ld r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) - bne- syscall_exit_trace -syscall_exit_trace_cont: - - /* disable interrupts so current_thread_info()->flags can't change, - and so that we don't get interrupted after loading SRR0/1. */ - ld r8,_MSR(r1) - andi. r10,r8,MSR_RI - beq- unrecov_restore - mfmsr r10 - rldicl r10,r10,48,1 - rotldi r10,r10,16 - mtmsrd r10,1 - ld r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) - bne- syscall_exit_work - ld r7,_NIP(r1) - stdcx. r0,0,r1 /* to clear the reservation */ - andi. r6,r8,MSR_PR - ld r4,_LINK(r1) - beq- 1f /* only restore r13 if */ - ld r13,GPR13(r1) /* returning to usermode */ -1: ld r2,GPR2(r1) - li r12,MSR_RI - andc r10,r10,r12 - mtmsrd r10,1 /* clear MSR.RI */ - ld r1,GPR1(r1) - mtlr r4 - mtcr r5 - mtspr SPRN_SRR0,r7 - mtspr SPRN_SRR1,r8 - rfid - b . /* prevent speculative execution */ - -syscall_enosys: - li r3,-ENOSYS - std r3,RESULT(r1) - clrrdi r12,r1,THREAD_SHIFT - ld r5,_CCR(r1) - -syscall_error: - lbz r11,TI_SC_NOERR(r12) - cmpwi 0,r11,0 - bne- syscall_error_cont - neg r3,r3 - oris r5,r5,0x1000 /* Set SO bit in CR */ - std r5,_CCR(r1) - b syscall_error_cont - -/* Traced system call support */ -syscall_dotrace: - bl .save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_syscall_trace_enter - ld r0,GPR0(r1) /* Restore original registers */ - ld r3,GPR3(r1) - ld r4,GPR4(r1) - ld r5,GPR5(r1) - ld r6,GPR6(r1) - ld r7,GPR7(r1) - ld r8,GPR8(r1) - addi r9,r1,STACK_FRAME_OVERHEAD - clrrdi r10,r1,THREAD_SHIFT - ld r10,TI_FLAGS(r10) - b syscall_dotrace_cont - -syscall_exit_trace: - std r3,GPR3(r1) - bl .save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_syscall_trace_leave - REST_NVGPRS(r1) - ld r3,GPR3(r1) - ld r5,_CCR(r1) - clrrdi r12,r1,THREAD_SHIFT - b syscall_exit_trace_cont - -/* Stuff to do on exit from a system call. */ -syscall_exit_work: - std r3,GPR3(r1) - std r5,_CCR(r1) - b .ret_from_except_lite - -/* Save non-volatile GPRs, if not already saved. */ -_GLOBAL(save_nvgprs) - ld r11,_TRAP(r1) - andi. r0,r11,1 - beqlr- - SAVE_NVGPRS(r1) - clrrdi r0,r11,1 - std r0,_TRAP(r1) - blr - -/* - * The sigsuspend and rt_sigsuspend system calls can call do_signal - * and thus put the process into the stopped state where we might - * want to examine its user state with ptrace. Therefore we need - * to save all the nonvolatile registers (r14 - r31) before calling - * the C code. Similarly, fork, vfork and clone need the full - * register state on the stack so that it can be copied to the child. - */ -_GLOBAL(ppc32_sigsuspend) - bl .save_nvgprs - bl .compat_sys_sigsuspend - b 70f - -_GLOBAL(ppc64_rt_sigsuspend) - bl .save_nvgprs - bl .sys_rt_sigsuspend - b 70f - -_GLOBAL(ppc32_rt_sigsuspend) - bl .save_nvgprs - bl .compat_sys_rt_sigsuspend -70: cmpdi 0,r3,0 - /* If it returned an error, we need to return via syscall_exit to set - the SO bit in cr0 and potentially stop for ptrace. */ - bne syscall_exit - /* If sigsuspend() returns zero, we are going into a signal handler. We - may need to call audit_syscall_exit() to mark the exit from sigsuspend() */ -#ifdef CONFIG_AUDIT - ld r3,PACACURRENT(r13) - ld r4,AUDITCONTEXT(r3) - cmpdi 0,r4,0 - beq .ret_from_except /* No audit_context: Leave immediately. */ - li r4, 2 /* AUDITSC_FAILURE */ - li r5,-4 /* It's always -EINTR */ - bl .audit_syscall_exit -#endif - b .ret_from_except - -_GLOBAL(ppc_fork) - bl .save_nvgprs - bl .sys_fork - b syscall_exit - -_GLOBAL(ppc_vfork) - bl .save_nvgprs - bl .sys_vfork - b syscall_exit - -_GLOBAL(ppc_clone) - bl .save_nvgprs - bl .sys_clone - b syscall_exit - -_GLOBAL(ppc32_swapcontext) - bl .save_nvgprs - bl .compat_sys_swapcontext - b 80f - -_GLOBAL(ppc64_swapcontext) - bl .save_nvgprs - bl .sys_swapcontext - b 80f - -_GLOBAL(ppc32_sigreturn) - bl .compat_sys_sigreturn - b 80f - -_GLOBAL(ppc32_rt_sigreturn) - bl .compat_sys_rt_sigreturn - b 80f - -_GLOBAL(ppc64_rt_sigreturn) - bl .sys_rt_sigreturn - -80: cmpdi 0,r3,0 - blt syscall_exit - clrrdi r4,r1,THREAD_SHIFT - ld r4,TI_FLAGS(r4) - andi. r4,r4,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) - beq+ 81f - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_syscall_trace_leave -81: b .ret_from_except - -_GLOBAL(ret_from_fork) - bl .schedule_tail - REST_NVGPRS(r1) - li r3,0 - b syscall_exit - -/* - * This routine switches between two different tasks. The process - * state of one is saved on its kernel stack. Then the state - * of the other is restored from its kernel stack. The memory - * management hardware is updated to the second process's state. - * Finally, we can return to the second process, via ret_from_except. - * On entry, r3 points to the THREAD for the current task, r4 - * points to the THREAD for the new task. - * - * Note: there are two ways to get to the "going out" portion - * of this code; either by coming in via the entry (_switch) - * or via "fork" which must set up an environment equivalent - * to the "_switch" path. If you change this you'll have to change - * the fork code also. - * - * The code which creates the new task context is in 'copy_thread' - * in arch/ppc64/kernel/process.c - */ - .align 7 -_GLOBAL(_switch) - mflr r0 - std r0,16(r1) - stdu r1,-SWITCH_FRAME_SIZE(r1) - /* r3-r13 are caller saved -- Cort */ - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) - mflr r20 /* Return to switch caller */ - mfmsr r22 - li r0, MSR_FP -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - oris r0,r0,MSR_VEC at h /* Disable altivec */ - mfspr r24,SPRN_VRSAVE /* save vrsave register value */ - std r24,THREAD_VRSAVE(r3) -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ - and. r0,r0,r22 - beq+ 1f - andc r22,r22,r0 - mtmsrd r22 - isync -1: std r20,_NIP(r1) - mfcr r23 - std r23,_CCR(r1) - std r1,KSP(r3) /* Set old stack pointer */ - -#ifdef CONFIG_SMP - /* We need a sync somewhere here to make sure that if the - * previous task gets rescheduled on another CPU, it sees all - * stores it has performed on this one. - */ - sync -#endif /* CONFIG_SMP */ - - addi r6,r4,-THREAD /* Convert THREAD to 'current' */ - std r6,PACACURRENT(r13) /* Set new 'current' */ - - ld r8,KSP(r4) /* new stack pointer */ -BEGIN_FTR_SECTION - clrrdi r6,r8,28 /* get its ESID */ - clrrdi r9,r1,28 /* get current sp ESID */ - clrldi. r0,r6,2 /* is new ESID c00000000? */ - cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ - cror eq,4*cr1+eq,eq - beq 2f /* if yes, don't slbie it */ - - /* Bolt in the new stack SLB entry */ - ld r7,KSP_VSID(r4) /* Get new stack's VSID */ - oris r0,r6,(SLB_ESID_V)@h - ori r0,r0,(SLB_NUM_BOLTED-1)@l - slbie r6 - slbie r6 /* Workaround POWER5 < DD2.1 issue */ - slbmte r7,r0 - isync - -2: -END_FTR_SECTION_IFSET(CPU_FTR_SLB) - clrrdi r7,r8,THREAD_SHIFT /* base of new stack */ - /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE - because we don't need to leave the 288-byte ABI gap at the - top of the kernel stack. */ - addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE - - mr r1,r8 /* start using new stack pointer */ - std r7,PACAKSAVE(r13) - - ld r6,_CCR(r1) - mtcrf 0xFF,r6 - -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - ld r0,THREAD_VRSAVE(r4) - mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ - - /* r3-r13 are destroyed -- Cort */ - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) - - /* convert old thread to its task_struct for return value */ - addi r3,r3,-THREAD - ld r7,_NIP(r1) /* Return to _switch caller in new task */ - mtlr r7 - addi r1,r1,SWITCH_FRAME_SIZE - blr - - .align 7 -_GLOBAL(ret_from_except) - ld r11,_TRAP(r1) - andi. r0,r11,1 - bne .ret_from_except_lite - REST_NVGPRS(r1) - -_GLOBAL(ret_from_except_lite) - /* - * Disable interrupts so that current_thread_info()->flags - * can't change between when we test it and when we return - * from the interrupt. - */ - mfmsr r10 /* Get current interrupt state */ - rldicl r9,r10,48,1 /* clear MSR_EE */ - rotldi r9,r9,16 - mtmsrd r9,1 /* Update machine state */ - -#ifdef CONFIG_PREEMPT - clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ - li r0,_TIF_NEED_RESCHED /* bits to check */ - ld r3,_MSR(r1) - ld r4,TI_FLAGS(r9) - /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */ - rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING - and. r0,r4,r0 /* check NEED_RESCHED and maybe SIGPENDING */ - bne do_work - -#else /* !CONFIG_PREEMPT */ - ld r3,_MSR(r1) /* Returning to user mode? */ - andi. r3,r3,MSR_PR - beq restore /* if not, just restore regs and return */ - - /* Check current_thread_info()->flags */ - clrrdi r9,r1,THREAD_SHIFT - ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_USER_WORK_MASK - bne do_work -#endif - -restore: -#ifdef CONFIG_PPC_ISERIES - ld r5,SOFTE(r1) - cmpdi 0,r5,0 - beq 4f - /* Check for pending interrupts (iSeries) */ - ld r3,PACALPPACA+LPPACAANYINT(r13) - cmpdi r3,0 - beq+ 4f /* skip do_IRQ if no interrupts */ - - li r3,0 - stb r3,PACAPROCENABLED(r13) /* ensure we are soft-disabled */ - ori r10,r10,MSR_EE - mtmsrd r10 /* hard-enable again */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_IRQ - b .ret_from_except_lite /* loop back and handle more */ - -4: stb r5,PACAPROCENABLED(r13) -#endif - - ld r3,_MSR(r1) - andi. r0,r3,MSR_RI - beq- unrecov_restore - - andi. r0,r3,MSR_PR - - /* - * r13 is our per cpu area, only restore it if we are returning to - * userspace - */ - beq 1f - REST_GPR(13, r1) -1: - ld r3,_CTR(r1) - ld r0,_LINK(r1) - mtctr r3 - mtlr r0 - ld r3,_XER(r1) - mtspr SPRN_XER,r3 - - REST_8GPRS(5, r1) - - stdcx. r0,0,r1 /* to clear the reservation */ - - mfmsr r0 - li r2, MSR_RI - andc r0,r0,r2 - mtmsrd r0,1 - - ld r0,_MSR(r1) - mtspr SPRN_SRR1,r0 - - ld r2,_CCR(r1) - mtcrf 0xFF,r2 - ld r2,_NIP(r1) - mtspr SPRN_SRR0,r2 - - ld r0,GPR0(r1) - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r4,GPR4(r1) - ld r1,GPR1(r1) - - rfid - b . /* prevent speculative execution */ - -/* Note: this must change if we start using the TIF_NOTIFY_RESUME bit */ -do_work: -#ifdef CONFIG_PREEMPT - andi. r0,r3,MSR_PR /* Returning to user mode? */ - bne user_work - /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) - cmpwi cr1,r8,0 -#ifdef CONFIG_PPC_ISERIES - ld r0,SOFTE(r1) - cmpdi r0,0 -#else - andi. r0,r3,MSR_EE -#endif - crandc eq,cr1*4+eq,eq - bne restore - /* here we are preempting the current task */ -1: -#ifdef CONFIG_PPC_ISERIES - li r0,1 - stb r0,PACAPROCENABLED(r13) -#endif - ori r10,r10,MSR_EE - mtmsrd r10,1 /* reenable interrupts */ - bl .preempt_schedule - mfmsr r10 - clrrdi r9,r1,THREAD_SHIFT - rldicl r10,r10,48,1 /* disable interrupts again */ - rotldi r10,r10,16 - mtmsrd r10,1 - ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED - bne 1b - b restore - -user_work: -#endif - /* Enable interrupts */ - ori r10,r10,MSR_EE - mtmsrd r10,1 - - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f - bl .schedule - b .ret_from_except_lite - -1: bl .save_nvgprs - li r3,0 - addi r4,r1,STACK_FRAME_OVERHEAD - bl .do_signal - b .ret_from_except - -unrecov_restore: - addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b unrecov_restore - -#ifdef CONFIG_PPC_RTAS -/* - * On CHRP, the Run-Time Abstraction Services (RTAS) have to be - * called with the MMU off. - * - * In addition, we need to be in 32b mode, at least for now. - * - * Note: r3 is an input parameter to rtas, so don't trash it... - */ -_GLOBAL(enter_rtas) - mflr r0 - std r0,16(r1) - stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */ - - /* Because RTAS is running in 32b mode, it clobbers the high order half - * of all registers that it saves. We therefore save those registers - * RTAS might touch to the stack. (r0, r3-r13 are caller saved) - */ - SAVE_GPR(2, r1) /* Save the TOC */ - SAVE_GPR(13, r1) /* Save paca */ - SAVE_8GPRS(14, r1) /* Save the non-volatiles */ - SAVE_10GPRS(22, r1) /* ditto */ - - mfcr r4 - std r4,_CCR(r1) - mfctr r5 - std r5,_CTR(r1) - mfspr r6,SPRN_XER - std r6,_XER(r1) - mfdar r7 - std r7,_DAR(r1) - mfdsisr r8 - std r8,_DSISR(r1) - mfsrr0 r9 - std r9,_SRR0(r1) - mfsrr1 r10 - std r10,_SRR1(r1) - - /* There is no way it is acceptable to get here with interrupts enabled, - * check it with the asm equivalent of WARN_ON - */ - mfmsr r6 - andi. r0,r6,MSR_EE -1: tdnei r0,0 -.section __bug_table,"a" - .llong 1b,__LINE__ + 0x1000000, 1f, 2f -.previous -.section .rodata,"a" -1: .asciz __FILE__ -2: .asciz "enter_rtas" -.previous - - /* Unfortunately, the stack pointer and the MSR are also clobbered, - * so they are saved in the PACA which allows us to restore - * our original state after RTAS returns. - */ - std r1,PACAR1(r13) - std r6,PACASAVEDMSR(r13) - - /* Setup our real return addr */ - SET_REG_TO_LABEL(r4,.rtas_return_loc) - SET_REG_TO_CONST(r9,KERNELBASE) - sub r4,r4,r9 - mtlr r4 - - li r0,0 - ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI - andc r0,r6,r0 - - li r9,1 - rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) - ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP - andc r6,r0,r9 - ori r6,r6,MSR_RI - sync /* disable interrupts so SRR0/1 */ - mtmsrd r0 /* don't get trashed */ - - SET_REG_TO_LABEL(r4,rtas) - ld r5,RTASENTRY(r4) /* get the rtas->entry value */ - ld r4,RTASBASE(r4) /* get the rtas->base value */ - - mtspr SPRN_SRR0,r5 - mtspr SPRN_SRR1,r6 - rfid - b . /* prevent speculative execution */ - -_STATIC(rtas_return_loc) - /* relocation is off at this point */ - mfspr r4,SPRN_SPRG3 /* Get PACA */ - SET_REG_TO_CONST(r5, KERNELBASE) - sub r4,r4,r5 /* RELOC the PACA base pointer */ - - mfmsr r6 - li r0,MSR_RI - andc r6,r6,r0 - sync - mtmsrd r6 - - ld r1,PACAR1(r4) /* Restore our SP */ - LOADADDR(r3,.rtas_restore_regs) - ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ - - mtspr SPRN_SRR0,r3 - mtspr SPRN_SRR1,r4 - rfid - b . /* prevent speculative execution */ - -_STATIC(rtas_restore_regs) - /* relocation is on at this point */ - REST_GPR(2, r1) /* Restore the TOC */ - REST_GPR(13, r1) /* Restore paca */ - REST_8GPRS(14, r1) /* Restore the non-volatiles */ - REST_10GPRS(22, r1) /* ditto */ - - mfspr r13,SPRN_SPRG3 - - ld r4,_CCR(r1) - mtcr r4 - ld r5,_CTR(r1) - mtctr r5 - ld r6,_XER(r1) - mtspr SPRN_XER,r6 - ld r7,_DAR(r1) - mtdar r7 - ld r8,_DSISR(r1) - mtdsisr r8 - ld r9,_SRR0(r1) - mtsrr0 r9 - ld r10,_SRR1(r1) - mtsrr1 r10 - - addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */ - ld r0,16(r1) /* get return address */ - - mtlr r0 - blr /* return to caller */ - -#endif /* CONFIG_PPC_RTAS */ - -#ifdef CONFIG_PPC_MULTIPLATFORM - -_GLOBAL(enter_prom) - mflr r0 - std r0,16(r1) - stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ - - /* Because PROM is running in 32b mode, it clobbers the high order half - * of all registers that it saves. We therefore save those registers - * PROM might touch to the stack. (r0, r3-r13 are caller saved) - */ - SAVE_8GPRS(2, r1) - SAVE_GPR(13, r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) - mfcr r4 - std r4,_CCR(r1) - mfctr r5 - std r5,_CTR(r1) - mfspr r6,SPRN_XER - std r6,_XER(r1) - mfdar r7 - std r7,_DAR(r1) - mfdsisr r8 - std r8,_DSISR(r1) - mfsrr0 r9 - std r9,_SRR0(r1) - mfsrr1 r10 - std r10,_SRR1(r1) - mfmsr r11 - std r11,_MSR(r1) - - /* Get the PROM entrypoint */ - ld r0,GPR4(r1) - mtlr r0 - - /* Switch MSR to 32 bits mode - */ - mfmsr r11 - li r12,1 - rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) - andc r11,r11,r12 - li r12,1 - rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) - andc r11,r11,r12 - mtmsrd r11 - isync - - /* Restore arguments & enter PROM here... */ - ld r3,GPR3(r1) - blrl - - /* Just make sure that r1 top 32 bits didn't get - * corrupt by OF - */ - rldicl r1,r1,0,32 - - /* Restore the MSR (back to 64 bits) */ - ld r0,_MSR(r1) - mtmsrd r0 - isync - - /* Restore other registers */ - REST_GPR(2, r1) - REST_GPR(13, r1) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) - ld r4,_CCR(r1) - mtcr r4 - ld r5,_CTR(r1) - mtctr r5 - ld r6,_XER(r1) - mtspr SPRN_XER,r6 - ld r7,_DAR(r1) - mtdar r7 - ld r8,_DSISR(r1) - mtdsisr r8 - ld r9,_SRR0(r1) - mtsrr0 r9 - ld r10,_SRR1(r1) - mtsrr1 r10 - - addi r1,r1,PROM_FRAME_SIZE - ld r0,16(r1) - mtlr r0 - blr - -#endif /* CONFIG_PPC_MULTIPLATFORM */ diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -918,566 +918,3 @@ _GLOBAL(kexec_sequence) li r5,0 blr /* image->start(physid, image->start, 0); */ #endif /* CONFIG_KEXEC */ - -/* Why isn't this a) automatic, b) written in 'C'? */ - .balign 8 -_GLOBAL(sys_call_table32) - .llong .sys_restart_syscall /* 0 */ - .llong .sys_exit - .llong .ppc_fork - .llong .sys_read - .llong .sys_write - .llong .compat_sys_open /* 5 */ - .llong .sys_close - .llong .compat_sys_waitpid - .llong .compat_sys_creat - .llong .sys_link - .llong .sys_unlink /* 10 */ - .llong .compat_sys_execve - .llong .sys_chdir - .llong .compat_sys_time - .llong .sys_mknod - .llong .sys_chmod /* 15 */ - .llong .sys_lchown - .llong .sys_ni_syscall /* old break syscall */ - .llong .sys_ni_syscall /* old stat syscall */ - .llong .ppc32_lseek - .llong .sys_getpid /* 20 */ - .llong .compat_sys_mount - .llong .sys_oldumount - .llong .sys_setuid - .llong .sys_getuid - .llong .compat_sys_stime /* 25 */ - .llong .compat_sys_ptrace - .llong .sys_alarm - .llong .sys_ni_syscall /* old fstat syscall */ - .llong .compat_sys_pause - .llong .compat_sys_utime /* 30 */ - .llong .sys_ni_syscall /* old stty syscall */ - .llong .sys_ni_syscall /* old gtty syscall */ - .llong .compat_sys_access - .llong .compat_sys_nice - .llong .sys_ni_syscall /* 35 - old ftime syscall */ - .llong .sys_sync - .llong .compat_sys_kill - .llong .sys_rename - .llong .compat_sys_mkdir - .llong .sys_rmdir /* 40 */ - .llong .sys_dup - .llong .sys_pipe - .llong .compat_sys_times - .llong .sys_ni_syscall /* old prof syscall */ - .llong .sys_brk /* 45 */ - .llong .sys_setgid - .llong .sys_getgid - .llong .sys_signal - .llong .sys_geteuid - .llong .sys_getegid /* 50 */ - .llong .sys_acct - .llong .sys_umount - .llong .sys_ni_syscall /* old lock syscall */ - .llong .compat_sys_ioctl - .llong .compat_sys_fcntl /* 55 */ - .llong .sys_ni_syscall /* old mpx syscall */ - .llong .compat_sys_setpgid - .llong .sys_ni_syscall /* old ulimit syscall */ - .llong .sys_olduname - .llong .compat_sys_umask /* 60 */ - .llong .sys_chroot - .llong .sys_ustat - .llong .sys_dup2 - .llong .sys_getppid - .llong .sys_getpgrp /* 65 */ - .llong .sys_setsid - .llong .compat_sys_sigaction - .llong .sys_sgetmask - .llong .compat_sys_ssetmask - .llong .sys_setreuid /* 70 */ - .llong .sys_setregid - .llong .ppc32_sigsuspend - .llong .compat_sys_sigpending - .llong .compat_sys_sethostname - .llong .compat_sys_setrlimit /* 75 */ - .llong .compat_sys_old_getrlimit - .llong .compat_sys_getrusage - .llong .compat_sys_gettimeofday - .llong .compat_sys_settimeofday - .llong .compat_sys_getgroups /* 80 */ - .llong .compat_sys_setgroups - .llong .sys_ni_syscall /* old select syscall */ - .llong .sys_symlink - .llong .sys_ni_syscall /* old lstat syscall */ - .llong .compat_sys_readlink /* 85 */ - .llong .sys_uselib - .llong .sys_swapon - .llong .sys_reboot - .llong .old32_readdir - .llong .sys_mmap /* 90 */ - .llong .sys_munmap - .llong .sys_truncate - .llong .sys_ftruncate - .llong .sys_fchmod - .llong .sys_fchown /* 95 */ - .llong .compat_sys_getpriority - .llong .compat_sys_setpriority - .llong .sys_ni_syscall /* old profil syscall */ - .llong .compat_sys_statfs - .llong .compat_sys_fstatfs /* 100 */ - .llong .sys_ni_syscall /* old ioperm syscall */ - .llong .compat_sys_socketcall - .llong .compat_sys_syslog - .llong .compat_sys_setitimer - .llong .compat_sys_getitimer /* 105 */ - .llong .compat_sys_newstat - .llong .compat_sys_newlstat - .llong .compat_sys_newfstat - .llong .sys_uname - .llong .sys_ni_syscall /* 110 old iopl syscall */ - .llong .sys_vhangup - .llong .sys_ni_syscall /* old idle syscall */ - .llong .sys_ni_syscall /* old vm86 syscall */ - .llong .compat_sys_wait4 - .llong .sys_swapoff /* 115 */ - .llong .compat_sys_sysinfo - .llong .sys32_ipc - .llong .sys_fsync - .llong .ppc32_sigreturn - .llong .ppc_clone /* 120 */ - .llong .compat_sys_setdomainname - .llong .ppc_newuname - .llong .sys_ni_syscall /* old modify_ldt syscall */ - .llong .compat_sys_adjtimex - .llong .sys_mprotect /* 125 */ - .llong .compat_sys_sigprocmask - .llong .sys_ni_syscall /* old create_module syscall */ - .llong .sys_init_module - .llong .sys_delete_module - .llong .sys_ni_syscall /* 130 old get_kernel_syms syscall */ - .llong .sys_quotactl - .llong .compat_sys_getpgid - .llong .sys_fchdir - .llong .sys_bdflush - .llong .compat_sys_sysfs /* 135 */ - .llong .ppc64_personality - .llong .sys_ni_syscall /* for afs_syscall */ - .llong .sys_setfsuid - .llong .sys_setfsgid - .llong .sys_llseek /* 140 */ - .llong .compat_sys_getdents - .llong .ppc32_select - .llong .sys_flock - .llong .sys_msync - .llong .compat_sys_readv /* 145 */ - .llong .compat_sys_writev - .llong .compat_sys_getsid - .llong .sys_fdatasync - .llong .compat_sys_sysctl - .llong .sys_mlock /* 150 */ - .llong .sys_munlock - .llong .sys_mlockall - .llong .sys_munlockall - .llong .compat_sys_sched_setparam - .llong .compat_sys_sched_getparam /* 155 */ - .llong .compat_sys_sched_setscheduler - .llong .compat_sys_sched_getscheduler - .llong .sys_sched_yield - .llong .compat_sys_sched_get_priority_max - .llong .compat_sys_sched_get_priority_min /* 160 */ - .llong .compat_sys_sched_rr_get_interval - .llong .compat_sys_nanosleep - .llong .sys_mremap - .llong .sys_setresuid - .llong .sys_getresuid /* 165 */ - .llong .sys_ni_syscall /* old query_module syscall */ - .llong .sys_poll - .llong .compat_sys_nfsservctl - .llong .sys_setresgid - .llong .sys_getresgid /* 170 */ - .llong .compat_sys_prctl - .llong .ppc32_rt_sigreturn - .llong .compat_sys_rt_sigaction - .llong .compat_sys_rt_sigprocmask - .llong .compat_sys_rt_sigpending /* 175 */ - .llong .compat_sys_rt_sigtimedwait - .llong .compat_sys_rt_sigqueueinfo - .llong .ppc32_rt_sigsuspend - .llong .compat_sys_pread64 - .llong .compat_sys_pwrite64 /* 180 */ - .llong .sys_chown - .llong .sys_getcwd - .llong .sys_capget - .llong .sys_capset - .llong .compat_sys_sigaltstack /* 185 */ - .llong .compat_sys_sendfile - .llong .sys_ni_syscall /* reserved for streams1 */ - .llong .sys_ni_syscall /* reserved for streams2 */ - .llong .ppc_vfork - .llong .compat_sys_getrlimit /* 190 */ - .llong .compat_sys_readahead - .llong .compat_sys_mmap2 - .llong .compat_sys_truncate64 - .llong .compat_sys_ftruncate64 - .llong .sys_stat64 /* 195 */ - .llong .sys_lstat64 - .llong .sys_fstat64 - .llong .compat_sys_pciconfig_read - .llong .compat_sys_pciconfig_write - .llong .compat_sys_pciconfig_iobase /* 200 - pciconfig_iobase */ - .llong .sys_ni_syscall /* reserved for MacOnLinux */ - .llong .sys_getdents64 - .llong .sys_pivot_root - .llong .compat_sys_fcntl64 - .llong .sys_madvise /* 205 */ - .llong .sys_mincore - .llong .sys_gettid - .llong .sys_tkill - .llong .sys_setxattr - .llong .sys_lsetxattr /* 210 */ - .llong .sys_fsetxattr - .llong .sys_getxattr - .llong .sys_lgetxattr - .llong .sys_fgetxattr - .llong .sys_listxattr /* 215 */ - .llong .sys_llistxattr - .llong .sys_flistxattr - .llong .sys_removexattr - .llong .sys_lremovexattr - .llong .sys_fremovexattr /* 220 */ - .llong .compat_sys_futex - .llong .compat_sys_sched_setaffinity - .llong .compat_sys_sched_getaffinity - .llong .sys_ni_syscall - .llong .sys_ni_syscall /* 225 - reserved for tux */ - .llong .compat_sys_sendfile64 - .llong .compat_sys_io_setup - .llong .sys_io_destroy - .llong .compat_sys_io_getevents - .llong .compat_sys_io_submit - .llong .sys_io_cancel - .llong .sys_set_tid_address - .llong .ppc32_fadvise64 - .llong .sys_exit_group - .llong .ppc32_lookup_dcookie /* 235 */ - .llong .sys_epoll_create - .llong .sys_epoll_ctl - .llong .sys_epoll_wait - .llong .sys_remap_file_pages - .llong .ppc32_timer_create /* 240 */ - .llong .compat_sys_timer_settime - .llong .compat_sys_timer_gettime - .llong .sys_timer_getoverrun - .llong .sys_timer_delete - .llong .compat_sys_clock_settime/* 245 */ - .llong .compat_sys_clock_gettime - .llong .compat_sys_clock_getres - .llong .compat_sys_clock_nanosleep - .llong .ppc32_swapcontext - .llong .compat_sys_tgkill /* 250 */ - .llong .compat_sys_utimes - .llong .compat_sys_statfs64 - .llong .compat_sys_fstatfs64 - .llong .ppc_fadvise64_64 /* 32bit only fadvise64_64 */ - .llong .ppc_rtas /* 255 */ - .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .llong .sys_ni_syscall /* 257 reserved for vserver */ - .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ - .llong .compat_sys_mbind - .llong .compat_sys_get_mempolicy /* 260 */ - .llong .compat_sys_set_mempolicy - .llong .compat_sys_mq_open - .llong .sys_mq_unlink - .llong .compat_sys_mq_timedsend - .llong .compat_sys_mq_timedreceive /* 265 */ - .llong .compat_sys_mq_notify - .llong .compat_sys_mq_getsetattr - .llong .compat_sys_kexec_load - .llong .compat_sys_add_key - .llong .compat_sys_request_key /* 270 */ - .llong .compat_sys_keyctl - .llong .compat_sys_waitid - .llong .compat_sys_ioprio_set - .llong .compat_sys_ioprio_get - .llong .sys_inotify_init /* 275 */ - .llong .sys_inotify_add_watch - .llong .sys_inotify_rm_watch - - .balign 8 -_GLOBAL(sys_call_table) - .llong .sys_restart_syscall /* 0 */ - .llong .sys_exit - .llong .ppc_fork - .llong .sys_read - .llong .sys_write - .llong .sys_open /* 5 */ - .llong .sys_close - .llong .sys_waitpid - .llong .sys_creat - .llong .sys_link - .llong .sys_unlink /* 10 */ - .llong .sys_execve - .llong .sys_chdir - .llong .sys64_time - .llong .sys_mknod - .llong .sys_chmod /* 15 */ - .llong .sys_lchown - .llong .sys_ni_syscall /* old break syscall */ - .llong .sys_ni_syscall /* old stat syscall */ - .llong .sys_lseek - .llong .sys_getpid /* 20 */ - .llong .sys_mount - .llong .sys_ni_syscall /* old umount syscall */ - .llong .sys_setuid - .llong .sys_getuid - .llong .sys_stime /* 25 */ - .llong .sys_ptrace - .llong .sys_alarm - .llong .sys_ni_syscall /* old fstat syscall */ - .llong .sys_pause - .llong .sys_utime /* 30 */ - .llong .sys_ni_syscall /* old stty syscall */ - .llong .sys_ni_syscall /* old gtty syscall */ - .llong .sys_access - .llong .sys_nice - .llong .sys_ni_syscall /* 35 - old ftime syscall */ - .llong .sys_sync - .llong .sys_kill - .llong .sys_rename - .llong .sys_mkdir - .llong .sys_rmdir /* 40 */ - .llong .sys_dup - .llong .sys_pipe - .llong .sys_times - .llong .sys_ni_syscall /* old prof syscall */ - .llong .sys_brk /* 45 */ - .llong .sys_setgid - .llong .sys_getgid - .llong .sys_signal - .llong .sys_geteuid - .llong .sys_getegid /* 50 */ - .llong .sys_acct - .llong .sys_umount - .llong .sys_ni_syscall /* old lock syscall */ - .llong .sys_ioctl - .llong .sys_fcntl /* 55 */ - .llong .sys_ni_syscall /* old mpx syscall */ - .llong .sys_setpgid - .llong .sys_ni_syscall /* old ulimit syscall */ - .llong .sys_ni_syscall /* old uname syscall */ - .llong .sys_umask /* 60 */ - .llong .sys_chroot - .llong .sys_ustat - .llong .sys_dup2 - .llong .sys_getppid - .llong .sys_getpgrp /* 65 */ - .llong .sys_setsid - .llong .sys_ni_syscall - .llong .sys_sgetmask - .llong .sys_ssetmask - .llong .sys_setreuid /* 70 */ - .llong .sys_setregid - .llong .sys_ni_syscall - .llong .sys_ni_syscall - .llong .sys_sethostname - .llong .sys_setrlimit /* 75 */ - .llong .sys_ni_syscall /* old getrlimit syscall */ - .llong .sys_getrusage - .llong .sys_gettimeofday - .llong .sys_settimeofday - .llong .sys_getgroups /* 80 */ - .llong .sys_setgroups - .llong .sys_ni_syscall /* old select syscall */ - .llong .sys_symlink - .llong .sys_ni_syscall /* old lstat syscall */ - .llong .sys_readlink /* 85 */ - .llong .sys_uselib - .llong .sys_swapon - .llong .sys_reboot - .llong .sys_ni_syscall /* old readdir syscall */ - .llong .sys_mmap /* 90 */ - .llong .sys_munmap - .llong .sys_truncate - .llong .sys_ftruncate - .llong .sys_fchmod - .llong .sys_fchown /* 95 */ - .llong .sys_getpriority - .llong .sys_setpriority - .llong .sys_ni_syscall /* old profil syscall holder */ - .llong .sys_statfs - .llong .sys_fstatfs /* 100 */ - .llong .sys_ni_syscall /* old ioperm syscall */ - .llong .sys_socketcall - .llong .sys_syslog - .llong .sys_setitimer - .llong .sys_getitimer /* 105 */ - .llong .sys_newstat - .llong .sys_newlstat - .llong .sys_newfstat - .llong .sys_ni_syscall /* old uname syscall */ - .llong .sys_ni_syscall /* 110 old iopl syscall */ - .llong .sys_vhangup - .llong .sys_ni_syscall /* old idle syscall */ - .llong .sys_ni_syscall /* old vm86 syscall */ - .llong .sys_wait4 - .llong .sys_swapoff /* 115 */ - .llong .sys_sysinfo - .llong .sys_ipc - .llong .sys_fsync - .llong .sys_ni_syscall - .llong .ppc_clone /* 120 */ - .llong .sys_setdomainname - .llong .ppc_newuname - .llong .sys_ni_syscall /* old modify_ldt syscall */ - .llong .sys_adjtimex - .llong .sys_mprotect /* 125 */ - .llong .sys_ni_syscall - .llong .sys_ni_syscall /* old create_module syscall */ - .llong .sys_init_module - .llong .sys_delete_module - .llong .sys_ni_syscall /* 130 old get_kernel_syms syscall */ - .llong .sys_quotactl - .llong .sys_getpgid - .llong .sys_fchdir - .llong .sys_bdflush - .llong .sys_sysfs /* 135 */ - .llong .ppc64_personality - .llong .sys_ni_syscall /* for afs_syscall */ - .llong .sys_setfsuid - .llong .sys_setfsgid - .llong .sys_llseek /* 140 */ - .llong .sys_getdents - .llong .sys_select - .llong .sys_flock - .llong .sys_msync - .llong .sys_readv /* 145 */ - .llong .sys_writev - .llong .sys_getsid - .llong .sys_fdatasync - .llong .sys_sysctl - .llong .sys_mlock /* 150 */ - .llong .sys_munlock - .llong .sys_mlockall - .llong .sys_munlockall - .llong .sys_sched_setparam - .llong .sys_sched_getparam /* 155 */ - .llong .sys_sched_setscheduler - .llong .sys_sched_getscheduler - .llong .sys_sched_yield - .llong .sys_sched_get_priority_max - .llong .sys_sched_get_priority_min /* 160 */ - .llong .sys_sched_rr_get_interval - .llong .sys_nanosleep - .llong .sys_mremap - .llong .sys_setresuid - .llong .sys_getresuid /* 165 */ - .llong .sys_ni_syscall /* old query_module syscall */ - .llong .sys_poll - .llong .sys_nfsservctl - .llong .sys_setresgid - .llong .sys_getresgid /* 170 */ - .llong .sys_prctl - .llong .ppc64_rt_sigreturn - .llong .sys_rt_sigaction - .llong .sys_rt_sigprocmask - .llong .sys_rt_sigpending /* 175 */ - .llong .sys_rt_sigtimedwait - .llong .sys_rt_sigqueueinfo - .llong .ppc64_rt_sigsuspend - .llong .sys_pread64 - .llong .sys_pwrite64 /* 180 */ - .llong .sys_chown - .llong .sys_getcwd - .llong .sys_capget - .llong .sys_capset - .llong .sys_sigaltstack /* 185 */ - .llong .sys_sendfile64 - .llong .sys_ni_syscall /* reserved for streams1 */ - .llong .sys_ni_syscall /* reserved for streams2 */ - .llong .ppc_vfork - .llong .sys_getrlimit /* 190 */ - .llong .sys_readahead - .llong .sys_ni_syscall /* 32bit only mmap2 */ - .llong .sys_ni_syscall /* 32bit only truncate64 */ - .llong .sys_ni_syscall /* 32bit only ftruncate64 */ - .llong .sys_ni_syscall /* 195 - 32bit only stat64 */ - .llong .sys_ni_syscall /* 32bit only lstat64 */ - .llong .sys_ni_syscall /* 32bit only fstat64 */ - .llong .sys_pciconfig_read - .llong .sys_pciconfig_write - .llong .sys_pciconfig_iobase /* 200 - pciconfig_iobase */ - .llong .sys_ni_syscall /* reserved for MacOnLinux */ - .llong .sys_getdents64 - .llong .sys_pivot_root - .llong .sys_ni_syscall /* 32bit only fcntl64 */ - .llong .sys_madvise /* 205 */ - .llong .sys_mincore - .llong .sys_gettid - .llong .sys_tkill - .llong .sys_setxattr - .llong .sys_lsetxattr /* 210 */ - .llong .sys_fsetxattr - .llong .sys_getxattr - .llong .sys_lgetxattr - .llong .sys_fgetxattr - .llong .sys_listxattr /* 215 */ - .llong .sys_llistxattr - .llong .sys_flistxattr - .llong .sys_removexattr - .llong .sys_lremovexattr - .llong .sys_fremovexattr /* 220 */ - .llong .sys_futex - .llong .sys_sched_setaffinity - .llong .sys_sched_getaffinity - .llong .sys_ni_syscall - .llong .sys_ni_syscall /* 225 - reserved for tux */ - .llong .sys_ni_syscall /* 32bit only sendfile64 */ - .llong .sys_io_setup - .llong .sys_io_destroy - .llong .sys_io_getevents - .llong .sys_io_submit /* 230 */ - .llong .sys_io_cancel - .llong .sys_set_tid_address - .llong .sys_fadvise64 - .llong .sys_exit_group - .llong .sys_lookup_dcookie /* 235 */ - .llong .sys_epoll_create - .llong .sys_epoll_ctl - .llong .sys_epoll_wait - .llong .sys_remap_file_pages - .llong .sys_timer_create /* 240 */ - .llong .sys_timer_settime - .llong .sys_timer_gettime - .llong .sys_timer_getoverrun - .llong .sys_timer_delete - .llong .sys_clock_settime /* 245 */ - .llong .sys_clock_gettime - .llong .sys_clock_getres - .llong .sys_clock_nanosleep - .llong .ppc64_swapcontext - .llong .sys_tgkill /* 250 */ - .llong .sys_utimes - .llong .sys_statfs64 - .llong .sys_fstatfs64 - .llong .sys_ni_syscall /* 32bit only fadvise64_64 */ - .llong .ppc_rtas /* 255 */ - .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .llong .sys_ni_syscall /* 257 reserved for vserver */ - .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ - .llong .sys_mbind - .llong .sys_get_mempolicy /* 260 */ - .llong .sys_set_mempolicy - .llong .sys_mq_open - .llong .sys_mq_unlink - .llong .sys_mq_timedsend - .llong .sys_mq_timedreceive /* 265 */ - .llong .sys_mq_notify - .llong .sys_mq_getsetattr - .llong .sys_kexec_load - .llong .sys_add_key - .llong .sys_request_key /* 270 */ - .llong .sys_keyctl - .llong .sys_waitid - .llong .sys_ioprio_set - .llong .sys_ioprio_get - .llong .sys_inotify_init /* 275 */ - .llong .sys_inotify_add_watch - .llong .sys_inotify_rm_watch diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c deleted file mode 100644 --- a/arch/ppc64/kernel/setup.c +++ /dev/null @@ -1,1307 +0,0 @@ -/* - * - * Common boot and setup code. - * - * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -/* - * Here are some early debugging facilities. You can enable one - * but your kernel will not boot on anything else if you do so - */ - -/* This one is for use on LPAR machines that support an HVC console - * on vterm 0 - */ -extern void udbg_init_debug_lpar(void); -/* This one is for use on Apple G5 machines - */ -extern void udbg_init_pmac_realmode(void); -/* That's RTAS panel debug */ -extern void call_rtas_display_status_delay(unsigned char c); -/* Here's maple real mode debug */ -extern void udbg_init_maple_realmode(void); - -#define EARLY_DEBUG_INIT() do {} while(0) - -#if 0 -#define EARLY_DEBUG_INIT() udbg_init_debug_lpar() -#define EARLY_DEBUG_INIT() udbg_init_maple_realmode() -#define EARLY_DEBUG_INIT() udbg_init_pmac_realmode() -#define EARLY_DEBUG_INIT() \ - do { udbg_putc = call_rtas_display_status_delay; } while(0) -#endif - -/* extern void *stab; */ -extern unsigned long klimit; - -extern void mm_init_ppc64(void); -extern void stab_initialize(unsigned long stab); -extern void htab_initialize(void); -extern void early_init_devtree(void *flat_dt); -extern void unflatten_device_tree(void); - -extern void smp_release_cpus(void); - -int have_of = 1; -int boot_cpuid = 0; -int boot_cpuid_phys = 0; -dev_t boot_dev; -u64 ppc64_pft_size; - -struct ppc64_caches ppc64_caches; -EXPORT_SYMBOL_GPL(ppc64_caches); - -/* - * These are used in binfmt_elf.c to put aux entries on the stack - * for each elf executable being started. - */ -int dcache_bsize; -int icache_bsize; -int ucache_bsize; - -/* The main machine-dep calls structure - */ -struct machdep_calls ppc_md; -EXPORT_SYMBOL(ppc_md); - -#ifdef CONFIG_MAGIC_SYSRQ -unsigned long SYSRQ_KEY; -#endif /* CONFIG_MAGIC_SYSRQ */ - - -static int ppc64_panic_event(struct notifier_block *, unsigned long, void *); -static struct notifier_block ppc64_panic_block = { - .notifier_call = ppc64_panic_event, - .priority = INT_MIN /* may not return; must be done last */ -}; - -/* - * Perhaps we can put the pmac screen_info[] here - * on pmac as well so we don't need the ifdef's. - * Until we get multiple-console support in here - * that is. -- Cort - * Maybe tie it to serial consoles, since this is really what - * these processors use on existing boards. -- Dan - */ -struct screen_info screen_info = { - .orig_x = 0, - .orig_y = 25, - .orig_video_cols = 80, - .orig_video_lines = 25, - .orig_video_isVGA = 1, - .orig_video_points = 16 -}; - -#ifdef CONFIG_SMP - -static int smt_enabled_cmdline; - -/* Look for ibm,smt-enabled OF option */ -static void check_smt_enabled(void) -{ - struct device_node *dn; - char *smt_option; - - /* Allow the command line to overrule the OF option */ - if (smt_enabled_cmdline) - return; - - dn = of_find_node_by_path("/options"); - - if (dn) { - smt_option = (char *)get_property(dn, "ibm,smt-enabled", NULL); - - if (smt_option) { - if (!strcmp(smt_option, "on")) - smt_enabled_at_boot = 1; - else if (!strcmp(smt_option, "off")) - smt_enabled_at_boot = 0; - } - } -} - -/* Look for smt-enabled= cmdline option */ -static int __init early_smt_enabled(char *p) -{ - smt_enabled_cmdline = 1; - - if (!p) - return 0; - - if (!strcmp(p, "on") || !strcmp(p, "1")) - smt_enabled_at_boot = 1; - else if (!strcmp(p, "off") || !strcmp(p, "0")) - smt_enabled_at_boot = 0; - - return 0; -} -early_param("smt-enabled", early_smt_enabled); - -/** - * setup_cpu_maps - initialize the following cpu maps: - * cpu_possible_map - * cpu_present_map - * cpu_sibling_map - * - * Having the possible map set up early allows us to restrict allocations - * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. - * - * We do not initialize the online map here; cpus set their own bits in - * cpu_online_map as they come up. - * - * This function is valid only for Open Firmware systems. finish_device_tree - * must be called before using this. - * - * While we're here, we may as well set the "physical" cpu ids in the paca. - */ -static void __init setup_cpu_maps(void) -{ - struct device_node *dn = NULL; - int cpu = 0; - int swap_cpuid = 0; - - check_smt_enabled(); - - while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { - u32 *intserv; - int j, len = sizeof(u32), nthreads; - - intserv = (u32 *)get_property(dn, "ibm,ppc-interrupt-server#s", - &len); - if (!intserv) - intserv = (u32 *)get_property(dn, "reg", NULL); - - nthreads = len / sizeof(u32); - - for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { - cpu_set(cpu, cpu_present_map); - set_hard_smp_processor_id(cpu, intserv[j]); - - if (intserv[j] == boot_cpuid_phys) - swap_cpuid = cpu; - cpu_set(cpu, cpu_possible_map); - cpu++; - } - } - - /* Swap CPU id 0 with boot_cpuid_phys, so we can always assume that - * boot cpu is logical 0. - */ - if (boot_cpuid_phys != get_hard_smp_processor_id(0)) { - u32 tmp; - tmp = get_hard_smp_processor_id(0); - set_hard_smp_processor_id(0, boot_cpuid_phys); - set_hard_smp_processor_id(swap_cpuid, tmp); - } - - /* - * On pSeries LPAR, we need to know how many cpus - * could possibly be added to this partition. - */ - if (systemcfg->platform == PLATFORM_PSERIES_LPAR && - (dn = of_find_node_by_path("/rtas"))) { - int num_addr_cell, num_size_cell, maxcpus; - unsigned int *ireg; - - num_addr_cell = prom_n_addr_cells(dn); - num_size_cell = prom_n_size_cells(dn); - - ireg = (unsigned int *) - get_property(dn, "ibm,lrdr-capacity", NULL); - - if (!ireg) - goto out; - - maxcpus = ireg[num_addr_cell + num_size_cell]; - - /* Double maxcpus for processors which have SMT capability */ - if (cpu_has_feature(CPU_FTR_SMT)) - maxcpus *= 2; - - if (maxcpus > NR_CPUS) { - printk(KERN_WARNING - "Partition configured for %d cpus, " - "operating system maximum is %d.\n", - maxcpus, NR_CPUS); - maxcpus = NR_CPUS; - } else - printk(KERN_INFO "Partition configured for %d cpus.\n", - maxcpus); - - for (cpu = 0; cpu < maxcpus; cpu++) - cpu_set(cpu, cpu_possible_map); - out: - of_node_put(dn); - } - - /* - * Do the sibling map; assume only two threads per processor. - */ - for_each_cpu(cpu) { - cpu_set(cpu, cpu_sibling_map[cpu]); - if (cpu_has_feature(CPU_FTR_SMT)) - cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); - } - - systemcfg->processorCount = num_present_cpus(); -} -#endif /* CONFIG_SMP */ - -extern struct machdep_calls pSeries_md; -extern struct machdep_calls pmac_md; -extern struct machdep_calls maple_md; -extern struct machdep_calls bpa_md; -extern struct machdep_calls iseries_md; - -/* Ultimately, stuff them in an elf section like initcalls... */ -static struct machdep_calls __initdata *machines[] = { -#ifdef CONFIG_PPC_PSERIES - &pSeries_md, -#endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_PPC_PMAC - &pmac_md, -#endif /* CONFIG_PPC_PMAC */ -#ifdef CONFIG_PPC_MAPLE - &maple_md, -#endif /* CONFIG_PPC_MAPLE */ -#ifdef CONFIG_PPC_BPA - &bpa_md, -#endif -#ifdef CONFIG_PPC_ISERIES - &iseries_md, -#endif - NULL -}; - -/* - * Early initialization entry point. This is called by head.S - * with MMU translation disabled. We rely on the "feature" of - * the CPU that ignores the top 2 bits of the address in real - * mode so we can access kernel globals normally provided we - * only toy with things in the RMO region. From here, we do - * some early parsing of the device-tree to setup out LMB - * data structures, and allocate & initialize the hash table - * and segment tables so we can start running with translation - * enabled. - * - * It is this function which will call the probe() callback of - * the various platform types and copy the matching one to the - * global ppc_md structure. Your platform can eventually do - * some very early initializations from the probe() routine, but - * this is not recommended, be very careful as, for example, the - * device-tree is not accessible via normal means at this point. - */ - -void __init early_setup(unsigned long dt_ptr) -{ - struct paca_struct *lpaca = get_paca(); - static struct machdep_calls **mach; - - /* - * Enable early debugging if any specified (see top of - * this file) - */ - EARLY_DEBUG_INIT(); - - DBG(" -> early_setup()\n"); - - /* - * Fill the default DBG level (do we want to keep - * that old mecanism around forever ?) - */ - ppcdbg_initialize(); - - /* - * Do early initializations using the flattened device - * tree, like retreiving the physical memory map or - * calculating/retreiving the hash table size - */ - early_init_devtree(__va(dt_ptr)); - - /* - * Iterate all ppc_md structures until we find the proper - * one for the current machine type - */ - DBG("Probing machine type for platform %x...\n", - systemcfg->platform); - - for (mach = machines; *mach; mach++) { - if ((*mach)->probe(systemcfg->platform)) - break; - } - /* What can we do if we didn't find ? */ - if (*mach == NULL) { - DBG("No suitable machine found !\n"); - for (;;); - } - ppc_md = **mach; - - DBG("Found, Initializing memory management...\n"); - - /* - * Initialize stab / SLB management - */ - if (!firmware_has_feature(FW_FEATURE_ISERIES)) - stab_initialize(lpaca->stab_real); - - /* - * Initialize the MMU Hash table and create the linear mapping - * of memory - */ - htab_initialize(); - - DBG(" <- early_setup()\n"); -} - - -/* - * Initialize some remaining members of the ppc64_caches and systemcfg structures - * (at least until we get rid of them completely). This is mostly some - * cache informations about the CPU that will be used by cache flush - * routines and/or provided to userland - */ -static void __init initialize_cache_info(void) -{ - struct device_node *np; - unsigned long num_cpus = 0; - - DBG(" -> initialize_cache_info()\n"); - - for (np = NULL; (np = of_find_node_by_type(np, "cpu"));) { - num_cpus += 1; - - /* We're assuming *all* of the CPUs have the same - * d-cache and i-cache sizes... -Peter - */ - - if ( num_cpus == 1 ) { - u32 *sizep, *lsizep; - u32 size, lsize; - const char *dc, *ic; - - /* Then read cache informations */ - if (systemcfg->platform == PLATFORM_POWERMAC) { - dc = "d-cache-block-size"; - ic = "i-cache-block-size"; - } else { - dc = "d-cache-line-size"; - ic = "i-cache-line-size"; - } - - size = 0; - lsize = cur_cpu_spec->dcache_bsize; - sizep = (u32 *)get_property(np, "d-cache-size", NULL); - if (sizep != NULL) - size = *sizep; - lsizep = (u32 *) get_property(np, dc, NULL); - if (lsizep != NULL) - lsize = *lsizep; - if (sizep == 0 || lsizep == 0) - DBG("Argh, can't find dcache properties ! " - "sizep: %p, lsizep: %p\n", sizep, lsizep); - - systemcfg->dcache_size = ppc64_caches.dsize = size; - systemcfg->dcache_line_size = - ppc64_caches.dline_size = lsize; - ppc64_caches.log_dline_size = __ilog2(lsize); - ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; - - size = 0; - lsize = cur_cpu_spec->icache_bsize; - sizep = (u32 *)get_property(np, "i-cache-size", NULL); - if (sizep != NULL) - size = *sizep; - lsizep = (u32 *)get_property(np, ic, NULL); - if (lsizep != NULL) - lsize = *lsizep; - if (sizep == 0 || lsizep == 0) - DBG("Argh, can't find icache properties ! " - "sizep: %p, lsizep: %p\n", sizep, lsizep); - - systemcfg->icache_size = ppc64_caches.isize = size; - systemcfg->icache_line_size = - ppc64_caches.iline_size = lsize; - ppc64_caches.log_iline_size = __ilog2(lsize); - ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; - } - } - - /* Add an eye catcher and the systemcfg layout version number */ - strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); - systemcfg->version.major = SYSTEMCFG_MAJOR; - systemcfg->version.minor = SYSTEMCFG_MINOR; - systemcfg->processor = mfspr(SPRN_PVR); - - DBG(" <- initialize_cache_info()\n"); -} - -static void __init check_for_initrd(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - u64 *prop; - - DBG(" -> check_for_initrd()\n"); - - if (of_chosen) { - prop = (u64 *)get_property(of_chosen, - "linux,initrd-start", NULL); - if (prop != NULL) { - initrd_start = (unsigned long)__va(*prop); - prop = (u64 *)get_property(of_chosen, - "linux,initrd-end", NULL); - if (prop != NULL) { - initrd_end = (unsigned long)__va(*prop); - initrd_below_start_ok = 1; - } else - initrd_start = 0; - } - } - - /* If we were passed an initrd, set the ROOT_DEV properly if the values - * look sensible. If not, clear initrd reference. - */ - if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && - initrd_end > initrd_start) - ROOT_DEV = Root_RAM0; - else - initrd_start = initrd_end = 0; - - if (initrd_start) - printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); - - DBG(" <- check_for_initrd()\n"); -#endif /* CONFIG_BLK_DEV_INITRD */ -} - -/* - * Do some initial setup of the system. The parameters are those which - * were passed in from the bootloader. - */ -void __init setup_system(void) -{ - DBG(" -> setup_system()\n"); - - /* - * Unflatten the device-tree passed by prom_init or kexec - */ - unflatten_device_tree(); - - /* - * Fill the ppc64_caches & systemcfg structures with informations - * retreived from the device-tree. Need to be called before - * finish_device_tree() since the later requires some of the - * informations filled up here to properly parse the interrupt - * tree. - * It also sets up the cache line sizes which allows to call - * routines like flush_icache_range (used by the hash init - * later on). - */ - initialize_cache_info(); - -#ifdef CONFIG_PPC_RTAS - /* - * Initialize RTAS if available - */ - rtas_initialize(); -#endif /* CONFIG_PPC_RTAS */ - - /* - * Check if we have an initrd provided via the device-tree - */ - check_for_initrd(); - - /* - * Do some platform specific early initializations, that includes - * setting up the hash table pointers. It also sets up some interrupt-mapping - * related options that will be used by finish_device_tree() - */ - ppc_md.init_early(); - - /* - * "Finish" the device-tree, that is do the actual parsing of - * some of the properties like the interrupt map - */ - finish_device_tree(); - -#ifdef CONFIG_BOOTX_TEXT - init_boot_display(); -#endif - - /* - * Initialize xmon - */ -#ifdef CONFIG_XMON_DEFAULT - xmon_init(1); -#endif - /* - * Register early console - */ - register_early_udbg_console(); - - /* Save unparsed command line copy for /proc/cmdline */ - strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); - - parse_early_param(); - -#ifdef CONFIG_SMP - /* - * iSeries has already initialized the cpu maps at this point. - */ - setup_cpu_maps(); - - /* Release secondary cpus out of their spinloops at 0x60 now that - * we can map physical -> logical CPU ids - */ - smp_release_cpus(); -#endif - - printk("Starting Linux PPC64 %s\n", system_utsname.version); - - printk("-----------------------------------------------------\n"); - printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); - printk("ppc64_debug_switch = 0x%lx\n", ppc64_debug_switch); - printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller); - printk("systemcfg = 0x%p\n", systemcfg); - printk("systemcfg->platform = 0x%x\n", systemcfg->platform); - printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount); - printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize); - printk("ppc64_caches.dcache_line_size = 0x%x\n", - ppc64_caches.dline_size); - printk("ppc64_caches.icache_line_size = 0x%x\n", - ppc64_caches.iline_size); - printk("htab_address = 0x%p\n", htab_address); - printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); - printk("-----------------------------------------------------\n"); - - mm_init_ppc64(); - - DBG(" <- setup_system()\n"); -} - -/* also used by kexec */ -void machine_shutdown(void) -{ - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); -} - -void machine_restart(char *cmd) -{ - machine_shutdown(); - ppc_md.restart(cmd); -#ifdef CONFIG_SMP - smp_send_stop(); -#endif - printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); - while (1) ; -} - -void machine_power_off(void) -{ - machine_shutdown(); - ppc_md.power_off(); -#ifdef CONFIG_SMP - smp_send_stop(); -#endif - printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); - while (1) ; -} -/* Used by the G5 thermal driver */ -EXPORT_SYMBOL_GPL(machine_power_off); - -void machine_halt(void) -{ - machine_shutdown(); - ppc_md.halt(); -#ifdef CONFIG_SMP - smp_send_stop(); -#endif - printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); - while (1) ; -} - -static int ppc64_panic_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - ppc_md.panic((char *)ptr); /* May not return */ - return NOTIFY_DONE; -} - - -#ifdef CONFIG_SMP -DEFINE_PER_CPU(unsigned int, pvr); -#endif - -static int show_cpuinfo(struct seq_file *m, void *v) -{ - unsigned long cpu_id = (unsigned long)v - 1; - unsigned int pvr; - unsigned short maj; - unsigned short min; - - if (cpu_id == NR_CPUS) { - seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); - - if (ppc_md.show_cpuinfo != NULL) - ppc_md.show_cpuinfo(m); - - return 0; - } - - /* We only show online cpus: disable preempt (overzealous, I - * knew) to prevent cpu going down. */ - preempt_disable(); - if (!cpu_online(cpu_id)) { - preempt_enable(); - return 0; - } - -#ifdef CONFIG_SMP - pvr = per_cpu(pvr, cpu_id); -#else - pvr = mfspr(SPRN_PVR); -#endif - maj = (pvr >> 8) & 0xFF; - min = pvr & 0xFF; - - seq_printf(m, "processor\t: %lu\n", cpu_id); - seq_printf(m, "cpu\t\t: "); - - if (cur_cpu_spec->pvr_mask) - seq_printf(m, "%s", cur_cpu_spec->cpu_name); - else - seq_printf(m, "unknown (%08x)", pvr); - -#ifdef CONFIG_ALTIVEC - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - seq_printf(m, ", altivec supported"); -#endif /* CONFIG_ALTIVEC */ - - seq_printf(m, "\n"); - - /* - * Assume here that all clock rates are the same in a - * smp system. -- Cort - */ - seq_printf(m, "clock\t\t: %lu.%06luMHz\n", ppc_proc_freq / 1000000, - ppc_proc_freq % 1000000); - - seq_printf(m, "revision\t: %hd.%hd\n\n", maj, min); - - preempt_enable(); - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos <= NR_CPUS ? (void *)((*pos)+1) : NULL; -} -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} -static void c_stop(struct seq_file *m, void *v) -{ -} -struct seq_operations cpuinfo_op = { - .start =c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; - -/* - * These three variables are used to save values passed to us by prom_init() - * via the device tree. The TCE variables are needed because with a memory_limit - * in force we may need to explicitly map the TCE are at the top of RAM. - */ -unsigned long memory_limit; -unsigned long tce_alloc_start; -unsigned long tce_alloc_end; - -#ifdef CONFIG_PPC_ISERIES -/* - * On iSeries we just parse the mem=X option from the command line. - * On pSeries it's a bit more complicated, see prom_init_mem() - */ -static int __init early_parsemem(char *p) -{ - if (!p) - return 0; - - memory_limit = ALIGN(memparse(p, &p), PAGE_SIZE); - - return 0; -} -early_param("mem", early_parsemem); -#endif /* CONFIG_PPC_ISERIES */ - -#ifdef CONFIG_PPC_MULTIPLATFORM -static int __init set_preferred_console(void) -{ - struct device_node *prom_stdout = NULL; - char *name; - u32 *spd; - int offset = 0; - - DBG(" -> set_preferred_console()\n"); - - /* The user has requested a console so this is already set up. */ - if (strstr(saved_command_line, "console=")) { - DBG(" console was specified !\n"); - return -EBUSY; - } - - if (!of_chosen) { - DBG(" of_chosen is NULL !\n"); - return -ENODEV; - } - /* We are getting a weird phandle from OF ... */ - /* ... So use the full path instead */ - name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); - if (name == NULL) { - DBG(" no linux,stdout-path !\n"); - return -ENODEV; - } - prom_stdout = of_find_node_by_path(name); - if (!prom_stdout) { - DBG(" can't find stdout package %s !\n", name); - return -ENODEV; - } - DBG("stdout is %s\n", prom_stdout->full_name); - - name = (char *)get_property(prom_stdout, "name", NULL); - if (!name) { - DBG(" stdout package has no name !\n"); - goto not_found; - } - spd = (u32 *)get_property(prom_stdout, "current-speed", NULL); - - if (0) - ; -#ifdef CONFIG_SERIAL_8250_CONSOLE - else if (strcmp(name, "serial") == 0) { - int i; - u32 *reg = (u32 *)get_property(prom_stdout, "reg", &i); - if (i > 8) { - switch (reg[1]) { - case 0x3f8: - offset = 0; - break; - case 0x2f8: - offset = 1; - break; - case 0x898: - offset = 2; - break; - case 0x890: - offset = 3; - break; - default: - /* We dont recognise the serial port */ - goto not_found; - } - } - } -#endif /* CONFIG_SERIAL_8250_CONSOLE */ -#ifdef CONFIG_PPC_PSERIES - else if (strcmp(name, "vty") == 0) { - u32 *reg = (u32 *)get_property(prom_stdout, "reg", NULL); - char *compat = (char *)get_property(prom_stdout, "compatible", NULL); - - if (reg && compat && (strcmp(compat, "hvterm-protocol") == 0)) { - /* Host Virtual Serial Interface */ - int offset; - switch (reg[0]) { - case 0x30000000: - offset = 0; - break; - case 0x30000001: - offset = 1; - break; - default: - goto not_found; - } - of_node_put(prom_stdout); - DBG("Found hvsi console at offset %d\n", offset); - return add_preferred_console("hvsi", offset, NULL); - } else { - /* pSeries LPAR virtual console */ - of_node_put(prom_stdout); - DBG("Found hvc console\n"); - return add_preferred_console("hvc", 0, NULL); - } - } -#endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE - else if (strcmp(name, "ch-a") == 0) - offset = 0; - else if (strcmp(name, "ch-b") == 0) - offset = 1; -#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */ - else - goto not_found; - of_node_put(prom_stdout); - - DBG("Found serial console at ttyS%d\n", offset); - - if (spd) { - static char __initdata opt[16]; - sprintf(opt, "%d", *spd); - return add_preferred_console("ttyS", offset, opt); - } else - return add_preferred_console("ttyS", offset, NULL); - - not_found: - DBG("No preferred console found !\n"); - of_node_put(prom_stdout); - return -ENODEV; -} -console_initcall(set_preferred_console); -#endif /* CONFIG_PPC_MULTIPLATFORM */ - -#ifdef CONFIG_IRQSTACKS -static void __init irqstack_early_init(void) -{ - unsigned int i; - - /* - * interrupt stacks must be under 256MB, we cannot afford to take - * SLB misses on them. - */ - for_each_cpu(i) { - softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); - hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); - } -} -#else -#define irqstack_early_init() -#endif - -/* - * Stack space used when we detect a bad kernel stack pointer, and - * early in SMP boots before relocation is enabled. - */ -static void __init emergency_stack_init(void) -{ - unsigned long limit; - unsigned int i; - - /* - * Emergency stacks must be under 256MB, we cannot afford to take - * SLB misses on them. The ABI also requires them to be 128-byte - * aligned. - * - * Since we use these as temporary stacks during secondary CPU - * bringup, we need to get at them in real mode. This means they - * must also be within the RMO region. - */ - limit = min(0x10000000UL, lmb.rmo_size); - - for_each_cpu(i) - paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128, - limit)) + PAGE_SIZE; -} - -/* - * Called from setup_arch to initialize the bitmap of available - * syscalls in the systemcfg page - */ -void __init setup_syscall_map(void) -{ - unsigned int i, count64 = 0, count32 = 0; - extern unsigned long *sys_call_table; - extern unsigned long *sys_call_table32; - extern unsigned long sys_ni_syscall; - - - for (i = 0; i < __NR_syscalls; i++) { - if (sys_call_table[i] == sys_ni_syscall) - continue; - count64++; - systemcfg->syscall_map_64[i >> 5] |= 0x80000000UL >> (i & 0x1f); - } - for (i = 0; i < __NR_syscalls; i++) { - if (sys_call_table32[i] == sys_ni_syscall) - continue; - count32++; - systemcfg->syscall_map_32[i >> 5] |= 0x80000000UL >> (i & 0x1f); - } - printk(KERN_INFO "Syscall map setup, %d 32 bits and %d 64 bits syscalls\n", - count32, count64); -} - -/* - * Called into from start_kernel, after lock_kernel has been called. - * Initializes bootmem, which is unsed to manage page allocation until - * mem_init is called. - */ -void __init setup_arch(char **cmdline_p) -{ - extern void do_init_bootmem(void); - - ppc64_boot_msg(0x12, "Setup Arch"); - - *cmdline_p = cmd_line; - - /* - * Set cache line size based on type of cpu as a default. - * Systems with OF can look in the properties on the cpu node(s) - * for a possibly more accurate value. - */ - dcache_bsize = ppc64_caches.dline_size; - icache_bsize = ppc64_caches.iline_size; - - /* reboot on panic */ - panic_timeout = 180; - - if (ppc_md.panic) - notifier_chain_register(&panic_notifier_list, &ppc64_panic_block); - - init_mm.start_code = PAGE_OFFSET; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; - - irqstack_early_init(); - emergency_stack_init(); - - stabs_alloc(); - - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); - sparse_init(); - - /* initialize the syscall map in systemcfg */ - setup_syscall_map(); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - ppc_md.setup_arch(); - - /* Use the default idle loop if the platform hasn't provided one. */ - if (NULL == ppc_md.idle_loop) { - ppc_md.idle_loop = default_idle; - printk(KERN_INFO "Using default idle loop\n"); - } - - paging_init(); - ppc64_boot_msg(0x15, "Setup Done"); -} - - -/* ToDo: do something useful if ppc_md is not yet setup. */ -#define PPC64_LINUX_FUNCTION 0x0f000000 -#define PPC64_IPL_MESSAGE 0xc0000000 -#define PPC64_TERM_MESSAGE 0xb0000000 - -static void ppc64_do_msg(unsigned int src, const char *msg) -{ - if (ppc_md.progress) { - char buf[128]; - - sprintf(buf, "%08X\n", src); - ppc_md.progress(buf, 0); - snprintf(buf, 128, "%s", msg); - ppc_md.progress(buf, 0); - } -} - -/* Print a boot progress message. */ -void ppc64_boot_msg(unsigned int src, const char *msg) -{ - ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg); - printk("[boot]%04x %s\n", src, msg); -} - -/* Print a termination message (print only -- does not stop the kernel) */ -void ppc64_terminate_msg(unsigned int src, const char *msg) -{ - ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_TERM_MESSAGE|src, msg); - printk("[terminate]%04x %s\n", src, msg); -} - -#ifndef CONFIG_PPC_ISERIES -/* - * This function can be used by platforms to "find" legacy serial ports. - * It works for "serial" nodes under an "isa" node, and will try to - * respect the "ibm,aix-loc" property if any. It works with up to 8 - * ports. - */ - -#define MAX_LEGACY_SERIAL_PORTS 8 -static struct plat_serial8250_port serial_ports[MAX_LEGACY_SERIAL_PORTS+1]; -static unsigned int old_serial_count; - -void __init generic_find_legacy_serial_ports(u64 *physport, - unsigned int *default_speed) -{ - struct device_node *np; - u32 *sizeprop; - - struct isa_reg_property { - u32 space; - u32 address; - u32 size; - }; - struct pci_reg_property { - struct pci_address addr; - u32 size_hi; - u32 size_lo; - }; - - DBG(" -> generic_find_legacy_serial_port()\n"); - - *physport = 0; - if (default_speed) - *default_speed = 0; - - np = of_find_node_by_path("/"); - if (!np) - return; - - /* First fill our array */ - for (np = NULL; (np = of_find_node_by_type(np, "serial"));) { - struct device_node *isa, *pci; - struct isa_reg_property *reg; - unsigned long phys_size, addr_size, io_base; - u32 *rangesp; - u32 *interrupts, *clk, *spd; - char *typep; - int index, rlen, rentsize; - - /* Ok, first check if it's under an "isa" parent */ - isa = of_get_parent(np); - if (!isa || strcmp(isa->name, "isa")) { - DBG("%s: no isa parent found\n", np->full_name); - continue; - } - - /* Now look for an "ibm,aix-loc" property that gives us ordering - * if any... - */ - typep = (char *)get_property(np, "ibm,aix-loc", NULL); - - /* Get the ISA port number */ - reg = (struct isa_reg_property *)get_property(np, "reg", NULL); - if (reg == NULL) - goto next_port; - /* We assume the interrupt number isn't translated ... */ - interrupts = (u32 *)get_property(np, "interrupts", NULL); - /* get clock freq. if present */ - clk = (u32 *)get_property(np, "clock-frequency", NULL); - /* get default speed if present */ - spd = (u32 *)get_property(np, "current-speed", NULL); - /* Default to locate at end of array */ - index = old_serial_count; /* end of the array by default */ - - /* If we have a location index, then use it */ - if (typep && *typep == 'S') { - index = simple_strtol(typep+1, NULL, 0) - 1; - /* if index is out of range, use end of array instead */ - if (index >= MAX_LEGACY_SERIAL_PORTS) - index = old_serial_count; - /* if our index is still out of range, that mean that - * array is full, we could scan for a free slot but that - * make little sense to bother, just skip the port - */ - if (index >= MAX_LEGACY_SERIAL_PORTS) - goto next_port; - if (index >= old_serial_count) - old_serial_count = index + 1; - /* Check if there is a port who already claimed our slot */ - if (serial_ports[index].iobase != 0) { - /* if we still have some room, move it, else override */ - if (old_serial_count < MAX_LEGACY_SERIAL_PORTS) { - DBG("Moved legacy port %d -> %d\n", index, - old_serial_count); - serial_ports[old_serial_count++] = - serial_ports[index]; - } else { - DBG("Replacing legacy port %d\n", index); - } - } - } - if (index >= MAX_LEGACY_SERIAL_PORTS) - goto next_port; - if (index >= old_serial_count) - old_serial_count = index + 1; - - /* Now fill the entry */ - memset(&serial_ports[index], 0, sizeof(struct plat_serial8250_port)); - serial_ports[index].uartclk = clk ? *clk : BASE_BAUD * 16; - serial_ports[index].iobase = reg->address; - serial_ports[index].irq = interrupts ? interrupts[0] : 0; - serial_ports[index].flags = ASYNC_BOOT_AUTOCONF; - - DBG("Added legacy port, index: %d, port: %x, irq: %d, clk: %d\n", - index, - serial_ports[index].iobase, - serial_ports[index].irq, - serial_ports[index].uartclk); - - /* Get phys address of IO reg for port 1 */ - if (index != 0) - goto next_port; - - pci = of_get_parent(isa); - if (!pci) { - DBG("%s: no pci parent found\n", np->full_name); - goto next_port; - } - - rangesp = (u32 *)get_property(pci, "ranges", &rlen); - if (rangesp == NULL) { - of_node_put(pci); - goto next_port; - } - rlen /= 4; - - /* we need the #size-cells of the PCI bridge node itself */ - phys_size = 1; - sizeprop = (u32 *)get_property(pci, "#size-cells", NULL); - if (sizeprop != NULL) - phys_size = *sizeprop; - /* we need the parent #addr-cells */ - addr_size = prom_n_addr_cells(pci); - rentsize = 3 + addr_size + phys_size; - io_base = 0; - for (;rlen >= rentsize; rlen -= rentsize,rangesp += rentsize) { - if (((rangesp[0] >> 24) & 0x3) != 1) - continue; /* not IO space */ - io_base = rangesp[3]; - if (addr_size == 2) - io_base = (io_base << 32) | rangesp[4]; - } - if (io_base != 0) { - *physport = io_base + reg->address; - if (default_speed && spd) - *default_speed = *spd; - } - of_node_put(pci); - next_port: - of_node_put(isa); - } - - DBG(" <- generic_find_legacy_serial_port()\n"); -} - -static struct platform_device serial_device = { - .name = "serial8250", - .id = PLAT8250_DEV_PLATFORM, - .dev = { - .platform_data = serial_ports, - }, -}; - -static int __init serial_dev_init(void) -{ - return platform_device_register(&serial_device); -} -arch_initcall(serial_dev_init); - -#endif /* CONFIG_PPC_ISERIES */ - -int check_legacy_ioport(unsigned long base_port) -{ - if (ppc_md.check_legacy_ioport == NULL) - return 0; - return ppc_md.check_legacy_ioport(base_port); -} -EXPORT_SYMBOL(check_legacy_ioport); - -#ifdef CONFIG_XMON -static int __init early_xmon(char *p) -{ - /* ensure xmon is enabled */ - if (p) { - if (strncmp(p, "on", 2) == 0) - xmon_init(1); - if (strncmp(p, "off", 3) == 0) - xmon_init(0); - if (strncmp(p, "early", 5) != 0) - return 0; - } - xmon_init(1); - debugger(NULL); - - return 0; -} -early_param("xmon", early_xmon); -#endif - -void cpu_die(void) -{ - if (ppc_md.cpu_die) - ppc_md.cpu_die(); -} From sfr at canb.auug.org.au Wed Oct 26 16:18:54 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Wed, 26 Oct 2005 16:18:54 +1000 Subject: [PATCH] ppc64: use the merged syscall table In-Reply-To: <20051026161253.2eb437c4.sfr@canb.auug.org.au> References: <20051026161253.2eb437c4.sfr@canb.auug.org.au> Message-ID: <20051026161854.1a300ae3.sfr@canb.auug.org.au> On Wed, 26 Oct 2005 16:12:53 +1000 Stephen Rothwell wrote: > > This allows us to also use entry_64.S and setup_64.c from the merged > tree. > > Signed-off-by: Stephen Rothwell This patch also *replaces* my previous patch "remove arch/ppc64/kernel/setup.c". -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051026/0fc593dc/attachment.pgp From apw at shadowen.org Wed Oct 26 22:37:45 2005 From: apw at shadowen.org (Andy Whitcroft) Date: Wed, 26 Oct 2005 13:37:45 +0100 Subject: [PATCH] ppc64 memory model depends on NUMA Message-ID: <20051026123745.GA5952@shadowen.org> Currently when we first select memory model (FLAT, DISCONTIG, SPARSE) then select whether the machine is NUMA. However NUMA systems may not be FLAT. This constraint it not honoured and we may configure a NUMA/FLAT system. Reorder the configuration such that we choose NUMA first which allows us to only list the memory models which are valid. We now default NUMA for known NUMA systems. Note that this new order also matches that used in x86. Signed-off-by: Andy Whitcroft Signed-off-by: Joel Schopp --- diff -upN reference/arch/ppc64/Kconfig current/arch/ppc64/Kconfig --- reference/arch/ppc64/Kconfig +++ current/arch/ppc64/Kconfig @@ -234,6 +234,10 @@ config HMT This option enables hardware multithreading on RS64 cpus. pSeries systems p620 and p660 have such a cpu type. +config NUMA + bool "NUMA support" + default y if SMP && PPC_PSERIES + config ARCH_SELECT_MEMORY_MODEL def_bool y @@ -249,9 +253,6 @@ config ARCH_DISCONTIGMEM_DEFAULT def_bool y depends on ARCH_DISCONTIGMEM_ENABLE -config ARCH_FLATMEM_ENABLE - def_bool y - config ARCH_SPARSEMEM_ENABLE def_bool y depends on ARCH_DISCONTIGMEM_ENABLE @@ -274,10 +275,6 @@ config NODES_SPAN_OTHER_NODES def_bool y depends on NEED_MULTIPLE_NODES -config NUMA - bool "NUMA support" - default y if DISCONTIGMEM || SPARSEMEM - config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on SMP From galak at freescale.com Thu Oct 27 00:55:41 2005 From: galak at freescale.com (Kumar Gala) Date: Wed, 26 Oct 2005 09:55:41 -0500 (CDT) Subject: [PATCH] powerpc: Fix warning related to do_dabr Message-ID: do_dabr() is not relevant on 40x or Book-E processors so dont build it Signed-off-by: Kumar K. Gala --- commit 76b3884004db7ea66b3ef801dd0fa1e2337025a8 tree e60f8a2af4bc06303ca3a81d28831fb6ae0a0097 parent e19eaf379ab1a633eea2d5ff0e0d99ca834324f1 author Kumar K. Gala Wed, 26 Oct 2005 09:39:58 -0500 committer Kumar K. Gala Wed, 26 Oct 2005 09:39:58 -0500 arch/powerpc/mm/fault.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -80,6 +80,7 @@ static int store_updates_sp(struct pt_re return 0; } +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) static void do_dabr(struct pt_regs *regs, unsigned long error_code) { siginfo_t info; @@ -101,6 +102,7 @@ static void do_dabr(struct pt_regs *regs info.si_addr = (void __user *)regs->nip; force_sig_info(SIGTRAP, &info, current); } +#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ /* * For 600- and 800-family processors, the error_code parameter is DSISR From galak at freescale.com Thu Oct 27 00:56:10 2005 From: galak at freescale.com (Kumar Gala) Date: Wed, 26 Oct 2005 09:56:10 -0500 (CDT) Subject: [PATCH] powerpc: Moved dcr support to arch/powerpc Message-ID: Signed-off-by: Kumar K. Gala --- commit ef5ace072939eda2e37acf1204363c8402727368 tree d5faa9bbae927b302e36cda55b8a41fd7b54eaf2 parent 76b3884004db7ea66b3ef801dd0fa1e2337025a8 author Kumar K. Gala Wed, 26 Oct 2005 09:48:25 -0500 committer Kumar K. Gala Wed, 26 Oct 2005 09:48:25 -0500 arch/powerpc/sysdev/Makefile | 2 ++ arch/powerpc/sysdev/dcr.S | 41 +++++++++++++++++++++++++++++++++++++++++ arch/ppc/syslib/Makefile | 2 -- arch/ppc/syslib/dcr.S | 41 ----------------------------------------- 4 files changed, 43 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -2,3 +2,5 @@ obj-$(CONFIG_MPIC) += mpic.o obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o obj-$(CONFIG_PPC_I8259) += i8259.o obj-$(CONFIG_PPC_MPC106) += grackle.o +obj-$(CONFIG_BOOKE) += dcr.o +obj-$(CONFIG_40x) += dcr.o diff --git a/arch/powerpc/sysdev/dcr.S b/arch/powerpc/sysdev/dcr.S new file mode 100644 --- /dev/null +++ b/arch/powerpc/sysdev/dcr.S @@ -0,0 +1,41 @@ +/* + * arch/ppc/syslib/dcr.S + * + * "Indirect" DCR access + * + * Copyright (c) 2004 Eugene Surovegin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include + +#define DCR_ACCESS_PROLOG(table) \ + rlwinm r3,r3,4,18,27; \ + lis r5,table at h; \ + ori r5,r5,table at l; \ + add r3,r3,r5; \ + mtctr r3; \ + bctr + +_GLOBAL(__mfdcr) + DCR_ACCESS_PROLOG(__mfdcr_table) + +_GLOBAL(__mtdcr) + DCR_ACCESS_PROLOG(__mtdcr_table) + +__mfdcr_table: + mfdcr r3,0; blr +__mtdcr_table: + mtdcr 0,r4; blr + +dcr = 1 + .rept 1023 + mfdcr r3,dcr; blr + mtdcr dcr,r4; blr + dcr = dcr + 1 + .endr diff --git a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile --- a/arch/ppc/syslib/Makefile +++ b/arch/ppc/syslib/Makefile @@ -86,8 +86,6 @@ endif obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_MPC10X_BRIDGE) += mpc10x_common.o ppc_sys.o obj-$(CONFIG_MPC10X_OPENPIC) += open_pic.o -obj-$(CONFIG_40x) += dcr.o -obj-$(CONFIG_BOOKE) += dcr.o obj-$(CONFIG_85xx) += open_pic.o ppc85xx_common.o ppc85xx_setup.o \ ppc_sys.o mpc85xx_sys.o \ mpc85xx_devices.o diff --git a/arch/ppc/syslib/dcr.S b/arch/ppc/syslib/dcr.S deleted file mode 100644 --- a/arch/ppc/syslib/dcr.S +++ /dev/null @@ -1,41 +0,0 @@ -/* - * arch/ppc/syslib/dcr.S - * - * "Indirect" DCR access - * - * Copyright (c) 2004 Eugene Surovegin - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include -#include - -#define DCR_ACCESS_PROLOG(table) \ - rlwinm r3,r3,4,18,27; \ - lis r5,table at h; \ - ori r5,r5,table at l; \ - add r3,r3,r5; \ - mtctr r3; \ - bctr - -_GLOBAL(__mfdcr) - DCR_ACCESS_PROLOG(__mfdcr_table) - -_GLOBAL(__mtdcr) - DCR_ACCESS_PROLOG(__mtdcr_table) - -__mfdcr_table: - mfdcr r3,0; blr -__mtdcr_table: - mtdcr 0,r4; blr - -dcr = 1 - .rept 1023 - mfdcr r3,dcr; blr - mtdcr dcr,r4; blr - dcr = dcr + 1 - .endr From galak at freescale.com Thu Oct 27 00:56:39 2005 From: galak at freescale.com (Kumar Gala) Date: Wed, 26 Oct 2005 09:56:39 -0500 (CDT) Subject: [PATCH] powerpc: only build idle_6xx for 6xx Message-ID: For the current time idle_6xx only applies to 6xx ppc32 CPUs Signed-off-by: Kumar K. Gala --- commit e19eaf379ab1a633eea2d5ff0e0d99ca834324f1 tree cf91db25a28f8f1d9d2426bc8b70c29d94aa249f parent b6a4ce526a87bd8a99a396ac993240d6d3239852 author Kumar K. Gala Wed, 26 Oct 2005 09:37:54 -0500 committer Kumar K. Gala Wed, 26 Oct 2005 09:37:54 -0500 arch/powerpc/kernel/Makefile | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -27,13 +27,14 @@ extra-$(CONFIG_40x) := head_4xx.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o +extra-$(CONFIG_6xx) += idle_6xx.o extra-$(CONFIG_PPC64) += entry_64.o extra-$(CONFIG_PPC_FPU) += fpu.o extra-y += vmlinux.lds obj-y += process.o init_task.o time.o \ prom.o systbl.o traps.o setup-common.o -obj-$(CONFIG_PPC32) += entry_32.o idle_6xx.o setup_32.o misc_32.o +obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o obj-$(CONFIG_PPC64) += setup_64.o misc_64.o obj-$(CONFIG_PPC_OF) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o From jschopp at austin.ibm.com Thu Oct 27 03:25:55 2005 From: jschopp at austin.ibm.com (Joel Schopp) Date: Wed, 26 Oct 2005 12:25:55 -0500 Subject: [PATCH] ppc64 memory model depends on NUMA In-Reply-To: <20051026123745.GA5952@shadowen.org> References: <20051026123745.GA5952@shadowen.org> Message-ID: <435FBC23.9080403@austin.ibm.com> > Currently when we first select memory model (FLAT, DISCONTIG, SPARSE) > then select whether the machine is NUMA. However NUMA systems may not > be FLAT. This constraint it not honoured and we may configure a NUMA/FLAT > system. The result of which is that without this patch people were configuring kernels that didn't compile. Because this change is straightforward and fixes a real problem, I believe it should go either into 2.6.14 or 2.6.14.x . From david at gibson.dropbear.id.au Thu Oct 27 16:27:25 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Thu, 27 Oct 2005 16:27:25 +1000 Subject: powerpc: Fix handling of fpscr on 64-bit Message-ID: <20051027062725.GA7176@localhost.localdomain> Paulus, please apply to the merge tree. This goes on top of sfr's patch which uses the merged entry_*.S for ARCH=ppc64. The recent merge of fpu.S broken the handling of fpscr for ARCH=powerpc and CONFIG_PPC64=y. FP registers could be corrupted, leading to strange random application crashes. The confusion arises, because the thread_struct has (and requires) a 64-bit area to save the fpscr, because we use load/store double instructions to get it in to/out of the FPU. However, only the low 32-bits are actually used, so we want to treat it as a 32-bit quantity when manipulating its bits to avoid extra load/stores on 32-bit. This patch replaces the current definition with a structure of two 32-bit quantities (pad and val), to clarify things as much as is possible. The 'val' field is used when manipulating bits, the structure itself is used when obtaining the address for loading/unloading the value from the FPU. While we're at it, consolidate the 4 (!) almost identical versions of cvt_fd() and cvt_df() (arch/ppc/kernel/misc.S, arch/ppc64/kernel/misc.S, arch/powerpc/kernel/misc_32.S, arch/powerpc/kernel/misc_64.S) into a single version in fpu.S. The new version takes a pointer to thread_struct and applies the correct offset itself, rather than a pointer to the fpscr field itself, again to avoid confusion as to which is the correct field to use. Finally, this patch makes ARCH=ppc64 also use the consolidated fpu.S code, which it previously did not. Built for G5 (ARCH=ppc64 and ARCH=powerpc), 32-bit powermac (ARCH=ppc and ARCH=powerpc) and Walnut (ARCH=ppc, CONFIG_MATH_EMULATION=y). Booted on G5 (ARCH=powerpc) and things which previously fell over no longer do. Signed-off-by: David Gibson Index: working-2.6/arch/powerpc/kernel/fpu.S =================================================================== --- working-2.6.orig/arch/powerpc/kernel/fpu.S 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/fpu.S 2005-10-27 16:01:41.000000000 +1000 @@ -48,7 +48,7 @@ addi r4,r4,THREAD /* want last_task_used_math->thread */ SAVE_32FPRS(0, r4) mffs fr0 - stfd fr0,THREAD_FPSCR-4(r4) + stfd fr0,THREAD_FPSCR(r4) LDL r5,PT_REGS(r4) tophys(r5,r5) LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5) @@ -71,7 +71,7 @@ or r12,r12,r4 std r12,_MSR(r1) #endif - lfd fr0,THREAD_FPSCR-4(r5) + lfd fr0,THREAD_FPSCR(r5) mtfsf 0xff,fr0 REST_32FPRS(0, r5) #ifndef CONFIG_SMP @@ -104,7 +104,7 @@ CMPI 0,r5,0 SAVE_32FPRS(0, r3) mffs fr0 - stfd fr0,THREAD_FPSCR-4(r3) + stfd fr0,THREAD_FPSCR(r3) beq 1f LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5) li r3,MSR_FP|MSR_FE0|MSR_FE1 @@ -117,3 +117,28 @@ STL r5,OFF(last_task_used_math)(r4) #endif /* CONFIG_SMP */ blr + +/* + * These are used in the alignment trap handler when emulating + * single-precision loads and stores. + * We restore and save the fpscr so the task gets the same result + * and exceptions as if the cpu had performed the load or store. + */ + +_GLOBAL(cvt_fd) + lfd 0,THREAD_FPSCR(r5) /* load up fpscr value */ + mtfsf 0xff,0 + lfs 0,0(r3) + stfd 0,0(r4) + mffs 0 + stfd 0,THREAD_FPSCR(r5) /* save new fpscr value */ + blr + +_GLOBAL(cvt_df) + lfd 0,THREAD_FPSCR(r5) /* load up fpscr value */ + mtfsf 0xff,0 + lfd 0,0(r3) + stfs 0,0(r4) + mffs 0 + stfd 0,THREAD_FPSCR(r5) /* save new fpscr value */ + blr Index: working-2.6/include/asm-powerpc/processor.h =================================================================== --- working-2.6.orig/include/asm-powerpc/processor.h 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/include/asm-powerpc/processor.h 2005-10-27 16:01:41.000000000 +1000 @@ -162,10 +162,11 @@ unsigned long dbcr1; #endif double fpr[32]; /* Complete floating point set */ -#ifdef CONFIG_PPC32 - unsigned long fpscr_pad; /* fpr ... fpscr must be contiguous */ -#endif - unsigned long fpscr; /* Floating point status */ + struct { /* fpr ... fpscr must be contiguous */ + + unsigned int pad; + unsigned int val; /* Floating point status */ + } fpscr; int fpexc_mode; /* floating-point exception mode */ #ifdef CONFIG_PPC64 unsigned long start_tb; /* Start purr when proc switched in */ @@ -207,7 +208,7 @@ .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ .fs = KERNEL_DS, \ .fpr = {0}, \ - .fpscr = 0, \ + .fpscr = { .val = 0, }, \ .fpexc_mode = MSR_FE0|MSR_FE1, \ } #endif Index: working-2.6/arch/powerpc/kernel/Makefile =================================================================== --- working-2.6.orig/arch/powerpc/kernel/Makefile 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/Makefile 2005-10-27 16:01:41.000000000 +1000 @@ -29,7 +29,6 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-$(CONFIG_PPC64) += entry_64.o -extra-$(CONFIG_PPC_FPU) += fpu.o extra-y += vmlinux.lds obj-y += process.o init_task.o time.o \ @@ -51,7 +50,7 @@ obj-$(CONFIG_PPC64) += traps.o process.o init_task.o time.o extra-$(CONFIG_PPC64) += entry_64.o -fpux-$(CONFIG_PPC32) += fpu.o -extra-$(CONFIG_PPC_FPU) += $(fpux-y) endif + +extra-$(CONFIG_PPC_FPU) += fpu.o Index: working-2.6/arch/ppc64/Kconfig =================================================================== --- working-2.6.orig/arch/ppc64/Kconfig 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/ppc64/Kconfig 2005-10-27 16:01:41.000000000 +1000 @@ -197,6 +197,9 @@ config POWER4 def_bool y +config PPC_FPU + def_bool y + config POWER4_ONLY bool "Optimize for POWER4" default n Index: working-2.6/arch/ppc64/Makefile =================================================================== --- working-2.6.orig/arch/ppc64/Makefile 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/ppc64/Makefile 2005-10-27 16:01:41.000000000 +1000 @@ -81,6 +81,7 @@ head-y := arch/ppc64/kernel/head.o head-y += arch/powerpc/kernel/entry_64.o +head-y += arch/powerpc/kernel/fpu.o libs-y += arch/ppc64/lib/ core-y += arch/ppc64/kernel/ arch/powerpc/kernel/ Index: working-2.6/arch/ppc64/kernel/head.S =================================================================== --- working-2.6.orig/arch/ppc64/kernel/head.S 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/head.S 2005-10-27 16:01:41.000000000 +1000 @@ -81,7 +81,7 @@ _GLOBAL(__start) /* NOP this out unconditionally */ BEGIN_FTR_SECTION - b .__start_initialization_multiplatform + b .__start_initialization_multiplatform END_FTR_SECTION(0, 1) #endif /* CONFIG_PPC_MULTIPLATFORM */ @@ -747,6 +747,7 @@ * any task or sent any task a signal, you should use * ret_from_except or ret_from_except_lite instead of this. */ + .globl fast_exception_return fast_exception_return: ld r12,_MSR(r1) ld r11,_NIP(r1) @@ -858,62 +859,6 @@ bl .kernel_fp_unavailable_exception BUG_OPCODE -/* - * load_up_fpu(unused, unused, tsk) - * Disable FP for the task which had the FPU previously, - * and save its floating-point registers in its thread_struct. - * Enables the FPU for use in the kernel on return. - * On SMP we know the fpu is free, since we give it up every - * switch (ie, no lazy save of the FP registers). - * On entry: r13 == 'current' && last_task_used_math != 'current' - */ -_STATIC(load_up_fpu) - mfmsr r5 /* grab the current MSR */ - ori r5,r5,MSR_FP - mtmsrd r5 /* enable use of fpu now */ - isync -/* - * For SMP, we don't do lazy FPU switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_fpu in switch_to. - * - */ -#ifndef CONFIG_SMP - ld r3,last_task_used_math at got(r2) - ld r4,0(r3) - cmpdi 0,r4,0 - beq 1f - /* Save FP state to last_task_used_math's THREAD struct */ - addi r4,r4,THREAD - SAVE_32FPRS(0, r4) - mffs fr0 - stfd fr0,THREAD_FPSCR(r4) - /* Disable FP for last_task_used_math */ - ld r5,PT_REGS(r4) - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r6,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r6 - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ - /* enable use of FP after return */ - ld r4,PACACURRENT(r13) - addi r5,r4,THREAD /* Get THREAD */ - ld r4,THREAD_FPEXC_MODE(r5) - ori r12,r12,MSR_FP - or r12,r12,r4 - std r12,_MSR(r1) - lfd fr0,THREAD_FPSCR(r5) - mtfsf 0xff,fr0 - REST_32FPRS(0, r5) -#ifndef CONFIG_SMP - /* Update last_task_used_math to 'current' */ - subi r4,r5,THREAD /* Back to 'current' */ - std r4,0(r3) -#endif /* CONFIG_SMP */ - /* restore registers and return */ - b fast_exception_return - .align 7 .globl altivec_unavailable_common altivec_unavailable_common: Index: working-2.6/arch/ppc64/kernel/misc.S =================================================================== --- working-2.6.orig/arch/ppc64/kernel/misc.S 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/misc.S 2005-10-27 16:01:41.000000000 +1000 @@ -451,25 +451,6 @@ sync blr - -_GLOBAL(cvt_fd) - lfd 0,0(r5) /* load up fpscr value */ - mtfsf 0xff,0 - lfs 0,0(r3) - stfd 0,0(r4) - mffs 0 /* save new fpscr value */ - stfd 0,0(r5) - blr - -_GLOBAL(cvt_df) - lfd 0,0(r5) /* load up fpscr value */ - mtfsf 0xff,0 - lfd 0,0(r3) - stfs 0,0(r4) - mffs 0 /* save new fpscr value */ - stfd 0,0(r5) - blr - /* * identify_cpu and calls setup_cpu * In: r3 = base of the cpu_specs array @@ -655,38 +636,6 @@ isync blr -/* - * giveup_fpu(tsk) - * Disable FP for the task given as the argument, - * and save the floating-point registers in its thread_struct. - * Enables the FPU for use in the kernel on return. - */ -_GLOBAL(giveup_fpu) - mfmsr r5 - ori r5,r5,MSR_FP - mtmsrd r5 /* enable use of fpu now */ - isync - cmpdi 0,r3,0 - beqlr- /* if no previous owner, done */ - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - SAVE_32FPRS(0, r3) - mffs fr0 - stfd fr0,THREAD_FPSCR(r3) - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r3,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r3 /* disable FP for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#ifndef CONFIG_SMP - li r5,0 - ld r4,last_task_used_math at got(r2) - std r5,0(r4) -#endif /* CONFIG_SMP */ - blr - #ifdef CONFIG_ALTIVEC #if 0 /* this has no callers for now */ Index: working-2.6/arch/powerpc/kernel/process.c =================================================================== --- working-2.6.orig/arch/powerpc/kernel/process.c 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/process.c 2005-10-27 16:01:41.000000000 +1000 @@ -665,7 +665,7 @@ #endif #endif /* CONFIG_SMP */ memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); - current->thread.fpscr = 0; + current->thread.fpscr.val = 0; #ifdef CONFIG_ALTIVEC memset(current->thread.vr, 0, sizeof(current->thread.vr)); memset(¤t->thread.vscr, 0, sizeof(current->thread.vscr)); Index: working-2.6/arch/powerpc/kernel/signal_32.c =================================================================== --- working-2.6.orig/arch/powerpc/kernel/signal_32.c 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/signal_32.c 2005-10-27 16:01:41.000000000 +1000 @@ -403,7 +403,7 @@ ELF_NFPREG * sizeof(double))) return 1; - current->thread.fpscr = 0; /* turn off all fp exceptions */ + current->thread.fpscr.val = 0; /* turn off all fp exceptions */ #ifdef CONFIG_ALTIVEC /* save altivec registers */ Index: working-2.6/arch/ppc/math-emu/sfp-machine.h =================================================================== --- working-2.6.orig/arch/ppc/math-emu/sfp-machine.h 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/ppc/math-emu/sfp-machine.h 2005-10-27 16:01:41.000000000 +1000 @@ -166,7 +166,7 @@ #include #include -#define __FPU_FPSCR (current->thread.fpscr) +#define __FPU_FPSCR (current->thread.fpscr.val) /* We only actually write to the destination register * if exceptions signalled (if any) will not trap. Index: working-2.6/arch/ppc64/kernel/signal.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/signal.c 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/signal.c 2005-10-27 16:01:41.000000000 +1000 @@ -133,7 +133,7 @@ flush_fp_to_thread(current); /* Make sure signal doesn't get spurrious FP exceptions */ - current->thread.fpscr = 0; + current->thread.fpscr.val = 0; #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); Index: working-2.6/arch/ppc/kernel/process.c =================================================================== --- working-2.6.orig/arch/ppc/kernel/process.c 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/ppc/kernel/process.c 2005-10-27 16:01:41.000000000 +1000 @@ -542,7 +542,7 @@ last_task_used_spe = NULL; #endif memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); - current->thread.fpscr = 0; + current->thread.fpscr.val = 0; #ifdef CONFIG_ALTIVEC memset(current->thread.vr, 0, sizeof(current->thread.vr)); memset(¤t->thread.vscr, 0, sizeof(current->thread.vscr)); Index: working-2.6/arch/ppc/kernel/traps.c =================================================================== --- working-2.6.orig/arch/ppc/kernel/traps.c 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/ppc/kernel/traps.c 2005-10-27 16:01:41.000000000 +1000 @@ -659,7 +659,7 @@ giveup_fpu(current); preempt_enable(); - fpscr = current->thread.fpscr; + fpscr = current->thread.fpscr.val; fpscr &= fpscr << 22; /* mask summary bits with enables */ if (fpscr & FPSCR_VX) code = FPE_FLTINV; Index: working-2.6/arch/powerpc/kernel/traps.c =================================================================== --- working-2.6.orig/arch/powerpc/kernel/traps.c 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/traps.c 2005-10-27 16:01:41.000000000 +1000 @@ -549,7 +549,7 @@ flush_fp_to_thread(current); - fpscr = current->thread.fpscr; + fpscr = current->thread.fpscr.val; /* Invalid operation */ if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) Index: working-2.6/arch/powerpc/kernel/misc_32.S =================================================================== --- working-2.6.orig/arch/powerpc/kernel/misc_32.S 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/misc_32.S 2005-10-27 16:01:41.000000000 +1000 @@ -993,33 +993,6 @@ blr /* - * These are used in the alignment trap handler when emulating - * single-precision loads and stores. - * We restore and save the fpscr so the task gets the same result - * and exceptions as if the cpu had performed the load or store. - */ - -#ifdef CONFIG_PPC_FPU -_GLOBAL(cvt_fd) - lfd 0,-4(r5) /* load up fpscr value */ - mtfsf 0xff,0 - lfs 0,0(r3) - stfd 0,0(r4) - mffs 0 /* save new fpscr value */ - stfd 0,-4(r5) - blr - -_GLOBAL(cvt_df) - lfd 0,-4(r5) /* load up fpscr value */ - mtfsf 0xff,0 - lfd 0,0(r3) - stfs 0,0(r4) - mffs 0 /* save new fpscr value */ - stfd 0,-4(r5) - blr -#endif - -/* * Create a kernel thread * kernel_thread(fn, arg, flags) */ Index: working-2.6/arch/powerpc/kernel/misc_64.S =================================================================== --- working-2.6.orig/arch/powerpc/kernel/misc_64.S 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/powerpc/kernel/misc_64.S 2005-10-27 16:01:41.000000000 +1000 @@ -462,25 +462,6 @@ sync blr - -_GLOBAL(cvt_fd) - lfd 0,0(r5) /* load up fpscr value */ - mtfsf 0xff,0 - lfs 0,0(r3) - stfd 0,0(r4) - mffs 0 /* save new fpscr value */ - stfd 0,0(r5) - blr - -_GLOBAL(cvt_df) - lfd 0,0(r5) /* load up fpscr value */ - mtfsf 0xff,0 - lfd 0,0(r3) - stfs 0,0(r4) - mffs 0 /* save new fpscr value */ - stfd 0,0(r5) - blr - /* * identify_cpu and calls setup_cpu * In: r3 = base of the cpu_specs array Index: working-2.6/arch/ppc/kernel/align.c =================================================================== --- working-2.6.orig/arch/ppc/kernel/align.c 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/ppc/kernel/align.c 2005-10-27 16:01:41.000000000 +1000 @@ -375,7 +375,7 @@ #ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); - cvt_fd(&data.f, &data.d, ¤t->thread.fpscr); + cvt_fd(&data.f, &data.d, ¤t->thread); preempt_enable(); #else return 0; @@ -385,7 +385,7 @@ #ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); - cvt_df(&data.d, &data.f, ¤t->thread.fpscr); + cvt_df(&data.d, &data.f, ¤t->thread); preempt_enable(); #else return 0; Index: working-2.6/arch/ppc/kernel/misc.S =================================================================== --- working-2.6.orig/arch/ppc/kernel/misc.S 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/ppc/kernel/misc.S 2005-10-27 16:01:41.000000000 +1000 @@ -968,33 +968,6 @@ blr /* - * These are used in the alignment trap handler when emulating - * single-precision loads and stores. - * We restore and save the fpscr so the task gets the same result - * and exceptions as if the cpu had performed the load or store. - */ - -#ifdef CONFIG_PPC_FPU -_GLOBAL(cvt_fd) - lfd 0,-4(r5) /* load up fpscr value */ - mtfsf 0xff,0 - lfs 0,0(r3) - stfd 0,0(r4) - mffs 0 /* save new fpscr value */ - stfd 0,-4(r5) - blr - -_GLOBAL(cvt_df) - lfd 0,-4(r5) /* load up fpscr value */ - mtfsf 0xff,0 - lfd 0,0(r3) - stfs 0,0(r4) - mffs 0 /* save new fpscr value */ - stfd 0,-4(r5) - blr -#endif - -/* * Create a kernel thread * kernel_thread(fn, arg, flags) */ Index: working-2.6/arch/ppc64/kernel/align.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/align.c 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/align.c 2005-10-27 16:01:41.000000000 +1000 @@ -313,7 +313,7 @@ /* Doing stfs, have to convert to single */ preempt_disable(); enable_kernel_fp(); - cvt_df(¤t->thread.fpr[reg], (float *)&data.v[4], ¤t->thread.fpscr); + cvt_df(¤t->thread.fpr[reg], (float *)&data.v[4], ¤t->thread); disable_kernel_fp(); preempt_enable(); } @@ -349,7 +349,7 @@ /* Doing lfs, have to convert to double */ preempt_disable(); enable_kernel_fp(); - cvt_fd((float *)&data.v[4], ¤t->thread.fpr[reg], ¤t->thread.fpscr); + cvt_fd((float *)&data.v[4], ¤t->thread.fpr[reg], ¤t->thread); disable_kernel_fp(); preempt_enable(); } Index: working-2.6/include/asm-ppc/system.h =================================================================== --- working-2.6.orig/include/asm-ppc/system.h 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/include/asm-ppc/system.h 2005-10-27 16:01:41.000000000 +1000 @@ -82,8 +82,8 @@ extern void giveup_spe(struct task_struct *); extern void load_up_spe(struct task_struct *); extern int fix_alignment(struct pt_regs *); -extern void cvt_fd(float *from, double *to, unsigned long *fpscr); -extern void cvt_df(double *from, float *to, unsigned long *fpscr); +extern void cvt_fd(float *from, double *to, struct thread_struct *thread); +extern void cvt_df(double *from, float *to, struct thread_struct *thread); #ifdef CONFIG_ALTIVEC extern void flush_altivec_to_thread(struct task_struct *); Index: working-2.6/include/asm-ppc64/system.h =================================================================== --- working-2.6.orig/include/asm-ppc64/system.h 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/include/asm-ppc64/system.h 2005-10-27 16:01:41.000000000 +1000 @@ -120,8 +120,8 @@ extern void disable_kernel_altivec(void); extern void enable_kernel_altivec(void); extern int emulate_altivec(struct pt_regs *); -extern void cvt_fd(float *from, double *to, unsigned long *fpscr); -extern void cvt_df(double *from, float *to, unsigned long *fpscr); +extern void cvt_fd(float *from, double *to, struct thread_struct *thread); +extern void cvt_df(double *from, float *to, struct thread_struct *thread); #ifdef CONFIG_ALTIVEC extern void flush_altivec_to_thread(struct task_struct *); Index: working-2.6/include/asm-powerpc/system.h =================================================================== --- working-2.6.orig/include/asm-powerpc/system.h 2005-10-27 16:01:06.000000000 +1000 +++ working-2.6/include/asm-powerpc/system.h 2005-10-27 16:01:41.000000000 +1000 @@ -132,8 +132,8 @@ extern void giveup_spe(struct task_struct *); extern void load_up_spe(struct task_struct *); extern int fix_alignment(struct pt_regs *); -extern void cvt_fd(float *from, double *to, unsigned long *fpscr); -extern void cvt_df(double *from, float *to, unsigned long *fpscr); +extern void cvt_fd(float *from, double *to, struct thread_struct *thread); +extern void cvt_df(double *from, float *to, struct thread_struct *thread); #ifdef CONFIG_ALTIVEC extern void flush_altivec_to_thread(struct task_struct *); -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From paulus at samba.org Thu Oct 27 20:53:15 2005 From: paulus at samba.org (Paul Mackerras) Date: Thu, 27 Oct 2005 20:53:15 +1000 Subject: [PATCH] powerpc: only build idle_6xx for 6xx In-Reply-To: References: Message-ID: <17248.45467.698310.551177@cargo.ozlabs.ibm.com> Kumar Gala writes: > For the current time idle_6xx only applies to 6xx ppc32 CPUs > +extra-$(CONFIG_6xx) += idle_6xx.o > -obj-$(CONFIG_PPC32) += entry_32.o idle_6xx.o setup_32.o misc_32.o > +obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o Why does idle_6xx.o become an extra- rather than an obj- ? Paul. From kumar.gala at freescale.com Thu Oct 27 23:45:04 2005 From: kumar.gala at freescale.com (Kumar Gala) Date: Thu, 27 Oct 2005 08:45:04 -0500 Subject: [PATCH] powerpc: only build idle_6xx for 6xx In-Reply-To: <17248.45467.698310.551177@cargo.ozlabs.ibm.com> References: <17248.45467.698310.551177@cargo.ozlabs.ibm.com> Message-ID: <1C38E8D7-C93F-4524-B66A-55193B468B08@freescale.com> On Oct 27, 2005, at 5:53 AM, Paul Mackerras wrote: > Kumar Gala writes: > > >> For the current time idle_6xx only applies to 6xx ppc32 CPUs >> > > >> +extra-$(CONFIG_6xx) += idle_6xx.o >> > > >> -obj-$(CONFIG_PPC32) += entry_32.o idle_6xx.o setup_32.o >> misc_32.o >> +obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o >> > > Why does idle_6xx.o become an extra- rather than an obj- ? Because I copied and pasted the line from arch/ppc/kernel/Makefile w/ o paying any attention to it. I'll send a proper patch :) (I'm really trying to see if you actually look at our patches or not :) - kumar From galak at freescale.com Thu Oct 27 23:54:36 2005 From: galak at freescale.com (Kumar Gala) Date: Thu, 27 Oct 2005 08:54:36 -0500 (CDT) Subject: [PATCH][UPDATED] powerpc: only build idle_6xx for 6xx Message-ID: For the current time idle_6xx only applies to 6xx ppc32 CPUs Signed-off-by: Kumar K. Gala --- commit e3b3bc6d075bd1d07caa64bcaa3add128a0cb6f3 tree e41cfdec6b3565258cb5bee14078a23f8be3b6ff parent 0f72dedad02fc2869ddc13ff2f6998ec7dfdcbc1 author Kumar K. Gala Thu, 27 Oct 2005 08:53:36 -0500 committer Kumar K. Gala Thu, 27 Oct 2005 08:53:36 -0500 arch/powerpc/kernel/Makefile | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -33,11 +33,12 @@ extra-y += vmlinux.lds obj-y += process.o init_task.o time.o \ prom.o systbl.o traps.o setup-common.o -obj-$(CONFIG_PPC32) += entry_32.o idle_6xx.o setup_32.o misc_32.o +obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o obj-$(CONFIG_PPC64) += setup_64.o misc_64.o obj-$(CONFIG_PPC_OF) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o +obj-$(CONFIG_6xx) += idle_6xx.o ifeq ($(CONFIG_PPC_ISERIES),y) $(obj)/head_64.o: $(obj)/lparmap.s From arnd at arndb.de Fri Oct 28 12:39:43 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:43 -0400 Subject: [patch 00/13] spufs snapshot against 2.6.14-rc5 Message-ID: <20051028023943.505038000@localhost> It's time for another development snapshot of spufs. We have a lot of bug fixes against the previous development release from September 16th. I'm planning to do another version of the first six patches against the arch/powerpc merge tree after I've moved over arch/ppc64/kernel/bpa_* to arch/powerpc/platforms/cell (yes, I haven't forgotten about that) and the merge tree makes its way into -mm. Aside from this, this whole set of patches will become the base for another kernel package on http://www.bsc.es/projects/deepcomputing/linuxoncell/ together with a matching libspe. The libspe interface has now been stablilized for version 1.0 and will be maintained in a compatible way, unlike the previous libspe releases. For the interface between libspe and kernel, That will become stable as soon it's merged into the mainline kernel (before that it can't be stable for obvious reasons), but at least I'm not planning to do any incompatible interface changes to it at this point. Please tell me ASAP if there are still concerns about the kernel interface. Arnd <>< From arnd at arndb.de Fri Oct 28 12:39:46 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:46 -0400 Subject: [patch 03/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025531.927338000@localhost> An embedded and charset-unspecified text was scrubbed... Name: spufs-context-part2-4.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/06171100/attachment.txt From arnd at arndb.de Fri Oct 28 12:39:50 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:50 -0400 Subject: [patch 07/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025532.601578000@localhost> An embedded and charset-unspecified text was scrubbed... Name: bpa-random-hacks-3.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/4a40c7ca/attachment.txt From arnd at arndb.de Fri Oct 28 07:12:56 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Thu, 27 Oct 2005 23:12:56 +0200 Subject: libspe for 2.6.14-rc5 spufs snapshot In-Reply-To: <20051028023943.505038000@localhost> References: <20051028023943.505038000@localhost> Message-ID: <200510272312.57007.arnd@arndb.de> This is the current snapshot of Dirk Herrendoerfers libspe, using the spufs interfaces from the patch set. After a series of incompatible versions, we will now maintain compatibility with the user interfaces in this version, and only serious bug fixes are included before the 1.0 release. The most significant features in this version are the spe_get_event call that allows using the interrupt mailbox through official interfaces and the addition of the c99 standard library call interface that lets SPU programs do callbacks to the host user space for many functions that could not be implemented before due to missing system call support on the SPU side itself. Arnd <>< -------------- next part -------------- A non-text attachment was scrubbed... Name: libspe-1.0pre1.tar.gz Type: application/x-tgz Size: 34116 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/00441421/attachment.bin From arnd at arndb.de Fri Oct 28 12:39:44 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:44 -0400 Subject: [patch 01/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025531.577537000@localhost> An embedded and charset-unspecified text was scrubbed... Name: spufs-12.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/37658209/attachment.txt From arnd at arndb.de Fri Oct 28 12:39:52 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:52 -0400 Subject: [patch 09/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025532.930221000@localhost> An embedded and charset-unspecified text was scrubbed... Name: bpa_be_dd1.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/90d0c421/attachment.txt From arnd at arndb.de Fri Oct 28 12:39:45 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:45 -0400 Subject: [patch 02/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025531.756322000@localhost> An embedded and charset-unspecified text was scrubbed... Name: spufs-context-4.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/e4163683/attachment.txt From arnd at arndb.de Fri Oct 28 12:39:48 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:48 -0400 Subject: [patch 05/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025532.271422000@localhost> An embedded and charset-unspecified text was scrubbed... Name: spufs-scheduler-2.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/f80f93fb/attachment.txt From arnd at arndb.de Fri Oct 28 12:39:53 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:53 -0400 Subject: [patch 10/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025533.088841000@localhost> An embedded and charset-unspecified text was scrubbed... Name: rtascons-3.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/959608d5/attachment.txt From arnd at arndb.de Fri Oct 28 12:39:55 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:55 -0400 Subject: [patch 12/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025533.415802000@localhost> An embedded and charset-unspecified text was scrubbed... Name: cell-defconfigs-2.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/0ed651c1/attachment.txt From arnd at arndb.de Fri Oct 28 07:27:44 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 17:27:44 -0400 Subject: [patch 04/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <200510272327.45154.arnd@arndb.de> An embedded and charset-unspecified text was scrubbed... Name: not available Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/264b1e33/attachment.txt From arnd at arndb.de Fri Oct 28 12:39:51 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:51 -0400 Subject: [patch 08/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025532.769080000@localhost> An embedded and charset-unspecified text was scrubbed... Name: spufs-sparsemem-extreme-2.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/a85375a4/attachment.txt From arnd at arndb.de Fri Oct 28 12:39:54 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:54 -0400 Subject: [patch 11/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025533.252450000@localhost> An embedded and charset-unspecified text was scrubbed... Name: ma-mambo-support-2.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/9982ca94/attachment.txt From arnd at arndb.de Fri Oct 28 07:17:01 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Thu, 27 Oct 2005 23:17:01 +0200 Subject: man pages for 2.6.14-rc5 spufs In-Reply-To: <20051028023943.505038000@localhost> References: <20051028023943.505038000@localhost> Message-ID: <200510272317.01902.arnd@arndb.de> These are three man pages describing the current kernel interface used in the spufs release. The spufs(7) man page tells about the specific files in the spu file system and how to interact with them. The spu_run(2) and spu_create(2) man pages describe the two newly introduced system calls. Arnd <>< -------------- next part -------------- A non-text attachment was scrubbed... Name: spu_create.2 Type: application/x-troff Size: 3748 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/74cbb092/attachment.tr -------------- next part -------------- A non-text attachment was scrubbed... Name: spu_run.2 Type: application/x-troff Size: 3775 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/74cbb092/attachment-0001.tr -------------- next part -------------- A non-text attachment was scrubbed... Name: spufs.7 Type: application/x-troff Size: 10498 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/74cbb092/attachment-0002.tr From arnd at arndb.de Fri Oct 28 12:39:49 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:49 -0400 Subject: [patch 06/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025532.442891000@localhost> An embedded and charset-unspecified text was scrubbed... Name: bpa-pmd-add-2.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/60a025e2/attachment.txt From arnd at arndb.de Fri Oct 28 12:39:56 2005 From: arnd at arndb.de (arnd at arndb.de) Date: Thu, 27 Oct 2005 22:39:56 -0400 Subject: [patch 13/13] spufs snapshot against 2.6.14-rc5 References: <20051028023943.505038000@localhost> Message-ID: <20051028025533.577051000@localhost> An embedded and charset-unspecified text was scrubbed... Name: simulator-bogus-console.diff Url: http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051027/539ede47/attachment.txt From david at gibson.dropbear.id.au Fri Oct 28 15:35:50 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Fri, 28 Oct 2005 15:35:50 +1000 Subject: powerpc: Move xics.[ch] into platforms/pseries Message-ID: <20051028053550.GA21589@localhost.localdomain> Paulus, please apply to the merge tree. This patch moves the XICS interrupt controller code into the platforms/pseries directory, since it only appears on pSeries machines. If it ever appears on some other machine we can move it to sysdev, although xics.c itself will need a bunch of changes in that case to remove pSeries specific assumptions. Signed-off-by: David Gibson Index: working-2.6/arch/powerpc/platforms/pseries/Makefile =================================================================== --- working-2.6.orig/arch/powerpc/platforms/pseries/Makefile 2005-10-27 16:44:55.000000000 +1000 +++ working-2.6/arch/powerpc/platforms/pseries/Makefile 2005-10-28 15:03:53.000000000 +1000 @@ -2,3 +2,4 @@ setup.o iommu.o rtas-fw.o ras.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_IBMVIO) += vio.o +obj-$(CONFIG_XICS) += xics.o Index: working-2.6/arch/powerpc/platforms/pseries/setup.c =================================================================== --- working-2.6.orig/arch/powerpc/platforms/pseries/setup.c 2005-10-27 16:44:55.000000000 +1000 +++ working-2.6/arch/powerpc/platforms/pseries/setup.c 2005-10-28 15:03:53.000000000 +1000 @@ -59,7 +59,7 @@ #include #include #include -#include +#include "xics.h" #include #include #include Index: working-2.6/arch/powerpc/platforms/pseries/smp.c =================================================================== --- working-2.6.orig/arch/powerpc/platforms/pseries/smp.c 2005-10-27 16:44:55.000000000 +1000 +++ working-2.6/arch/powerpc/platforms/pseries/smp.c 2005-10-28 15:03:53.000000000 +1000 @@ -39,7 +39,7 @@ #include #include #include -#include +#include "xics.h" #include #include #include Index: working-2.6/arch/ppc64/kernel/Makefile =================================================================== --- working-2.6.orig/arch/ppc64/kernel/Makefile 2005-10-28 14:50:40.000000000 +1000 +++ working-2.6/arch/ppc64/kernel/Makefile 2005-10-28 15:03:53.000000000 +1000 @@ -53,8 +53,6 @@ endif obj-$(CONFIG_HVCS) += hvcserver.o -obj-$(CONFIG_XICS) += xics.o - obj-$(CONFIG_PPC_PMAC) += udbg_scc.o obj-$(CONFIG_PPC_MAPLE) += maple_setup.o maple_pci.o maple_time.o \ Index: working-2.6/arch/powerpc/platforms/pseries/xics.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ working-2.6/arch/powerpc/platforms/pseries/xics.c 2005-10-28 15:05:23.000000000 +1000 @@ -0,0 +1,747 @@ +/* + * arch/powerpc/platforms/pseries/xics.c + * + * Copyright 2000 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xics.h" + +static unsigned int xics_startup(unsigned int irq); +static void xics_enable_irq(unsigned int irq); +static void xics_disable_irq(unsigned int irq); +static void xics_mask_and_ack_irq(unsigned int irq); +static void xics_end_irq(unsigned int irq); +static void xics_set_affinity(unsigned int irq_nr, cpumask_t cpumask); + +static struct hw_interrupt_type xics_pic = { + .typename = " XICS ", + .startup = xics_startup, + .enable = xics_enable_irq, + .disable = xics_disable_irq, + .ack = xics_mask_and_ack_irq, + .end = xics_end_irq, + .set_affinity = xics_set_affinity +}; + +static struct hw_interrupt_type xics_8259_pic = { + .typename = " XICS/8259", + .ack = xics_mask_and_ack_irq, +}; + +/* This is used to map real irq numbers to virtual */ +static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC); + +#define XICS_IPI 2 +#define XICS_IRQ_SPURIOUS 0 + +/* Want a priority other than 0. Various HW issues require this. */ +#define DEFAULT_PRIORITY 5 + +/* + * Mark IPIs as higher priority so we can take them inside interrupts that + * arent marked SA_INTERRUPT + */ +#define IPI_PRIORITY 4 + +struct xics_ipl { + union { + u32 word; + u8 bytes[4]; + } xirr_poll; + union { + u32 word; + u8 bytes[4]; + } xirr; + u32 dummy; + union { + u32 word; + u8 bytes[4]; + } qirr; +}; + +static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS]; + +static int xics_irq_8259_cascade = 0; +static int xics_irq_8259_cascade_real = 0; +static unsigned int default_server = 0xFF; +static unsigned int default_distrib_server = 0; +static unsigned int interrupt_server_size = 8; + +/* + * XICS only has a single IPI, so encode the messages per CPU + */ +struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned; + +/* RTAS service tokens */ +static int ibm_get_xive; +static int ibm_set_xive; +static int ibm_int_on; +static int ibm_int_off; + +typedef struct { + int (*xirr_info_get)(int cpu); + void (*xirr_info_set)(int cpu, int val); + void (*cppr_info)(int cpu, u8 val); + void (*qirr_info)(int cpu, u8 val); +} xics_ops; + + +/* SMP */ + +static int pSeries_xirr_info_get(int n_cpu) +{ + return in_be32(&xics_per_cpu[n_cpu]->xirr.word); +} + +static void pSeries_xirr_info_set(int n_cpu, int value) +{ + out_be32(&xics_per_cpu[n_cpu]->xirr.word, value); +} + +static void pSeries_cppr_info(int n_cpu, u8 value) +{ + out_8(&xics_per_cpu[n_cpu]->xirr.bytes[0], value); +} + +static void pSeries_qirr_info(int n_cpu, u8 value) +{ + out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value); +} + +static xics_ops pSeries_ops = { + pSeries_xirr_info_get, + pSeries_xirr_info_set, + pSeries_cppr_info, + pSeries_qirr_info +}; + +static xics_ops *ops = &pSeries_ops; + + +/* LPAR */ + +static inline long plpar_eoi(unsigned long xirr) +{ + return plpar_hcall_norets(H_EOI, xirr); +} + +static inline long plpar_cppr(unsigned long cppr) +{ + return plpar_hcall_norets(H_CPPR, cppr); +} + +static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr) +{ + return plpar_hcall_norets(H_IPI, servernum, mfrr); +} + +static inline long plpar_xirr(unsigned long *xirr_ret) +{ + unsigned long dummy; + return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy); +} + +static int pSeriesLP_xirr_info_get(int n_cpu) +{ + unsigned long lpar_rc; + unsigned long return_value; + + lpar_rc = plpar_xirr(&return_value); + if (lpar_rc != H_Success) + panic(" bad return code xirr - rc = %lx \n", lpar_rc); + return (int)return_value; +} + +static void pSeriesLP_xirr_info_set(int n_cpu, int value) +{ + unsigned long lpar_rc; + unsigned long val64 = value & 0xffffffff; + + lpar_rc = plpar_eoi(val64); + if (lpar_rc != H_Success) + panic("bad return code EOI - rc = %ld, value=%lx\n", lpar_rc, + val64); +} + +void pSeriesLP_cppr_info(int n_cpu, u8 value) +{ + unsigned long lpar_rc; + + lpar_rc = plpar_cppr(value); + if (lpar_rc != H_Success) + panic("bad return code cppr - rc = %lx\n", lpar_rc); +} + +static void pSeriesLP_qirr_info(int n_cpu , u8 value) +{ + unsigned long lpar_rc; + + lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value); + if (lpar_rc != H_Success) + panic("bad return code qirr - rc = %lx\n", lpar_rc); +} + +xics_ops pSeriesLP_ops = { + pSeriesLP_xirr_info_get, + pSeriesLP_xirr_info_set, + pSeriesLP_cppr_info, + pSeriesLP_qirr_info +}; + +static unsigned int xics_startup(unsigned int virq) +{ + unsigned int irq; + + irq = irq_offset_down(virq); + if (radix_tree_insert(&irq_map, virt_irq_to_real(irq), + &virt_irq_to_real_map[irq]) == -ENOMEM) + printk(KERN_CRIT "Out of memory creating real -> virtual" + " IRQ mapping for irq %u (real 0x%x)\n", + virq, virt_irq_to_real(irq)); + xics_enable_irq(virq); + return 0; /* return value is ignored */ +} + +static unsigned int real_irq_to_virt(unsigned int real_irq) +{ + unsigned int *ptr; + + ptr = radix_tree_lookup(&irq_map, real_irq); + if (ptr == NULL) + return NO_IRQ; + return ptr - virt_irq_to_real_map; +} + +#ifdef CONFIG_SMP +static int get_irq_server(unsigned int irq) +{ + unsigned int server; + /* For the moment only implement delivery to all cpus or one cpu */ + cpumask_t cpumask = irq_affinity[irq]; + cpumask_t tmp = CPU_MASK_NONE; + + if (!distribute_irqs) + return default_server; + + if (cpus_equal(cpumask, CPU_MASK_ALL)) { + server = default_distrib_server; + } else { + cpus_and(tmp, cpu_online_map, cpumask); + + if (cpus_empty(tmp)) + server = default_distrib_server; + else + server = get_hard_smp_processor_id(first_cpu(tmp)); + } + + return server; + +} +#else +static int get_irq_server(unsigned int irq) +{ + return default_server; +} +#endif + +static void xics_enable_irq(unsigned int virq) +{ + unsigned int irq; + int call_status; + unsigned int server; + + irq = virt_irq_to_real(irq_offset_down(virq)); + if (irq == XICS_IPI) + return; + + server = get_irq_server(virq); + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, + DEFAULT_PRIORITY); + if (call_status != 0) { + printk(KERN_ERR "xics_enable_irq: irq=%u: ibm_set_xive " + "returned %d\n", irq, call_status); + printk("set_xive %x, server %x\n", ibm_set_xive, server); + return; + } + + /* Now unmask the interrupt (often a no-op) */ + call_status = rtas_call(ibm_int_on, 1, 1, NULL, irq); + if (call_status != 0) { + printk(KERN_ERR "xics_enable_irq: irq=%u: ibm_int_on " + "returned %d\n", irq, call_status); + return; + } +} + +static void xics_disable_real_irq(unsigned int irq) +{ + int call_status; + unsigned int server; + + if (irq == XICS_IPI) + return; + + call_status = rtas_call(ibm_int_off, 1, 1, NULL, irq); + if (call_status != 0) { + printk(KERN_ERR "xics_disable_real_irq: irq=%u: " + "ibm_int_off returned %d\n", irq, call_status); + return; + } + + server = get_irq_server(irq); + /* Have to set XIVE to 0xff to be able to remove a slot */ + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, 0xff); + if (call_status != 0) { + printk(KERN_ERR "xics_disable_irq: irq=%u: ibm_set_xive(0xff)" + " returned %d\n", irq, call_status); + return; + } +} + +static void xics_disable_irq(unsigned int virq) +{ + unsigned int irq; + + irq = virt_irq_to_real(irq_offset_down(virq)); + xics_disable_real_irq(irq); +} + +static void xics_end_irq(unsigned int irq) +{ + int cpu = smp_processor_id(); + + iosync(); + ops->xirr_info_set(cpu, ((0xff << 24) | + (virt_irq_to_real(irq_offset_down(irq))))); + +} + +static void xics_mask_and_ack_irq(unsigned int irq) +{ + int cpu = smp_processor_id(); + + if (irq < irq_offset_value()) { + i8259_pic.ack(irq); + iosync(); + ops->xirr_info_set(cpu, ((0xff<<24) | + xics_irq_8259_cascade_real)); + iosync(); + } +} + +int xics_get_irq(struct pt_regs *regs) +{ + unsigned int cpu = smp_processor_id(); + unsigned int vec; + int irq; + + vec = ops->xirr_info_get(cpu); + /* (vec >> 24) == old priority */ + vec &= 0x00ffffff; + + /* for sanity, this had better be < NR_IRQS - 16 */ + if (vec == xics_irq_8259_cascade_real) { + irq = i8259_irq(regs); + if (irq == -1) { + /* Spurious cascaded interrupt. Still must ack xics */ + xics_end_irq(irq_offset_up(xics_irq_8259_cascade)); + + irq = -1; + } + } else if (vec == XICS_IRQ_SPURIOUS) { + irq = -1; + } else { + irq = real_irq_to_virt(vec); + if (irq == NO_IRQ) + irq = real_irq_to_virt_slowpath(vec); + if (irq == NO_IRQ) { + printk(KERN_ERR "Interrupt %u (real) is invalid," + " disabling it.\n", vec); + xics_disable_real_irq(vec); + } else + irq = irq_offset_up(irq); + } + return irq; +} + +#ifdef CONFIG_SMP + +irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + ops->qirr_info(cpu, 0xff); + + WARN_ON(cpu_is_offline(cpu)); + + while (xics_ipi_message[cpu].value) { + if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, + &xics_ipi_message[cpu].value)) { + mb(); + smp_message_recv(PPC_MSG_CALL_FUNCTION, regs); + } + if (test_and_clear_bit(PPC_MSG_RESCHEDULE, + &xics_ipi_message[cpu].value)) { + mb(); + smp_message_recv(PPC_MSG_RESCHEDULE, regs); + } +#if 0 + if (test_and_clear_bit(PPC_MSG_MIGRATE_TASK, + &xics_ipi_message[cpu].value)) { + mb(); + smp_message_recv(PPC_MSG_MIGRATE_TASK, regs); + } +#endif +#ifdef CONFIG_DEBUGGER + if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, + &xics_ipi_message[cpu].value)) { + mb(); + smp_message_recv(PPC_MSG_DEBUGGER_BREAK, regs); + } +#endif + } + return IRQ_HANDLED; +} + +void xics_cause_IPI(int cpu) +{ + ops->qirr_info(cpu, IPI_PRIORITY); +} +#endif /* CONFIG_SMP */ + +void xics_setup_cpu(void) +{ + int cpu = smp_processor_id(); + + ops->cppr_info(cpu, 0xff); + iosync(); + + /* + * Put the calling processor into the GIQ. This is really only + * necessary from a secondary thread as the OF start-cpu interface + * performs this function for us on primary threads. + * + * XXX: undo of teardown on kexec needs this too, as may hotplug + */ + rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, + (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); +} + +void xics_init_IRQ(void) +{ + int i; + unsigned long intr_size = 0; + struct device_node *np; + uint *ireg, ilen, indx = 0; + unsigned long intr_base = 0; + struct xics_interrupt_node { + unsigned long addr; + unsigned long size; + } intnodes[NR_CPUS]; + + ppc64_boot_msg(0x20, "XICS Init"); + + ibm_get_xive = rtas_token("ibm,get-xive"); + ibm_set_xive = rtas_token("ibm,set-xive"); + ibm_int_on = rtas_token("ibm,int-on"); + ibm_int_off = rtas_token("ibm,int-off"); + + np = of_find_node_by_type(NULL, "PowerPC-External-Interrupt-Presentation"); + if (!np) + panic("xics_init_IRQ: can't find interrupt presentation"); + +nextnode: + ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", NULL); + if (ireg) { + /* + * set node starting index for this node + */ + indx = *ireg; + } + + ireg = (uint *)get_property(np, "reg", &ilen); + if (!ireg) + panic("xics_init_IRQ: can't find interrupt reg property"); + + while (ilen) { + intnodes[indx].addr = (unsigned long)*ireg++ << 32; + ilen -= sizeof(uint); + intnodes[indx].addr |= *ireg++; + ilen -= sizeof(uint); + intnodes[indx].size = (unsigned long)*ireg++ << 32; + ilen -= sizeof(uint); + intnodes[indx].size |= *ireg++; + ilen -= sizeof(uint); + indx++; + if (indx >= NR_CPUS) break; + } + + np = of_find_node_by_type(np, "PowerPC-External-Interrupt-Presentation"); + if ((indx < NR_CPUS) && np) goto nextnode; + + /* Find the server numbers for the boot cpu. */ + for (np = of_find_node_by_type(NULL, "cpu"); + np; + np = of_find_node_by_type(np, "cpu")) { + ireg = (uint *)get_property(np, "reg", &ilen); + if (ireg && ireg[0] == boot_cpuid_phys) { + ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s", + &ilen); + i = ilen / sizeof(int); + if (ireg && i > 0) { + default_server = ireg[0]; + default_distrib_server = ireg[i-1]; /* take last element */ + } + ireg = (uint *)get_property(np, + "ibm,interrupt-server#-size", NULL); + if (ireg) + interrupt_server_size = *ireg; + break; + } + } + of_node_put(np); + + intr_base = intnodes[0].addr; + intr_size = intnodes[0].size; + + np = of_find_node_by_type(NULL, "interrupt-controller"); + if (!np) { + printk(KERN_WARNING "xics: no ISA interrupt controller\n"); + xics_irq_8259_cascade_real = -1; + xics_irq_8259_cascade = -1; + } else { + ireg = (uint *) get_property(np, "interrupts", NULL); + if (!ireg) + panic("xics_init_IRQ: can't find ISA interrupts property"); + + xics_irq_8259_cascade_real = *ireg; + xics_irq_8259_cascade + = virt_irq_create_mapping(xics_irq_8259_cascade_real); + of_node_put(np); + } + + if (systemcfg->platform == PLATFORM_PSERIES) { +#ifdef CONFIG_SMP + for_each_cpu(i) { + int hard_id; + + /* FIXME: Do this dynamically! --RR */ + if (!cpu_present(i)) + continue; + + hard_id = get_hard_smp_processor_id(i); + xics_per_cpu[i] = ioremap(intnodes[hard_id].addr, + intnodes[hard_id].size); + } +#else + xics_per_cpu[0] = ioremap(intr_base, intr_size); +#endif /* CONFIG_SMP */ + } else if (systemcfg->platform == PLATFORM_PSERIES_LPAR) { + ops = &pSeriesLP_ops; + } + + xics_8259_pic.enable = i8259_pic.enable; + xics_8259_pic.disable = i8259_pic.disable; + for (i = 0; i < 16; ++i) + get_irq_desc(i)->handler = &xics_8259_pic; + for (; i < NR_IRQS; ++i) + get_irq_desc(i)->handler = &xics_pic; + + xics_setup_cpu(); + + ppc64_boot_msg(0x21, "XICS Done"); +} + +/* + * We cant do this in init_IRQ because we need the memory subsystem up for + * request_irq() + */ +static int __init xics_setup_i8259(void) +{ + if (ppc64_interrupt_controller == IC_PPC_XIC && + xics_irq_8259_cascade != -1) { + if (request_irq(irq_offset_up(xics_irq_8259_cascade), + no_action, 0, "8259 cascade", NULL)) + printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 " + "cascade\n"); + i8259_init(0, 0); + } + return 0; +} +arch_initcall(xics_setup_i8259); + +#ifdef CONFIG_SMP +void xics_request_IPIs(void) +{ + virt_irq_to_real_map[XICS_IPI] = XICS_IPI; + + /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */ + request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, SA_INTERRUPT, + "IPI", NULL); + get_irq_desc(irq_offset_up(XICS_IPI))->status |= IRQ_PER_CPU; +} +#endif + +static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) +{ + unsigned int irq; + int status; + int xics_status[2]; + unsigned long newmask; + cpumask_t tmp = CPU_MASK_NONE; + + irq = virt_irq_to_real(irq_offset_down(virq)); + if (irq == XICS_IPI || irq == NO_IRQ) + return; + + status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); + + if (status) { + printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive " + "returns %d\n", irq, status); + return; + } + + /* For the moment only implement delivery to all cpus or one cpu */ + if (cpus_equal(cpumask, CPU_MASK_ALL)) { + newmask = default_distrib_server; + } else { + cpus_and(tmp, cpu_online_map, cpumask); + if (cpus_empty(tmp)) + return; + newmask = get_hard_smp_processor_id(first_cpu(tmp)); + } + + status = rtas_call(ibm_set_xive, 3, 1, NULL, + irq, newmask, xics_status[1]); + + if (status) { + printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive " + "returns %d\n", irq, status); + return; + } +} + +void xics_teardown_cpu(int secondary) +{ + int cpu = smp_processor_id(); + + ops->cppr_info(cpu, 0x00); + iosync(); + + /* + * Some machines need to have at least one cpu in the GIQ, + * so leave the master cpu in the group. + */ + if (secondary) { + /* + * we need to EOI the IPI if we got here from kexec down IPI + * + * probably need to check all the other interrupts too + * should we be flagging idle loop instead? + * or creating some task to be scheduled? + */ + ops->xirr_info_set(cpu, XICS_IPI); + rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, + (1UL << interrupt_server_size) - 1 - + default_distrib_server, 0); + } +} + +#ifdef CONFIG_HOTPLUG_CPU + +/* Interrupts are disabled. */ +void xics_migrate_irqs_away(void) +{ + int status; + unsigned int irq, virq, cpu = smp_processor_id(); + + /* Reject any interrupt that was queued to us... */ + ops->cppr_info(cpu, 0); + iosync(); + + /* remove ourselves from the global interrupt queue */ + status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, + (1UL << interrupt_server_size) - 1 - default_distrib_server, 0); + WARN_ON(status < 0); + + /* Allow IPIs again... */ + ops->cppr_info(cpu, DEFAULT_PRIORITY); + iosync(); + + for_each_irq(virq) { + irq_desc_t *desc; + int xics_status[2]; + unsigned long flags; + + /* We cant set affinity on ISA interrupts */ + if (virq < irq_offset_value()) + continue; + + desc = get_irq_desc(virq); + irq = virt_irq_to_real(irq_offset_down(virq)); + + /* We need to get IPIs still. */ + if (irq == XICS_IPI || irq == NO_IRQ) + continue; + + /* We only need to migrate enabled IRQS */ + if (desc == NULL || desc->handler == NULL + || desc->action == NULL + || desc->handler->set_affinity == NULL) + continue; + + spin_lock_irqsave(&desc->lock, flags); + + status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); + if (status) { + printk(KERN_ERR "migrate_irqs_away: irq=%u " + "ibm,get-xive returns %d\n", + virq, status); + goto unlock; + } + + /* + * We only support delivery to all cpus or to one cpu. + * The irq has to be migrated only in the single cpu + * case. + */ + if (xics_status[0] != get_hard_smp_processor_id(cpu)) + goto unlock; + + printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n", + virq, cpu); + + /* Reset affinity to all cpus */ + desc->handler->set_affinity(virq, CPU_MASK_ALL); + irq_affinity[virq] = CPU_MASK_ALL; +unlock: + spin_unlock_irqrestore(&desc->lock, flags); + } +} +#endif Index: working-2.6/arch/powerpc/platforms/pseries/xics.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ working-2.6/arch/powerpc/platforms/pseries/xics.h 2005-10-28 15:03:53.000000000 +1000 @@ -0,0 +1,34 @@ +/* + * arch/powerpc/platforms/pseries/xics.h + * + * Copyright 2000 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _POWERPC_KERNEL_XICS_H +#define _POWERPC_KERNEL_XICS_H + +#include + +void xics_init_IRQ(void); +int xics_get_irq(struct pt_regs *); +void xics_setup_cpu(void); +void xics_teardown_cpu(int secondary); +void xics_cause_IPI(int cpu); +void xics_request_IPIs(void); +void xics_migrate_irqs_away(void); + +/* first argument is ignored for now*/ +void pSeriesLP_cppr_info(int n_cpu, u8 value); + +struct xics_ipi_struct { + volatile unsigned long value; +} ____cacheline_aligned; + +extern struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned; + +#endif /* _POWERPC_KERNEL_XICS_H */ Index: working-2.6/arch/ppc64/kernel/xics.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/xics.c 2005-10-28 14:50:40.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,746 +0,0 @@ -/* - * arch/ppc64/kernel/xics.c - * - * Copyright 2000 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int xics_startup(unsigned int irq); -static void xics_enable_irq(unsigned int irq); -static void xics_disable_irq(unsigned int irq); -static void xics_mask_and_ack_irq(unsigned int irq); -static void xics_end_irq(unsigned int irq); -static void xics_set_affinity(unsigned int irq_nr, cpumask_t cpumask); - -static struct hw_interrupt_type xics_pic = { - .typename = " XICS ", - .startup = xics_startup, - .enable = xics_enable_irq, - .disable = xics_disable_irq, - .ack = xics_mask_and_ack_irq, - .end = xics_end_irq, - .set_affinity = xics_set_affinity -}; - -static struct hw_interrupt_type xics_8259_pic = { - .typename = " XICS/8259", - .ack = xics_mask_and_ack_irq, -}; - -/* This is used to map real irq numbers to virtual */ -static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC); - -#define XICS_IPI 2 -#define XICS_IRQ_SPURIOUS 0 - -/* Want a priority other than 0. Various HW issues require this. */ -#define DEFAULT_PRIORITY 5 - -/* - * Mark IPIs as higher priority so we can take them inside interrupts that - * arent marked SA_INTERRUPT - */ -#define IPI_PRIORITY 4 - -struct xics_ipl { - union { - u32 word; - u8 bytes[4]; - } xirr_poll; - union { - u32 word; - u8 bytes[4]; - } xirr; - u32 dummy; - union { - u32 word; - u8 bytes[4]; - } qirr; -}; - -static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS]; - -static int xics_irq_8259_cascade = 0; -static int xics_irq_8259_cascade_real = 0; -static unsigned int default_server = 0xFF; -static unsigned int default_distrib_server = 0; -static unsigned int interrupt_server_size = 8; - -/* - * XICS only has a single IPI, so encode the messages per CPU - */ -struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned; - -/* RTAS service tokens */ -static int ibm_get_xive; -static int ibm_set_xive; -static int ibm_int_on; -static int ibm_int_off; - -typedef struct { - int (*xirr_info_get)(int cpu); - void (*xirr_info_set)(int cpu, int val); - void (*cppr_info)(int cpu, u8 val); - void (*qirr_info)(int cpu, u8 val); -} xics_ops; - - -/* SMP */ - -static int pSeries_xirr_info_get(int n_cpu) -{ - return in_be32(&xics_per_cpu[n_cpu]->xirr.word); -} - -static void pSeries_xirr_info_set(int n_cpu, int value) -{ - out_be32(&xics_per_cpu[n_cpu]->xirr.word, value); -} - -static void pSeries_cppr_info(int n_cpu, u8 value) -{ - out_8(&xics_per_cpu[n_cpu]->xirr.bytes[0], value); -} - -static void pSeries_qirr_info(int n_cpu, u8 value) -{ - out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value); -} - -static xics_ops pSeries_ops = { - pSeries_xirr_info_get, - pSeries_xirr_info_set, - pSeries_cppr_info, - pSeries_qirr_info -}; - -static xics_ops *ops = &pSeries_ops; - - -/* LPAR */ - -static inline long plpar_eoi(unsigned long xirr) -{ - return plpar_hcall_norets(H_EOI, xirr); -} - -static inline long plpar_cppr(unsigned long cppr) -{ - return plpar_hcall_norets(H_CPPR, cppr); -} - -static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr) -{ - return plpar_hcall_norets(H_IPI, servernum, mfrr); -} - -static inline long plpar_xirr(unsigned long *xirr_ret) -{ - unsigned long dummy; - return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy); -} - -static int pSeriesLP_xirr_info_get(int n_cpu) -{ - unsigned long lpar_rc; - unsigned long return_value; - - lpar_rc = plpar_xirr(&return_value); - if (lpar_rc != H_Success) - panic(" bad return code xirr - rc = %lx \n", lpar_rc); - return (int)return_value; -} - -static void pSeriesLP_xirr_info_set(int n_cpu, int value) -{ - unsigned long lpar_rc; - unsigned long val64 = value & 0xffffffff; - - lpar_rc = plpar_eoi(val64); - if (lpar_rc != H_Success) - panic("bad return code EOI - rc = %ld, value=%lx\n", lpar_rc, - val64); -} - -void pSeriesLP_cppr_info(int n_cpu, u8 value) -{ - unsigned long lpar_rc; - - lpar_rc = plpar_cppr(value); - if (lpar_rc != H_Success) - panic("bad return code cppr - rc = %lx\n", lpar_rc); -} - -static void pSeriesLP_qirr_info(int n_cpu , u8 value) -{ - unsigned long lpar_rc; - - lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value); - if (lpar_rc != H_Success) - panic("bad return code qirr - rc = %lx\n", lpar_rc); -} - -xics_ops pSeriesLP_ops = { - pSeriesLP_xirr_info_get, - pSeriesLP_xirr_info_set, - pSeriesLP_cppr_info, - pSeriesLP_qirr_info -}; - -static unsigned int xics_startup(unsigned int virq) -{ - unsigned int irq; - - irq = irq_offset_down(virq); - if (radix_tree_insert(&irq_map, virt_irq_to_real(irq), - &virt_irq_to_real_map[irq]) == -ENOMEM) - printk(KERN_CRIT "Out of memory creating real -> virtual" - " IRQ mapping for irq %u (real 0x%x)\n", - virq, virt_irq_to_real(irq)); - xics_enable_irq(virq); - return 0; /* return value is ignored */ -} - -static unsigned int real_irq_to_virt(unsigned int real_irq) -{ - unsigned int *ptr; - - ptr = radix_tree_lookup(&irq_map, real_irq); - if (ptr == NULL) - return NO_IRQ; - return ptr - virt_irq_to_real_map; -} - -#ifdef CONFIG_SMP -static int get_irq_server(unsigned int irq) -{ - unsigned int server; - /* For the moment only implement delivery to all cpus or one cpu */ - cpumask_t cpumask = irq_affinity[irq]; - cpumask_t tmp = CPU_MASK_NONE; - - if (!distribute_irqs) - return default_server; - - if (cpus_equal(cpumask, CPU_MASK_ALL)) { - server = default_distrib_server; - } else { - cpus_and(tmp, cpu_online_map, cpumask); - - if (cpus_empty(tmp)) - server = default_distrib_server; - else - server = get_hard_smp_processor_id(first_cpu(tmp)); - } - - return server; - -} -#else -static int get_irq_server(unsigned int irq) -{ - return default_server; -} -#endif - -static void xics_enable_irq(unsigned int virq) -{ - unsigned int irq; - int call_status; - unsigned int server; - - irq = virt_irq_to_real(irq_offset_down(virq)); - if (irq == XICS_IPI) - return; - - server = get_irq_server(virq); - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, - DEFAULT_PRIORITY); - if (call_status != 0) { - printk(KERN_ERR "xics_enable_irq: irq=%u: ibm_set_xive " - "returned %d\n", irq, call_status); - printk("set_xive %x, server %x\n", ibm_set_xive, server); - return; - } - - /* Now unmask the interrupt (often a no-op) */ - call_status = rtas_call(ibm_int_on, 1, 1, NULL, irq); - if (call_status != 0) { - printk(KERN_ERR "xics_enable_irq: irq=%u: ibm_int_on " - "returned %d\n", irq, call_status); - return; - } -} - -static void xics_disable_real_irq(unsigned int irq) -{ - int call_status; - unsigned int server; - - if (irq == XICS_IPI) - return; - - call_status = rtas_call(ibm_int_off, 1, 1, NULL, irq); - if (call_status != 0) { - printk(KERN_ERR "xics_disable_real_irq: irq=%u: " - "ibm_int_off returned %d\n", irq, call_status); - return; - } - - server = get_irq_server(irq); - /* Have to set XIVE to 0xff to be able to remove a slot */ - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, 0xff); - if (call_status != 0) { - printk(KERN_ERR "xics_disable_irq: irq=%u: ibm_set_xive(0xff)" - " returned %d\n", irq, call_status); - return; - } -} - -static void xics_disable_irq(unsigned int virq) -{ - unsigned int irq; - - irq = virt_irq_to_real(irq_offset_down(virq)); - xics_disable_real_irq(irq); -} - -static void xics_end_irq(unsigned int irq) -{ - int cpu = smp_processor_id(); - - iosync(); - ops->xirr_info_set(cpu, ((0xff << 24) | - (virt_irq_to_real(irq_offset_down(irq))))); - -} - -static void xics_mask_and_ack_irq(unsigned int irq) -{ - int cpu = smp_processor_id(); - - if (irq < irq_offset_value()) { - i8259_pic.ack(irq); - iosync(); - ops->xirr_info_set(cpu, ((0xff<<24) | - xics_irq_8259_cascade_real)); - iosync(); - } -} - -int xics_get_irq(struct pt_regs *regs) -{ - unsigned int cpu = smp_processor_id(); - unsigned int vec; - int irq; - - vec = ops->xirr_info_get(cpu); - /* (vec >> 24) == old priority */ - vec &= 0x00ffffff; - - /* for sanity, this had better be < NR_IRQS - 16 */ - if (vec == xics_irq_8259_cascade_real) { - irq = i8259_irq(regs); - if (irq == -1) { - /* Spurious cascaded interrupt. Still must ack xics */ - xics_end_irq(irq_offset_up(xics_irq_8259_cascade)); - - irq = -1; - } - } else if (vec == XICS_IRQ_SPURIOUS) { - irq = -1; - } else { - irq = real_irq_to_virt(vec); - if (irq == NO_IRQ) - irq = real_irq_to_virt_slowpath(vec); - if (irq == NO_IRQ) { - printk(KERN_ERR "Interrupt %u (real) is invalid," - " disabling it.\n", vec); - xics_disable_real_irq(vec); - } else - irq = irq_offset_up(irq); - } - return irq; -} - -#ifdef CONFIG_SMP - -irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs) -{ - int cpu = smp_processor_id(); - - ops->qirr_info(cpu, 0xff); - - WARN_ON(cpu_is_offline(cpu)); - - while (xics_ipi_message[cpu].value) { - if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, - &xics_ipi_message[cpu].value)) { - mb(); - smp_message_recv(PPC_MSG_CALL_FUNCTION, regs); - } - if (test_and_clear_bit(PPC_MSG_RESCHEDULE, - &xics_ipi_message[cpu].value)) { - mb(); - smp_message_recv(PPC_MSG_RESCHEDULE, regs); - } -#if 0 - if (test_and_clear_bit(PPC_MSG_MIGRATE_TASK, - &xics_ipi_message[cpu].value)) { - mb(); - smp_message_recv(PPC_MSG_MIGRATE_TASK, regs); - } -#endif -#ifdef CONFIG_DEBUGGER - if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, - &xics_ipi_message[cpu].value)) { - mb(); - smp_message_recv(PPC_MSG_DEBUGGER_BREAK, regs); - } -#endif - } - return IRQ_HANDLED; -} - -void xics_cause_IPI(int cpu) -{ - ops->qirr_info(cpu, IPI_PRIORITY); -} -#endif /* CONFIG_SMP */ - -void xics_setup_cpu(void) -{ - int cpu = smp_processor_id(); - - ops->cppr_info(cpu, 0xff); - iosync(); - - /* - * Put the calling processor into the GIQ. This is really only - * necessary from a secondary thread as the OF start-cpu interface - * performs this function for us on primary threads. - * - * XXX: undo of teardown on kexec needs this too, as may hotplug - */ - rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, - (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); -} - -void xics_init_IRQ(void) -{ - int i; - unsigned long intr_size = 0; - struct device_node *np; - uint *ireg, ilen, indx = 0; - unsigned long intr_base = 0; - struct xics_interrupt_node { - unsigned long addr; - unsigned long size; - } intnodes[NR_CPUS]; - - ppc64_boot_msg(0x20, "XICS Init"); - - ibm_get_xive = rtas_token("ibm,get-xive"); - ibm_set_xive = rtas_token("ibm,set-xive"); - ibm_int_on = rtas_token("ibm,int-on"); - ibm_int_off = rtas_token("ibm,int-off"); - - np = of_find_node_by_type(NULL, "PowerPC-External-Interrupt-Presentation"); - if (!np) - panic("xics_init_IRQ: can't find interrupt presentation"); - -nextnode: - ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", NULL); - if (ireg) { - /* - * set node starting index for this node - */ - indx = *ireg; - } - - ireg = (uint *)get_property(np, "reg", &ilen); - if (!ireg) - panic("xics_init_IRQ: can't find interrupt reg property"); - - while (ilen) { - intnodes[indx].addr = (unsigned long)*ireg++ << 32; - ilen -= sizeof(uint); - intnodes[indx].addr |= *ireg++; - ilen -= sizeof(uint); - intnodes[indx].size = (unsigned long)*ireg++ << 32; - ilen -= sizeof(uint); - intnodes[indx].size |= *ireg++; - ilen -= sizeof(uint); - indx++; - if (indx >= NR_CPUS) break; - } - - np = of_find_node_by_type(np, "PowerPC-External-Interrupt-Presentation"); - if ((indx < NR_CPUS) && np) goto nextnode; - - /* Find the server numbers for the boot cpu. */ - for (np = of_find_node_by_type(NULL, "cpu"); - np; - np = of_find_node_by_type(np, "cpu")) { - ireg = (uint *)get_property(np, "reg", &ilen); - if (ireg && ireg[0] == boot_cpuid_phys) { - ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s", - &ilen); - i = ilen / sizeof(int); - if (ireg && i > 0) { - default_server = ireg[0]; - default_distrib_server = ireg[i-1]; /* take last element */ - } - ireg = (uint *)get_property(np, - "ibm,interrupt-server#-size", NULL); - if (ireg) - interrupt_server_size = *ireg; - break; - } - } - of_node_put(np); - - intr_base = intnodes[0].addr; - intr_size = intnodes[0].size; - - np = of_find_node_by_type(NULL, "interrupt-controller"); - if (!np) { - printk(KERN_WARNING "xics: no ISA interrupt controller\n"); - xics_irq_8259_cascade_real = -1; - xics_irq_8259_cascade = -1; - } else { - ireg = (uint *) get_property(np, "interrupts", NULL); - if (!ireg) - panic("xics_init_IRQ: can't find ISA interrupts property"); - - xics_irq_8259_cascade_real = *ireg; - xics_irq_8259_cascade - = virt_irq_create_mapping(xics_irq_8259_cascade_real); - of_node_put(np); - } - - if (systemcfg->platform == PLATFORM_PSERIES) { -#ifdef CONFIG_SMP - for_each_cpu(i) { - int hard_id; - - /* FIXME: Do this dynamically! --RR */ - if (!cpu_present(i)) - continue; - - hard_id = get_hard_smp_processor_id(i); - xics_per_cpu[i] = ioremap(intnodes[hard_id].addr, - intnodes[hard_id].size); - } -#else - xics_per_cpu[0] = ioremap(intr_base, intr_size); -#endif /* CONFIG_SMP */ - } else if (systemcfg->platform == PLATFORM_PSERIES_LPAR) { - ops = &pSeriesLP_ops; - } - - xics_8259_pic.enable = i8259_pic.enable; - xics_8259_pic.disable = i8259_pic.disable; - for (i = 0; i < 16; ++i) - get_irq_desc(i)->handler = &xics_8259_pic; - for (; i < NR_IRQS; ++i) - get_irq_desc(i)->handler = &xics_pic; - - xics_setup_cpu(); - - ppc64_boot_msg(0x21, "XICS Done"); -} - -/* - * We cant do this in init_IRQ because we need the memory subsystem up for - * request_irq() - */ -static int __init xics_setup_i8259(void) -{ - if (ppc64_interrupt_controller == IC_PPC_XIC && - xics_irq_8259_cascade != -1) { - if (request_irq(irq_offset_up(xics_irq_8259_cascade), - no_action, 0, "8259 cascade", NULL)) - printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 " - "cascade\n"); - i8259_init(0, 0); - } - return 0; -} -arch_initcall(xics_setup_i8259); - -#ifdef CONFIG_SMP -void xics_request_IPIs(void) -{ - virt_irq_to_real_map[XICS_IPI] = XICS_IPI; - - /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */ - request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, SA_INTERRUPT, - "IPI", NULL); - get_irq_desc(irq_offset_up(XICS_IPI))->status |= IRQ_PER_CPU; -} -#endif - -static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) -{ - unsigned int irq; - int status; - int xics_status[2]; - unsigned long newmask; - cpumask_t tmp = CPU_MASK_NONE; - - irq = virt_irq_to_real(irq_offset_down(virq)); - if (irq == XICS_IPI || irq == NO_IRQ) - return; - - status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); - - if (status) { - printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive " - "returns %d\n", irq, status); - return; - } - - /* For the moment only implement delivery to all cpus or one cpu */ - if (cpus_equal(cpumask, CPU_MASK_ALL)) { - newmask = default_distrib_server; - } else { - cpus_and(tmp, cpu_online_map, cpumask); - if (cpus_empty(tmp)) - return; - newmask = get_hard_smp_processor_id(first_cpu(tmp)); - } - - status = rtas_call(ibm_set_xive, 3, 1, NULL, - irq, newmask, xics_status[1]); - - if (status) { - printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive " - "returns %d\n", irq, status); - return; - } -} - -void xics_teardown_cpu(int secondary) -{ - int cpu = smp_processor_id(); - - ops->cppr_info(cpu, 0x00); - iosync(); - - /* - * Some machines need to have at least one cpu in the GIQ, - * so leave the master cpu in the group. - */ - if (secondary) { - /* - * we need to EOI the IPI if we got here from kexec down IPI - * - * probably need to check all the other interrupts too - * should we be flagging idle loop instead? - * or creating some task to be scheduled? - */ - ops->xirr_info_set(cpu, XICS_IPI); - rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, - (1UL << interrupt_server_size) - 1 - - default_distrib_server, 0); - } -} - -#ifdef CONFIG_HOTPLUG_CPU - -/* Interrupts are disabled. */ -void xics_migrate_irqs_away(void) -{ - int status; - unsigned int irq, virq, cpu = smp_processor_id(); - - /* Reject any interrupt that was queued to us... */ - ops->cppr_info(cpu, 0); - iosync(); - - /* remove ourselves from the global interrupt queue */ - status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, - (1UL << interrupt_server_size) - 1 - default_distrib_server, 0); - WARN_ON(status < 0); - - /* Allow IPIs again... */ - ops->cppr_info(cpu, DEFAULT_PRIORITY); - iosync(); - - for_each_irq(virq) { - irq_desc_t *desc; - int xics_status[2]; - unsigned long flags; - - /* We cant set affinity on ISA interrupts */ - if (virq < irq_offset_value()) - continue; - - desc = get_irq_desc(virq); - irq = virt_irq_to_real(irq_offset_down(virq)); - - /* We need to get IPIs still. */ - if (irq == XICS_IPI || irq == NO_IRQ) - continue; - - /* We only need to migrate enabled IRQS */ - if (desc == NULL || desc->handler == NULL - || desc->action == NULL - || desc->handler->set_affinity == NULL) - continue; - - spin_lock_irqsave(&desc->lock, flags); - - status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); - if (status) { - printk(KERN_ERR "migrate_irqs_away: irq=%u " - "ibm,get-xive returns %d\n", - virq, status); - goto unlock; - } - - /* - * We only support delivery to all cpus or to one cpu. - * The irq has to be migrated only in the single cpu - * case. - */ - if (xics_status[0] != get_hard_smp_processor_id(cpu)) - goto unlock; - - printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n", - virq, cpu); - - /* Reset affinity to all cpus */ - desc->handler->set_affinity(virq, CPU_MASK_ALL); - irq_affinity[virq] = CPU_MASK_ALL; -unlock: - spin_unlock_irqrestore(&desc->lock, flags); - } -} -#endif Index: working-2.6/include/asm-ppc64/xics.h =================================================================== --- working-2.6.orig/include/asm-ppc64/xics.h 2005-10-25 11:59:59.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,34 +0,0 @@ -/* - * arch/ppc64/kernel/xics.h - * - * Copyright 2000 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _PPC64_KERNEL_XICS_H -#define _PPC64_KERNEL_XICS_H - -#include - -void xics_init_IRQ(void); -int xics_get_irq(struct pt_regs *); -void xics_setup_cpu(void); -void xics_teardown_cpu(int secondary); -void xics_cause_IPI(int cpu); -void xics_request_IPIs(void); -void xics_migrate_irqs_away(void); - -/* first argument is ignored for now*/ -void pSeriesLP_cppr_info(int n_cpu, u8 value); - -struct xics_ipi_struct { - volatile unsigned long value; -} ____cacheline_aligned; - -extern struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned; - -#endif /* _PPC64_KERNEL_XICS_H */ -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From mikey at neuling.org Fri Oct 28 17:47:33 2005 From: mikey at neuling.org (Michael Neuling) Date: Fri, 28 Oct 2005 17:47:33 +1000 Subject: [PATCH 0/2] Fix legacy drivers for remove io_page_mask patch Message-ID: <20051028174733.a9ff2b1e.mikey@neuling.org> Hi, These two patches fix the legacy parallel port and PC speaker drivers when using Anton's io_page_mask patch from: http://ozlabs.org/pipermail/linuxppc64-dev/2005-May/003922.html http://ozlabs.org/pipermail/linuxppc64-dev/2005-May/004182.html Anton's patch is necessary for running kexec with some e1000 revisions. The issue we had is that the reset is not being sent to the e1000 correctly, resulting in it still running during the second boot. Mikey From mikey at neuling.org Fri Oct 28 17:55:40 2005 From: mikey at neuling.org (Michael Neuling) Date: Fri, 28 Oct 2005 17:55:40 +1000 Subject: [PATCH 1/2] Parallel port init fix In-Reply-To: <20051028174733.a9ff2b1e.mikey@neuling.org> References: <20051028174733.a9ff2b1e.mikey@neuling.org> Message-ID: <20051028175540.27b1e580.mikey@neuling.org> Fixes init for the parallel port on ppc64 Signed-off-by: Michael Neuling -- drivers/input/misc/pcspkr.c | 5 +++++ include/asm-powerpc/8253pit.h | 13 +++++++++++++ 2 files changed, 18 insertions(+) Index: linux-2.6/drivers/input/misc/pcspkr.c =================================================================== --- linux-2.6.orig/drivers/input/misc/pcspkr.c 2005-10-28 16:53:53.000000000 +1000 +++ linux-2.6/drivers/input/misc/pcspkr.c 2005-10-28 17:25:41.000000000 +1000 @@ -68,6 +68,11 @@ static int __init pcspkr_init(void) { +#ifdef HAS_PCSPKR_ARCH_INIT + int rc = pcspkr_arch_init(); + if (rc) + return rc; +#endif pcspkr_dev.evbit[0] = BIT(EV_SND); pcspkr_dev.sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE); pcspkr_dev.event = pcspkr_event; Index: linux-2.6/include/asm-powerpc/8253pit.h =================================================================== --- linux-2.6.orig/include/asm-powerpc/8253pit.h 2005-10-28 16:53:53.000000000 +1000 +++ linux-2.6/include/asm-powerpc/8253pit.h 2005-10-28 16:57:37.000000000 +1000 @@ -5,6 +5,19 @@ * 8253/8254 Programmable Interval Timer */ +#include + #define PIT_TICK_RATE 1193182UL +#define HAS_PCSPKR_ARCH_INIT + +static inline int pcspkr_arch_init(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "pnpPNP,100"); + of_node_put(np); + return np ? 0 : -ENODEV; +} + #endif /* _ASM_POWERPC_8253PIT_H */ From mikey at neuling.org Fri Oct 28 17:57:50 2005 From: mikey at neuling.org (Michael Neuling) Date: Fri, 28 Oct 2005 17:57:50 +1000 Subject: [PATCH 2/2] PC speaker init fix In-Reply-To: <20051028174733.a9ff2b1e.mikey@neuling.org> References: <20051028174733.a9ff2b1e.mikey@neuling.org> Message-ID: <20051028175750.bd0ac1d4.mikey@neuling.org> Fixes init for the PC speaker on ppc64 Signed-off-by: Michael Neuling --- include/asm-ppc64/parport.h | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) Index: linux-2.6/include/asm-ppc64/parport.h =================================================================== --- linux-2.6.orig/include/asm-ppc64/parport.h 2005-10-26 16:50:32.000000000 +1000 +++ linux-2.6/include/asm-ppc64/parport.h 2005-10-27 17:07:04.000000000 +1000 @@ -9,10 +9,34 @@ #ifndef _ASM_PPC64_PARPORT_H #define _ASM_PPC64_PARPORT_H -static int __devinit parport_pc_find_isa_ports (int autoirq, int autodma); +#include + +extern struct parport *parport_pc_probe_port (unsigned long int base, + unsigned long int base_hi, + int irq, int dma, + struct pci_dev *dev); + static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma) { - return parport_pc_find_isa_ports (autoirq, autodma); + struct device_node *np; + u32 *prop; + u32 io1, io2; + int propsize; + int count = 0; + for (np = NULL; (np = of_find_compatible_node(np, + "parallel", + "pnpPNP,400")) != NULL;) { + prop = (u32 *)get_property(np, "reg", &propsize); + if (!prop || propsize > 6*sizeof(u32)) + continue; + io1 = prop[1]; io2 = prop[2]; + prop = (u32 *)get_property(np, "interrupts", NULL); + if (!prop) + continue; + if (parport_pc_probe_port(io1, io2, prop[0], autodma, NULL) != NULL) + count++; + } + return count; } #endif /* !(_ASM_PPC_PARPORT_H) */ From paulus at samba.org Fri Oct 28 23:05:25 2005 From: paulus at samba.org (Paul Mackerras) Date: Fri, 28 Oct 2005 23:05:25 +1000 Subject: Patches for 2.6.15 Message-ID: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> If anyone has patches for arch/ppc{,64} and include/asm-ppc{,64} that they would like to see go upstream now that 2.6.14 is out, other than patches that are already in the powerpc-merge tree, please let me know. I am planning to ask Linus to pull the powerpc-merge tree shortly, and that will probably break your patches. I think the merge tree is looking pretty good, although the merge is by no means complete yet. The powermac, pseries and iseries platforms seem to be working fine with ARCH=powerpc. 32-bit chrp is mostly there but needs a bit more work. If possible, I'd like to get to the point where we can remove arch/ppc64 entirely by the end of the 2-week merge window for 2.6.15. Paul. From cfriesen at nortel.com Sat Oct 29 02:00:48 2005 From: cfriesen at nortel.com (Christopher Friesen) Date: Fri, 28 Oct 2005 10:00:48 -0600 Subject: Patches for 2.6.15 In-Reply-To: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> Message-ID: <43624B30.20600@nortel.com> Paul Mackerras wrote: > If possible, I'd like to get to the point where we can remove > arch/ppc64 entirely by the end of the 2-week merge window for 2.6.15. I haven't been following this seriously, so forgive me if this has already been asked. Under the unified architecture, what is the machine type in the "uname" outout? Chris From moilanen at austin.ibm.com Sat Oct 29 02:55:09 2005 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Fri, 28 Oct 2005 11:55:09 -0500 Subject: [PATCH] VMX get_user w/ irq disabled Message-ID: <20051028115509.1bb23cb6.moilanen@austin.ibm.com> During some VMX stress runs, this was seen on a RHEL4 U2 kernel: Oct 9 04:03:11 ist-6 kernel: Debug: sleeping function called from invalid context at arch/ppc64/kernel/vecemu.c:266 Oct 9 04:03:11 ist-6 kernel: in_atomic():0[expected: 0], irqs_disabled():1 Oct 9 04:03:11 ist-6 kernel: Call Trace: Oct 9 04:03:11 ist-6 kernel: [c000000061503be0] [c00000000005a87c] .__might_sleep+0xcc/0xec (unreliable) Oct 9 04:03:11 ist-6 kernel: [c000000061503c80] [c0000000000470cc] .emulate_altivec+0x44/0x430 Oct 9 04:03:11 ist-6 kernel: [c000000061503d30] [c000000000012af8] .AltivecAssistException+0x5c/0x120 Looks like we have a get_user() call with interrupts disabled. While I haven't seen the problem, I believe we have the same hole in mainline. The patch below fixed the problem on Redhat (rebased at 2.6.14). Thanks, Jake Signed-off-by: Jake Moilanen Index: 2.6.14/arch/ppc64/kernel/vecemu.c =================================================================== --- 2.6.14.orig/arch/ppc64/kernel/vecemu.c 2005-03-02 01:37:30.000000000 -0600 +++ 2.6.14/arch/ppc64/kernel/vecemu.c 2005-10-28 10:18:51.203485713 -0500 @@ -263,7 +263,7 @@ unsigned int va, vb, vc, vd; vector128 *vrs; - if (get_user(instr, (unsigned int __user *) regs->nip)) + if (__copy_from_user_inatomic(&instr, (unsigned int *) regs->nip, sizeof(unsigned int *))) return -EFAULT; if ((instr >> 26) != 4) return -EINVAL; /* not an altivec instruction */ From schwab at suse.de Sat Oct 29 03:54:58 2005 From: schwab at suse.de (Andreas Schwab) Date: Fri, 28 Oct 2005 19:54:58 +0200 Subject: Patches for 2.6.15 In-Reply-To: <43624B30.20600@nortel.com> (Christopher Friesen's message of "Fri, 28 Oct 2005 10:00:48 -0600") References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> <43624B30.20600@nortel.com> Message-ID: "Christopher Friesen" writes: > Under the unified architecture, what is the machine type in the "uname" > outout? I don't think that should change in any way. Andreas. -- Andreas Schwab, SuSE Labs, schwab at suse.de SuSE Linux Products GmbH, Maxfeldstra?e 5, 90409 N?rnberg, Germany Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5 "And now for something completely different." From mporter at kernel.crashing.org Sat Oct 29 03:30:41 2005 From: mporter at kernel.crashing.org (Matt Porter) Date: Fri, 28 Oct 2005 10:30:41 -0700 Subject: Patches for 2.6.15 In-Reply-To: <17250.8725.358204.62510@cargo.ozlabs.ibm.com>; from paulus@samba.org on Fri, Oct 28, 2005 at 11:05:25PM +1000 References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> Message-ID: <20051028103041.B15268@cox.net> On Fri, Oct 28, 2005 at 11:05:25PM +1000, Paul Mackerras wrote: > If anyone has patches for arch/ppc{,64} and include/asm-ppc{,64} that > they would like to see go upstream now that 2.6.14 is out, other than > patches that are already in the powerpc-merge tree, please let me > know. I am planning to ask Linus to pull the powerpc-merge tree > shortly, and that will probably break your patches. Ok, we have a set of 4xx patches that I plan to send to Andrew. They are some existing 4xx SoC/board updates as well as a new SoC/board. They are obviously mostly confined to the 4xx code paths but there's likely conflicts in changes to Makefiles, etc. Would you prefer these going upstream before or after the powerpc-merge pull? -Matt From kumar.gala at freescale.com Sat Oct 29 04:41:11 2005 From: kumar.gala at freescale.com (Kumar Gala) Date: Fri, 28 Oct 2005 13:41:11 -0500 Subject: Patches for 2.6.15 In-Reply-To: <43624B30.20600@nortel.com> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> <43624B30.20600@nortel.com> Message-ID: On Oct 28, 2005, at 11:00 AM, Christopher Friesen wrote: > Paul Mackerras wrote: > > >> If possible, I'd like to get to the point where we can remove >> arch/ppc64 entirely by the end of the 2-week merge window for 2.6.15. >> > > I haven't been following this seriously, so forgive me if this has > already been asked. > > Under the unified architecture, what is the machine type in the > "uname" outout? When building the merge tree for a 32-bit cpu uname spits out: Linux fred 2.6.14-rc5-g278144ed #9 Thu Oct 27 09:20:01 CDT 2005 ppc unknown - kumar From kumar.gala at freescale.com Sat Oct 29 04:44:36 2005 From: kumar.gala at freescale.com (Kumar Gala) Date: Fri, 28 Oct 2005 13:44:36 -0500 Subject: Patches for 2.6.15 In-Reply-To: <20051028103041.B15268@cox.net> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> <20051028103041.B15268@cox.net> Message-ID: On Oct 28, 2005, at 12:30 PM, Matt Porter wrote: > On Fri, Oct 28, 2005 at 11:05:25PM +1000, Paul Mackerras wrote: > >> If anyone has patches for arch/ppc{,64} and include/asm-ppc{,64} that >> they would like to see go upstream now that 2.6.14 is out, other than >> patches that are already in the powerpc-merge tree, please let me >> know. I am planning to ask Linus to pull the powerpc-merge tree >> shortly, and that will probably break your patches. >> > > Ok, we have a set of 4xx patches that I plan to send to Andrew. > They are some existing 4xx SoC/board updates as well as a new > SoC/board. They are obviously mostly confined to the 4xx code paths > but there's likely conflicts in changes to Makefiles, etc. > > Would you prefer these going upstream before or after the > powerpc-merge pull? Can we ask Andrew to flush any ppc patches in -mm to linus before we have Linus pull the merge tree. The following are patches that should probably go to linus before the merge tree that exist in 2.6.14-rc5-mm1: (I may have missed some, but these where the obvious ones) +ppc32-85xx-phy-platform-update.patch +ppc32-ppc_sys-fixes-for-8xx-and-82xx.patch ppc32 updates +various-powerpc-32bit-ppc64-build-fixes.patch +ppc64-reenable-make-install-with-defconfig.patch +ppc64-change-name-of-target-file-during-make-install.patch +ppc64-remove-duplicate-local-variable-in-set_preferred_console.patch - kumar From kumar.gala at freescale.com Sat Oct 29 04:49:26 2005 From: kumar.gala at freescale.com (Kumar Gala) Date: Fri, 28 Oct 2005 13:49:26 -0500 Subject: Patches for 2.6.15 In-Reply-To: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> Message-ID: On Oct 28, 2005, at 8:05 AM, Paul Mackerras wrote: > If anyone has patches for arch/ppc{,64} and include/asm-ppc{,64} that > they would like to see go upstream now that 2.6.14 is out, other than > patches that are already in the powerpc-merge tree, please let me > know. I am planning to ask Linus to pull the powerpc-merge tree > shortly, and that will probably break your patches. > > I think the merge tree is looking pretty good, although the merge is > by no means complete yet. The powermac, pseries and iseries platforms > seem to be working fine with ARCH=powerpc. 32-bit chrp is mostly > there but needs a bit more work. > > If possible, I'd like to get to the point where we can remove > arch/ppc64 entirely by the end of the 2-week merge window for 2.6.15. Can you merge this in: http://patchwork.ozlabs.org/linuxppc/patch?id=2931 - kumar From akpm at osdl.org Sat Oct 29 06:02:46 2005 From: akpm at osdl.org (Andrew Morton) Date: Fri, 28 Oct 2005 13:02:46 -0700 Subject: Patches for 2.6.15 In-Reply-To: References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> <20051028103041.B15268@cox.net> Message-ID: <20051028130246.459f1e9a.akpm@osdl.org> Kumar Gala wrote: > > Can we ask Andrew to flush any ppc patches in -mm to linus before we > have Linus pull the merge tree. Well I'd prefer that ;) Paul? From moilanen at austin.ibm.com Sat Oct 29 06:00:35 2005 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Fri, 28 Oct 2005 15:00:35 -0500 Subject: [PATCH 0/2] PPC64 setup for TPM Message-ID: <20051028150035.3d1da846.moilanen@austin.ibm.com> Trusted Platform Module (TPM) is setup differently from other traditional devices. TPM is supposed to be a memory device. TPM pulls its memory location from the /tpm device-node. This memory location could be in the low 64K IO space (ISA space). Thus PPC64 needs to allow a device to register a low ISA address through a specific call to punch a hole in page_io_mask (as since it's memory, it will not be picked in the PCI probe). This should be used until the page_io_mask can be safely removed. These patches do two things: 1.) Allow specific call to punch a hole into page_io_mask for ISA addresses. 2.) Export the physical address of the base IO address. So TPM can interpret the device-tree memory location correctly. Jake From moilanen at austin.ibm.com Sat Oct 29 06:05:03 2005 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Fri, 28 Oct 2005 15:05:03 -0500 Subject: [PATCH 1/2] Allow ISA address In-Reply-To: <20051028150035.3d1da846.moilanen@austin.ibm.com> References: <20051028150035.3d1da846.moilanen@austin.ibm.com> Message-ID: <20051028150503.398ba65e.moilanen@austin.ibm.com> This exports the functionality of punching a hole into io_page_mask for other callers. Signed-off-by: Jake Moilanen -- Index: 2.6.14/arch/ppc64/kernel/pci.c =================================================================== --- 2.6.14.orig/arch/ppc64/kernel/pci.c 2005-10-28 09:54:55.199422205 -0500 +++ 2.6.14/arch/ppc64/kernel/pci.c 2005-10-28 14:24:04.325502096 -0500 @@ -1107,6 +1107,26 @@ return 0; } + +/* Allow IO access to pages that are in the ISA range */ +void __devinit allow_isa_address(unsigned long start, unsigned end) +{ + unsigned long mask; + + if (start < MAX_ISA_PORT) { + if (end > MAX_ISA_PORT) + end = MAX_ISA_PORT; + + start >>= PAGE_SHIFT; + end >>= PAGE_SHIFT; + + /* get the range of pages for the map */ + mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1); + io_page_mask |= mask; + } +} +EXPORT_SYMBOL(allow_isa_address); + static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); @@ -1118,19 +1138,8 @@ start = res->start += offset; end = res->end += offset; - /* Need to allow IO access to pages that are in the - ISA range */ - if (start < MAX_ISA_PORT) { - if (end > MAX_ISA_PORT) - end = MAX_ISA_PORT; - - start >>= PAGE_SHIFT; - end >>= PAGE_SHIFT; - - /* get the range of pages for the map */ - mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1); - io_page_mask |= mask; - } + allow_isa_address(start, end); + } else if (res->flags & IORESOURCE_MEM) { res->start += hose->pci_mem_offset; res->end += hose->pci_mem_offset; Index: 2.6.14/include/asm-ppc64/pci.h =================================================================== --- 2.6.14.orig/include/asm-ppc64/pci.h 2005-10-28 09:55:01.897151985 -0500 +++ 2.6.14/include/asm-ppc64/pci.h 2005-10-28 14:21:26.932903810 -0500 @@ -172,6 +172,8 @@ unsigned long size, pgprot_t prot); +extern void __devinit allow_isa_address(unsigned long start, unsigned end); + #ifdef CONFIG_PPC_MULTIPLATFORM #define HAVE_ARCH_PCI_RESOURCE_TO_USER extern void pci_resource_to_user(const struct pci_dev *dev, int bar, From moilanen at austin.ibm.com Sat Oct 29 06:08:04 2005 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Fri, 28 Oct 2005 15:08:04 -0500 Subject: [PATCH 2/2] Export Physical IO base address In-Reply-To: <20051028150035.3d1da846.moilanen@austin.ibm.com> References: <20051028150035.3d1da846.moilanen@austin.ibm.com> Message-ID: <20051028150804.73b5cedb.moilanen@austin.ibm.com> This patch exports the physical IO base address so drivers can pick it up when using addresses from the device-tree. Signed-off-by: Jake Moilanen -- Index: 2.6.14/arch/ppc64/kernel/pci.c =================================================================== --- 2.6.14.orig/arch/ppc64/kernel/pci.c 2005-10-28 14:21:26.931904010 -0500 +++ 2.6.14/arch/ppc64/kernel/pci.c 2005-10-28 14:21:57.883394096 -0500 @@ -71,6 +71,8 @@ EXPORT_SYMBOL(isa_io_base); unsigned long pci_io_base; EXPORT_SYMBOL(pci_io_base); +unsigned long pci_io_base_phys; +EXPORT_SYMBOL(pci_io_base_phys); void iSeries_pcibios_init(void); @@ -967,6 +969,7 @@ if (primary) { pci_io_base = (unsigned long)hose->io_base_virt; + pci_io_base_phys = (unsigned long)hose->io_base_phys; isa_dn = of_find_node_by_type(NULL, "isa"); if (isa_dn) { isa_io_base = pci_io_base; Index: 2.6.14/include/asm-ppc64/io.h =================================================================== --- 2.6.14.orig/include/asm-ppc64/io.h 2005-10-28 09:55:01.882154994 -0500 +++ 2.6.14/include/asm-ppc64/io.h 2005-10-28 14:21:57.884393895 -0500 @@ -33,6 +33,7 @@ extern unsigned long isa_io_base; extern unsigned long pci_io_base; +extern unsigned long pci_io_base_phys; extern unsigned long io_page_mask; #define MAX_ISA_PORT 0x10000 From olof at lixom.net Sat Oct 29 06:13:47 2005 From: olof at lixom.net (Olof Johansson) Date: Fri, 28 Oct 2005 15:13:47 -0500 Subject: [PATCH 2/2] Export Physical IO base address In-Reply-To: <20051028150804.73b5cedb.moilanen@austin.ibm.com> References: <20051028150035.3d1da846.moilanen@austin.ibm.com> <20051028150804.73b5cedb.moilanen@austin.ibm.com> Message-ID: <20051028201347.GE16568@austin.ibm.com> On Fri, Oct 28, 2005 at 03:08:04PM -0500, Jake Moilanen wrote: > This patch exports the physical IO base address so drivers can pick it > up when using addresses from the device-tree. [...] > +unsigned long pci_io_base_phys; > +EXPORT_SYMBOL(pci_io_base_phys); Perhaps EXPORT_SYMBOL_GPL() instead? -Olof From moilanen at austin.ibm.com Sat Oct 29 06:18:46 2005 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Fri, 28 Oct 2005 15:18:46 -0500 Subject: [PATCH 2/2] Export Physical IO base address In-Reply-To: <20051028201347.GE16568@austin.ibm.com> References: <20051028150035.3d1da846.moilanen@austin.ibm.com> <20051028150804.73b5cedb.moilanen@austin.ibm.com> <20051028201347.GE16568@austin.ibm.com> Message-ID: <20051028151846.24263faa.moilanen@austin.ibm.com> > On Fri, Oct 28, 2005 at 03:08:04PM -0500, Jake Moilanen wrote: > > This patch exports the physical IO base address so drivers can pick it > > up when using addresses from the device-tree. > [...] > > +unsigned long pci_io_base_phys; > > +EXPORT_SYMBOL(pci_io_base_phys); > > Perhaps EXPORT_SYMBOL_GPL() instead? Bah... Signed-off-by: Jake Moilanen -- Index: 2.6.14/arch/ppc64/kernel/pci.c =================================================================== --- 2.6.14.orig/arch/ppc64/kernel/pci.c 2005-10-28 14:24:04.325502096 -0500 +++ 2.6.14/arch/ppc64/kernel/pci.c 2005-10-28 15:16:15.814095514 -0500 @@ -71,6 +71,8 @@ EXPORT_SYMBOL(isa_io_base); unsigned long pci_io_base; EXPORT_SYMBOL(pci_io_base); +unsigned long pci_io_base_phys; +EXPORT_SYMBOL_GPL(pci_io_base_phys); void iSeries_pcibios_init(void); @@ -967,6 +969,7 @@ if (primary) { pci_io_base = (unsigned long)hose->io_base_virt; + pci_io_base_phys = (unsigned long)hose->io_base_phys; isa_dn = of_find_node_by_type(NULL, "isa"); if (isa_dn) { isa_io_base = pci_io_base; Index: 2.6.14/include/asm-ppc64/io.h =================================================================== --- 2.6.14.orig/include/asm-ppc64/io.h 2005-10-28 14:23:01.188499329 -0500 +++ 2.6.14/include/asm-ppc64/io.h 2005-10-28 15:15:54.558004623 -0500 @@ -33,6 +33,7 @@ extern unsigned long isa_io_base; extern unsigned long pci_io_base; +extern unsigned long pci_io_base_phys; extern unsigned long io_page_mask; #define MAX_ISA_PORT 0x10000 From moilanen at austin.ibm.com Sat Oct 29 06:20:41 2005 From: moilanen at austin.ibm.com (Jake Moilanen) Date: Fri, 28 Oct 2005 15:20:41 -0500 Subject: [PATCH 1/2] Allow ISA address In-Reply-To: <20051028150503.398ba65e.moilanen@austin.ibm.com> References: <20051028150035.3d1da846.moilanen@austin.ibm.com> <20051028150503.398ba65e.moilanen@austin.ibm.com> Message-ID: <20051028152041.79a17a00.moilanen@austin.ibm.com> > +EXPORT_SYMBOL(allow_isa_address); Let's try EXPORT_SYMBOL_GPL(). Signed-off-by: Jake Moilanen -- Index: 2.6.14/arch/ppc64/kernel/pci.c =================================================================== --- 2.6.14.orig/arch/ppc64/kernel/pci.c 2005-10-28 09:54:55.199422205 -0500 +++ 2.6.14/arch/ppc64/kernel/pci.c 2005-10-28 15:19:25.391050702 -0500 @@ -1107,6 +1107,26 @@ return 0; } + +/* Allow IO access to pages that are in the ISA range */ +void __devinit allow_isa_address(unsigned long start, unsigned end) +{ + unsigned long mask; + + if (start < MAX_ISA_PORT) { + if (end > MAX_ISA_PORT) + end = MAX_ISA_PORT; + + start >>= PAGE_SHIFT; + end >>= PAGE_SHIFT; + + /* get the range of pages for the map */ + mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1); + io_page_mask |= mask; + } +} +EXPORT_SYMBOL_GPL(allow_isa_address); + static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); @@ -1118,19 +1138,8 @@ start = res->start += offset; end = res->end += offset; - /* Need to allow IO access to pages that are in the - ISA range */ - if (start < MAX_ISA_PORT) { - if (end > MAX_ISA_PORT) - end = MAX_ISA_PORT; - - start >>= PAGE_SHIFT; - end >>= PAGE_SHIFT; - - /* get the range of pages for the map */ - mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1); - io_page_mask |= mask; - } + allow_isa_address(start, end); + } else if (res->flags & IORESOURCE_MEM) { res->start += hose->pci_mem_offset; res->end += hose->pci_mem_offset; Index: 2.6.14/include/asm-ppc64/pci.h =================================================================== --- 2.6.14.orig/include/asm-ppc64/pci.h 2005-10-28 09:55:01.897151985 -0500 +++ 2.6.14/include/asm-ppc64/pci.h 2005-10-28 14:21:26.932903810 -0500 @@ -172,6 +172,8 @@ unsigned long size, pgprot_t prot); +extern void __devinit allow_isa_address(unsigned long start, unsigned end); + #ifdef CONFIG_PPC_MULTIPLATFORM #define HAVE_ARCH_PCI_RESOURCE_TO_USER extern void pci_resource_to_user(const struct pci_dev *dev, int bar, From benh at kernel.crashing.org Sat Oct 29 08:52:01 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Sat, 29 Oct 2005 08:52:01 +1000 Subject: [PATCH 0/2] PPC64 setup for TPM In-Reply-To: <20051028150035.3d1da846.moilanen@austin.ibm.com> References: <20051028150035.3d1da846.moilanen@austin.ibm.com> Message-ID: <1130539922.29054.123.camel@gaston> On Fri, 2005-10-28 at 15:00 -0500, Jake Moilanen wrote: > Trusted Platform Module (TPM) is setup differently from other > traditional devices. TPM is supposed to be a memory device. > > TPM pulls its memory location from the /tpm device-node. This memory > location could be in the low 64K IO space (ISA space). Thus PPC64 needs > to allow a device to register a low ISA address through a specific call > to punch a hole in page_io_mask (as since it's memory, it will not be > picked in the PCI probe). This should be used until the page_io_mask > can be safely removed. The page_io_mask is going away anyway... > These patches do two things: -ENOPATCH > 1.) Allow specific call to punch a hole into page_io_mask for ISA > addresses. > 2.) Export the physical address of the base IO address. So > TPM can interpret the device-tree memory location correctly. From benh at kernel.crashing.org Sat Oct 29 08:54:01 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Sat, 29 Oct 2005 08:54:01 +1000 Subject: [PATCH 1/2] Allow ISA address In-Reply-To: <20051028150503.398ba65e.moilanen@austin.ibm.com> References: <20051028150035.3d1da846.moilanen@austin.ibm.com> <20051028150503.398ba65e.moilanen@austin.ibm.com> Message-ID: <1130540041.29054.126.camel@gaston> On Fri, 2005-10-28 at 15:05 -0500, Jake Moilanen wrote: > This exports the functionality of punching a hole into io_page_mask for > other callers. Nah, forget this one, we'll be remoing the whole io mask thing. It doesn't work properly and has "issues" (we have case where valid PCI devices have been allocated IOs in the first page and this IO mask is not letting us access them for example). The proper solution is to have drivers not try to tap hardware they aren't supposed to. Mikey has a couple of fixes for parport and pc speaker, other legacy stuff should already be pretty much fixed except maybe legacy VGA which I still need to look at, but it makes little sense to enable it for us anyway. Ben. From benh at kernel.crashing.org Sat Oct 29 08:54:47 2005 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Sat, 29 Oct 2005 08:54:47 +1000 Subject: [PATCH 2/2] Export Physical IO base address In-Reply-To: <20051028150804.73b5cedb.moilanen@austin.ibm.com> References: <20051028150035.3d1da846.moilanen@austin.ibm.com> <20051028150804.73b5cedb.moilanen@austin.ibm.com> Message-ID: <1130540087.29054.128.camel@gaston> On Fri, 2005-10-28 at 15:08 -0500, Jake Moilanen wrote: > This patch exports the physical IO base address so drivers can pick it > up when using addresses from the device-tree. Why ? What is your driver exactly trying to do ? Ben. From arnd at arndb.de Sat Oct 29 09:11:13 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Sat, 29 Oct 2005 01:11:13 +0200 Subject: Patches for 2.6.15 In-Reply-To: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> Message-ID: <200510290111.13867.arnd@arndb.de> On Freedag 28 Oktober 2005 15:05, Paul Mackerras wrote: > If anyone has patches for arch/ppc{,64} and include/asm-ppc{,64} that > they would like to see go upstream now that 2.6.14 is out, other than > patches that are already in the powerpc-merge tree, please let me > know. ?I am planning to ask Linus to pull the powerpc-merge tree > shortly, and that will probably break your patches. Out of my spufs patches, I'd like to have at least the reservation for my two system call numbers in there so we don't get any conflicts on that front. The patch follows in another mail. From my point of view, the spufs itself could go in at this point, but I have the feeling that the real concerns from other people will come up at the moment that I post them for inclusion. Andrew, are you ok with including spufs in -mm when the merge tree is upstream? > If possible, I'd like to get to the point where we can remove > arch/ppc64 entirely by the end of the 2-week merge window for 2.6.15. Ok, I'll do a new patch to move over the BPA files then to get my stuff out of the way ASAP. Arnd <>< From arnd at arndb.de Sat Oct 29 09:16:43 2005 From: arnd at arndb.de (Arnd Bergmann) Date: Sat, 29 Oct 2005 01:16:43 +0200 Subject: [PATCH] reserve syscall numbers for Cell In-Reply-To: <200510290111.13867.arnd@arndb.de> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> <200510290111.13867.arnd@arndb.de> Message-ID: <200510290116.43753.arnd@arndb.de> This creates two powerpc specific dummy system calls that will later be used on the Cell platform to manage SPUs. Reserve the system call numbers now so we don't accidentally use the same numbers for different system calls. Signed-off-by: Arnd Bergmann --- Please apply to merge tree --- a/kernel/sys_ni.c 2005-10-29 00:46:21.000000000 +0200 +++ b/kernel/sys_ni.c 2005-10-29 00:47:37.000000000 +0200 @@ -90,3 +90,5 @@ cond_syscall(sys32_ipc); cond_syscall(sys32_sysctl); cond_syscall(ppc_rtas); +cond_syscall(sys_spu_run); +cond_syscall(sys_spu_create); --- a/arch/powerpc/kernel/systbl.S 2005-10-29 00:39:49.000000000 +0200 +++ b/arch/powerpc/kernel/systbl.S 2005-10-29 00:44:48.000000000 +0200 @@ -319,3 +319,5 @@ SYSCALL(inotify_init) SYSCALL(inotify_add_watch) SYSCALL(inotify_rm_watch) +SYSCALL(spu_run) +SYSCALL(spu_create) --- a/include/asm-powerpc/unistd.h 2005-10-29 00:34:40.000000000 +0200 +++ b/include/asm-powerpc/unistd.h 2005-10-29 00:35:52.000000000 +0200 @@ -297,7 +297,9 @@ #define __NR_inotify_add_watch 276 #define __NR_inotify_rm_watch 277 -#define __NR_syscalls 278 +#define __NR_spu_run 278 +#define __NR_spu_create 279 +#define __NR_syscalls 280 #ifdef __KERNEL__ #define __NR__exit __NR_exit From akpm at osdl.org Sat Oct 29 09:21:24 2005 From: akpm at osdl.org (Andrew Morton) Date: Fri, 28 Oct 2005 16:21:24 -0700 Subject: Patches for 2.6.15 In-Reply-To: <200510290111.13867.arnd@arndb.de> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> <200510290111.13867.arnd@arndb.de> Message-ID: <20051028162124.325b37e7.akpm@osdl.org> Arnd Bergmann wrote: > > Andrew, are you ok with including spufs in -mm when the merge > tree is upstream? Probably. I haven't paid much attention to them thus far, sorry. From paulus at samba.org Sat Oct 29 10:10:22 2005 From: paulus at samba.org (Paul Mackerras) Date: Sat, 29 Oct 2005 10:10:22 +1000 Subject: Patches for 2.6.15 In-Reply-To: <43624B30.20600@nortel.com> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> <43624B30.20600@nortel.com> Message-ID: <17250.48622.69899.322254@cargo.ozlabs.ibm.com> Christopher Friesen writes: > Under the unified architecture, what is the machine type in the "uname" > outout? "ppc" on 32-bit machines, "ppc64" on 64-bit machines, same as before. The intention is that the merge doesn't change the user/kernel ABI in any way. :) Paul. From paulus at samba.org Sat Oct 29 10:20:31 2005 From: paulus at samba.org (Paul Mackerras) Date: Sat, 29 Oct 2005 10:20:31 +1000 Subject: Patches for 2.6.15 In-Reply-To: <20051028130246.459f1e9a.akpm@osdl.org> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> <20051028103041.B15268@cox.net> <20051028130246.459f1e9a.akpm@osdl.org> Message-ID: <17250.49231.248045.41302@cargo.ozlabs.ibm.com> Andrew Morton writes: > Kumar Gala wrote: > > > > Can we ask Andrew to flush any ppc patches in -mm to linus before we > > have Linus pull the merge tree. > > Well I'd prefer that ;) My concern is that if we do that we may end up losing some of the changes from Andrew's patches when the merge goes in and/or creating a nasty, difficult merge. Probably the best thing is if I take the patches and apply them on top of the merge tree (hacking them as necessary) and then get Linus to pull. Andrew, will I just grab the ones you listed from the latest -mm, or will you send them to me? Paul. From akpm at osdl.org Sat Oct 29 10:40:29 2005 From: akpm at osdl.org (Andrew Morton) Date: Fri, 28 Oct 2005 17:40:29 -0700 Subject: Patches for 2.6.15 In-Reply-To: <17250.49231.248045.41302@cargo.ozlabs.ibm.com> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> <20051028103041.B15268@cox.net> <20051028130246.459f1e9a.akpm@osdl.org> <17250.49231.248045.41302@cargo.ozlabs.ibm.com> Message-ID: <20051028174029.24d4dbb9.akpm@osdl.org> Paul Mackerras wrote: > > Andrew, will I just grab the ones you listed from the latest -mm, or > will you send them to me? I'll mail them all over. From akpm at osdl.org Sat Oct 29 10:46:10 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:10 -0700 Subject: [patch 02/43] ppc32 8xx: use io accessor macros instead of direct memory reference Message-ID: <200510290046.j9T0kfRd029988@shell0.pdx.osdl.net> From: Marcelo Tosatti Convert core 8xx drivers to use in_xxxbe/in_xxx macros instead of direct memory references. Other than making IO accesses explicit (which is a plus for readability), a common set of macros provides a unified place for the volatile flag to constraint compiler code reordering. There are several unlucky places at the moment which lack the volatile flag. Signed-off-by: Marcelo Tosatti Signed-off-by: Andrew Morton --- arch/ppc/8xx_io/commproc.c | 20 +++++++++---------- arch/ppc/syslib/m8xx_setup.c | 45 +++++++++++++++++++------------------------ arch/ppc/syslib/m8xx_wdt.c | 14 ++++++------- arch/ppc/syslib/ppc8xx_pic.c | 17 ++++++---------- 4 files changed, 44 insertions(+), 52 deletions(-) diff -puN arch/ppc/8xx_io/commproc.c~ppc32-8xx-use-io-accessor-macros-instead-of-direct-memory-reference arch/ppc/8xx_io/commproc.c --- devel/arch/ppc/8xx_io/commproc.c~ppc32-8xx-use-io-accessor-macros-instead-of-direct-memory-reference 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/8xx_io/commproc.c 2005-10-28 17:44:02.000000000 -0700 @@ -73,7 +73,7 @@ cpm_mask_irq(unsigned int irq) { int cpm_vec = irq - CPM_IRQ_OFFSET; - ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr &= ~(1 << cpm_vec); + out_be32(&((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr, in_be32(&((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr) & ~(1 << cpm_vec)); } static void @@ -81,7 +81,7 @@ cpm_unmask_irq(unsigned int irq) { int cpm_vec = irq - CPM_IRQ_OFFSET; - ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr |= (1 << cpm_vec); + out_be32(&((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr, in_be32(&((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr) | (1 << cpm_vec)); } static void @@ -95,7 +95,7 @@ cpm_eoi(unsigned int irq) { int cpm_vec = irq - CPM_IRQ_OFFSET; - ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cisr = (1 << cpm_vec); + out_be32(&((immap_t *)IMAP_ADDR)->im_cpic.cpic_cisr, (1 << cpm_vec)); } struct hw_interrupt_type cpm_pic = { @@ -133,7 +133,7 @@ m8xx_cpm_reset(void) * manual recommends it. * Bit 25, FAM can also be set to use FEC aggressive mode (860T). */ - imp->im_siu_conf.sc_sdcr = 1; + out_be32(&imp->im_siu_conf.sc_sdcr, 1), /* Reclaim the DP memory for our use. */ m8xx_cpm_dpinit(); @@ -178,10 +178,10 @@ cpm_interrupt_init(void) /* Initialize the CPM interrupt controller. */ - ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cicr = + out_be32(&((immap_t *)IMAP_ADDR)->im_cpic.cpic_cicr, (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) | - ((CPM_INTERRUPT/2) << 13) | CICR_HP_MASK; - ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr = 0; + ((CPM_INTERRUPT/2) << 13) | CICR_HP_MASK); + out_be32(&((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr, 0); /* install the CPM interrupt controller routines for the CPM * interrupt vectors @@ -198,7 +198,7 @@ cpm_interrupt_init(void) if (setup_irq(CPM_IRQ_OFFSET + CPMVEC_ERROR, &cpm_error_irqaction)) panic("Could not allocate CPM error IRQ!"); - ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cicr |= CICR_IEN; + out_be32(&((immap_t *)IMAP_ADDR)->im_cpic.cpic_cicr, in_be32(&((immap_t *)IMAP_ADDR)->im_cpic.cpic_cicr) | CICR_IEN); } /* @@ -212,8 +212,8 @@ cpm_get_irq(struct pt_regs *regs) /* Get the vector by setting the ACK bit and then reading * the register. */ - ((volatile immap_t *)IMAP_ADDR)->im_cpic.cpic_civr = 1; - cpm_vec = ((volatile immap_t *)IMAP_ADDR)->im_cpic.cpic_civr; + out_be16(&((volatile immap_t *)IMAP_ADDR)->im_cpic.cpic_civr, 1); + cpm_vec = in_be16(&((volatile immap_t *)IMAP_ADDR)->im_cpic.cpic_civr); cpm_vec >>= 11; return cpm_vec; diff -puN arch/ppc/syslib/m8xx_setup.c~ppc32-8xx-use-io-accessor-macros-instead-of-direct-memory-reference arch/ppc/syslib/m8xx_setup.c --- devel/arch/ppc/syslib/m8xx_setup.c~ppc32-8xx-use-io-accessor-macros-instead-of-direct-memory-reference 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/m8xx_setup.c 2005-10-28 17:45:23.000000000 -0700 @@ -144,12 +144,12 @@ void __init m8xx_calibrate_decr(void) int freq, fp, divisor; /* Unlock the SCCR. */ - ((volatile immap_t *)IMAP_ADDR)->im_clkrstk.cark_sccrk = ~KAPWR_KEY; - ((volatile immap_t *)IMAP_ADDR)->im_clkrstk.cark_sccrk = KAPWR_KEY; + out_be32(&((immap_t *)IMAP_ADDR)->im_clkrstk.cark_sccrk, ~KAPWR_KEY); + out_be32(&((immap_t *)IMAP_ADDR)->im_clkrstk.cark_sccrk, KAPWR_KEY); /* Force all 8xx processors to use divide by 16 processor clock. */ - ((volatile immap_t *)IMAP_ADDR)->im_clkrst.car_sccr |= 0x02000000; - + out_be32(&((immap_t *)IMAP_ADDR)->im_clkrst.car_sccr, + in_be32(&((immap_t *)IMAP_ADDR)->im_clkrst.car_sccr)|0x02000000); /* Processor frequency is MHz. * The value 'fp' is the number of decrementer ticks per second. */ @@ -175,28 +175,24 @@ void __init m8xx_calibrate_decr(void) * we guarantee the registers are locked, then we unlock them * for our use. */ - ((volatile immap_t *)IMAP_ADDR)->im_sitk.sitk_tbscrk = ~KAPWR_KEY; - ((volatile immap_t *)IMAP_ADDR)->im_sitk.sitk_rtcsck = ~KAPWR_KEY; - ((volatile immap_t *)IMAP_ADDR)->im_sitk.sitk_tbk = ~KAPWR_KEY; - ((volatile immap_t *)IMAP_ADDR)->im_sitk.sitk_tbscrk = KAPWR_KEY; - ((volatile immap_t *)IMAP_ADDR)->im_sitk.sitk_rtcsck = KAPWR_KEY; - ((volatile immap_t *)IMAP_ADDR)->im_sitk.sitk_tbk = KAPWR_KEY; + out_be32(&((immap_t *)IMAP_ADDR)->im_sitk.sitk_tbscrk, ~KAPWR_KEY); + out_be32(&((immap_t *)IMAP_ADDR)->im_sitk.sitk_rtcsck, ~KAPWR_KEY); + out_be32(&((immap_t *)IMAP_ADDR)->im_sitk.sitk_tbk, ~KAPWR_KEY); + out_be32(&((immap_t *)IMAP_ADDR)->im_sitk.sitk_tbscrk, KAPWR_KEY); + out_be32(&((immap_t *)IMAP_ADDR)->im_sitk.sitk_rtcsck, KAPWR_KEY); + out_be32(&((immap_t *)IMAP_ADDR)->im_sitk.sitk_tbk, KAPWR_KEY); /* Disable the RTC one second and alarm interrupts. */ - ((volatile immap_t *)IMAP_ADDR)->im_sit.sit_rtcsc &= - ~(RTCSC_SIE | RTCSC_ALE); + out_be16(&((immap_t *)IMAP_ADDR)->im_sit.sit_rtcsc, in_be16(&((immap_t *)IMAP_ADDR)->im_sit.sit_rtcsc) & ~(RTCSC_SIE | RTCSC_ALE)); /* Enable the RTC */ - ((volatile immap_t *)IMAP_ADDR)->im_sit.sit_rtcsc |= - (RTCSC_RTF | RTCSC_RTE); + out_be16(&((immap_t *)IMAP_ADDR)->im_sit.sit_rtcsc, in_be16(&((immap_t *)IMAP_ADDR)->im_sit.sit_rtcsc) | (RTCSC_RTF | RTCSC_RTE)); /* Enabling the decrementer also enables the timebase interrupts * (or from the other point of view, to get decrementer interrupts * we have to enable the timebase). The decrementer interrupt * is wired into the vector table, nothing to do here for that. */ - ((volatile immap_t *)IMAP_ADDR)->im_sit.sit_tbscr = - ((mk_int_int_mask(DEC_INTERRUPT) << 8) | - (TBSCR_TBF | TBSCR_TBE)); + out_be16(&((immap_t *)IMAP_ADDR)->im_sit.sit_tbscr, (mk_int_int_mask(DEC_INTERRUPT) << 8) | (TBSCR_TBF | TBSCR_TBE)); if (setup_irq(DEC_INTERRUPT, &tbint_irqaction)) panic("Could not allocate timer IRQ!"); @@ -216,9 +212,9 @@ void __init m8xx_calibrate_decr(void) static int m8xx_set_rtc_time(unsigned long time) { - ((volatile immap_t *)IMAP_ADDR)->im_sitk.sitk_rtck = KAPWR_KEY; - ((volatile immap_t *)IMAP_ADDR)->im_sit.sit_rtc = time; - ((volatile immap_t *)IMAP_ADDR)->im_sitk.sitk_rtck = ~KAPWR_KEY; + out_be32(&((immap_t *)IMAP_ADDR)->im_sitk.sitk_rtck, KAPWR_KEY); + out_be32(&((immap_t *)IMAP_ADDR)->im_sit.sit_rtc, time); + out_be32(&((immap_t *)IMAP_ADDR)->im_sitk.sitk_rtck, ~KAPWR_KEY); return(0); } @@ -226,7 +222,7 @@ static unsigned long m8xx_get_rtc_time(void) { /* Get time from the RTC. */ - return((unsigned long)(((immap_t *)IMAP_ADDR)->im_sit.sit_rtc)); + return (unsigned long) in_be32(&((immap_t *)IMAP_ADDR)->im_sit.sit_rtc); } static void @@ -235,13 +231,13 @@ m8xx_restart(char *cmd) __volatile__ unsigned char dummy; local_irq_disable(); - ((immap_t *)IMAP_ADDR)->im_clkrst.car_plprcr |= 0x00000080; + out_be32(&((immap_t *)IMAP_ADDR)->im_clkrst.car_plprcr, in_be32(&((immap_t *)IMAP_ADDR)->im_clkrst.car_plprcr) | 0x00000080); /* Clear the ME bit in MSR to cause checkstop on machine check */ mtmsr(mfmsr() & ~0x1000); - dummy = ((immap_t *)IMAP_ADDR)->im_clkrst.res[0]; + dummy = in_8(&((immap_t *)IMAP_ADDR)->im_clkrst.res[0]); printk("Restart failed\n"); while(1); } @@ -306,8 +302,7 @@ m8xx_init_IRQ(void) i8259_init(0); /* The i8259 cascade interrupt must be level sensitive. */ - ((immap_t *)IMAP_ADDR)->im_siu_conf.sc_siel &= - ~(0x80000000 >> ISA_BRIDGE_INT); + out_be32(&((immap_t *)IMAP_ADDR)->im_siu_conf.sc_siel, in_be32(&((immap_t *)IMAP_ADDR)->im_siu_conf.sc_siel & ~(0x80000000 >> ISA_BRIDGE_INT))); if (setup_irq(ISA_BRIDGE_INT, &mbx_i8259_irqaction)) enable_irq(ISA_BRIDGE_INT); diff -puN arch/ppc/syslib/m8xx_wdt.c~ppc32-8xx-use-io-accessor-macros-instead-of-direct-memory-reference arch/ppc/syslib/m8xx_wdt.c --- devel/arch/ppc/syslib/m8xx_wdt.c~ppc32-8xx-use-io-accessor-macros-instead-of-direct-memory-reference 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/m8xx_wdt.c 2005-10-28 17:44:02.000000000 -0700 @@ -29,8 +29,8 @@ void m8xx_wdt_reset(void) { volatile immap_t *imap = (volatile immap_t *)IMAP_ADDR; - imap->im_siu_conf.sc_swsr = 0x556c; /* write magic1 */ - imap->im_siu_conf.sc_swsr = 0xaa39; /* write magic2 */ + out_be16(imap->im_siu_conf.sc_swsr, 0x556c); /* write magic1 */ + out_be16(imap->im_siu_conf.sc_swsr, 0xaa39); /* write magic2 */ } static irqreturn_t m8xx_wdt_interrupt(int irq, void *dev, struct pt_regs *regs) @@ -39,7 +39,7 @@ static irqreturn_t m8xx_wdt_interrupt(in m8xx_wdt_reset(); - imap->im_sit.sit_piscr |= PISCR_PS; /* clear irq */ + out_be16(imap->im_sit.sit_piscr, in_be16(imap->im_sit.sit_piscr | PISCR_PS)); /* clear irq */ return IRQ_HANDLED; } @@ -51,7 +51,7 @@ void __init m8xx_wdt_handler_install(bd_ u32 sypcr; u32 pitrtclk; - sypcr = imap->im_siu_conf.sc_sypcr; + sypcr = in_be32(imap->im_siu_conf.sc_sypcr); if (!(sypcr & 0x04)) { printk(KERN_NOTICE "m8xx_wdt: wdt disabled (SYPCR: 0x%08X)\n", @@ -87,9 +87,9 @@ void __init m8xx_wdt_handler_install(bd_ else pitc = pitrtclk * wdt_timeout / binfo->bi_intfreq / 2; - imap->im_sit.sit_pitc = pitc << 16; - imap->im_sit.sit_piscr = - (mk_int_int_mask(PIT_INTERRUPT) << 8) | PISCR_PIE | PISCR_PTE; + out_be32(imap->im_sit.sit_pitc, pitc << 16); + + out_be16(imap->im_sit.sit_piscr, (mk_int_int_mask(PIT_INTERRUPT) << 8) | PISCR_PIE | PISCR_PTE); if (setup_irq(PIT_INTERRUPT, &m8xx_wdt_irqaction)) panic("m8xx_wdt: error setting up the watchdog irq!"); diff -puN arch/ppc/syslib/ppc8xx_pic.c~ppc32-8xx-use-io-accessor-macros-instead-of-direct-memory-reference arch/ppc/syslib/ppc8xx_pic.c --- devel/arch/ppc/syslib/ppc8xx_pic.c~ppc32-8xx-use-io-accessor-macros-instead-of-direct-memory-reference 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/ppc8xx_pic.c 2005-10-28 17:44:02.000000000 -0700 @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include "ppc8xx_pic.h" @@ -29,8 +30,7 @@ static void m8xx_mask_irq(unsigned int i word = irq_nr >> 5; ppc_cached_irq_mask[word] &= ~(1 << (31-bit)); - ((immap_t *)IMAP_ADDR)->im_siu_conf.sc_simask = - ppc_cached_irq_mask[word]; + out_be32(&((immap_t *)IMAP_ADDR)->im_siu_conf.sc_simask, ppc_cached_irq_mask[word]); } static void m8xx_unmask_irq(unsigned int irq_nr) @@ -41,8 +41,7 @@ static void m8xx_unmask_irq(unsigned int word = irq_nr >> 5; ppc_cached_irq_mask[word] |= (1 << (31-bit)); - ((immap_t *)IMAP_ADDR)->im_siu_conf.sc_simask = - ppc_cached_irq_mask[word]; + out_be32(&((immap_t *)IMAP_ADDR)->im_siu_conf.sc_simask, ppc_cached_irq_mask[word]); } static void m8xx_end_irq(unsigned int irq_nr) @@ -55,8 +54,7 @@ static void m8xx_end_irq(unsigned int ir word = irq_nr >> 5; ppc_cached_irq_mask[word] |= (1 << (31-bit)); - ((immap_t *)IMAP_ADDR)->im_siu_conf.sc_simask = - ppc_cached_irq_mask[word]; + out_be32(&((immap_t *)IMAP_ADDR)->im_siu_conf.sc_simask, ppc_cached_irq_mask[word]); } } @@ -69,9 +67,8 @@ static void m8xx_mask_and_ack(unsigned i word = irq_nr >> 5; ppc_cached_irq_mask[word] &= ~(1 << (31-bit)); - ((immap_t *)IMAP_ADDR)->im_siu_conf.sc_simask = - ppc_cached_irq_mask[word]; - ((immap_t *)IMAP_ADDR)->im_siu_conf.sc_sipend = 1 << (31-bit); + out_be32(&((immap_t *)IMAP_ADDR)->im_siu_conf.sc_simask, ppc_cached_irq_mask[word]); + out_be32(&((immap_t *)IMAP_ADDR)->im_siu_conf.sc_sipend, 1 << (31-bit)); } struct hw_interrupt_type ppc8xx_pic = { @@ -93,7 +90,7 @@ m8xx_get_irq(struct pt_regs *regs) /* For MPC8xx, read the SIVEC register and shift the bits down * to get the irq number. */ - irq = ((immap_t *)IMAP_ADDR)->im_siu_conf.sc_sivec >> 26; + irq = in_be32(&((immap_t *)IMAP_ADDR)->im_siu_conf.sc_sivec) >> 26; /* * When we read the sivec without an interrupt to process, we will _ From akpm at osdl.org Sat Oct 29 10:46:15 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:15 -0700 Subject: [patch 05/43] ppc32: #ifdef out ALTIVEC specific code in __switch_to Message-ID: <200510290046.j9T0kkk2030000@shell0.pdx.osdl.net> From: Marcelo Tosatti #ifdef out an ALTIVEC specific tweak in __switch_to() Signed-off-by: Marcelo Tosatti Signed-off-by: Andrew Morton --- arch/ppc/kernel/process.c | 2 ++ 1 files changed, 2 insertions(+) diff -puN arch/ppc/kernel/process.c~ppc32-ifdef-out-altivec-specific-code-in-__switch_to arch/ppc/kernel/process.c --- devel/arch/ppc/kernel/process.c~ppc32-ifdef-out-altivec-specific-code-in-__switch_to 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/kernel/process.c 2005-10-28 17:44:02.000000000 -0700 @@ -287,11 +287,13 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ #endif /* CONFIG_SMP */ +#ifdef CONFIG_ALTIVEC /* Avoid the trap. On smp this this never happens since * we don't set last_task_used_altivec -- Cort */ if (new->thread.regs && last_task_used_altivec == new) new->thread.regs->msr |= MSR_VEC; +#endif #ifdef CONFIG_SPE /* Avoid the trap. On smp this this never happens since * we don't set last_task_used_spe _ From akpm at osdl.org Sat Oct 29 10:46:16 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:16 -0700 Subject: [patch 06/43] ppc32: handle access to non-present IO ports on 8xx Message-ID: <200510290046.j9T0klC0030003@shell0.pdx.osdl.net> From: Marcelo Tosatti Accessing non present "IO ports" on 8xx generates MCE's. The exception is easily triggered during insertion/removal/suspension of PCMCIA cards. The following adds exception table entries for I/O instructions on 8xx (copied from the original Paul's PowerMac code), and changes MachineCheckException() slightly to cover 8xx specific's (on 8xx the MCE can be generated while executing the IO access instruction itself, which is not the case on PowerMac's, as the comment on traps.c details). Signed-off-by: Marcelo Tosatti Signed-off-by: Andrew Morton --- arch/ppc/kernel/misc.S | 160 ++++++++++++++++++++++++++++++++++++++++++------ arch/ppc/kernel/traps.c | 8 +- include/asm-ppc/io.h | 24 ++++++- 3 files changed, 168 insertions(+), 24 deletions(-) diff -puN arch/ppc/kernel/misc.S~ppc32-handle-access-to-non-present-io-ports-on-8xx arch/ppc/kernel/misc.S --- devel/arch/ppc/kernel/misc.S~ppc32-handle-access-to-non-present-io-ports-on-8xx 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/kernel/misc.S 2005-10-28 17:44:02.000000000 -0700 @@ -930,8 +930,20 @@ _GLOBAL(_insb) subi r4,r4,1 blelr- 00: lbz r5,0(r3) - eieio - stbu r5,1(r4) +01: eieio +02: stbu r5,1(r4) +#ifdef CONFIG_8xx + isync + .section .fixup,"ax" +03: blr + .text + .section __ex_table, "a" + .align 2 + .long 00b, 03b + .long 01b, 03b + .long 02b, 03b + .text +#endif bdnz 00b blr @@ -941,8 +953,20 @@ _GLOBAL(_outsb) subi r4,r4,1 blelr- 00: lbzu r5,1(r4) - stb r5,0(r3) - eieio +01: stb r5,0(r3) +02: eieio +#ifdef CONFIG_8xx + isync + .section .fixup,"ax" +03: blr + .text + .section __ex_table, "a" + .align 2 + .long 00b, 03b + .long 01b, 03b + .long 02b, 03b + .text +#endif bdnz 00b blr @@ -952,8 +976,20 @@ _GLOBAL(_insw) subi r4,r4,2 blelr- 00: lhbrx r5,0,r3 - eieio - sthu r5,2(r4) +01: eieio +02: sthu r5,2(r4) +#ifdef CONFIG_8xx + isync + .section .fixup,"ax" +03: blr + .text + .section __ex_table, "a" + .align 2 + .long 00b, 03b + .long 01b, 03b + .long 02b, 03b + .text +#endif bdnz 00b blr @@ -963,8 +999,20 @@ _GLOBAL(_outsw) subi r4,r4,2 blelr- 00: lhzu r5,2(r4) - eieio - sthbrx r5,0,r3 +01: eieio +02: sthbrx r5,0,r3 +#ifdef CONFIG_8xx + isync + .section .fixup,"ax" +03: blr + .text + .section __ex_table, "a" + .align 2 + .long 00b, 03b + .long 01b, 03b + .long 02b, 03b + .text +#endif bdnz 00b blr @@ -974,8 +1022,20 @@ _GLOBAL(_insl) subi r4,r4,4 blelr- 00: lwbrx r5,0,r3 - eieio - stwu r5,4(r4) +01: eieio +02: stwu r5,4(r4) +#ifdef CONFIG_8xx + isync + .section .fixup,"ax" +03: blr + .text + .section __ex_table, "a" + .align 2 + .long 00b, 03b + .long 01b, 03b + .long 02b, 03b + .text +#endif bdnz 00b blr @@ -985,8 +1045,20 @@ _GLOBAL(_outsl) subi r4,r4,4 blelr- 00: lwzu r5,4(r4) - stwbrx r5,0,r3 - eieio +01: stwbrx r5,0,r3 +02: eieio +#ifdef CONFIG_8xx + isync + .section .fixup,"ax" +03: blr + .text + .section __ex_table, "a" + .align 2 + .long 00b, 03b + .long 01b, 03b + .long 02b, 03b + .text +#endif bdnz 00b blr @@ -997,8 +1069,20 @@ _GLOBAL(_insw_ns) subi r4,r4,2 blelr- 00: lhz r5,0(r3) - eieio - sthu r5,2(r4) +01: eieio +02: sthu r5,2(r4) +#ifdef CONFIG_8xx + isync + .section .fixup,"ax" +03: blr + .text + .section __ex_table, "a" + .align 2 + .long 00b, 03b + .long 01b, 03b + .long 02b, 03b + .text +#endif bdnz 00b blr @@ -1009,8 +1093,20 @@ _GLOBAL(_outsw_ns) subi r4,r4,2 blelr- 00: lhzu r5,2(r4) - sth r5,0(r3) - eieio +01: sth r5,0(r3) +02: eieio +#ifdef CONFIG_8xx + isync + .section .fixup,"ax" +03: blr + .text + .section __ex_table, "a" + .align 2 + .long 00b, 03b + .long 01b, 03b + .long 02b, 03b + .text +#endif bdnz 00b blr @@ -1021,8 +1117,20 @@ _GLOBAL(_insl_ns) subi r4,r4,4 blelr- 00: lwz r5,0(r3) - eieio - stwu r5,4(r4) +01: eieio +02: stwu r5,4(r4) +#ifdef CONFIG_8xx + isync + .section .fixup,"ax" +03: blr + .text + .section __ex_table, "a" + .align 2 + .long 00b, 03b + .long 01b, 03b + .long 02b, 03b + .text +#endif bdnz 00b blr @@ -1033,8 +1141,20 @@ _GLOBAL(_outsl_ns) subi r4,r4,4 blelr- 00: lwzu r5,4(r4) - stw r5,0(r3) - eieio +01: stw r5,0(r3) +02: eieio +#ifdef CONFIG_8xx + isync + .section .fixup,"ax" +03: blr + .text + .section __ex_table, "a" + .align 2 + .long 00b, 03b + .long 01b, 03b + .long 02b, 03b + .text +#endif bdnz 00b blr diff -puN arch/ppc/kernel/traps.c~ppc32-handle-access-to-non-present-io-ports-on-8xx arch/ppc/kernel/traps.c --- devel/arch/ppc/kernel/traps.c~ppc32-handle-access-to-non-present-io-ports-on-8xx 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/kernel/traps.c 2005-10-28 17:44:02.000000000 -0700 @@ -154,7 +154,7 @@ void _exception(int signr, struct pt_reg */ static inline int check_io_access(struct pt_regs *regs) { -#ifdef CONFIG_PPC_PMAC +#if defined CONFIG_PPC_PMAC || defined CONFIG_8xx unsigned long msr = regs->msr; const struct exception_table_entry *entry; unsigned int *nip = (unsigned int *)regs->nip; @@ -173,7 +173,11 @@ static inline int check_io_access(struct nip -= 2; else if (*nip == 0x4c00012c) /* isync */ --nip; - if (*nip == 0x7c0004ac || (*nip >> 26) == 3) { + /* eieio from I/O string functions */ + else if ((*nip) == 0x7c0006ac || *(nip+1) == 0x7c0006ac) + nip += 2; + if (*nip == 0x7c0004ac || (*nip >> 26) == 3 || + (*(nip+1) >> 26) == 3) { /* sync or twi */ unsigned int rb; diff -puN include/asm-ppc/io.h~ppc32-handle-access-to-non-present-io-ports-on-8xx include/asm-ppc/io.h --- devel/include/asm-ppc/io.h~ppc32-handle-access-to-non-present-io-ports-on-8xx 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/include/asm-ppc/io.h 2005-10-28 17:44:02.000000000 -0700 @@ -257,7 +257,7 @@ extern __inline__ unsigned int name(unsi { \ unsigned int x; \ __asm__ __volatile__( \ - op " %0,0,%1\n" \ + "0:" op " %0,0,%1\n" \ "1: twi 0,%0,0\n" \ "2: isync\n" \ "3: nop\n" \ @@ -268,6 +268,7 @@ extern __inline__ unsigned int name(unsi ".previous\n" \ ".section __ex_table,\"a\"\n" \ " .align 2\n" \ + " .long 0b,5b\n" \ " .long 1b,5b\n" \ " .long 2b,5b\n" \ " .long 3b,5b\n" \ @@ -276,7 +277,25 @@ extern __inline__ unsigned int name(unsi : "r" (port + ___IO_BASE)); \ return x; \ } - +#ifdef CONFIG_8xx +#define __do_out_asm(name, op) \ +extern __inline__ void name(unsigned int val, unsigned int port) \ +{ \ + __asm__ __volatile__( \ + "0:" op " %0,0,%1\n" \ + "1: sync\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: b 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 2\n" \ + " .long 0b,2b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : : "r" (val), "r" (port + ___IO_BASE)); \ +} +#else #define __do_out_asm(name, op) \ extern __inline__ void name(unsigned int val, unsigned int port) \ { \ @@ -290,6 +309,7 @@ extern __inline__ void name(unsigned int ".previous" \ : : "r" (val), "r" (port + ___IO_BASE)); \ } +#endif __do_out_asm(outb, "stbx") #ifdef CONFIG_APUS _ From akpm at osdl.org Sat Oct 29 10:46:17 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:17 -0700 Subject: [patch 07/43] ppc32: update xmon help text Message-ID: <200510290046.j9T0kmQY030006@shell0.pdx.osdl.net> From: Olaf Hering Mention a few more commands in xmon. System.map processing was replaced with kallsyms. Signed-off-by: Olaf Hering Signed-off-by: Andrew Morton --- arch/ppc/xmon/xmon.c | 9 +++++++-- 1 files changed, 7 insertions(+), 2 deletions(-) diff -puN arch/ppc/xmon/xmon.c~ppc32-update-xmon-help-text arch/ppc/xmon/xmon.c --- devel/arch/ppc/xmon/xmon.c~ppc32-update-xmon-help-text 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/xmon/xmon.c 2005-10-28 17:44:02.000000000 -0700 @@ -148,9 +148,14 @@ Commands:\n\ r print registers\n\ S print special registers\n\ t print backtrace\n\ - la lookup address in system.map\n\ - ls lookup symbol in system.map\n\ + la lookup address\n\ + ls lookup symbol\n\ + C checksum\n\ + P call function with arguments\n\ + T print time\n\ x exit monitor\n\ + zr reboot\n\ + zh halt\n\ "; static int xmon_trace[NR_CPUS]; _ From akpm at osdl.org Sat Oct 29 10:46:20 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:20 -0700 Subject: [patch 10/43] Add modalias to macio sysfs attributes Message-ID: <200510290046.j9T0kpoa030015@shell0.pdx.osdl.net> From: Provide a "compatible" entry in /sys/bus/macio/devices/*/ This can be used to load drivers via the modules.alias file. Signed-off-by: Olaf Hering Acked-by: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton --- drivers/macintosh/macio_sysfs.c | 26 ++++++++++++++++++++++++++ 1 files changed, 26 insertions(+) diff -puN drivers/macintosh/macio_sysfs.c~add-modalias-to-macio-sysfs-attributes drivers/macintosh/macio_sysfs.c --- devel/drivers/macintosh/macio_sysfs.c~add-modalias-to-macio-sysfs-attributes 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/macintosh/macio_sysfs.c 2005-10-28 17:44:03.000000000 -0700 @@ -39,6 +39,31 @@ compatible_show (struct device *dev, str return length; } +static ssize_t modalias_show (struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct of_device *of; + char *compat; + int cplen; + int length; + + of = &to_macio_device (dev)->ofdev; + compat = (char *) get_property (of->node, "compatible", &cplen); + if (!compat) compat = "", cplen = 1; + length = sprintf (buf, "of:N%sT%s", of->node->name, of->node->type); + buf += length; + while (cplen > 0) { + int l; + length += sprintf (buf, "C%s", compat); + buf += length; + l = strlen (compat) + 1; + compat += l; + cplen -= l; + } + + return length; +} + macio_config_of_attr (name, "%s\n"); macio_config_of_attr (type, "%s\n"); @@ -46,5 +71,6 @@ struct device_attribute macio_dev_attrs[ __ATTR_RO(name), __ATTR_RO(type), __ATTR_RO(compatible), + __ATTR_RO(modalias), __ATTR_NULL }; _ From akpm at osdl.org Sat Oct 29 10:46:19 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:19 -0700 Subject: [patch 09/43] ppc32: nvram driver for chrp Message-ID: <200510290046.j9T0ko9H030012@shell0.pdx.osdl.net> From: Olaf Hering This implements a nvram acccess method, similar to arch/ppc64/kernel/pSeries_nvram.c tested on CHRP B50. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc/platforms/Makefile | 3 + arch/ppc/platforms/chrp_nvram.c | 83 ++++++++++++++++++++++++++++++++++++++++ arch/ppc/platforms/chrp_setup.c | 3 - include/asm-ppc/system.h | 1 4 files changed, 88 insertions(+), 2 deletions(-) diff -puN /dev/null arch/ppc/platforms/chrp_nvram.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/chrp_nvram.c 2005-10-28 17:44:02.000000000 -0700 @@ -0,0 +1,83 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * /dev/nvram driver for PPC + * + */ + +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nvram_size; +static unsigned char nvram_buf[4]; +static DEFINE_SPINLOCK(nvram_lock); + +static unsigned char chrp_nvram_read(int addr) +{ + unsigned long done, flags; + unsigned char ret; + + if (addr >= nvram_size) { + printk(KERN_DEBUG "%s: read addr %d > nvram_size %u\n", + current->comm, addr, nvram_size); + return 0xff; + } + spin_lock_irqsave(&nvram_lock, flags); + if ((call_rtas("nvram-fetch", 3, 2, &done, addr, __pa(nvram_buf), 1) != 0) || 1 != done) + ret = 0xff; + else + ret = nvram_buf[0]; + spin_unlock_irqrestore(&nvram_lock, flags); + + return ret; +} + +static void chrp_nvram_write(int addr, unsigned char val) +{ + unsigned long done, flags; + + if (addr >= nvram_size) { + printk(KERN_DEBUG "%s: write addr %d > nvram_size %u\n", + current->comm, addr, nvram_size); + return; + } + spin_lock_irqsave(&nvram_lock, flags); + nvram_buf[0] = val; + if ((call_rtas("nvram-store", 3, 2, &done, addr, __pa(nvram_buf), 1) != 0) || 1 != done) + printk(KERN_DEBUG "rtas IO error storing 0x%02x at %d", val, addr); + spin_unlock_irqrestore(&nvram_lock, flags); +} + +void __init chrp_nvram_init(void) +{ + struct device_node *nvram; + unsigned int *nbytes_p, proplen; + + nvram = of_find_node_by_type(NULL, "nvram"); + if (nvram == NULL) + return; + + nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen); + if (nbytes_p == NULL || proplen != sizeof(unsigned int)) + return; + + nvram_size = *nbytes_p; + + printk(KERN_INFO "CHRP nvram contains %u bytes\n", nvram_size); + of_node_put(nvram); + + ppc_md.nvram_read_val = chrp_nvram_read; + ppc_md.nvram_write_val = chrp_nvram_write; + + return; +} diff -puN arch/ppc/platforms/chrp_setup.c~ppc32-nvram-driver-for-chrp arch/ppc/platforms/chrp_setup.c --- devel/arch/ppc/platforms/chrp_setup.c~ppc32-nvram-driver-for-chrp 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/chrp_setup.c 2005-10-28 17:44:02.000000000 -0700 @@ -464,8 +464,7 @@ void __init chrp_init2(void) { #ifdef CONFIG_NVRAM -// XX replace this in a more saner way -// pmac_nvram_init(); + chrp_nvram_init(); #endif request_region(0x20,0x20,"pic1"); diff -puN arch/ppc/platforms/Makefile~ppc32-nvram-driver-for-chrp arch/ppc/platforms/Makefile --- devel/arch/ppc/platforms/Makefile~ppc32-nvram-driver-for-chrp 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/Makefile 2005-10-28 17:45:21.000000000 -0700 @@ -14,6 +14,9 @@ obj-$(CONFIG_PPC_PMAC) += pmac_pic.o pm pmac_low_i2c.o pmac_cache.o obj-$(CONFIG_PPC_CHRP) += chrp_setup.o chrp_time.o chrp_pci.o \ chrp_pegasos_eth.o +ifeq ($(CONFIG_PPC_CHRP),y) +obj-$(CONFIG_NVRAM) += chrp_nvram.o +endif obj-$(CONFIG_PPC_PREP) += prep_pci.o prep_setup.o ifeq ($(CONFIG_PPC_PMAC),y) obj-$(CONFIG_NVRAM) += pmac_nvram.o diff -puN include/asm-ppc/system.h~ppc32-nvram-driver-for-chrp include/asm-ppc/system.h --- devel/include/asm-ppc/system.h~ppc32-nvram-driver-for-chrp 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/include/asm-ppc/system.h 2005-10-28 17:44:02.000000000 -0700 @@ -70,6 +70,7 @@ extern void _set_L3CR(unsigned long); #endif extern void via_cuda_init(void); extern void pmac_nvram_init(void); +extern void chrp_nvram_init(void); extern void read_rtc_time(void); extern void pmac_find_display(void); extern void giveup_fpu(struct task_struct *); _ From akpm at osdl.org Sat Oct 29 10:46:24 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:24 -0700 Subject: [patch 13/43] Add MAINTAINER entry for the new PowerPC 4xx on-chip ethernet controller driver Message-ID: <200510290046.j9T0ks7c030026@shell0.pdx.osdl.net> From: Eugene Surovegin Add MAINTAINER entry for the new PPC4xx EMAC driver Signed-off-by: Eugene Surovegin Signed-off-by: Andrew Morton --- MAINTAINERS | 8 ++++++++ 1 files changed, 8 insertions(+) diff -puN MAINTAINERS~add-maintainer-entry-for-the-new-powerpc-4xx-on-chip-ethernet-controller-driver MAINTAINERS --- devel/MAINTAINERS~add-maintainer-entry-for-the-new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/MAINTAINERS 2005-10-28 17:44:03.000000000 -0700 @@ -1951,6 +1951,14 @@ M: george at mvista.com L: netdev at vger.kernel.org S: Supported +POWERPC 4xx EMAC DRIVER +P: Eugene Surovegin +M: ebs at ebshome.net +W: http://kernel.ebshome.net/emac/ +L: linuxppc-embedded at ozlabs.org +L: netdev at vger.kernel.org +S: Maintained + PNP SUPPORT P: Adam Belay M: ambx1 at neo.rr.com _ From akpm at osdl.org Sat Oct 29 10:46:14 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:14 -0700 Subject: [patch 04/43] ppc32: Cleanup AMCC PPC44x eval board U-Boot support Message-ID: <200510290046.j9T0kiJc029997@shell0.pdx.osdl.net> From: Matt Porter Cleanup PPC440 eval boards (bamboo, ebony, luan and ocotea) to better support U-Boot as bootloader. Signed-off-by: Stefan Roese Signed-off-by: Matt Porter Signed-off-by: Andrew Morton --- arch/ppc/platforms/4xx/bamboo.c | 14 ++------------ arch/ppc/platforms/4xx/ebony.c | 13 ++----------- arch/ppc/platforms/4xx/luan.c | 13 ++----------- arch/ppc/platforms/4xx/ocotea.c | 31 +++++++++++-------------------- arch/ppc/syslib/ibm44x_common.c | 35 ++++++++++++++++++++++++++++++++++- arch/ppc/syslib/ibm44x_common.h | 3 ++- 6 files changed, 53 insertions(+), 56 deletions(-) diff -puN arch/ppc/platforms/4xx/bamboo.c~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support arch/ppc/platforms/4xx/bamboo.c --- devel/arch/ppc/platforms/4xx/bamboo.c~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/4xx/bamboo.c 2005-10-28 17:44:02.000000000 -0700 @@ -51,7 +51,7 @@ #include #include -bd_t __res; +extern bd_t __res; static struct ibm44x_clocks clocks __initdata; @@ -425,17 +425,7 @@ bamboo_setup_arch(void) void __init platform_init(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { - parse_bootinfo(find_bootinfo()); - - /* - * If we were passed in a board information, copy it into the - * residual data area. - */ - if (r3) - __res = *(bd_t *)(r3 + KERNELBASE); - - - ibm44x_platform_init(); + ibm44x_platform_init(r3, r4, r5, r6, r7); ppc_md.setup_arch = bamboo_setup_arch; ppc_md.show_cpuinfo = bamboo_show_cpuinfo; diff -puN arch/ppc/platforms/4xx/ebony.c~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support arch/ppc/platforms/4xx/ebony.c --- devel/arch/ppc/platforms/4xx/ebony.c~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/4xx/ebony.c 2005-10-28 17:44:02.000000000 -0700 @@ -54,7 +54,7 @@ #include #include -bd_t __res; +extern bd_t __res; static struct ibm44x_clocks clocks __initdata; @@ -317,16 +317,7 @@ ebony_setup_arch(void) void __init platform_init(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { - parse_bootinfo(find_bootinfo()); - - /* - * If we were passed in a board information, copy it into the - * residual data area. - */ - if (r3) - __res = *(bd_t *)(r3 + KERNELBASE); - - ibm44x_platform_init(); + ibm44x_platform_init(r3, r4, r5, r6, r7); ppc_md.setup_arch = ebony_setup_arch; ppc_md.show_cpuinfo = ebony_show_cpuinfo; diff -puN arch/ppc/platforms/4xx/luan.c~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support arch/ppc/platforms/4xx/luan.c --- devel/arch/ppc/platforms/4xx/luan.c~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/4xx/luan.c 2005-10-28 17:44:02.000000000 -0700 @@ -52,7 +52,7 @@ #include #include -bd_t __res; +extern bd_t __res; static struct ibm44x_clocks clocks __initdata; @@ -355,16 +355,7 @@ luan_setup_arch(void) void __init platform_init(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { - parse_bootinfo(find_bootinfo()); - - /* - * If we were passed in a board information, copy it into the - * residual data area. - */ - if (r3) - __res = *(bd_t *)(r3 + KERNELBASE); - - ibm44x_platform_init(); + ibm44x_platform_init(r3, r4, r5, r6, r7); ppc_md.setup_arch = luan_setup_arch; ppc_md.show_cpuinfo = luan_show_cpuinfo; diff -puN arch/ppc/platforms/4xx/ocotea.c~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support arch/ppc/platforms/4xx/ocotea.c --- devel/arch/ppc/platforms/4xx/ocotea.c~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/4xx/ocotea.c 2005-10-28 17:44:02.000000000 -0700 @@ -52,7 +52,7 @@ #include #include -bd_t __res; +extern bd_t __res; static struct ibm44x_clocks clocks __initdata; @@ -286,6 +286,15 @@ ocotea_setup_arch(void) ibm440gx_tah_enable(); + /* + * Determine various clocks. + * To be completely correct we should get SysClk + * from FPGA, because it can be changed by on-board switches + * --ebs + */ + ibm440gx_get_clocks(&clocks, 33333333, 6 * 1843200); + ocp_sys_info.opb_bus_freq = clocks.opb; + /* Setup TODC access */ TODC_INIT(TODC_TYPE_DS1743, 0, @@ -324,25 +333,7 @@ static void __init ocotea_init(void) void __init platform_init(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { - parse_bootinfo(find_bootinfo()); - - /* - * If we were passed in a board information, copy it into the - * residual data area. - */ - if (r3) - __res = *(bd_t *)(r3 + KERNELBASE); - - /* - * Determine various clocks. - * To be completely correct we should get SysClk - * from FPGA, because it can be changed by on-board switches - * --ebs - */ - ibm440gx_get_clocks(&clocks, 33333333, 6 * 1843200); - ocp_sys_info.opb_bus_freq = clocks.opb; - - ibm44x_platform_init(); + ibm44x_platform_init(r3, r4, r5, r6, r7); ppc_md.setup_arch = ocotea_setup_arch; ppc_md.show_cpuinfo = ocotea_show_cpuinfo; diff -puN arch/ppc/syslib/ibm44x_common.c~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support arch/ppc/syslib/ibm44x_common.c --- devel/arch/ppc/syslib/ibm44x_common.c~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/ibm44x_common.c 2005-10-28 17:44:02.000000000 -0700 @@ -27,9 +27,14 @@ #include #include #include +#include +#include #include +/* Global Variables */ +bd_t __res; + phys_addr_t fixup_bigphys_addr(phys_addr_t addr, phys_addr_t size) { phys_addr_t page_4gb = 0; @@ -150,8 +155,36 @@ static unsigned long __init ibm44x_find_ return mem_size; } -void __init ibm44x_platform_init(void) +void __init ibm44x_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) { + parse_bootinfo(find_bootinfo()); + + /* + * If we were passed in a board information, copy it into the + * residual data area. + */ + if (r3) + __res = *(bd_t *)(r3 + KERNELBASE); + +#if defined(CONFIG_BLK_DEV_INITRD) + /* + * If the init RAM disk has been configured in, and there's a valid + * starting address for it, set it up. + */ + if (r4) { + initrd_start = r4 + KERNELBASE; + initrd_end = r5 + KERNELBASE; + } +#endif /* CONFIG_BLK_DEV_INITRD */ + + /* Copy the kernel command line arguments to a safe place. */ + + if (r6) { + *(char *) (r7 + KERNELBASE) = 0; + strcpy(cmd_line, (char *) (r6 + KERNELBASE)); + } + ppc_md.init_IRQ = ppc4xx_pic_init; ppc_md.find_end_of_memory = ibm44x_find_end_of_memory; ppc_md.restart = ibm44x_restart; diff -puN arch/ppc/syslib/ibm44x_common.h~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support arch/ppc/syslib/ibm44x_common.h --- devel/arch/ppc/syslib/ibm44x_common.h~ppc32-cleanup-amcc-ppc44x-eval-board-u-boot-support 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/ibm44x_common.h 2005-10-28 17:44:02.000000000 -0700 @@ -36,7 +36,8 @@ struct ibm44x_clocks { }; /* common 44x platform init */ -void ibm44x_platform_init(void) __init; +void ibm44x_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) __init; /* initialize decrementer and tick-related variables */ void ibm44x_calibrate_decr(unsigned int freq) __init; _ From akpm at osdl.org Sat Oct 29 10:46:18 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:18 -0700 Subject: [patch 08/43] ppc: make phys_mem_access_prot() work with pfns instead of addresses Message-ID: <200510290046.j9T0knw4030009@shell0.pdx.osdl.net> From: Roland Dreier Change the phys_mem_access_prot() function to take a pfn instead of an address. This allows mmap64() to work on /dev/mem for addresses above 4G on 32-bit architectures. We start with a pfn in mmap_mem(), so there's no need to convert to an address; in fact, it's actively bad, since the conversion can overflow when the address is above 4G. Similarly fix the ppc32 page_is_ram() function to avoid a conversion to an address by directly comparing to max_pfn. Working with max_pfn instead of high_memory fixes page_is_ram() to give the right answer for highmem pages. Signed-off-by: Roland Dreier Cc: Paul Mackerras Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton --- arch/ppc/kernel/pci.c | 5 +++-- arch/ppc/mm/init.c | 10 ++++------ arch/ppc64/kernel/pci.c | 5 +++-- arch/ppc64/mm/init.c | 6 +++--- drivers/char/mem.c | 4 +--- drivers/video/fbmem.c | 2 +- include/asm-ppc/machdep.h | 2 +- include/asm-ppc/pci.h | 2 +- include/asm-ppc/pgtable.h | 2 +- include/asm-ppc64/machdep.h | 2 +- include/asm-ppc64/pci.h | 2 +- include/asm-ppc64/pgtable.h | 2 +- 12 files changed, 21 insertions(+), 23 deletions(-) diff -puN arch/ppc64/kernel/pci.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of arch/ppc64/kernel/pci.c --- devel/arch/ppc64/kernel/pci.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc64/kernel/pci.c 2005-10-28 17:44:02.000000000 -0700 @@ -727,16 +727,17 @@ static pgprot_t __pci_mmap_set_pgprot(st * above routine */ pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long offset, + unsigned long pfn, unsigned long size, pgprot_t protection) { struct pci_dev *pdev = NULL; struct resource *found = NULL; unsigned long prot = pgprot_val(protection); + unsigned long offset = pfn << PAGE_SHIFT; int i; - if (page_is_ram(offset >> PAGE_SHIFT)) + if (page_is_ram(pfn)) return __pgprot(prot); prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; diff -puN arch/ppc64/mm/init.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of arch/ppc64/mm/init.c --- devel/arch/ppc64/mm/init.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc64/mm/init.c 2005-10-28 17:44:02.000000000 -0700 @@ -855,13 +855,13 @@ void pgtable_cache_init(void) } } -pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { if (ppc_md.phys_mem_access_prot) - return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); + return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot); - if (!page_is_ram(addr >> PAGE_SHIFT)) + if (!page_is_ram(pfn)) vma_prot = __pgprot(pgprot_val(vma_prot) | _PAGE_GUARDED | _PAGE_NO_CACHE); return vma_prot; diff -puN arch/ppc/kernel/pci.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of arch/ppc/kernel/pci.c --- devel/arch/ppc/kernel/pci.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/kernel/pci.c 2005-10-28 17:44:02.000000000 -0700 @@ -1586,16 +1586,17 @@ static pgprot_t __pci_mmap_set_pgprot(st * above routine */ pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long offset, + unsigned long pfn, unsigned long size, pgprot_t protection) { struct pci_dev *pdev = NULL; struct resource *found = NULL; unsigned long prot = pgprot_val(protection); + unsigned long offset = pfn << PAGE_SHIFT; int i; - if (page_is_ram(offset >> PAGE_SHIFT)) + if (page_is_ram(pfn)) return prot; prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; diff -puN arch/ppc/mm/init.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of arch/ppc/mm/init.c --- devel/arch/ppc/mm/init.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/mm/init.c 2005-10-28 17:44:02.000000000 -0700 @@ -648,18 +648,16 @@ void update_mmu_cache(struct vm_area_str */ int page_is_ram(unsigned long pfn) { - unsigned long paddr = (pfn << PAGE_SHIFT); - - return paddr < __pa(high_memory); + return pfn < max_pfn; } -pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { if (ppc_md.phys_mem_access_prot) - return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); + return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot); - if (!page_is_ram(addr >> PAGE_SHIFT)) + if (!page_is_ram(pfn)) vma_prot = __pgprot(pgprot_val(vma_prot) | _PAGE_GUARDED | _PAGE_NO_CACHE); return vma_prot; diff -puN drivers/char/mem.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of drivers/char/mem.c --- devel/drivers/char/mem.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/drivers/char/mem.c 2005-10-28 17:44:02.000000000 -0700 @@ -231,9 +231,7 @@ static ssize_t write_mem(struct file * f static int mmap_mem(struct file * file, struct vm_area_struct * vma) { #if defined(__HAVE_PHYS_MEM_ACCESS_PROT) - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - - vma->vm_page_prot = phys_mem_access_prot(file, offset, + vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot); #elif defined(pgprot_noncached) diff -puN drivers/video/fbmem.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of drivers/video/fbmem.c --- devel/drivers/video/fbmem.c~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/drivers/video/fbmem.c 2005-10-28 17:44:02.000000000 -0700 @@ -918,7 +918,7 @@ fb_mmap(struct file *file, struct vm_are } #endif #elif defined(__powerpc__) - vma->vm_page_prot = phys_mem_access_prot(file, off, + vma->vm_page_prot = phys_mem_access_prot(file, off >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); #elif defined(__alpha__) diff -puN include/asm-ppc64/machdep.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of include/asm-ppc64/machdep.h --- devel/include/asm-ppc64/machdep.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/include/asm-ppc64/machdep.h 2005-10-28 17:44:02.000000000 -0700 @@ -130,7 +130,7 @@ struct machdep_calls { /* Get access protection for /dev/mem */ pgprot_t (*phys_mem_access_prot)(struct file *file, - unsigned long offset, + unsigned long pfn, unsigned long size, pgprot_t vma_prot); diff -puN include/asm-ppc64/pci.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of include/asm-ppc64/pci.h --- devel/include/asm-ppc64/pci.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/include/asm-ppc64/pci.h 2005-10-28 17:44:02.000000000 -0700 @@ -168,7 +168,7 @@ extern void pcibios_add_platform_entries struct file; extern pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long offset, + unsigned long pfn, unsigned long size, pgprot_t prot); diff -puN include/asm-ppc64/pgtable.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of include/asm-ppc64/pgtable.h --- devel/include/asm-ppc64/pgtable.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/include/asm-ppc64/pgtable.h 2005-10-28 17:44:02.000000000 -0700 @@ -471,7 +471,7 @@ static inline void __ptep_set_access_fla #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED)) struct file; -extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot); #define __HAVE_PHYS_MEM_ACCESS_PROT diff -puN include/asm-ppc/machdep.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of include/asm-ppc/machdep.h --- devel/include/asm-ppc/machdep.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/include/asm-ppc/machdep.h 2005-10-28 17:44:02.000000000 -0700 @@ -98,7 +98,7 @@ struct machdep_calls { /* Get access protection for /dev/mem */ pgprot_t (*phys_mem_access_prot)(struct file *file, - unsigned long offset, + unsigned long pfn, unsigned long size, pgprot_t vma_prot); diff -puN include/asm-ppc/pci.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of include/asm-ppc/pci.h --- devel/include/asm-ppc/pci.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/include/asm-ppc/pci.h 2005-10-28 17:44:02.000000000 -0700 @@ -126,7 +126,7 @@ extern void pcibios_add_platform_entries struct file; extern pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long offset, + unsigned long pfn, unsigned long size, pgprot_t prot); diff -puN include/asm-ppc/pgtable.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of include/asm-ppc/pgtable.h --- devel/include/asm-ppc/pgtable.h~ppc-make-phys_mem_access_prot-work-with-pfns-instead-of 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/include/asm-ppc/pgtable.h 2005-10-28 17:44:02.000000000 -0700 @@ -705,7 +705,7 @@ static inline void __ptep_set_access_fla #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED)) struct file; -extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot); #define __HAVE_PHYS_MEM_ACCESS_PROT _ From akpm at osdl.org Sat Oct 29 10:46:21 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:21 -0700 Subject: [patch 11/43] Add modalias for pmac network drivers Message-ID: <200510290046.j9T0kp7R030020@shell0.pdx.osdl.net> From: Olaf Hering mesh, mac53c94 and airport already have an entry. Add the network drivers for pmac. Signed-off-by: Olaf Hering Cc: Paul Mackerras Acked-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton --- drivers/net/bmac.c | 1 + drivers/net/mace.c | 1 + 2 files changed, 2 insertions(+) diff -puN drivers/net/bmac.c~add-modalias-for-pmac-network-drivers drivers/net/bmac.c --- devel/drivers/net/bmac.c~add-modalias-for-pmac-network-drivers 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/bmac.c 2005-10-28 17:44:03.000000000 -0700 @@ -1658,6 +1658,7 @@ static struct of_device_id bmac_match[] }, {}, }; +MODULE_DEVICE_TABLE (of, bmac_match); static struct macio_driver bmac_driver = { diff -puN drivers/net/mace.c~add-modalias-for-pmac-network-drivers drivers/net/mace.c --- devel/drivers/net/mace.c~add-modalias-for-pmac-network-drivers 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/mace.c 2005-10-28 17:44:03.000000000 -0700 @@ -1016,6 +1016,7 @@ static struct of_device_id mace_match[] }, {}, }; +MODULE_DEVICE_TABLE (of, mace_match); static struct macio_driver mace_driver = { _ From akpm at osdl.org Sat Oct 29 10:46:13 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:13 -0700 Subject: [patch 03/43] MPC8xx PCMCIA driver Message-ID: <200510290046.j9T0khJo029994@shell0.pdx.osdl.net> From: Marcelo Tosatti Here is an uptodated version of the MPC8xx PCMCIA driver for v2.6, addressing comments by Jeff and Dominik: - use IO accessors instead of direct device memory referencing - avoid usage of non-standard "uint/uchar" data types - kill struct typedef's Will submit it for inclusion once v2.6.14 is out. Testing on 8xx platforms is more than welcome! Works like a charm on our custom hardware (CONFIG_PRxK). Cc: Dominik Brodowski Signed-off-by: Andrew Morton --- drivers/pcmcia/Kconfig | 10 drivers/pcmcia/Makefile | 1 drivers/pcmcia/m8xx_pcmcia.c | 1290 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1301 insertions(+) diff -puN drivers/pcmcia/Kconfig~mpc8xx-pcmcia-driver drivers/pcmcia/Kconfig --- devel/drivers/pcmcia/Kconfig~mpc8xx-pcmcia-driver 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/drivers/pcmcia/Kconfig 2005-10-28 17:44:02.000000000 -0700 @@ -154,6 +154,16 @@ config TCIC "Bridge" is the name used for the hardware inside your computer that PCMCIA cards are plugged into. If unsure, say N. +config PCMCIA_M8XX + tristate "MPC8xx PCMCIA support" + depends on PCMCIA && PPC + select PCCARD_NONSTATIC + help + Say Y here to include support for PowerPC 8xx series PCMCIA + controller. + + This driver is also available as a module called m8xx_pcmcia. + config HD64465_PCMCIA tristate "HD64465 host bridge support" depends on HD64465 && PCMCIA diff -puN /dev/null drivers/pcmcia/m8xx_pcmcia.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/pcmcia/m8xx_pcmcia.c 2005-10-28 17:44:02.000000000 -0700 @@ -0,0 +1,1290 @@ +/* + * m8xx_pcmcia.c - Linux PCMCIA socket driver for the mpc8xx series. + * + * (C) 1999-2000 Magnus Damm + * (C) 2001-2002 Montavista Software, Inc. + * + * + * Support for two slots by Cyclades Corporation + * + * Further fixes, v2.6 kernel port + * + * + * "The ExCA standard specifies that socket controllers should provide + * two IO and five memory windows per socket, which can be independently + * configured and positioned in the host address space and mapped to + * arbitrary segments of card address space. " - David A Hinds. 1999 + * + * This controller does _not_ meet the ExCA standard. + * + * m8xx pcmcia controller brief info: + * + 8 windows (attrib, mem, i/o) + * + up to two slots (SLOT_A and SLOT_B) + * + inputpins, outputpins, event and mask registers. + * - no offset register. sigh. + * + * Because of the lacking offset register we must map the whole card. + * We assign each memory window PCMCIA_MEM_WIN_SIZE address space. + * Make sure there is (PCMCIA_MEM_WIN_SIZE * PCMCIA_MEM_WIN_NO + * * PCMCIA_SOCKETS_NO) bytes at PCMCIA_MEM_WIN_BASE. + * The i/o windows are dynamically allocated at PCMCIA_IO_WIN_BASE. + * They are maximum 64KByte each... + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#ifdef PCMCIA_DEBUG +static int pc_debug = PCMCIA_DEBUG; +module_param(pc_debug, int, 0); +#define dprintk(args...) printk(KERN_DEBUG "m8xx_pcmcia: " args); +#else +#define dprintk(args...) +#endif + +#define pcmcia_info(args...) printk(KERN_INFO "m8xx_pcmcia: "args) +#define pcmcia_error(args...) printk(KERN_ERR "m8xx_pcmcia: "args) + +static const char *version = "Version 0.06, Aug 2005"; +MODULE_LICENSE("Dual MPL/GPL"); + +#if !defined(CONFIG_PCMCIA_SLOT_A) && !defined(CONFIG_PCMCIA_SLOT_B) + +/* The RPX series use SLOT_B */ +#if defined(CONFIG_RPXCLASSIC) || defined(CONFIG_RPXLITE) +#define CONFIG_PCMCIA_SLOT_B +#define CONFIG_BD_IS_MHZ +#endif + +/* The ADS board use SLOT_A */ +#ifdef CONFIG_ADS +#define CONFIG_PCMCIA_SLOT_A +#define CONFIG_BD_IS_MHZ +#endif + +/* The FADS series are a mess */ +#ifdef CONFIG_FADS +#if defined(CONFIG_MPC860T) || defined(CONFIG_MPC860) || defined(CONFIG_MPC821) +#define CONFIG_PCMCIA_SLOT_A +#else +#define CONFIG_PCMCIA_SLOT_B +#endif +#endif + +/* Cyclades ACS uses both slots */ +#ifdef CONFIG_PRxK +#define CONFIG_PCMCIA_SLOT_A +#define CONFIG_PCMCIA_SLOT_B +#endif + +#endif /* !defined(CONFIG_PCMCIA_SLOT_A) && !defined(CONFIG_PCMCIA_SLOT_B) */ + +#if defined(CONFIG_PCMCIA_SLOT_A) && defined(CONFIG_PCMCIA_SLOT_B) + +#define PCMCIA_SOCKETS_NO 2 +/* We have only 8 windows, dualsocket support will be limited. */ +#define PCMCIA_MEM_WIN_NO 2 +#define PCMCIA_IO_WIN_NO 2 +#define PCMCIA_SLOT_MSG "SLOT_A and SLOT_B" + +#elif defined(CONFIG_PCMCIA_SLOT_A) || defined(CONFIG_PCMCIA_SLOT_B) + +#define PCMCIA_SOCKETS_NO 1 +/* full support for one slot */ +#define PCMCIA_MEM_WIN_NO 5 +#define PCMCIA_IO_WIN_NO 2 + +/* define _slot_ to be able to optimize macros */ + +#ifdef CONFIG_PCMCIA_SLOT_A +#define _slot_ 0 +#define PCMCIA_SLOT_MSG "SLOT_A" +#else +#define _slot_ 1 +#define PCMCIA_SLOT_MSG "SLOT_B" +#endif + +#else +#error m8xx_pcmcia: Bad configuration! +#endif + +/* ------------------------------------------------------------------------- */ + +#define PCMCIA_MEM_WIN_BASE 0xe0000000 /* base address for memory window 0 */ +#define PCMCIA_MEM_WIN_SIZE 0x04000000 /* each memory window is 64 MByte */ +#define PCMCIA_IO_WIN_BASE _IO_BASE /* base address for io window 0 */ + +#define PCMCIA_SCHLVL PCMCIA_INTERRUPT /* Status Change Interrupt Level */ + +/* ------------------------------------------------------------------------- */ + +/* 2.4.x and newer has this always in HZ */ +#define M8XX_BUSFREQ ((((bd_t *)&(__res))->bi_busfreq)) + +static int pcmcia_schlvl = PCMCIA_SCHLVL; + +static spinlock_t events_lock = SPIN_LOCK_UNLOCKED; + + +#define PCMCIA_SOCKET_KEY_5V 1 +#define PCMCIA_SOCKET_KEY_LV 2 + +/* look up table for pgcrx registers */ +static u32 *m8xx_pgcrx[2] = { + &((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pgcra, + &((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pgcrb +}; + +/* + * This structure is used to address each window in the PCMCIA controller. + * + * Keep in mind that we assume that pcmcia_win[n+1] is mapped directly + * after pcmcia_win[n]... + */ + +struct pcmcia_win { + u32 br; + u32 or; +}; + +/* + * For some reason the hardware guys decided to make both slots share + * some registers. + * + * Could someone invent object oriented hardware ? + * + * The macros are used to get the right bit from the registers. + * SLOT_A : slot = 0 + * SLOT_B : slot = 1 + */ + +#define M8XX_PCMCIA_VS1(slot) (0x80000000 >> (slot << 4)) +#define M8XX_PCMCIA_VS2(slot) (0x40000000 >> (slot << 4)) +#define M8XX_PCMCIA_VS_MASK(slot) (0xc0000000 >> (slot << 4)) +#define M8XX_PCMCIA_VS_SHIFT(slot) (30 - (slot << 4)) + +#define M8XX_PCMCIA_WP(slot) (0x20000000 >> (slot << 4)) +#define M8XX_PCMCIA_CD2(slot) (0x10000000 >> (slot << 4)) +#define M8XX_PCMCIA_CD1(slot) (0x08000000 >> (slot << 4)) +#define M8XX_PCMCIA_BVD2(slot) (0x04000000 >> (slot << 4)) +#define M8XX_PCMCIA_BVD1(slot) (0x02000000 >> (slot << 4)) +#define M8XX_PCMCIA_RDY(slot) (0x01000000 >> (slot << 4)) +#define M8XX_PCMCIA_RDY_L(slot) (0x00800000 >> (slot << 4)) +#define M8XX_PCMCIA_RDY_H(slot) (0x00400000 >> (slot << 4)) +#define M8XX_PCMCIA_RDY_R(slot) (0x00200000 >> (slot << 4)) +#define M8XX_PCMCIA_RDY_F(slot) (0x00100000 >> (slot << 4)) +#define M8XX_PCMCIA_MASK(slot) (0xFFFF0000 >> (slot << 4)) + +#define M8XX_PCMCIA_POR_VALID 0x00000001 +#define M8XX_PCMCIA_POR_WRPROT 0x00000002 +#define M8XX_PCMCIA_POR_ATTRMEM 0x00000010 +#define M8XX_PCMCIA_POR_IO 0x00000018 +#define M8XX_PCMCIA_POR_16BIT 0x00000040 + +#define M8XX_PGCRX(slot) m8xx_pgcrx[slot] + +#define M8XX_PGCRX_CXOE 0x00000080 +#define M8XX_PGCRX_CXRESET 0x00000040 + +/* we keep one lookup table per socket to check flags */ + +#define PCMCIA_EVENTS_MAX 5 /* 4 max at a time + termination */ + +struct event_table { + u32 regbit; + u32 eventbit; +}; + +struct socket_info { + void (*handler)(void *info, u32 events); + void *info; + + u32 slot; + + socket_state_t state; + struct pccard_mem_map mem_win[PCMCIA_MEM_WIN_NO]; + struct pccard_io_map io_win[PCMCIA_IO_WIN_NO]; + struct event_table events[PCMCIA_EVENTS_MAX]; + struct pcmcia_socket socket; +}; + +static struct socket_info socket[PCMCIA_SOCKETS_NO]; + +/* + * Search this table to see if the windowsize is + * supported... + */ + +#define M8XX_SIZES_NO 32 + +static const u32 m8xx_size_to_gray[M8XX_SIZES_NO] = +{ + 0x00000001, 0x00000002, 0x00000008, 0x00000004, + 0x00000080, 0x00000040, 0x00000010, 0x00000020, + 0x00008000, 0x00004000, 0x00001000, 0x00002000, + 0x00000100, 0x00000200, 0x00000800, 0x00000400, + + 0x0fffffff, 0xffffffff, 0xffffffff, 0xffffffff, + 0x01000000, 0x02000000, 0xffffffff, 0x04000000, + 0x00010000, 0x00020000, 0x00080000, 0x00040000, + 0x00800000, 0x00400000, 0x00100000, 0x00200000 +}; + +/* ------------------------------------------------------------------------- */ + +static irqreturn_t m8xx_interrupt(int irq, void *dev, struct pt_regs *regs); + +#define PCMCIA_BMT_LIMIT (15*4) /* Bus Monitor Timeout value */ + +/* ------------------------------------------------------------------------- */ +/* board specific stuff: */ +/* voltage_set(), hardware_enable() and hardware_disable() */ +/* ------------------------------------------------------------------------- */ +/* RPX Boards from Embedded Planet */ + +#if defined(CONFIG_RPXCLASSIC) || defined(CONFIG_RPXLITE) + +/* The RPX boards seems to have it's bus monitor timeout set to 6*8 clocks. + * SYPCR is write once only, therefore must the slowest memory be faster + * than the bus monitor or we will get a machine check due to the bus timeout. + */ + +#define PCMCIA_BOARD_MSG "RPX CLASSIC or RPX LITE" + +#undef PCMCIA_BMT_LIMIT +#define PCMCIA_BMT_LIMIT (6*8) + +static int voltage_set(int slot, int vcc, int vpp) +{ + u32 reg = 0; + + switch(vcc) { + case 0: break; + case 33: + reg |= BCSR1_PCVCTL4; + break; + case 50: + reg |= BCSR1_PCVCTL5; + break; + default: + return 1; + } + + switch(vpp) { + case 0: break; + case 33: + case 50: + if(vcc == vpp) + reg |= BCSR1_PCVCTL6; + else + return 1; + break; + case 120: + reg |= BCSR1_PCVCTL7; + default: + return 1; + } + + if(!((vcc == 50) || (vcc == 0))) + return 1; + + /* first, turn off all power */ + + out_be32(((u32 *)RPX_CSR_ADDR), in_be32(((u32 *)RPX_CSR_ADDR)) & ~(BCSR1_PCVCTL4 | BCSR1_PCVCTL5 | BCSR1_PCVCTL6 | BCSR1_PCVCTL7)); + + /* enable new powersettings */ + + out_be32(((u32 *)RPX_CSR_ADDR), in_be32(((u32 *)RPX_CSR_ADDR)) | reg); + + return 0; +} + +#define socket_get(_slot_) PCMCIA_SOCKET_KEY_5V +#define hardware_enable(_slot_) /* No hardware to enable */ +#define hardware_disable(_slot_) /* No hardware to disable */ + +#endif /* CONFIG_RPXCLASSIC */ + +/* FADS Boards from Motorola */ + +#if defined(CONFIG_FADS) + +#define PCMCIA_BOARD_MSG "FADS" + +static int voltage_set(int slot, int vcc, int vpp) +{ + u32 reg = 0; + + switch(vcc) { + case 0: + break; + case 33: + reg |= BCSR1_PCCVCC0; + break; + case 50: + reg |= BCSR1_PCCVCC1; + break; + default: + return 1; + } + + switch(vpp) { + case 0: + break; + case 33: + case 50: + if(vcc == vpp) + reg |= BCSR1_PCCVPP1; + else + return 1; + break; + case 120: + if ((vcc == 33) || (vcc == 50)) + reg |= BCSR1_PCCVPP0; + else + return 1; + default: + return 1; + } + + /* first, turn off all power */ + out_be32(&((u32 *)BCSR1), in_be32(&((u32 *)BCSR1)) & ~(BCSR1_PCCVCC_MASK | BCSR1_PCCVPP_MASK)); + + /* enable new powersettings */ + out_be32(&((u32 *)BCSR1), in_be32(&((u32 *)BCSR1)) | reg); + + return 0; +} + +#define socket_get(_slot_) PCMCIA_SOCKET_KEY_5V + +static void hardware_enable(int slot) +{ + out_be32(&((u32 *)BCSR1), in_be32(&((u32 *)BCSR1)) & ~BCSR1_PCCEN); +} + +static void hardware_disable(int slot) +{ + out_be32(&((u32 *)BCSR1), in_be32(&((u32 *)BCSR1)) | BCSR1_PCCEN); +} + +#endif + +/* ------------------------------------------------------------------------- */ +/* Motorola MBX860 */ + +#if defined(CONFIG_MBX) + +#define PCMCIA_BOARD_MSG "MBX" + +static int voltage_set(int slot, int vcc, int vpp) +{ + u8 reg = 0; + + switch(vcc) { + case 0: + break; + case 33: + reg |= CSR2_VCC_33; + break; + case 50: + reg |= CSR2_VCC_50; + break; + default: + return 1; + } + + switch(vpp) { + case 0: + break; + case 33: + case 50: + if(vcc == vpp) + reg |= CSR2_VPP_VCC; + else + return 1; + break; + case 120: + if ((vcc == 33) || (vcc == 50)) + reg |= CSR2_VPP_12; + else + return 1; + default: + return 1; + } + + /* first, turn off all power */ + out_8(&((u8 *)MBX_CSR2_ADDR), in_8(&((u8 *)MBX_CSR2_ADDR)) & ~(CSR2_VCC_MASK | CSR2_VPP_MASK)); + + /* enable new powersettings */ + out_8(&((u8 *)MBX_CSR2_ADDR), in_8(&((u8 *)MBX_CSR2_ADDR)) | reg); + + return 0; +} + +#define socket_get(_slot_) PCMCIA_SOCKET_KEY_5V +#define hardware_enable(_slot_) /* No hardware to enable */ +#define hardware_disable(_slot_) /* No hardware to disable */ + +#endif /* CONFIG_MBX */ + +#if defined(CONFIG_PRxK) +#include +extern volatile fpga_pc_regs *fpga_pc; + +#define PCMCIA_BOARD_MSG "MPC855T" + +static int voltage_set(int slot, int vcc, int vpp) +{ + u8 reg = 0; + u8 regread; + cpld_regs *ccpld = get_cpld(); + + switch(vcc) { + case 0: + break; + case 33: + reg |= PCMCIA_VCC_33; + break; + case 50: + reg |= PCMCIA_VCC_50; + break; + default: + return 1; + } + + switch(vpp) { + case 0: + break; + case 33: + case 50: + if(vcc == vpp) + reg |= PCMCIA_VPP_VCC; + else + return 1; + break; + case 120: + if ((vcc == 33) || (vcc == 50)) + reg |= PCMCIA_VPP_12; + else + return 1; + default: + return 1; + } + + reg = reg >> (slot << 2); + regread = in_8(&ccpld->fpga_pc_ctl); + if (reg != (regread & ((PCMCIA_VCC_MASK | PCMCIA_VPP_MASK) >> (slot << 2)))) { + /* enable new powersettings */ + regread = regread & ~((PCMCIA_VCC_MASK | PCMCIA_VPP_MASK) >> (slot << 2)); + out_8(&ccpld->fpga_pc_ctl, reg | regread); + msleep(100); + } + + return 0; +} + +#define socket_get(_slot_) PCMCIA_SOCKET_KEY_LV +#define hardware_enable(_slot_) /* No hardware to enable */ +#define hardware_disable(_slot_) /* No hardware to disable */ + +#endif /* CONFIG_PRxK */ + +static void m8xx_shutdown(void) +{ + u32 m, i; + struct pcmcia_win *w; + + for(i = 0; i < PCMCIA_SOCKETS_NO; i++){ + w = (void *) &((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pbr0; + + out_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pscr, M8XX_PCMCIA_MASK(i)); + out_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_per, in_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_per) & ~M8XX_PCMCIA_MASK(i)); + + /* turn off interrupt and disable CxOE */ + out_be32(M8XX_PGCRX(i), M8XX_PGCRX_CXOE); + + /* turn off memory windows */ + for(m = 0; m < PCMCIA_MEM_WIN_NO; m++) { + out_be32(&w->or, 0); /* set to not valid */ + w++; + } + + /* turn off voltage */ + voltage_set(i, 0, 0); + + /* disable external hardware */ + hardware_disable(i); + } + + free_irq(pcmcia_schlvl, NULL); +} + +/* copied from tcic.c */ + +static int m8xx_drv_suspend(struct device *dev, pm_message_t state, u32 level) +{ + int ret = 0; + if (level == SUSPEND_SAVE_STATE) + ret = pcmcia_socket_dev_suspend(dev, state); + return ret; +} + +static int m8xx_drv_resume(struct device *dev, u32 level) +{ + int ret = 0; + if (level == RESUME_RESTORE_STATE) + ret = pcmcia_socket_dev_resume(dev); + return ret; +} + +static struct device_driver m8xx_driver = { + .name = "m8xx-pcmcia", + .bus = &platform_bus_type, + .suspend = m8xx_drv_suspend, + .resume = m8xx_drv_resume, +}; + +static struct platform_device m8xx_device = { + .name = "m8xx-pcmcia", + .id = 0, +}; + +static u32 pending_events[PCMCIA_SOCKETS_NO]; +static spinlock_t pending_event_lock = SPIN_LOCK_UNLOCKED; + +static irqreturn_t m8xx_interrupt(int irq, void *dev, struct pt_regs *regs) +{ + struct socket_info *s; + struct event_table *e; + unsigned int i, events, pscr, pipr, per; + + dprintk("Interrupt!\n"); + /* get interrupt sources */ + + pscr = in_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pscr); + pipr = in_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pipr); + per = in_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_per); + + for(i = 0; i < PCMCIA_SOCKETS_NO; i++) { + s = &socket[i]; + e = &s->events[0]; + events = 0; + + while(e->regbit) { + if(pscr & e->regbit) + events |= e->eventbit; + + e++; + } + + /* + * report only if both card detect signals are the same + * not too nice done, + * we depend on that CD2 is the bit to the left of CD1... + */ + if(events & SS_DETECT) + if(((pipr & M8XX_PCMCIA_CD2(i)) >> 1) ^ + (pipr & M8XX_PCMCIA_CD1(i))) + { + events &= ~SS_DETECT; + } + +#ifdef PCMCIA_GLITCHY_CD + /* + * I've experienced CD problems with my ADS board. + * We make an extra check to see if there was a + * real change of Card detection. + */ + + if((events & SS_DETECT) && + ((pipr & + (M8XX_PCMCIA_CD2(i) | M8XX_PCMCIA_CD1(i))) == 0) && + (s->state.Vcc | s->state.Vpp)) { + events &= ~SS_DETECT; + /*printk( "CD glitch workaround - CD = 0x%08x!\n", + (pipr & (M8XX_PCMCIA_CD2(i) + | M8XX_PCMCIA_CD1(i))));*/ + } +#endif + + /* call the handler */ + + dprintk("slot %u: events = 0x%02x, pscr = 0x%08x, " + "pipr = 0x%08x\n", + i, events, pscr, pipr); + + if(events) { + spin_lock(&pending_event_lock); + pending_events[i] |= events; + spin_unlock(&pending_event_lock); + /* + * Turn off RDY_L bits in the PER mask on + * CD interrupt receival. + * + * They can generate bad interrupts on the + * ACS4,8,16,32. - marcelo + */ + per &= ~M8XX_PCMCIA_RDY_L(0); + per &= ~M8XX_PCMCIA_RDY_L(1); + + out_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_per, per); + + if (events) + pcmcia_parse_events(&socket[i].socket, events); + } + } + + /* clear the interrupt sources */ + out_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pscr, pscr); + + dprintk("Interrupt done.\n"); + + return IRQ_HANDLED; +} + +static u32 m8xx_get_graycode(u32 size) +{ + u32 k; + + for(k = 0; k < M8XX_SIZES_NO; k++) + if(m8xx_size_to_gray[k] == size) + break; + + if((k == M8XX_SIZES_NO) || (m8xx_size_to_gray[k] == -1)) + k = -1; + + return k; +} + +static u32 m8xx_get_speed(u32 ns, u32 is_io) +{ + u32 reg, clocks, psst, psl, psht; + + if(!ns) { + + /* + * We get called with IO maps setup to 0ns + * if not specified by the user. + * They should be 255ns. + */ + + if(is_io) + ns = 255; + else + ns = 100; /* fast memory if 0 */ + } + + /* + * In PSST, PSL, PSHT fields we tell the controller + * timing parameters in CLKOUT clock cycles. + * CLKOUT is the same as GCLK2_50. + */ + +/* how we want to adjust the timing - in percent */ + +#define ADJ 180 /* 80 % longer accesstime - to be sure */ + + clocks = ((M8XX_BUSFREQ / 1000) * ns) / 1000; + clocks = (clocks * ADJ) / (100*1000); + if(clocks >= PCMCIA_BMT_LIMIT) { + printk( "Max access time limit reached\n"); + clocks = PCMCIA_BMT_LIMIT-1; + } + + psst = clocks / 7; /* setup time */ + psht = clocks / 7; /* hold time */ + psl = (clocks * 5) / 7; /* strobe length */ + + psst += clocks - (psst + psht + psl); + + reg = psst << 12; + reg |= psl << 7; + reg |= psht << 16; + + return reg; +} + +static int m8xx_get_status(struct pcmcia_socket *sock, unsigned int *value) +{ + int lsock = container_of(sock, struct socket_info, socket)->slot; + struct socket_info *s = &socket[lsock]; + unsigned int pipr, reg; + + pipr = in_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pipr); + + *value = ((pipr & (M8XX_PCMCIA_CD1(lsock) + | M8XX_PCMCIA_CD2(lsock))) == 0) ? SS_DETECT : 0; + *value |= (pipr & M8XX_PCMCIA_WP(lsock)) ? SS_WRPROT : 0; + + if (s->state.flags & SS_IOCARD) + *value |= (pipr & M8XX_PCMCIA_BVD1(lsock)) ? SS_STSCHG : 0; + else { + *value |= (pipr & M8XX_PCMCIA_RDY(lsock)) ? SS_READY : 0; + *value |= (pipr & M8XX_PCMCIA_BVD1(lsock)) ? SS_BATDEAD : 0; + *value |= (pipr & M8XX_PCMCIA_BVD2(lsock)) ? SS_BATWARN : 0; + } + + if (s->state.Vcc | s->state.Vpp) + *value |= SS_POWERON; + + /* + * Voltage detection: + * This driver only supports 16-Bit pc-cards. + * Cardbus is not handled here. + * + * To determine what voltage to use we must read the VS1 and VS2 pin. + * Depending on what socket type is present, + * different combinations mean different things. + * + * Card Key Socket Key VS1 VS2 Card Vcc for CIS parse + * + * 5V 5V, LV* NC NC 5V only 5V (if available) + * + * 5V 5V, LV* GND NC 5 or 3.3V as low as possible + * + * 5V 5V, LV* GND GND 5, 3.3, x.xV as low as possible + * + * LV* 5V - - shall not fit into socket + * + * LV* LV* GND NC 3.3V only 3.3V + * + * LV* LV* NC GND x.xV x.xV (if avail.) + * + * LV* LV* GND GND 3.3 or x.xV as low as possible + * + * *LV means Low Voltage + * + * + * That gives us the following table: + * + * Socket VS1 VS2 Voltage + * + * 5V NC NC 5V + * 5V NC GND none (should not be possible) + * 5V GND NC >= 3.3V + * 5V GND GND >= x.xV + * + * LV NC NC 5V (if available) + * LV NC GND x.xV (if available) + * LV GND NC 3.3V + * LV GND GND >= x.xV + * + * So, how do I determine if I have a 5V or a LV + * socket on my board? Look at the socket! + * + * + * Socket with 5V key: + * ++--------------------------------------------+ + * || | + * || || + * || || + * | | + * +---------------------------------------------+ + * + * Socket with LV key: + * ++--------------------------------------------+ + * || | + * | || + * | || + * | | + * +---------------------------------------------+ + * + * + * With other words - LV only cards does not fit + * into the 5V socket! + */ + + /* read out VS1 and VS2 */ + + reg = (pipr & M8XX_PCMCIA_VS_MASK(lsock)) + >> M8XX_PCMCIA_VS_SHIFT(lsock); + + if(socket_get(lsock) == PCMCIA_SOCKET_KEY_LV) { + switch(reg) { + case 1: + *value |= SS_3VCARD; + break; /* GND, NC - 3.3V only */ + case 2: + *value |= SS_XVCARD; + break; /* NC. GND - x.xV only */ + }; + } + + dprintk("GetStatus(%d) = %#2.2x\n", lsock, *value); + return 0; +} + +static int m8xx_get_socket(struct pcmcia_socket *sock, socket_state_t *state) +{ + int lsock = container_of(sock, struct socket_info, socket)->slot; + *state = socket[lsock].state; /* copy the whole structure */ + + dprintk("GetSocket(%d) = flags %#3.3x, Vcc %d, Vpp %d, " + "io_irq %d, csc_mask %#2.2x\n", lsock, state->flags, + state->Vcc, state->Vpp, state->io_irq, state->csc_mask); + return 0; +} + +static int m8xx_set_socket(struct pcmcia_socket *sock, socket_state_t *state) +{ + int lsock = container_of(sock, struct socket_info, socket)->slot; + struct socket_info *s = &socket[lsock]; + struct event_table *e; + unsigned int reg; + unsigned long flags; + + dprintk( "SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " + "io_irq %d, csc_mask %#2.2x)\n", lsock, state->flags, + state->Vcc, state->Vpp, state->io_irq, state->csc_mask); + + /* First, set voltage - bail out if invalid */ + if(voltage_set(lsock, state->Vcc, state->Vpp)) + return -EINVAL; + + /* Take care of reset... */ + if(state->flags & SS_RESET) + out_be32(M8XX_PGCRX(lsock), in_be32(M8XX_PGCRX(lsock)) | M8XX_PGCRX_CXRESET); /* active high */ + else + out_be32(M8XX_PGCRX(lsock), in_be32(M8XX_PGCRX(lsock)) & ~M8XX_PGCRX_CXRESET); + + /* ... and output enable. */ + + /* The CxOE signal is connected to a 74541 on the ADS. + I guess most other boards used the ADS as a reference. + I tried to control the CxOE signal with SS_OUTPUT_ENA, + but the reset signal seems connected via the 541. + If the CxOE is left high are some signals tristated and + no pullups are present -> the cards act wierd. + So right now the buffers are enabled if the power is on. */ + + if(state->Vcc || state->Vpp) + out_be32(M8XX_PGCRX(lsock), in_be32(M8XX_PGCRX(lsock)) & ~M8XX_PGCRX_CXOE); /* active low */ + else + out_be32(M8XX_PGCRX(lsock), in_be32(M8XX_PGCRX(lsock)) | M8XX_PGCRX_CXOE); + + /* + * We'd better turn off interrupts before + * we mess with the events-table.. + */ + + spin_lock_irqsave(&events_lock, flags); + + /* + * Play around with the interrupt mask to be able to + * give the events the generic pcmcia driver wants us to. + */ + + e = &s->events[0]; + reg = 0; + + if(state->csc_mask & SS_DETECT) { + e->eventbit = SS_DETECT; + reg |= e->regbit = (M8XX_PCMCIA_CD2(lsock) + | M8XX_PCMCIA_CD1(lsock)); + e++; + } + if(state->flags & SS_IOCARD) { + /* + * I/O card + */ + if(state->csc_mask & SS_STSCHG) { + e->eventbit = SS_STSCHG; + reg |= e->regbit = M8XX_PCMCIA_BVD1(lsock); + e++; + } + /* + * If io_irq is non-zero we should enable irq. + */ + if(state->io_irq) { + out_be32(M8XX_PGCRX(lsock), in_be32(M8XX_PGCRX(lsock)) | mk_int_int_mask(state->io_irq) << 24); + /* + * Strange thing here: + * The manual does not tell us which interrupt + * the sources generate. + * Anyhow, I found out that RDY_L generates IREQLVL. + * + * We use level triggerd interrupts, and they don't + * have to be cleared in PSCR in the interrupt handler. + */ + reg |= M8XX_PCMCIA_RDY_L(lsock); + } + else + out_be32(M8XX_PGCRX(lsock), in_be32(M8XX_PGCRX(lsock)) & 0x00ffffff); + } + else { + /* + * Memory card + */ + if(state->csc_mask & SS_BATDEAD) { + e->eventbit = SS_BATDEAD; + reg |= e->regbit = M8XX_PCMCIA_BVD1(lsock); + e++; + } + if(state->csc_mask & SS_BATWARN) { + e->eventbit = SS_BATWARN; + reg |= e->regbit = M8XX_PCMCIA_BVD2(lsock); + e++; + } + /* What should I trigger on - low/high,raise,fall? */ + if(state->csc_mask & SS_READY) { + e->eventbit = SS_READY; + reg |= e->regbit = 0; //?? + e++; + } + } + + e->regbit = 0; /* terminate list */ + + /* + * Clear the status changed . + * Port A and Port B share the same port. + * Writing ones will clear the bits. + */ + + out_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pscr, reg); + + /* + * Write the mask. + * Port A and Port B share the same port. + * Need for read-modify-write. + * Ones will enable the interrupt. + */ + + /* + reg |= ((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_per + & M8XX_PCMCIA_MASK(lsock); + */ + + reg |= in_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_per) & + (M8XX_PCMCIA_MASK(0) | M8XX_PCMCIA_MASK(1)); + + out_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_per, reg); + + spin_unlock_irqrestore(&events_lock, flags); + + /* copy the struct and modify the copy */ + + s->state = *state; + + return 0; +} + +static int m8xx_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) +{ + int lsock = container_of(sock, struct socket_info, socket)->slot; + + struct socket_info *s = &socket[lsock]; + struct pcmcia_win *w; + unsigned int reg, winnr; + +#define M8XX_SIZE (io->stop - io->start + 1) +#define M8XX_BASE (PCMCIA_IO_WIN_BASE + io->start) + + dprintk( "SetIOMap(%d, %d, %#2.2x, %d ns, " + "%#4.4x-%#4.4x)\n", lsock, io->map, io->flags, + io->speed, io->start, io->stop); + + if ((io->map >= PCMCIA_IO_WIN_NO) || (io->start > 0xffff) + || (io->stop > 0xffff) || (io->stop < io->start)) + return -EINVAL; + + if((reg = m8xx_get_graycode(M8XX_SIZE)) == -1) + return -EINVAL; + + if(io->flags & MAP_ACTIVE) { + + dprintk( "io->flags & MAP_ACTIVE\n"); + + winnr = (PCMCIA_MEM_WIN_NO * PCMCIA_SOCKETS_NO) + + (lsock * PCMCIA_IO_WIN_NO) + io->map; + + /* setup registers */ + + w = (void *) &((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pbr0; + w += winnr; + + out_be32(&w->or, 0); /* turn off window first */ + out_be32(&w->br, M8XX_BASE); + + reg <<= 27; + reg |= M8XX_PCMCIA_POR_IO |(lsock << 2); + + reg |= m8xx_get_speed(io->speed, 1); + + if(io->flags & MAP_WRPROT) + reg |= M8XX_PCMCIA_POR_WRPROT; + + /*if(io->flags & (MAP_16BIT | MAP_AUTOSZ))*/ + if(io->flags & MAP_16BIT) + reg |= M8XX_PCMCIA_POR_16BIT; + + if(io->flags & MAP_ACTIVE) + reg |= M8XX_PCMCIA_POR_VALID; + + out_be32(&w->or, reg); + + dprintk("Socket %u: Mapped io window %u at %#8.8x, " + "OR = %#8.8x.\n", lsock, io->map, w->br, w->or); + } else { + /* shutdown IO window */ + winnr = (PCMCIA_MEM_WIN_NO * PCMCIA_SOCKETS_NO) + + (lsock * PCMCIA_IO_WIN_NO) + io->map; + + /* setup registers */ + + w = (void *) &((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pbr0; + w += winnr; + + out_be32(&w->or, 0); /* turn off window */ + out_be32(&w->br, 0); /* turn off base address */ + + dprintk("Socket %u: Unmapped io window %u at %#8.8x, " + "OR = %#8.8x.\n", lsock, io->map, w->br, w->or); + } + + /* copy the struct and modify the copy */ + s->io_win[io->map] = *io; + s->io_win[io->map].flags &= (MAP_WRPROT + | MAP_16BIT + | MAP_ACTIVE); + dprintk("SetIOMap exit\n"); + + return 0; +} + +static int m8xx_set_mem_map(struct pcmcia_socket *sock, struct pccard_mem_map *mem) +{ + int lsock = container_of(sock, struct socket_info, socket)->slot; + struct socket_info *s = &socket[lsock]; + struct pcmcia_win *w; + struct pccard_mem_map *old; + unsigned int reg, winnr; + + dprintk( "SetMemMap(%d, %d, %#2.2x, %d ns, " + "%#5.5lx, %#5.5x)\n", lsock, mem->map, mem->flags, + mem->speed, mem->static_start, mem->card_start); + + if ((mem->map >= PCMCIA_MEM_WIN_NO) +// || ((mem->s) >= PCMCIA_MEM_WIN_SIZE) + || (mem->card_start >= 0x04000000) + || (mem->static_start & 0xfff) /* 4KByte resolution */ + || (mem->card_start & 0xfff)) + return -EINVAL; + + if((reg = m8xx_get_graycode(PCMCIA_MEM_WIN_SIZE)) == -1) { + printk( "Cannot set size to 0x%08x.\n", PCMCIA_MEM_WIN_SIZE); + return -EINVAL; + } + reg <<= 27; + + winnr = (lsock * PCMCIA_MEM_WIN_NO) + mem->map; + + /* Setup the window in the pcmcia controller */ + + w = (void *) &((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pbr0; + w += winnr; + + reg |= lsock << 2; + + reg |= m8xx_get_speed(mem->speed, 0); + + if(mem->flags & MAP_ATTRIB) + reg |= M8XX_PCMCIA_POR_ATTRMEM; + + if(mem->flags & MAP_WRPROT) + reg |= M8XX_PCMCIA_POR_WRPROT; + + if(mem->flags & MAP_16BIT) + reg |= M8XX_PCMCIA_POR_16BIT; + + if(mem->flags & MAP_ACTIVE) + reg |= M8XX_PCMCIA_POR_VALID; + + out_be32(&w->or, reg); + + dprintk("Socket %u: Mapped memory window %u at %#8.8x, " + "OR = %#8.8x.\n", lsock, mem->map, w->br, w->or); + + if(mem->flags & MAP_ACTIVE) { + /* get the new base address */ + mem->static_start = PCMCIA_MEM_WIN_BASE + + (PCMCIA_MEM_WIN_SIZE * winnr) + + mem->card_start; + } + + dprintk("SetMemMap(%d, %d, %#2.2x, %d ns, " + "%#5.5lx, %#5.5x)\n", lsock, mem->map, mem->flags, + mem->speed, mem->static_start, mem->card_start); + + /* copy the struct and modify the copy */ + + old = &s->mem_win[mem->map]; + + *old = *mem; + old->flags &= (MAP_ATTRIB + | MAP_WRPROT + | MAP_16BIT + | MAP_ACTIVE); + + return 0; +} + +static int m8xx_sock_init(struct pcmcia_socket *sock) +{ + int i; + pccard_io_map io = { 0, 0, 0, 0, 1 }; + pccard_mem_map mem = { 0, 0, 0, 0, 0, 0 }; + + dprintk( "sock_init(%d)\n", s); + + m8xx_set_socket(sock, &dead_socket); + for (i = 0; i < PCMCIA_IO_WIN_NO; i++) { + io.map = i; + m8xx_set_io_map(sock, &io); + } + for (i = 0; i < PCMCIA_MEM_WIN_NO; i++) { + mem.map = i; + m8xx_set_mem_map(sock, &mem); + } + + return 0; + +} + +static int m8xx_suspend(struct pcmcia_socket *sock) +{ + return m8xx_set_socket(sock, &dead_socket); +} + +static struct pccard_operations m8xx_services = { + .init = m8xx_sock_init, + .suspend = m8xx_suspend, + .get_status = m8xx_get_status, + .get_socket = m8xx_get_socket, + .set_socket = m8xx_set_socket, + .set_io_map = m8xx_set_io_map, + .set_mem_map = m8xx_set_mem_map, +}; + +static int __init m8xx_init(void) +{ + struct pcmcia_win *w; + unsigned int i,m; + + pcmcia_info("%s\n", version); + + if (driver_register(&m8xx_driver)) + return -1; + + pcmcia_info(PCMCIA_BOARD_MSG " using " PCMCIA_SLOT_MSG + " with IRQ %u.\n", pcmcia_schlvl); + + /* Configure Status change interrupt */ + + if(request_irq(pcmcia_schlvl, m8xx_interrupt, 0, + "m8xx_pcmcia", NULL)) { + pcmcia_error("Cannot allocate IRQ %u for SCHLVL!\n", + pcmcia_schlvl); + return -1; + } + + w = (void *) &((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pbr0; + + out_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pscr, + M8XX_PCMCIA_MASK(0)| M8XX_PCMCIA_MASK(1)); + + out_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_per, + in_be32(&((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_per) & + ~(M8XX_PCMCIA_MASK(0)| M8XX_PCMCIA_MASK(1))); + +/* connect interrupt and disable CxOE */ + + out_be32(M8XX_PGCRX(0), M8XX_PGCRX_CXOE | (mk_int_int_mask(pcmcia_schlvl) << 16)); + out_be32(M8XX_PGCRX(1), M8XX_PGCRX_CXOE | (mk_int_int_mask(pcmcia_schlvl) << 16)); + +/* intialize the fixed memory windows */ + + for(i = 0; i < PCMCIA_SOCKETS_NO; i++){ + for(m = 0; m < PCMCIA_MEM_WIN_NO; m++) { + out_be32(&w->br, PCMCIA_MEM_WIN_BASE + + (PCMCIA_MEM_WIN_SIZE + * (m + i * PCMCIA_MEM_WIN_NO))); + + out_be32(&w->or, 0); /* set to not valid */ + + w++; + } + } + +/* turn off voltage */ + voltage_set(0, 0, 0); + voltage_set(1, 0, 0); + +/* Enable external hardware */ + hardware_enable(0); + hardware_enable(1); + + platform_device_register(&m8xx_device); + + for (i = 0 ; i < PCMCIA_SOCKETS_NO; i++) { + socket[i].slot = i; + socket[i].socket.owner = THIS_MODULE; + socket[i].socket.features = SS_CAP_PCCARD | SS_CAP_MEM_ALIGN | SS_CAP_STATIC_MAP; + socket[i].socket.irq_mask = 0x000; + socket[i].socket.map_size = 0x1000; + socket[i].socket.io_offset = 0; + socket[i].socket.pci_irq = i ? 7 : 9; + socket[i].socket.ops = &m8xx_services; + socket[i].socket.resource_ops = &pccard_nonstatic_ops; + socket[i].socket.cb_dev = NULL; + socket[i].socket.dev.dev = &m8xx_device.dev; + } + + for (i = 0; i < PCMCIA_SOCKETS_NO; i++) + pcmcia_register_socket(&socket[i].socket); + + return 0; +} + +static void __exit m8xx_exit(void) +{ + int i; + + for (i = 0; i < PCMCIA_SOCKETS_NO; i++) + pcmcia_unregister_socket(&socket[i].socket); + + m8xx_shutdown(); + + platform_device_unregister(&m8xx_device); + driver_unregister(&m8xx_driver); +} + +module_init(m8xx_init); +module_exit(m8xx_exit); diff -puN drivers/pcmcia/Makefile~mpc8xx-pcmcia-driver drivers/pcmcia/Makefile --- devel/drivers/pcmcia/Makefile~mpc8xx-pcmcia-driver 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/drivers/pcmcia/Makefile 2005-10-28 17:44:02.000000000 -0700 @@ -25,6 +25,7 @@ obj-$(CONFIG_PD6729) += pd6729.o obj-$(CONFIG_I82365) += i82365.o obj-$(CONFIG_I82092) += i82092.o obj-$(CONFIG_TCIC) += tcic.o +obj-$(CONFIG_PCMCIA_M8XX) += m8xx_pcmcia.o obj-$(CONFIG_HD64465_PCMCIA) += hd64465_ss.o obj-$(CONFIG_PCMCIA_SA1100) += sa11xx_core.o sa1100_cs.o obj-$(CONFIG_PCMCIA_SA1111) += sa11xx_core.o sa1111_cs.o _ From akpm at osdl.org Sat Oct 29 10:46:09 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:09 -0700 Subject: [patch 01/43] ppc: prevent GCC 4 from generating AltiVec instructions in kernel Message-ID: <200510290046.j9T0kegV029985@shell0.pdx.osdl.net> From: Lee Nicks Depending on how GCC is built, GCC 4 may generate altivec instructions without user explicitly requesting vector operations in the code. Although this is a performance booster for user applications, it is a problem for kernel. This patch explicitly instruct GCC to NOT generate altivec instructions while building the kernel. Here are some test cases I ran. (1) build gcc 4.0.1 with '--with-cpu=7450 --enable-altivec --enable-cxx-flags=-mcpu=7450', and use this gcc to build kernel WITHOUT this kernel patch. Kernel fail to boot up on a 7450 board because of altivec instructions in kernel. (2) build gcc 4.0.1 with "--with-cpu=7450 --enable-altivec --enable-cxx-flags=-mcpu=7450", and use this gcc to build kernel WITH this kernel patch. Kernel boot up on a 7450 board without any problem. (3) build gcc 4.0.1 with "--with-cpu=750 --enable-cxx-flags=-mcpu=750", and use this gcc to build kernel with or without this kernel patch. Kernel boot up on a 7450 board without any problem. This patch should also work with GCC 3 or even earlier GCC 2.95.3. Signed-off-by: Lee Nicks Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton --- arch/ppc/Makefile | 4 ++++ arch/ppc64/Makefile | 3 +++ 2 files changed, 7 insertions(+) diff -puN arch/ppc64/Makefile~ppc-prevent-gcc-4-from-generating-altivec-instructions-in-kernel arch/ppc64/Makefile --- devel/arch/ppc64/Makefile~ppc-prevent-gcc-4-from-generating-altivec-instructions-in-kernel 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc64/Makefile 2005-10-28 17:45:21.000000000 -0700 @@ -75,6 +75,9 @@ else CFLAGS += $(call cc-option,-mtune=power4) endif +# No AltiVec instruction when building kernel +CFLAGS += $(call cc-option, -mno-altivec) + # Enable unit-at-a-time mode when possible. It shrinks the # kernel considerably. CFLAGS += $(call cc-option,-funit-at-a-time) diff -puN arch/ppc/Makefile~ppc-prevent-gcc-4-from-generating-altivec-instructions-in-kernel arch/ppc/Makefile --- devel/arch/ppc/Makefile~ppc-prevent-gcc-4-from-generating-altivec-instructions-in-kernel 2005-10-28 17:44:02.000000000 -0700 +++ devel-akpm/arch/ppc/Makefile 2005-10-28 17:44:02.000000000 -0700 @@ -26,6 +26,10 @@ CPPFLAGS += -Iarch/$(ARCH) -Iarch/$(ARCH AFLAGS += -Iarch/$(ARCH) CFLAGS += -Iarch/$(ARCH) -msoft-float -pipe \ -ffixed-r2 -mmultiple + +# No AltiVec instruction when building kernel +CFLAGS += $(call cc-option, -mno-altivec) + CPP = $(CC) -E $(CFLAGS) # Temporary hack until we have migrated to asm-powerpc LINUXINCLUDE += -Iarch/$(ARCH)/include _ From akpm at osdl.org Sat Oct 29 10:46:27 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:27 -0700 Subject: [patch 15/43] ppc32: 85xx PHY Platform Update Message-ID: <200510290046.j9T0kvrA030032@shell0.pdx.osdl.net> From: Andy Fleming This patch updates the 85xx platform code to support the new PHY Layer. Signed-off-by: Andy Fleming Signed-off-by: Kumar Gala Signed-off-by: Andrew Morton --- arch/ppc/platforms/85xx/mpc8540_ads.c | 30 +++++++++++------- arch/ppc/platforms/85xx/mpc8560_ads.c | 25 ++++++++++----- arch/ppc/platforms/85xx/mpc85xx_cds_common.c | 34 +++++++++++--------- arch/ppc/platforms/85xx/sbc8560.c | 22 ++++++++----- arch/ppc/platforms/85xx/stx_gp3.c | 21 ++++++++---- arch/ppc/syslib/mpc85xx_devices.c | 17 ++++++---- arch/ppc/syslib/mpc85xx_sys.c | 44 +++++++++++++++++---------- include/asm-ppc/mpc85xx.h | 3 + include/linux/fsl_devices.h | 13 +++++-- 9 files changed, 131 insertions(+), 78 deletions(-) diff -puN arch/ppc/platforms/85xx/mpc8540_ads.c~ppc32-85xx-phy-platform-update arch/ppc/platforms/85xx/mpc8540_ads.c --- devel/arch/ppc/platforms/85xx/mpc8540_ads.c~ppc32-85xx-phy-platform-update 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/85xx/mpc8540_ads.c 2005-10-28 17:44:03.000000000 -0700 @@ -52,6 +52,10 @@ #include +static const char *GFAR_PHY_0 = "phy0:0"; +static const char *GFAR_PHY_1 = "phy0:1"; +static const char *GFAR_PHY_3 = "phy0:3"; + /* ************************************************************************ * * Setup the architecture @@ -63,6 +67,7 @@ mpc8540ads_setup_arch(void) bd_t *binfo = (bd_t *) __res; unsigned int freq; struct gianfar_platform_data *pdata; + struct gianfar_mdio_data *mdata; /* get the core frequency */ freq = binfo->bi_intfreq; @@ -89,34 +94,35 @@ mpc8540ads_setup_arch(void) invalidate_tlbcam_entry(num_tlbcam_entries - 1); #endif + /* setup the board related info for the MDIO bus */ + mdata = (struct gianfar_mdio_data *) ppc_sys_get_pdata(MPC85xx_MDIO); + + mdata->irq[0] = MPC85xx_IRQ_EXT5; + mdata->irq[1] = MPC85xx_IRQ_EXT5; + mdata->irq[2] = -1; + mdata->irq[3] = MPC85xx_IRQ_EXT5; + mdata->irq[31] = -1; + mdata->paddr += binfo->bi_immr_base; + /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); if (pdata) { pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 0; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_0; memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); if (pdata) { pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 1; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_1; memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_FEC); if (pdata) { pdata->board_flags = 0; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 3; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_3; memcpy(pdata->mac_addr, binfo->bi_enet2addr, 6); } diff -puN arch/ppc/platforms/85xx/mpc8560_ads.c~ppc32-85xx-phy-platform-update arch/ppc/platforms/85xx/mpc8560_ads.c --- devel/arch/ppc/platforms/85xx/mpc8560_ads.c~ppc32-85xx-phy-platform-update 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/85xx/mpc8560_ads.c 2005-10-28 17:44:03.000000000 -0700 @@ -56,6 +56,10 @@ #include +static const char *GFAR_PHY_0 = "phy0:0"; +static const char *GFAR_PHY_1 = "phy0:1"; +static const char *GFAR_PHY_3 = "phy0:3"; + /* ************************************************************************ * * Setup the architecture @@ -68,6 +72,7 @@ mpc8560ads_setup_arch(void) bd_t *binfo = (bd_t *) __res; unsigned int freq; struct gianfar_platform_data *pdata; + struct gianfar_mdio_data *mdata; cpm2_reset(); @@ -86,24 +91,28 @@ mpc8560ads_setup_arch(void) mpc85xx_setup_hose(); #endif + /* setup the board related info for the MDIO bus */ + mdata = (struct gianfar_mdio_data *) ppc_sys_get_pdata(MPC85xx_MDIO); + + mdata->irq[0] = MPC85xx_IRQ_EXT5; + mdata->irq[1] = MPC85xx_IRQ_EXT5; + mdata->irq[2] = -1; + mdata->irq[3] = MPC85xx_IRQ_EXT5; + mdata->irq[31] = -1; + mdata->paddr += binfo->bi_immr_base; + /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); if (pdata) { pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 0; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_0; memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); if (pdata) { pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 1; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_1; memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); } diff -puN arch/ppc/platforms/85xx/mpc85xx_cds_common.c~ppc32-85xx-phy-platform-update arch/ppc/platforms/85xx/mpc85xx_cds_common.c --- devel/arch/ppc/platforms/85xx/mpc85xx_cds_common.c~ppc32-85xx-phy-platform-update 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/85xx/mpc85xx_cds_common.c 2005-10-28 17:44:03.000000000 -0700 @@ -394,6 +394,9 @@ mpc85xx_cds_pcibios_fixup(void) TODC_ALLOC(); +static const char *GFAR_PHY_0 = "phy0:0"; +static const char *GFAR_PHY_1 = "phy0:1"; + /* ************************************************************************ * * Setup the architecture @@ -405,6 +408,7 @@ mpc85xx_cds_setup_arch(void) bd_t *binfo = (bd_t *) __res; unsigned int freq; struct gianfar_platform_data *pdata; + struct gianfar_mdio_data *mdata; /* get the core frequency */ freq = binfo->bi_intfreq; @@ -448,44 +452,42 @@ mpc85xx_cds_setup_arch(void) invalidate_tlbcam_entry(num_tlbcam_entries - 1); #endif + /* setup the board related info for the MDIO bus */ + mdata = (struct gianfar_mdio_data *) ppc_sys_get_pdata(MPC85xx_MDIO); + + mdata->irq[0] = MPC85xx_IRQ_EXT5; + mdata->irq[1] = MPC85xx_IRQ_EXT5; + mdata->irq[2] = -1; + mdata->irq[3] = -1; + mdata->irq[31] = -1; + mdata->paddr += binfo->bi_immr_base; + /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); if (pdata) { pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 0; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_0; memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); if (pdata) { pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 1; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_1; memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_eTSEC1); if (pdata) { pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 0; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_0; memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_eTSEC2); if (pdata) { pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 1; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_1; memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); } diff -puN arch/ppc/platforms/85xx/sbc8560.c~ppc32-85xx-phy-platform-update arch/ppc/platforms/85xx/sbc8560.c --- devel/arch/ppc/platforms/85xx/sbc8560.c~ppc32-85xx-phy-platform-update 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/85xx/sbc8560.c 2005-10-28 17:44:03.000000000 -0700 @@ -91,6 +91,9 @@ sbc8560_early_serial_map(void) } #endif +static const char *GFAR_PHY_25 = "phy0:25"; +static const char *GFAR_PHY_26 = "phy0:26"; + /* ************************************************************************ * * Setup the architecture @@ -102,6 +105,7 @@ sbc8560_setup_arch(void) bd_t *binfo = (bd_t *) __res; unsigned int freq; struct gianfar_platform_data *pdata; + struct gianfar_mdio_data *mdata; /* get the core frequency */ freq = binfo->bi_intfreq; @@ -126,24 +130,26 @@ sbc8560_setup_arch(void) invalidate_tlbcam_entry(num_tlbcam_entries - 1); #endif + /* setup the board related info for the MDIO bus */ + mdata = (struct gianfar_mdio_data *) ppc_sys_get_pdata(MPC85xx_MDIO); + + mdata->irq[25] = MPC85xx_IRQ_EXT6; + mdata->irq[26] = MPC85xx_IRQ_EXT7; + mdata->irq[31] = -1; + mdata->paddr += binfo->bi_immr_base; + /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); if (pdata) { pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT6; - pdata->phyid = 25; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_25; memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); if (pdata) { pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT7; - pdata->phyid = 26; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_26; memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); } diff -puN arch/ppc/platforms/85xx/stx_gp3.c~ppc32-85xx-phy-platform-update arch/ppc/platforms/85xx/stx_gp3.c --- devel/arch/ppc/platforms/85xx/stx_gp3.c~ppc32-85xx-phy-platform-update 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/85xx/stx_gp3.c 2005-10-28 17:44:03.000000000 -0700 @@ -91,6 +91,9 @@ static u8 gp3_openpic_initsenses[] __ini 0x0, /* External 11: */ }; +static const char *GFAR_PHY_2 = "phy0:2"; +static const char *GFAR_PHY_4 = "phy0:4"; + /* * Setup the architecture */ @@ -100,6 +103,7 @@ gp3_setup_arch(void) bd_t *binfo = (bd_t *) __res; unsigned int freq; struct gianfar_platform_data *pdata; + struct gianfar_mdio_data *mdata; cpm2_reset(); @@ -118,23 +122,26 @@ gp3_setup_arch(void) mpc85xx_setup_hose(); #endif + /* setup the board related info for the MDIO bus */ + mdata = (struct gianfar_mdio_data *) ppc_sys_get_pdata(MPC85xx_MDIO); + + mdata->irq[2] = MPC85xx_IRQ_EXT5; + mdata->irq[4] = MPC85xx_IRQ_EXT5; + mdata->irq[31] = -1; + mdata->paddr += binfo->bi_immr_base; + /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); if (pdata) { /* pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; */ - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 2; - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_2; memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); if (pdata) { /* pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; */ - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 4; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; + pdata->bus_id = GFAR_PHY_4; memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); } diff -puN arch/ppc/syslib/mpc85xx_devices.c~ppc32-85xx-phy-platform-update arch/ppc/syslib/mpc85xx_devices.c --- devel/arch/ppc/syslib/mpc85xx_devices.c~ppc32-85xx-phy-platform-update 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/mpc85xx_devices.c 2005-10-28 17:44:03.000000000 -0700 @@ -25,19 +25,20 @@ /* We use offsets for IORESOURCE_MEM since we do not know at compile time * what CCSRBAR is, will get fixed up by mach_mpc85xx_fixup */ +struct gianfar_mdio_data mpc85xx_mdio_pdata = { + .paddr = MPC85xx_MIIM_OFFSET, +}; static struct gianfar_platform_data mpc85xx_tsec1_pdata = { .device_flags = FSL_GIANFAR_DEV_HAS_GIGABIT | FSL_GIANFAR_DEV_HAS_COALESCE | FSL_GIANFAR_DEV_HAS_RMON | FSL_GIANFAR_DEV_HAS_MULTI_INTR, - .phy_reg_addr = MPC85xx_ENET1_OFFSET, }; static struct gianfar_platform_data mpc85xx_tsec2_pdata = { .device_flags = FSL_GIANFAR_DEV_HAS_GIGABIT | FSL_GIANFAR_DEV_HAS_COALESCE | FSL_GIANFAR_DEV_HAS_RMON | FSL_GIANFAR_DEV_HAS_MULTI_INTR, - .phy_reg_addr = MPC85xx_ENET1_OFFSET, }; static struct gianfar_platform_data mpc85xx_etsec1_pdata = { @@ -46,7 +47,6 @@ static struct gianfar_platform_data mpc8 FSL_GIANFAR_DEV_HAS_MULTI_INTR | FSL_GIANFAR_DEV_HAS_CSUM | FSL_GIANFAR_DEV_HAS_VLAN | FSL_GIANFAR_DEV_HAS_EXTENDED_HASH, - .phy_reg_addr = MPC85xx_ENET1_OFFSET, }; static struct gianfar_platform_data mpc85xx_etsec2_pdata = { @@ -55,7 +55,6 @@ static struct gianfar_platform_data mpc8 FSL_GIANFAR_DEV_HAS_MULTI_INTR | FSL_GIANFAR_DEV_HAS_CSUM | FSL_GIANFAR_DEV_HAS_VLAN | FSL_GIANFAR_DEV_HAS_EXTENDED_HASH, - .phy_reg_addr = MPC85xx_ENET1_OFFSET, }; static struct gianfar_platform_data mpc85xx_etsec3_pdata = { @@ -64,7 +63,6 @@ static struct gianfar_platform_data mpc8 FSL_GIANFAR_DEV_HAS_MULTI_INTR | FSL_GIANFAR_DEV_HAS_CSUM | FSL_GIANFAR_DEV_HAS_VLAN | FSL_GIANFAR_DEV_HAS_EXTENDED_HASH, - .phy_reg_addr = MPC85xx_ENET1_OFFSET, }; static struct gianfar_platform_data mpc85xx_etsec4_pdata = { @@ -73,11 +71,10 @@ static struct gianfar_platform_data mpc8 FSL_GIANFAR_DEV_HAS_MULTI_INTR | FSL_GIANFAR_DEV_HAS_CSUM | FSL_GIANFAR_DEV_HAS_VLAN | FSL_GIANFAR_DEV_HAS_EXTENDED_HASH, - .phy_reg_addr = MPC85xx_ENET1_OFFSET, }; static struct gianfar_platform_data mpc85xx_fec_pdata = { - .phy_reg_addr = MPC85xx_ENET1_OFFSET, + .device_flags = 0, }; static struct fsl_i2c_platform_data mpc85xx_fsl_i2c_pdata = { @@ -719,6 +716,12 @@ struct platform_device ppc_sys_platform_ }, }, }, + [MPC85xx_MDIO] = { + .name = "fsl-gianfar_mdio", + .id = 0, + .dev.platform_data = &mpc85xx_mdio_pdata, + .num_resources = 0, + }, }; static int __init mach_mpc85xx_fixup(struct platform_device *pdev) diff -puN arch/ppc/syslib/mpc85xx_sys.c~ppc32-85xx-phy-platform-update arch/ppc/syslib/mpc85xx_sys.c --- devel/arch/ppc/syslib/mpc85xx_sys.c~ppc32-85xx-phy-platform-update 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/mpc85xx_sys.c 2005-10-28 17:44:03.000000000 -0700 @@ -24,19 +24,19 @@ struct ppc_sys_spec ppc_sys_specs[] = { .ppc_sys_name = "8540", .mask = 0xFFFF0000, .value = 0x80300000, - .num_devices = 10, + .num_devices = 11, .device_list = (enum ppc_sys_devices[]) { MPC85xx_TSEC1, MPC85xx_TSEC2, MPC85xx_FEC, MPC85xx_IIC1, MPC85xx_DMA0, MPC85xx_DMA1, MPC85xx_DMA2, MPC85xx_DMA3, - MPC85xx_PERFMON, MPC85xx_DUART, + MPC85xx_PERFMON, MPC85xx_DUART, MPC85xx_MDIO, }, }, { .ppc_sys_name = "8560", .mask = 0xFFFF0000, .value = 0x80700000, - .num_devices = 19, + .num_devices = 20, .device_list = (enum ppc_sys_devices[]) { MPC85xx_TSEC1, MPC85xx_TSEC2, MPC85xx_IIC1, @@ -45,14 +45,14 @@ struct ppc_sys_spec ppc_sys_specs[] = { MPC85xx_CPM_SPI, MPC85xx_CPM_I2C, MPC85xx_CPM_SCC1, MPC85xx_CPM_SCC2, MPC85xx_CPM_SCC3, MPC85xx_CPM_SCC4, MPC85xx_CPM_FCC1, MPC85xx_CPM_FCC2, MPC85xx_CPM_FCC3, - MPC85xx_CPM_MCC1, MPC85xx_CPM_MCC2, + MPC85xx_CPM_MCC1, MPC85xx_CPM_MCC2, MPC85xx_MDIO, }, }, { .ppc_sys_name = "8541", .mask = 0xFFFF0000, .value = 0x80720000, - .num_devices = 13, + .num_devices = 14, .device_list = (enum ppc_sys_devices[]) { MPC85xx_TSEC1, MPC85xx_TSEC2, MPC85xx_IIC1, @@ -60,13 +60,14 @@ struct ppc_sys_spec ppc_sys_specs[] = { MPC85xx_PERFMON, MPC85xx_DUART, MPC85xx_CPM_SPI, MPC85xx_CPM_I2C, MPC85xx_CPM_FCC1, MPC85xx_CPM_FCC2, + MPC85xx_MDIO, }, }, { .ppc_sys_name = "8541E", .mask = 0xFFFF0000, .value = 0x807A0000, - .num_devices = 14, + .num_devices = 15, .device_list = (enum ppc_sys_devices[]) { MPC85xx_TSEC1, MPC85xx_TSEC2, MPC85xx_IIC1, @@ -74,13 +75,14 @@ struct ppc_sys_spec ppc_sys_specs[] = { MPC85xx_PERFMON, MPC85xx_DUART, MPC85xx_SEC2, MPC85xx_CPM_SPI, MPC85xx_CPM_I2C, MPC85xx_CPM_FCC1, MPC85xx_CPM_FCC2, + MPC85xx_MDIO, }, }, { .ppc_sys_name = "8555", .mask = 0xFFFF0000, .value = 0x80710000, - .num_devices = 19, + .num_devices = 20, .device_list = (enum ppc_sys_devices[]) { MPC85xx_TSEC1, MPC85xx_TSEC2, MPC85xx_IIC1, @@ -91,13 +93,14 @@ struct ppc_sys_spec ppc_sys_specs[] = { MPC85xx_CPM_FCC1, MPC85xx_CPM_FCC2, MPC85xx_CPM_SMC1, MPC85xx_CPM_SMC2, MPC85xx_CPM_USB, + MPC85xx_MDIO, }, }, { .ppc_sys_name = "8555E", .mask = 0xFFFF0000, .value = 0x80790000, - .num_devices = 20, + .num_devices = 21, .device_list = (enum ppc_sys_devices[]) { MPC85xx_TSEC1, MPC85xx_TSEC2, MPC85xx_IIC1, @@ -108,6 +111,7 @@ struct ppc_sys_spec ppc_sys_specs[] = { MPC85xx_CPM_FCC1, MPC85xx_CPM_FCC2, MPC85xx_CPM_SMC1, MPC85xx_CPM_SMC2, MPC85xx_CPM_USB, + MPC85xx_MDIO, }, }, /* SVRs on 8548 rev1.0 matches for 8548/8547/8545 */ @@ -115,104 +119,112 @@ struct ppc_sys_spec ppc_sys_specs[] = { .ppc_sys_name = "8548E", .mask = 0xFFFF00F0, .value = 0x80390010, - .num_devices = 13, + .num_devices = 14, .device_list = (enum ppc_sys_devices[]) { MPC85xx_eTSEC1, MPC85xx_eTSEC2, MPC85xx_eTSEC3, MPC85xx_eTSEC4, MPC85xx_IIC1, MPC85xx_IIC2, MPC85xx_DMA0, MPC85xx_DMA1, MPC85xx_DMA2, MPC85xx_DMA3, MPC85xx_PERFMON, MPC85xx_DUART, MPC85xx_SEC2, + MPC85xx_MDIO, }, }, { .ppc_sys_name = "8548", .mask = 0xFFFF00F0, .value = 0x80310010, - .num_devices = 12, + .num_devices = 13, .device_list = (enum ppc_sys_devices[]) { MPC85xx_eTSEC1, MPC85xx_eTSEC2, MPC85xx_eTSEC3, MPC85xx_eTSEC4, MPC85xx_IIC1, MPC85xx_IIC2, MPC85xx_DMA0, MPC85xx_DMA1, MPC85xx_DMA2, MPC85xx_DMA3, MPC85xx_PERFMON, MPC85xx_DUART, + MPC85xx_MDIO, }, }, { .ppc_sys_name = "8547E", .mask = 0xFFFF00F0, .value = 0x80390010, - .num_devices = 13, + .num_devices = 14, .device_list = (enum ppc_sys_devices[]) { MPC85xx_eTSEC1, MPC85xx_eTSEC2, MPC85xx_eTSEC3, MPC85xx_eTSEC4, MPC85xx_IIC1, MPC85xx_IIC2, MPC85xx_DMA0, MPC85xx_DMA1, MPC85xx_DMA2, MPC85xx_DMA3, MPC85xx_PERFMON, MPC85xx_DUART, MPC85xx_SEC2, + MPC85xx_MDIO, }, }, { .ppc_sys_name = "8547", .mask = 0xFFFF00F0, .value = 0x80310010, - .num_devices = 12, + .num_devices = 13, .device_list = (enum ppc_sys_devices[]) { MPC85xx_eTSEC1, MPC85xx_eTSEC2, MPC85xx_eTSEC3, MPC85xx_eTSEC4, MPC85xx_IIC1, MPC85xx_IIC2, MPC85xx_DMA0, MPC85xx_DMA1, MPC85xx_DMA2, MPC85xx_DMA3, MPC85xx_PERFMON, MPC85xx_DUART, + MPC85xx_MDIO, }, }, { .ppc_sys_name = "8545E", .mask = 0xFFFF00F0, .value = 0x80390010, - .num_devices = 11, + .num_devices = 12, .device_list = (enum ppc_sys_devices[]) { MPC85xx_eTSEC1, MPC85xx_eTSEC2, MPC85xx_IIC1, MPC85xx_IIC2, MPC85xx_DMA0, MPC85xx_DMA1, MPC85xx_DMA2, MPC85xx_DMA3, MPC85xx_PERFMON, MPC85xx_DUART, MPC85xx_SEC2, + MPC85xx_MDIO, }, }, { .ppc_sys_name = "8545", .mask = 0xFFFF00F0, .value = 0x80310010, - .num_devices = 10, + .num_devices = 11, .device_list = (enum ppc_sys_devices[]) { MPC85xx_eTSEC1, MPC85xx_eTSEC2, MPC85xx_IIC1, MPC85xx_IIC2, MPC85xx_DMA0, MPC85xx_DMA1, MPC85xx_DMA2, MPC85xx_DMA3, MPC85xx_PERFMON, MPC85xx_DUART, + MPC85xx_MDIO, }, }, { .ppc_sys_name = "8543E", .mask = 0xFFFF00F0, .value = 0x803A0010, - .num_devices = 11, + .num_devices = 12, .device_list = (enum ppc_sys_devices[]) { MPC85xx_eTSEC1, MPC85xx_eTSEC2, MPC85xx_IIC1, MPC85xx_IIC2, MPC85xx_DMA0, MPC85xx_DMA1, MPC85xx_DMA2, MPC85xx_DMA3, MPC85xx_PERFMON, MPC85xx_DUART, MPC85xx_SEC2, + MPC85xx_MDIO, }, }, { .ppc_sys_name = "8543", .mask = 0xFFFF00F0, .value = 0x80320010, - .num_devices = 10, + .num_devices = 11, .device_list = (enum ppc_sys_devices[]) { MPC85xx_eTSEC1, MPC85xx_eTSEC2, MPC85xx_IIC1, MPC85xx_IIC2, MPC85xx_DMA0, MPC85xx_DMA1, MPC85xx_DMA2, MPC85xx_DMA3, MPC85xx_PERFMON, MPC85xx_DUART, + MPC85xx_MDIO, }, }, { /* default match */ diff -puN include/asm-ppc/mpc85xx.h~ppc32-85xx-phy-platform-update include/asm-ppc/mpc85xx.h --- devel/include/asm-ppc/mpc85xx.h~ppc32-85xx-phy-platform-update 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/asm-ppc/mpc85xx.h 2005-10-28 17:44:03.000000000 -0700 @@ -67,6 +67,8 @@ extern unsigned char __res[]; #define MPC85xx_DMA3_SIZE (0x00080) #define MPC85xx_ENET1_OFFSET (0x24000) #define MPC85xx_ENET1_SIZE (0x01000) +#define MPC85xx_MIIM_OFFSET (0x24520) +#define MPC85xx_MIIM_SIZE (0x00018) #define MPC85xx_ENET2_OFFSET (0x25000) #define MPC85xx_ENET2_SIZE (0x01000) #define MPC85xx_ENET3_OFFSET (0x26000) @@ -132,6 +134,7 @@ enum ppc_sys_devices { MPC85xx_eTSEC3, MPC85xx_eTSEC4, MPC85xx_IIC2, + MPC85xx_MDIO, }; /* Internal interrupts are all Level Sensitive, and Positive Polarity */ diff -puN include/linux/fsl_devices.h~ppc32-85xx-phy-platform-update include/linux/fsl_devices.h --- devel/include/linux/fsl_devices.h~ppc32-85xx-phy-platform-update 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/linux/fsl_devices.h 2005-10-28 17:44:03.000000000 -0700 @@ -47,16 +47,21 @@ struct gianfar_platform_data { /* device specific information */ u32 device_flags; - u32 phy_reg_addr; /* board specific information */ u32 board_flags; - u32 phy_flags; - u32 phyid; - u32 interruptPHY; + const char *bus_id; u8 mac_addr[6]; }; +struct gianfar_mdio_data { + /* device specific information */ + u32 paddr; + + /* board specific information */ + int irq[32]; +}; + /* Flags related to gianfar device features */ #define FSL_GIANFAR_DEV_HAS_GIGABIT 0x00000001 #define FSL_GIANFAR_DEV_HAS_COALESCE 0x00000002 _ From akpm at osdl.org Sat Oct 29 10:46:31 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:31 -0700 Subject: [patch 18/43] mv643xx_eth_pcidev: implement hotplug for the marvell gige functionality by probing the northbridge pci id Message-ID: <200510290047.j9T0l1qU030041@shell0.pdx.osdl.net> From: Sven Luther Add Marvell gigabit ethernet hotplug through the northbridge pci id support for the Pegasos machines. This patch is not optimal, as the proper way to handling this is to bring full hotplug support to the platform drivers, but this would be much more invasive and far reaching a project. This approach matches the marvell discovery northbridge pci id to load the driver for the builtin gigabit ethernet function, and is similar of what is done already using the discover module loading technique. With more and more distributions moving to using hotplug only to probe for modules to load, this patch becomes necessary. Signed-off-by: Nicolas DET Signed-off-by: Sven Luther Cc: Jeff Garzik Signed-off-by: Andrew Morton --- drivers/net/mv643xx_eth.c | 14 ++++++++++++++ 1 files changed, 14 insertions(+) diff -puN drivers/net/mv643xx_eth.c~mv643xx_eth_pcidev-implements-hotplug-for-the-marvell-gige-functionality-by-probing-the-northbridge-pci-id drivers/net/mv643xx_eth.c --- devel/drivers/net/mv643xx_eth.c~mv643xx_eth_pcidev-implements-hotplug-for-the-marvell-gige-functionality-by-probing-the-northbridge-pci-id 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/mv643xx_eth.c 2005-10-28 17:44:03.000000000 -0700 @@ -36,6 +36,10 @@ #include #include +#ifdef CONFIG_PPC_MULTIPLATFORM +#include +#endif + #include #include #include @@ -1643,6 +1647,16 @@ MODULE_AUTHOR( "Rabeeh Khoury, Assaf Hof " and Dale Farnsworth"); MODULE_DESCRIPTION("Ethernet driver for Marvell MV643XX"); +#ifdef CONFIG_PPC_MULTIPLATFORM +static struct pci_device_id pci_marvell_mv64360[] = { + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, PCI_DEVICE_ID_MARVELL_MV64360) }, + { } +}; + +MODULE_DEVICE_TABLE(pci, pci_marvell_mv64360); +#endif + + /* * The second part is the low level driver of the gigE ethernet ports. */ _ From akpm at osdl.org Sat Oct 29 10:46:32 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:32 -0700 Subject: [patch 19/43] ppc64: Add cpufreq support for SMU based G5 Message-ID: <200510290047.j9T0l29Y030044@shell0.pdx.osdl.net> From: Benjamin Herrenschmidt iMac G5 and latest single CPU desktop G5 (SMU based machines) have a 970FX DD3 CPU that supports frequency & vooltage switching. This patch adds support for simple dual frequency switch. It is required for the upcoming thermal control patch for these machines. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton --- arch/ppc64/Kconfig | 11 + arch/ppc64/kernel/Makefile | 1 arch/ppc64/kernel/misc.S | 72 +++++++++ arch/ppc64/kernel/pmac_cpufreq.c | 297 +++++++++++++++++++++++++++++++++++++++ drivers/macintosh/smu.c | 12 + include/asm-ppc64/processor.h | 6 include/asm-ppc64/smu.h | 61 +++++++- 7 files changed, 458 insertions(+), 2 deletions(-) diff -puN arch/ppc64/Kconfig~ppc64-add-cpufreq-support-for-smu-based-g5 arch/ppc64/Kconfig --- devel/arch/ppc64/Kconfig~ppc64-add-cpufreq-support-for-smu-based-g5 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc64/Kconfig 2005-10-28 17:45:21.000000000 -0700 @@ -159,6 +159,17 @@ config KEXEC support. As of this writing the exact hardware interface is strongly in flux, so no good recommendation can be made. +source "drivers/cpufreq/Kconfig" + +config CPU_FREQ_PMAC + bool "Support for Apple G5" + depends on CPU_FREQ && PPC_PMAC64 + select CPU_FREQ_TABLE + help + This adds support for frequency switching on some Apple G5 + machine. This is currently very experimental and works only + on some iMac G5. + config IBMVIO depends on PPC_PSERIES || PPC_ISERIES bool diff -puN arch/ppc64/kernel/Makefile~ppc64-add-cpufreq-support-for-smu-based-g5 arch/ppc64/kernel/Makefile --- devel/arch/ppc64/kernel/Makefile~ppc64-add-cpufreq-support-for-smu-based-g5 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc64/kernel/Makefile 2005-10-28 17:44:03.000000000 -0700 @@ -60,6 +60,7 @@ obj-$(CONFIG_MPIC) += mpic.o obj-$(CONFIG_PPC_PMAC) += pmac_setup.o pmac_feature.o pmac_pci.o \ pmac_time.o pmac_nvram.o pmac_low_i2c.o \ udbg_scc.o +obj-$(CONFIG_CPU_FREQ_PMAC) += pmac_cpufreq.o obj-$(CONFIG_PPC_MAPLE) += maple_setup.o maple_pci.o maple_time.o \ udbg_16550.o diff -puN arch/ppc64/kernel/misc.S~ppc64-add-cpufreq-support-for-smu-based-g5 arch/ppc64/kernel/misc.S --- devel/arch/ppc64/kernel/misc.S~ppc64-add-cpufreq-support-for-smu-based-g5 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc64/kernel/misc.S 2005-10-28 17:44:03.000000000 -0700 @@ -616,7 +616,7 @@ _GLOBAL(real_readb) isync blr - /* +/* * Do an IO access in real mode */ _GLOBAL(real_writeb) @@ -649,6 +649,76 @@ _GLOBAL(real_writeb) #endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ /* + * SCOM access functions for 970 (FX only for now) + * + * unsigned long scom970_read(unsigned int address); + * void scom970_write(unsigned int address, unsigned long value); + * + * The address passed in is the 24 bits register address. This code + * is 970 specific and will not check the status bits, so you should + * know what you are doing. + */ +_GLOBAL(scom970_read) + /* interrupts off */ + mfmsr r4 + ori r0,r4,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd, + * and finally or in RW bit + */ + rlwinm r3,r3,8,0,15 + ori r3,r3,0x8000 + + /* do the actual scom read */ + sync + mtspr SPRN_SCOMC,r3 + isync + mfspr r3,SPRN_SCOMD + isync + mfspr r0,SPRN_SCOMC + isync + + /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah + * that's the best we can do). Not implemented yet as we don't use + * the scom on any of the bogus CPUs yet, but may have to be done + * ultimately + */ + + /* restore interrupts */ + mtmsrd r4,1 + blr + + +_GLOBAL(scom970_write) + /* interrupts off */ + mfmsr r5 + ori r0,r5,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd. + */ + + rlwinm r3,r3,8,0,15 + + sync + mtspr SPRN_SCOMD,r4 /* write data */ + isync + mtspr SPRN_SCOMC,r3 /* write command */ + isync + mfspr 3,SPRN_SCOMC + isync + + /* restore interrupts */ + mtmsrd r5,1 + blr + + +/* * Create a kernel thread * kernel_thread(fn, arg, flags) */ diff -puN /dev/null arch/ppc64/kernel/pmac_cpufreq.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/arch/ppc64/kernel/pmac_cpufreq.c 2005-10-28 17:45:23.000000000 -0700 @@ -0,0 +1,297 @@ +/* + * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt + * and Markus Demleitner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs, + * that is iMac G5 and latest single CPU desktop. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +/* see 970FX user manual */ + +#define SCOM_PCR 0x0aa001 /* PCR scom addr */ + +#define PCR_HILO_SELECT 0x80000000U /* 1 = PCR, 0 = PCRH */ +#define PCR_SPEED_FULL 0x00000000U /* 1:1 speed value */ +#define PCR_SPEED_HALF 0x00020000U /* 1:2 speed value */ +#define PCR_SPEED_QUARTER 0x00040000U /* 1:4 speed value */ +#define PCR_SPEED_MASK 0x000e0000U /* speed mask */ +#define PCR_SPEED_SHIFT 17 +#define PCR_FREQ_REQ_VALID 0x00010000U /* freq request valid */ +#define PCR_VOLT_REQ_VALID 0x00008000U /* volt request valid */ +#define PCR_TARGET_TIME_MASK 0x00006000U /* target time */ +#define PCR_STATLAT_MASK 0x00001f00U /* STATLAT value */ +#define PCR_SNOOPLAT_MASK 0x000000f0U /* SNOOPLAT value */ +#define PCR_SNOOPACC_MASK 0x0000000fU /* SNOOPACC value */ + +#define SCOM_PSR 0x408001 /* PSR scom addr */ +/* warning: PSR is a 64 bits register */ +#define PSR_CMD_RECEIVED 0x2000000000000000U /* command received */ +#define PSR_CMD_COMPLETED 0x1000000000000000U /* command completed */ +#define PSR_CUR_SPEED_MASK 0x0300000000000000U /* current speed */ +#define PSR_CUR_SPEED_SHIFT (56) + +/* + * The G5 only supports two frequencies (Quarter speed is not supported) + */ +#define CPUFREQ_HIGH 0 +#define CPUFREQ_LOW 1 + +static struct cpufreq_frequency_table g5_cpu_freqs[] = { + {CPUFREQ_HIGH, 0}, + {CPUFREQ_LOW, 0}, + {0, CPUFREQ_TABLE_END}, +}; + +static struct freq_attr* g5_cpu_freqs_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +/* Power mode data is an array of the 32 bits PCR values to use for + * the various frequencies, retreived from the device-tree + */ +static u32 *g5_pmode_data; +static int g5_pmode_max; +static int g5_pmode_cur; + + +static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ +static int g5_fvt_count; /* number of op. points */ +static int g5_fvt_cur; /* current op. point */ + +/* ----------------- real hardware interface */ + +static void g5_switch_volt(int speed_mode) +{ + struct smu_simple_cmd cmd; + + DECLARE_COMPLETION(comp); + smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, smu_done_complete, + &comp, 'V', 'S', 'L', 'E', 'W', + 0xff, g5_fvt_cur+1, speed_mode); + wait_for_completion(&comp); +} + +static int g5_switch_freq(int speed_mode) +{ + int to; + + if (g5_pmode_cur == speed_mode) + return 0; + + /* If frequency is going up, first ramp up the voltage */ + if (speed_mode < g5_pmode_cur) + g5_switch_volt(speed_mode); + + /* Clear PCR high */ + scom970_write(SCOM_PCR, 0); + /* Clear PCR low */ + scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0); + /* Set PCR low */ + scom970_write(SCOM_PCR, PCR_HILO_SELECT | + g5_pmode_data[speed_mode]); + + /* Wait for completion */ + for (to = 0; to < 10; to++) { + unsigned long psr = scom970_read(SCOM_PSR); + + if ((psr & PSR_CMD_RECEIVED) == 0 && + (((psr >> PSR_CUR_SPEED_SHIFT) ^ + (g5_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3) + == 0) + break; + if (psr & PSR_CMD_COMPLETED) + break; + udelay(100); + } + + /* If frequency is going down, last ramp the voltage */ + if (speed_mode > g5_pmode_cur) + g5_switch_volt(speed_mode); + + g5_pmode_cur = speed_mode; + ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; + + return 0; +} + +static int g5_query_freq(void) +{ + unsigned long psr = scom970_read(SCOM_PSR); + int i; + + for (i = 0; i <= g5_pmode_max; i++) + if ((((psr >> PSR_CUR_SPEED_SHIFT) ^ + (g5_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0) + break; + return i; +} + +/* ----------------- cpufreq bookkeeping */ +static int __pmac g5_cpufreq_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, g5_cpu_freqs); +} + +static int __pmac g5_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, g5_cpu_freqs, + target_freq, relation, &newstate)) + return -EINVAL; + + return g5_switch_freq(newstate); +} + +static int __pmac g5_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + if (policy->cpu != 0) + return -ENODEV; + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = g5_cpu_freqs[g5_query_freq()].frequency; + cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu); + + return cpufreq_frequency_table_cpuinfo(policy, + g5_cpu_freqs); +} + + +static struct cpufreq_driver g5_cpufreq_driver = { + .name = "powermac", + .owner = THIS_MODULE, + .flags = CPUFREQ_CONST_LOOPS, + .init = g5_cpufreq_cpu_init, + .verify = g5_cpufreq_verify, + .target = g5_cpufreq_target, + .attr = g5_cpu_freqs_attr, +}; + + +static int __init g5_cpufreq_init(void) +{ + struct device_node *cpunode; + unsigned int psize, ssize; + struct smu_sdbp_header *shdr; + unsigned long max_freq; + u32 *valp; + int rc = -ENODEV; + + /* Look for CPU and SMU nodes */ + cpunode = of_find_node_by_type(NULL, "cpu"); + if (!cpunode) { + DBG("No CPU node !\n"); + return -ENODEV; + } + + /* Check 970FX for now */ + valp = (u32 *)get_property(cpunode, "cpu-version", NULL); + if (!valp) { + DBG("No cpu-version property !\n"); + goto bail_noprops; + } + if (((*valp) >> 16) != 0x3c) { + DBG("Wrong CPU version: %08x\n", *valp); + goto bail_noprops; + } + + /* Look for the powertune data in the device-tree */ + g5_pmode_data = (u32 *)get_property(cpunode, "power-mode-data",&psize); + if (!g5_pmode_data) { + DBG("No power-mode-data !\n"); + goto bail_noprops; + } + g5_pmode_max = psize / sizeof(u32) - 1; + + /* Look for the FVT table */ + shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL); + if (!shdr) + goto bail_noprops; + g5_fvt_table = (struct smu_sdbp_fvt *)&shdr[1]; + ssize = (shdr->len * sizeof(u32)) - sizeof(struct smu_sdbp_header); + g5_fvt_count = ssize / sizeof(struct smu_sdbp_fvt); + g5_fvt_cur = 0; + + /* Sanity checking */ + if (g5_fvt_count < 1 || g5_pmode_max < 1) + goto bail_noprops; + + /* + * From what I see, clock-frequency is always the maximal frequency. + * The current driver can not slew sysclk yet, so we really only deal + * with powertune steps for now. We also only implement full freq and + * half freq in this version. So far, I haven't yet seen a machine + * supporting anything else. + */ + valp = (u32 *)get_property(cpunode, "clock-frequency", NULL); + if (!valp) + return -ENODEV; + max_freq = (*valp)/1000; + g5_cpu_freqs[0].frequency = max_freq; + g5_cpu_freqs[1].frequency = max_freq/2; + + /* Check current frequency */ + g5_pmode_cur = g5_query_freq(); + if (g5_pmode_cur > 1) { + /* We don't support anything but 1:1 and 1:2, fixup ... */ + g5_switch_freq(1); + g5_pmode_cur = 1; + } + + printk(KERN_INFO "Registering G5 CPU frequency driver\n"); + printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", + g5_cpu_freqs[1].frequency/1000, + g5_cpu_freqs[0].frequency/1000, + g5_cpu_freqs[g5_pmode_cur].frequency/1000); + + rc = cpufreq_register_driver(&g5_cpufreq_driver); + + /* We keep the CPU node on hold... hopefully, Apple G5 don't have + * hotplug CPU with a dynamic device-tree ... + */ + return rc; + + bail_noprops: + of_node_put(cpunode); + + return rc; +} + +module_init(g5_cpufreq_init); + + +MODULE_LICENSE("GPL"); diff -puN drivers/macintosh/smu.c~ppc64-add-cpufreq-support-for-smu-based-g5 drivers/macintosh/smu.c --- devel/drivers/macintosh/smu.c~ppc64-add-cpufreq-support-for-smu-based-g5 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/macintosh/smu.c 2005-10-28 17:45:23.000000000 -0700 @@ -845,6 +845,18 @@ int smu_queue_i2c(struct smu_i2c_cmd *cm return 0; } +struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size) +{ + char pname[32]; + + if (!smu) + return NULL; + + sprintf(pname, "sdb-partition-%02x", id); + return (struct smu_sdbp_header *)get_property(smu->of_node, + pname, size); +} +EXPORT_SYMBOL(smu_get_sdb_partition); /* diff -puN include/asm-ppc64/processor.h~ppc64-add-cpufreq-support-for-smu-based-g5 include/asm-ppc64/processor.h --- devel/include/asm-ppc64/processor.h~ppc64-add-cpufreq-support-for-smu-based-g5 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/asm-ppc64/processor.h 2005-10-28 17:44:03.000000000 -0700 @@ -177,6 +177,9 @@ #define SPRN_CTRLT 0x098 #define CTRL_RUNLATCH 0x1 +#define SPRN_SCOMC 0x114 +#define SPRN_SCOMD 0x115 + /* Performance monitor SPRs */ #define SPRN_SIAR 780 #define SPRN_SDAR 781 @@ -536,6 +539,9 @@ static inline void ppc64_runlatch_off(vo } } +extern unsigned long scom970_read(unsigned int address); +extern void scom970_write(unsigned int address, unsigned long value); + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff -puN include/asm-ppc64/smu.h~ppc64-add-cpufreq-support-for-smu-based-g5 include/asm-ppc64/smu.h --- devel/include/asm-ppc64/smu.h~ppc64-add-cpufreq-support-for-smu-based-g5 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/asm-ppc64/smu.h 2005-10-28 17:45:23.000000000 -0700 @@ -144,7 +144,11 @@ * - lenght 8 ("VSLEWxyz") has 3 additional bytes appended, and is * used to set the voltage slewing point. The SMU replies with "DONE" * I yet have to figure out their exact meaning of those 3 bytes in - * both cases. + * both cases. They seem to be: + * x = processor mask + * y = op. point index + * z = processor freq. step index + * I haven't yet decyphered result codes * */ #define SMU_CMD_POWER_COMMAND 0xaa @@ -244,6 +248,7 @@ extern int smu_queue_simple(struct smu_s */ extern void smu_done_complete(struct smu_cmd *cmd, void *misc); + /* * Synchronous helpers. Will spin-wait for completion of a command */ @@ -334,6 +339,59 @@ extern int smu_queue_i2c(struct smu_i2c_ #endif /* __KERNEL__ */ /* + * - SMU "sdb" partitions informations - + */ + + +/* + * Partition header format + */ +struct smu_sdbp_header { + __u8 id; + __u8 len; + __u8 version; + __u8 flags; +}; + +/* + * 32 bits integers are usually encoded with 2x16 bits swapped, + * this demangles them + */ +#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) + +/* This is the definition of the SMU sdb-partition-0x12 table (called + * CPU F/V/T operating points in Darwin). The definition for all those + * SMU tables should be moved to some separate file + */ +#define SMU_SDB_FVT_ID 0x12 + +struct smu_sdbp_fvt { + __u32 sysclk; /* Base SysClk frequency in Hz for + * this operating point + */ + __u8 pad; + __u8 maxtemp; /* Max temp. supported by this + * operating point + */ + + __u16 volts[3]; /* CPU core voltage for the 3 + * PowerTune modes, a mode with + * 0V = not supported. + */ +}; + +#ifdef __KERNEL__ +/* + * This returns the pointer to an SMU "sdb" partition data or NULL + * if not found. The data format is described below + */ +extern struct smu_sdbp_header *smu_get_sdb_partition(int id, + unsigned int *size); + +#endif /* __KERNEL__ */ + + +/* * - Userland interface - */ @@ -376,4 +434,5 @@ struct smu_user_reply_hdr __u32 reply_len; /* Lenght of data follwing */ }; + #endif /* _SMU_H */ _ From akpm at osdl.org Sat Oct 29 10:46:28 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:28 -0700 Subject: [patch 16/43] ppc32: ppc_sys fixes for 8xx and 82xx Message-ID: <200510290046.j9T0kxpS030035@shell0.pdx.osdl.net> From: Vitaly Bordug This patch fixes a numbers of issues regarding to that both 8xx and 82xx began to use ppc_sys model: - Platform is now identified by default deviceless SOC, if no BOARD_CHIP_NAME is specified in the bard-specific header. For the list of supported names refer to (arch/ppc/syslib/) mpc8xx_sys.c and mpc82xx_sys.c for 8xx and 82xx respectively. - Fixed a bug in identification by name - if the name was not found, it returned -1 instead of default deviceless ppc_spec. - fixed devices amount in the 8xx platform system descriptions Signed-off-by: Vitaly Bordug Signed-off-by: Marcelo Tosatti Signed-off-by: Kumar Gala Signed-off-by: Andrew Morton --- arch/ppc/platforms/fads.h | 2 ++ arch/ppc/platforms/mpc885ads.h | 2 ++ arch/ppc/syslib/m8260_setup.c | 4 ++++ arch/ppc/syslib/m8xx_setup.c | 2 ++ arch/ppc/syslib/mpc8xx_sys.c | 4 ++-- arch/ppc/syslib/ppc_sys.c | 3 +++ include/asm-ppc/cpm2.h | 3 +++ include/asm-ppc/mpc8260.h | 4 ++++ include/asm-ppc/mpc8xx.h | 4 ++++ 9 files changed, 26 insertions(+), 2 deletions(-) diff -puN arch/ppc/platforms/fads.h~ppc32-ppc_sys-fixes-for-8xx-and-82xx arch/ppc/platforms/fads.h --- devel/arch/ppc/platforms/fads.h~ppc32-ppc_sys-fixes-for-8xx-and-82xx 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/fads.h 2005-10-28 17:44:03.000000000 -0700 @@ -25,6 +25,8 @@ #if defined(CONFIG_MPC86XADS) +#define BOARD_CHIP_NAME "MPC86X" + /* U-Boot maps BCSR to 0xff080000 */ #define BCSR_ADDR ((uint)0xff080000) diff -puN arch/ppc/platforms/mpc885ads.h~ppc32-ppc_sys-fixes-for-8xx-and-82xx arch/ppc/platforms/mpc885ads.h --- devel/arch/ppc/platforms/mpc885ads.h~ppc32-ppc_sys-fixes-for-8xx-and-82xx 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/mpc885ads.h 2005-10-28 17:44:03.000000000 -0700 @@ -88,5 +88,7 @@ #define SICR_ENET_MASK ((uint)0x00ff0000) #define SICR_ENET_CLKRT ((uint)0x002c0000) +#define BOARD_CHIP_NAME "MPC885" + #endif /* __ASM_MPC885ADS_H__ */ #endif /* __KERNEL__ */ diff -puN arch/ppc/syslib/m8260_setup.c~ppc32-ppc_sys-fixes-for-8xx-and-82xx arch/ppc/syslib/m8260_setup.c --- devel/arch/ppc/syslib/m8260_setup.c~ppc32-ppc_sys-fixes-for-8xx-and-82xx 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/m8260_setup.c 2005-10-28 17:44:03.000000000 -0700 @@ -62,6 +62,10 @@ m8260_setup_arch(void) if (initrd_start) ROOT_DEV = Root_RAM0; #endif + + identify_ppc_sys_by_name_and_id(BOARD_CHIP_NAME, + in_be32(CPM_MAP_ADDR + CPM_IMMR_OFFSET)); + m82xx_board_setup(); } diff -puN arch/ppc/syslib/m8xx_setup.c~ppc32-ppc_sys-fixes-for-8xx-and-82xx arch/ppc/syslib/m8xx_setup.c --- devel/arch/ppc/syslib/m8xx_setup.c~ppc32-ppc_sys-fixes-for-8xx-and-82xx 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/m8xx_setup.c 2005-10-28 17:44:03.000000000 -0700 @@ -399,6 +399,8 @@ platform_init(unsigned long r3, unsigned strcpy(cmd_line, (char *)(r6+KERNELBASE)); } + identify_ppc_sys_by_name(BOARD_CHIP_NAME); + ppc_md.setup_arch = m8xx_setup_arch; ppc_md.show_percpuinfo = m8xx_show_percpuinfo; ppc_md.irq_canonicalize = NULL; diff -puN arch/ppc/syslib/mpc8xx_sys.c~ppc32-ppc_sys-fixes-for-8xx-and-82xx arch/ppc/syslib/mpc8xx_sys.c --- devel/arch/ppc/syslib/mpc8xx_sys.c~ppc32-ppc_sys-fixes-for-8xx-and-82xx 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/mpc8xx_sys.c 2005-10-28 17:44:03.000000000 -0700 @@ -24,7 +24,7 @@ struct ppc_sys_spec ppc_sys_specs[] = { .ppc_sys_name = "MPC86X", .mask = 0xFFFFFFFF, .value = 0x00000000, - .num_devices = 2, + .num_devices = 7, .device_list = (enum ppc_sys_devices[]) { MPC8xx_CPM_FEC1, @@ -40,7 +40,7 @@ struct ppc_sys_spec ppc_sys_specs[] = { .ppc_sys_name = "MPC885", .mask = 0xFFFFFFFF, .value = 0x00000000, - .num_devices = 3, + .num_devices = 8, .device_list = (enum ppc_sys_devices[]) { MPC8xx_CPM_FEC1, diff -puN arch/ppc/syslib/ppc_sys.c~ppc32-ppc_sys-fixes-for-8xx-and-82xx arch/ppc/syslib/ppc_sys.c --- devel/arch/ppc/syslib/ppc_sys.c~ppc32-ppc_sys-fixes-for-8xx-and-82xx 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/ppc_sys.c 2005-10-28 17:44:03.000000000 -0700 @@ -69,6 +69,9 @@ static int __init find_chip_by_name_and_ matched[j++] = i; i++; } + + ret = i; + if (j != 0) { for (i = 0; i < j; i++) { if ((ppc_sys_specs[matched[i]].mask & id) == diff -puN include/asm-ppc/cpm2.h~ppc32-ppc_sys-fixes-for-8xx-and-82xx include/asm-ppc/cpm2.h --- devel/include/asm-ppc/cpm2.h~ppc32-ppc_sys-fixes-for-8xx-and-82xx 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/asm-ppc/cpm2.h 2005-10-28 17:44:03.000000000 -0700 @@ -1087,6 +1087,9 @@ typedef struct im_idma { #define SCCR_PCIDF_MSK 0x00000078 /* PCI division factor */ #define SCCR_PCIDF_SHIFT 3 +#ifndef CPM_IMMR_OFFSET +#define CPM_IMMR_OFFSET 0x101a8 +#endif #endif /* __CPM2__ */ #endif /* __KERNEL__ */ diff -puN include/asm-ppc/mpc8260.h~ppc32-ppc_sys-fixes-for-8xx-and-82xx include/asm-ppc/mpc8260.h --- devel/include/asm-ppc/mpc8260.h~ppc32-ppc_sys-fixes-for-8xx-and-82xx 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/asm-ppc/mpc8260.h 2005-10-28 17:44:03.000000000 -0700 @@ -92,6 +92,10 @@ enum ppc_sys_devices { extern unsigned char __res[]; #endif +#ifndef BOARD_CHIP_NAME +#define BOARD_CHIP_NAME "" +#endif + #endif /* CONFIG_8260 */ #endif /* !__ASM_PPC_MPC8260_H__ */ #endif /* __KERNEL__ */ diff -puN include/asm-ppc/mpc8xx.h~ppc32-ppc_sys-fixes-for-8xx-and-82xx include/asm-ppc/mpc8xx.h --- devel/include/asm-ppc/mpc8xx.h~ppc32-ppc_sys-fixes-for-8xx-and-82xx 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/asm-ppc/mpc8xx.h 2005-10-28 17:44:03.000000000 -0700 @@ -113,6 +113,10 @@ enum ppc_sys_devices { MPC8xx_CPM_USB, }; +#ifndef BOARD_CHIP_NAME +#define BOARD_CHIP_NAME "" +#endif + #endif /* !__ASSEMBLY__ */ #endif /* CONFIG_8xx */ #endif /* __CONFIG_8xx_DEFS */ _ From akpm at osdl.org Sat Oct 29 10:46:35 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:35 -0700 Subject: [patch 22/43] ppc64 boot: remove include from lib/zlib_inflate/inflate.c Message-ID: <200510290047.j9T0l68u030060@shell0.pdx.osdl.net> From: Olaf Hering There is no need to include module.h in inflate.c Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- lib/zlib_inflate/inflate.c | 1 - 1 files changed, 1 deletion(-) diff -puN lib/zlib_inflate/inflate.c~ppc64-boot-remove-include-from-lib-zlib_inflate-inflatec lib/zlib_inflate/inflate.c --- devel/lib/zlib_inflate/inflate.c~ppc64-boot-remove-include-from-lib-zlib_inflate-inflatec 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/lib/zlib_inflate/inflate.c 2005-10-28 17:44:04.000000000 -0700 @@ -3,7 +3,6 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include #include #include "infblock.h" #include "infutil.h" _ From akpm at osdl.org Sat Oct 29 10:46:33 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:33 -0700 Subject: [patch 20/43] ppc64: Support retreiving missing SMU partitions Message-ID: <200510290047.j9T0l4tk030049@shell0.pdx.osdl.net> From: Benjamin Herrenschmidt The SMU chip has an EEPROM that contains various informations about the motherboard, like thermal calibration infos, etc... This EEPROM is divided in "partitions", and the firmware only extracts some of these and publish them in the device-tree. This patch adds a mecanism to retreive the missing ones which is necessary for the upcoming thermal control patch. In order to make this accessible to userland as well, the patch adds the ability to the /proc/device-tree code to get new properties added at runtime and simplify the code. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton --- arch/ppc/syslib/prom.c | 4 - arch/ppc64/kernel/prom.c | 24 +++++- drivers/macintosh/smu.c | 164 ++++++++++++++++++++++++++++++++++++++++++++--- fs/proc/proc_devtree.c | 57 ++++++++++------ include/asm-ppc/prom.h | 2 include/asm-ppc64/prom.h | 2 include/asm-ppc64/smu.h | 51 +++++++++++++- include/linux/proc_fs.h | 9 -- 8 files changed, 266 insertions(+), 47 deletions(-) diff -puN arch/ppc64/kernel/prom.c~ppc64-support-retreiving-missing-smu-partitions arch/ppc64/kernel/prom.c --- devel/arch/ppc64/kernel/prom.c~ppc64-support-retreiving-missing-smu-partitions 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc64/kernel/prom.c 2005-10-28 17:44:03.000000000 -0700 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -1893,17 +1894,32 @@ get_property(struct device_node *np, con EXPORT_SYMBOL(get_property); /* - * Add a property to a node + * Add a property to a node. */ -void +int prom_add_property(struct device_node* np, struct property* prop) { - struct property **next = &np->properties; + struct property **next; prop->next = NULL; - while (*next) + write_lock(&devtree_lock); + next = &np->properties; + while (*next) { + if (strcmp(prop->name, (*next)->name) == 0) { + /* duplicate ! don't insert it */ + write_unlock(&devtree_lock); + return -1; + } next = &(*next)->next; + } *next = prop; + write_unlock(&devtree_lock); + + /* try to add to proc as well if it was initialized */ + if (np->pde) + proc_device_tree_add_prop(np->pde, prop); + + return 0; } #if 0 diff -puN arch/ppc/syslib/prom.c~ppc64-support-retreiving-missing-smu-partitions arch/ppc/syslib/prom.c --- devel/arch/ppc/syslib/prom.c~ppc64-support-retreiving-missing-smu-partitions 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/syslib/prom.c 2005-10-28 17:44:03.000000000 -0700 @@ -1165,7 +1165,7 @@ get_property(struct device_node *np, con /* * Add a property to a node */ -void __openfirmware +int __openfirmware prom_add_property(struct device_node* np, struct property* prop) { struct property **next = &np->properties; @@ -1174,6 +1174,8 @@ prom_add_property(struct device_node* np while (*next) next = &(*next)->next; *next = prop; + + return 0; } /* I quickly hacked that one, check against spec ! */ diff -puN drivers/macintosh/smu.c~ppc64-support-retreiving-missing-smu-partitions drivers/macintosh/smu.c --- devel/drivers/macintosh/smu.c~ppc64-support-retreiving-missing-smu-partitions 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/macintosh/smu.c 2005-10-28 17:45:23.000000000 -0700 @@ -47,13 +47,13 @@ #include #include -#define VERSION "0.6" +#define VERSION "0.7" #define AUTHOR "(c) 2005 Benjamin Herrenschmidt, IBM Corp." #undef DEBUG_SMU #ifdef DEBUG_SMU -#define DPRINTK(fmt, args...) do { printk(KERN_DEBUG fmt , ##args); } while (0) +#define DPRINTK(fmt, args...) do { udbg_printf(KERN_DEBUG fmt , ##args); } while (0) #else #define DPRINTK(fmt, args...) do { } while (0) #endif @@ -92,7 +92,7 @@ struct smu_device { * for now, just hard code that */ static struct smu_device *smu; - +static DECLARE_MUTEX(smu_part_access); /* * SMU driver low level stuff @@ -113,9 +113,11 @@ static void smu_start_cmd(void) DPRINTK("SMU: starting cmd %x, %d bytes data\n", cmd->cmd, cmd->data_len); - DPRINTK("SMU: data buffer: %02x %02x %02x %02x ...\n", + DPRINTK("SMU: data buffer: %02x %02x %02x %02x %02x %02x %02x %02x\n", ((u8 *)cmd->data_buf)[0], ((u8 *)cmd->data_buf)[1], - ((u8 *)cmd->data_buf)[2], ((u8 *)cmd->data_buf)[3]); + ((u8 *)cmd->data_buf)[2], ((u8 *)cmd->data_buf)[3], + ((u8 *)cmd->data_buf)[4], ((u8 *)cmd->data_buf)[5], + ((u8 *)cmd->data_buf)[6], ((u8 *)cmd->data_buf)[7]); /* Fill the SMU command buffer */ smu->cmd_buf->cmd = cmd->cmd; @@ -440,7 +442,7 @@ int smu_present(void) EXPORT_SYMBOL(smu_present); -int smu_init (void) +int __init smu_init (void) { struct device_node *np; u32 *data; @@ -845,16 +847,154 @@ int smu_queue_i2c(struct smu_i2c_cmd *cm return 0; } -struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size) +/* + * Handling of "partitions" + */ + +static int smu_read_datablock(u8 *dest, unsigned int addr, unsigned int len) +{ + DECLARE_COMPLETION(comp); + unsigned int chunk; + struct smu_cmd cmd; + int rc; + u8 params[8]; + + /* We currently use a chunk size of 0xe. We could check the + * SMU firmware version and use bigger sizes though + */ + chunk = 0xe; + + while (len) { + unsigned int clen = min(len, chunk); + + cmd.cmd = SMU_CMD_MISC_ee_COMMAND; + cmd.data_len = 7; + cmd.data_buf = params; + cmd.reply_len = chunk; + cmd.reply_buf = dest; + cmd.done = smu_done_complete; + cmd.misc = ∁ + params[0] = SMU_CMD_MISC_ee_GET_DATABLOCK_REC; + params[1] = 0x4; + *((u32 *)¶ms[2]) = addr; + params[6] = clen; + + rc = smu_queue_cmd(&cmd); + if (rc) + return rc; + wait_for_completion(&comp); + if (cmd.status != 0) + return rc; + if (cmd.reply_len != clen) { + printk(KERN_DEBUG "SMU: short read in " + "smu_read_datablock, got: %d, want: %d\n", + cmd.reply_len, clen); + return -EIO; + } + len -= clen; + addr += clen; + dest += clen; + } + return 0; +} + +static struct smu_sdbp_header *smu_create_sdb_partition(int id) +{ + DECLARE_COMPLETION(comp); + struct smu_simple_cmd cmd; + unsigned int addr, len, tlen; + struct smu_sdbp_header *hdr; + struct property *prop; + + /* First query the partition info */ + smu_queue_simple(&cmd, SMU_CMD_PARTITION_COMMAND, 2, + smu_done_complete, &comp, + SMU_CMD_PARTITION_LATEST, id); + wait_for_completion(&comp); + + /* Partition doesn't exist (or other error) */ + if (cmd.cmd.status != 0 || cmd.cmd.reply_len != 6) + return NULL; + + /* Fetch address and length from reply */ + addr = *((u16 *)cmd.buffer); + len = cmd.buffer[3] << 2; + /* Calucluate total length to allocate, including the 17 bytes + * for "sdb-partition-XX" that we append at the end of the buffer + */ + tlen = sizeof(struct property) + len + 18; + + prop = kcalloc(tlen, 1, GFP_KERNEL); + if (prop == NULL) + return NULL; + hdr = (struct smu_sdbp_header *)(prop + 1); + prop->name = ((char *)prop) + tlen - 18; + sprintf(prop->name, "sdb-partition-%02x", id); + prop->length = len; + prop->value = (unsigned char *)hdr; + prop->next = NULL; + + /* Read the datablock */ + if (smu_read_datablock((u8 *)hdr, addr, len)) { + printk(KERN_DEBUG "SMU: datablock read failed while reading " + "partition %02x !\n", id); + goto failure; + } + + /* Got it, check a few things and create the property */ + if (hdr->id != id) { + printk(KERN_DEBUG "SMU: Reading partition %02x and got " + "%02x !\n", id, hdr->id); + goto failure; + } + if (prom_add_property(smu->of_node, prop)) { + printk(KERN_DEBUG "SMU: Failed creating sdb-partition-%02x " + "property !\n", id); + goto failure; + } + + return hdr; + failure: + kfree(prop); + return NULL; +} + +/* Note: Only allowed to return error code in pointers (using ERR_PTR) + * when interruptible is 1 + */ +struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size, + int interruptible) { char pname[32]; + struct smu_sdbp_header *part; if (!smu) return NULL; sprintf(pname, "sdb-partition-%02x", id); - return (struct smu_sdbp_header *)get_property(smu->of_node, + + if (interruptible) { + int rc; + rc = down_interruptible(&smu_part_access); + if (rc) + return ERR_PTR(rc); + } else + down(&smu_part_access); + + part = (struct smu_sdbp_header *)get_property(smu->of_node, pname, size); + if (part == NULL) { + part = smu_create_sdb_partition(id); + if (part != NULL && size) + *size = part->len << 2; + } + up(&smu_part_access); + return part; +} + +struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size) +{ + return __smu_get_sdb_partition(id, size, 0); } EXPORT_SYMBOL(smu_get_sdb_partition); @@ -930,6 +1070,14 @@ static ssize_t smu_write(struct file *fi else if (hdr.cmdtype == SMU_CMDTYPE_WANTS_EVENTS) { pp->mode = smu_file_events; return 0; + } else if (hdr.cmdtype == SMU_CMDTYPE_GET_PARTITION) { + struct smu_sdbp_header *part; + part = __smu_get_sdb_partition(hdr.cmd, NULL, 1); + if (part == NULL) + return -EINVAL; + else if (IS_ERR(part)) + return PTR_ERR(part); + return 0; } else if (hdr.cmdtype != SMU_CMDTYPE_SMU) return -EINVAL; else if (pp->mode != smu_file_commands) diff -puN fs/proc/proc_devtree.c~ppc64-support-retreiving-missing-smu-partitions fs/proc/proc_devtree.c --- devel/fs/proc/proc_devtree.c~ppc64-support-retreiving-missing-smu-partitions 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/fs/proc/proc_devtree.c 2005-10-28 17:44:03.000000000 -0700 @@ -49,6 +49,39 @@ static int property_read_proc(char *page */ /* + * Add a property to a node + */ +static struct proc_dir_entry * +__proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp) +{ + struct proc_dir_entry *ent; + + /* + * Unfortunately proc_register puts each new entry + * at the beginning of the list. So we rearrange them. + */ + ent = create_proc_read_entry(pp->name, + strncmp(pp->name, "security-", 9) + ? S_IRUGO : S_IRUSR, de, + property_read_proc, pp); + if (ent == NULL) + return NULL; + + if (!strncmp(pp->name, "security-", 9)) + ent->size = 0; /* don't leak number of password chars */ + else + ent->size = pp->length; + + return ent; +} + + +void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop) +{ + __proc_device_tree_add_prop(pde, prop); +} + +/* * Process a node, adding entries for its children and its properties. */ void proc_device_tree_add_node(struct device_node *np, @@ -57,11 +90,9 @@ void proc_device_tree_add_node(struct de struct property *pp; struct proc_dir_entry *ent; struct device_node *child; - struct proc_dir_entry *list = NULL, **lastp; const char *p; set_node_proc_entry(np, de); - lastp = &list; for (child = NULL; (child = of_get_next_child(np, child));) { p = strrchr(child->full_name, '/'); if (!p) @@ -71,9 +102,6 @@ void proc_device_tree_add_node(struct de ent = proc_mkdir(p, de); if (ent == 0) break; - *lastp = ent; - ent->next = NULL; - lastp = &ent->next; proc_device_tree_add_node(child, ent); } of_node_put(child); @@ -84,7 +112,7 @@ void proc_device_tree_add_node(struct de * properties are quite unimportant for us though, thus we * simply "skip" them here, but we do have to check. */ - for (ent = list; ent != NULL; ent = ent->next) + for (ent = de->subdir; ent != NULL; ent = ent->next) if (!strcmp(ent->name, pp->name)) break; if (ent != NULL) { @@ -94,25 +122,10 @@ void proc_device_tree_add_node(struct de continue; } - /* - * Unfortunately proc_register puts each new entry - * at the beginning of the list. So we rearrange them. - */ - ent = create_proc_read_entry(pp->name, - strncmp(pp->name, "security-", 9) - ? S_IRUGO : S_IRUSR, de, - property_read_proc, pp); + ent = __proc_device_tree_add_prop(de, pp); if (ent == 0) break; - if (!strncmp(pp->name, "security-", 9)) - ent->size = 0; /* don't leak number of password chars */ - else - ent->size = pp->length; - ent->next = NULL; - *lastp = ent; - lastp = &ent->next; } - de->subdir = list; } /* diff -puN include/asm-ppc64/prom.h~ppc64-support-retreiving-missing-smu-partitions include/asm-ppc64/prom.h --- devel/include/asm-ppc64/prom.h~ppc64-support-retreiving-missing-smu-partitions 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/asm-ppc64/prom.h 2005-10-28 17:44:03.000000000 -0700 @@ -201,6 +201,6 @@ extern int prom_n_addr_cells(struct devi extern int prom_n_size_cells(struct device_node* np); extern int prom_n_intr_cells(struct device_node* np); extern void prom_get_irq_senses(unsigned char *senses, int off, int max); -extern void prom_add_property(struct device_node* np, struct property* prop); +extern int prom_add_property(struct device_node* np, struct property* prop); #endif /* _PPC64_PROM_H */ diff -puN include/asm-ppc64/smu.h~ppc64-support-retreiving-missing-smu-partitions include/asm-ppc64/smu.h --- devel/include/asm-ppc64/smu.h~ppc64-support-retreiving-missing-smu-partitions 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/asm-ppc64/smu.h 2005-10-28 17:45:23.000000000 -0700 @@ -20,10 +20,23 @@ /* * Partition info commands * - * I do not know what those are for at this point + * These commands are used to retreive the sdb-partition-XX datas from + * the SMU. The lenght is always 2. First byte is the subcommand code + * and second byte is the partition ID. + * + * The reply is 6 bytes: + * + * - 0..1 : partition address + * - 2 : a byte containing the partition ID + * - 3 : length (maybe other bits are rest of header ?) + * + * The data must then be obtained with calls to another command: + * SMU_CMD_MISC_ee_GET_DATABLOCK_REC (described below). */ #define SMU_CMD_PARTITION_COMMAND 0x3e - +#define SMU_CMD_PARTITION_LATEST 0x01 +#define SMU_CMD_PARTITION_BASE 0x02 +#define SMU_CMD_PARTITION_UPDATE 0x03 /* * Fan control @@ -176,6 +189,25 @@ * Misc commands * * This command seem to be a grab bag of various things + * + * SMU_CMD_MISC_ee_GET_DATABLOCK_REC is used, among others, to + * transfer blocks of data from the SMU. So far, I've decrypted it's + * usage to retreive partition data. In order to do that, you have to + * break your transfer in "chunks" since that command cannot transfer + * more than a chunk at a time. The chunk size used by OF is 0xe bytes, + * but it seems that the darwin driver will let you do 0x1e bytes if + * your "PMU" version is >= 0x30. You can get the "PMU" version apparently + * either in the last 16 bits of property "smu-version-pmu" or as the 16 + * bytes at offset 1 of "smu-version-info" + * + * For each chunk, the command takes 7 bytes of arguments: + * byte 0: subcommand code (0x02) + * byte 1: 0x04 (always, I don't know what it means, maybe the address + * space to use or some other nicety. It's hard coded in OF) + * byte 2..5: SMU address of the chunk (big endian 32 bits) + * byte 6: size to transfer (up to max chunk size) + * + * The data is returned directly */ #define SMU_CMD_MISC_ee_COMMAND 0xee #define SMU_CMD_MISC_ee_GET_DATABLOCK_REC 0x02 @@ -357,13 +389,13 @@ struct smu_sdbp_header { * 32 bits integers are usually encoded with 2x16 bits swapped, * this demangles them */ -#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) +//#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) /* This is the definition of the SMU sdb-partition-0x12 table (called * CPU F/V/T operating points in Darwin). The definition for all those * SMU tables should be moved to some separate file */ -#define SMU_SDB_FVT_ID 0x12 +#define SMU_SDB_FVT_ID 0x12 struct smu_sdbp_fvt { __u32 sysclk; /* Base SysClk frequency in Hz for @@ -380,6 +412,9 @@ struct smu_sdbp_fvt { */ }; +/* Other partitions without known structures */ +#define SMU_SDB_DEBUG_SWITCHES_ID 0x05 + #ifdef __KERNEL__ /* * This returns the pointer to an SMU "sdb" partition data or NULL @@ -417,14 +452,22 @@ extern struct smu_sdbp_header *smu_get_s * It is illegal to send SMU commands through a file descriptor configured * for events reception * + * The special SMU_CMDTYPE_GET_PARTITION command can be used to retreive + * SMU sdb-partition's from the SMU when not available. The command will also + * cause the new partition to be added to the device-tree. That command has + * a data_len of 0, you pass the partition ID in the "cmd" field. It will + * not trigger any reply and is not asynchronous. Just fetch the partition + * from the device-tree after it's done. */ struct smu_user_cmd_hdr { __u32 cmdtype; #define SMU_CMDTYPE_SMU 0 /* SMU command */ #define SMU_CMDTYPE_WANTS_EVENTS 1 /* switch fd to events mode */ +#define SMU_CMDTYPE_GET_PARTITION 2 /* retreive an sdb partition */ __u8 cmd; /* SMU command byte */ + __u8 pad[3]; /* padding */ __u32 data_len; /* Lenght of data following */ }; diff -puN include/asm-ppc/prom.h~ppc64-support-retreiving-missing-smu-partitions include/asm-ppc/prom.h --- devel/include/asm-ppc/prom.h~ppc64-support-retreiving-missing-smu-partitions 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/asm-ppc/prom.h 2005-10-28 17:44:03.000000000 -0700 @@ -93,7 +93,7 @@ extern int device_is_compatible(struct d extern int machine_is_compatible(const char *compat); extern unsigned char *get_property(struct device_node *node, const char *name, int *lenp); -extern void prom_add_property(struct device_node* np, struct property* prop); +extern int prom_add_property(struct device_node* np, struct property* prop); extern void prom_get_irq_senses(unsigned char *, int, int); extern int prom_n_addr_cells(struct device_node* np); extern int prom_n_size_cells(struct device_node* np); diff -puN include/linux/proc_fs.h~ppc64-support-retreiving-missing-smu-partitions include/linux/proc_fs.h --- devel/include/linux/proc_fs.h~ppc64-support-retreiving-missing-smu-partitions 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/linux/proc_fs.h 2005-10-28 17:44:03.000000000 -0700 @@ -139,15 +139,12 @@ extern void proc_tty_unregister_driver(s /* * proc_devtree.c */ +#ifdef CONFIG_PROC_DEVICETREE struct device_node; +struct property; extern void proc_device_tree_init(void); -#ifdef CONFIG_PROC_DEVICETREE extern void proc_device_tree_add_node(struct device_node *, struct proc_dir_entry *); -#else /* !CONFIG_PROC_DEVICETREE */ -static inline void proc_device_tree_add_node(struct device_node *np, struct proc_dir_entry *pde) -{ - return; -} +extern void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop); #endif /* CONFIG_PROC_DEVICETREE */ extern struct proc_dir_entry *proc_symlink(const char *, _ From akpm at osdl.org Sat Oct 29 10:46:25 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:25 -0700 Subject: [patch 14/43] chrp_pegasos_eth: Added Marvell Discovery II SRAM support Message-ID: <200510290046.j9T0ktdF030029@shell0.pdx.osdl.net> From: Nicolas DET Add proper entry to support the Marvell MV64361 (Marvell Discovery II) SRAM. This feature may be used by the mv643xx_eth driver. Signed-off-by: Nicolas DET Signed-off-by: Andrew Morton --- arch/ppc/platforms/chrp_pegasos_eth.c | 124 ++++++++++++++++++++++++++++++++-- 1 files changed, 118 insertions(+), 6 deletions(-) diff -puN arch/ppc/platforms/chrp_pegasos_eth.c~chrp_pegasos_eth-added-marvell-discovery arch/ppc/platforms/chrp_pegasos_eth.c --- devel/arch/ppc/platforms/chrp_pegasos_eth.c~chrp_pegasos_eth-added-marvell-discovery 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/chrp_pegasos_eth.c 2005-10-28 17:44:03.000000000 -0700 @@ -17,7 +17,20 @@ #include #include -/* Pegasos 2 specific Marvell MV 64361 gigabit ethernet port setup */ +#define PEGASOS2_MARVELL_REGBASE (0xf1000000) +#define PEGASOS2_MARVELL_REGSIZE (0x00004000) +#define PEGASOS2_SRAM_BASE (0xf2000000) +#define PEGASOS2_SRAM_SIZE (256*1024) + +#define PEGASOS2_SRAM_BASE_ETH0 (PEGASOS2_SRAM_BASE) +#define PEGASOS2_SRAM_BASE_ETH1 (PEGASOS2_SRAM_BASE_ETH0 + (PEGASOS2_SRAM_SIZE / 2) ) + + +#define PEGASOS2_SRAM_RXRING_SIZE (PEGASOS2_SRAM_SIZE/4) +#define PEGASOS2_SRAM_TXRING_SIZE (PEGASOS2_SRAM_SIZE/4) + +#undef BE_VERBOSE + static struct resource mv643xx_eth_shared_resources[] = { [0] = { .name = "ethernet shared base", @@ -44,7 +57,16 @@ static struct resource mv643xx_eth0_reso }, }; -static struct mv643xx_eth_platform_data eth0_pd; + +static struct mv643xx_eth_platform_data eth0_pd = { + .tx_sram_addr = PEGASOS2_SRAM_BASE_ETH0, + .tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE, + .tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16, + + .rx_sram_addr = PEGASOS2_SRAM_BASE_ETH0 + PEGASOS2_SRAM_TXRING_SIZE, + .rx_sram_size = PEGASOS2_SRAM_RXRING_SIZE, + .rx_queue_size = PEGASOS2_SRAM_RXRING_SIZE/16, +}; static struct platform_device eth0_device = { .name = MV643XX_ETH_NAME, @@ -65,7 +87,15 @@ static struct resource mv643xx_eth1_reso }, }; -static struct mv643xx_eth_platform_data eth1_pd; +static struct mv643xx_eth_platform_data eth1_pd = { + .tx_sram_addr = PEGASOS2_SRAM_BASE_ETH1, + .tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE, + .tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16, + + .rx_sram_addr = PEGASOS2_SRAM_BASE_ETH1 + PEGASOS2_SRAM_TXRING_SIZE, + .rx_sram_size = PEGASOS2_SRAM_RXRING_SIZE, + .rx_queue_size = PEGASOS2_SRAM_RXRING_SIZE/16, +}; static struct platform_device eth1_device = { .name = MV643XX_ETH_NAME, @@ -83,9 +113,62 @@ static struct platform_device *mv643xx_e ð1_device, }; +/***********/ +/***********/ +#define MV_READ(offset,val) { val = readl(mv643xx_reg_base + offset); } +#define MV_WRITE(offset,data) writel(data, mv643xx_reg_base + offset) + +static void __iomem *mv643xx_reg_base; + +static int Enable_SRAM(void) +{ + u32 ALong; + + if (mv643xx_reg_base == NULL) + mv643xx_reg_base = ioremap(PEGASOS2_MARVELL_REGBASE, + PEGASOS2_MARVELL_REGSIZE); + + if (mv643xx_reg_base == NULL) + return -ENOMEM; + +#ifdef BE_VERBOSE + printk("Pegasos II/Marvell MV64361: register remapped from %p to %p\n", + (void *)PEGASOS2_MARVELL_REGBASE, (void *)mv643xx_reg_base); +#endif + + MV_WRITE(MV64340_SRAM_CONFIG, 0); -int -mv643xx_eth_add_pds(void) + MV_WRITE(MV64340_INTEGRATED_SRAM_BASE_ADDR, PEGASOS2_SRAM_BASE >> 16); + + MV_READ(MV64340_BASE_ADDR_ENABLE, ALong); + ALong &= ~(1 << 19); + MV_WRITE(MV64340_BASE_ADDR_ENABLE, ALong); + + ALong = 0x02; + ALong |= PEGASOS2_SRAM_BASE & 0xffff0000; + MV_WRITE(MV643XX_ETH_BAR_4, ALong); + + MV_WRITE(MV643XX_ETH_SIZE_REG_4, (PEGASOS2_SRAM_SIZE-1) & 0xffff0000); + + MV_READ(MV643XX_ETH_BASE_ADDR_ENABLE_REG, ALong); + ALong &= ~(1 << 4); + MV_WRITE(MV643XX_ETH_BASE_ADDR_ENABLE_REG, ALong); + +#ifdef BE_VERBOSE + printk("Pegasos II/Marvell MV64361: register unmapped\n"); + printk("Pegasos II/Marvell MV64361: SRAM at %p, size=%x\n", (void*) PEGASOS2_SRAM_BASE, PEGASOS2_SRAM_SIZE); +#endif + + iounmap(mv643xx_reg_base); + mv643xx_reg_base = NULL; + + return 1; +} + + +/***********/ +/***********/ +int mv643xx_eth_add_pds(void) { int ret = 0; static struct pci_device_id pci_marvell_mv64360[] = { @@ -93,9 +176,38 @@ mv643xx_eth_add_pds(void) { } }; +#ifdef BE_VERBOSE + printk("Pegasos II/Marvell MV64361: init\n"); +#endif + if (pci_dev_present(pci_marvell_mv64360)) { - ret = platform_add_devices(mv643xx_eth_pd_devs, ARRAY_SIZE(mv643xx_eth_pd_devs)); + ret = platform_add_devices(mv643xx_eth_pd_devs, + ARRAY_SIZE(mv643xx_eth_pd_devs)); + + if ( Enable_SRAM() < 0) + { + eth0_pd.tx_sram_addr = 0; + eth0_pd.tx_sram_size = 0; + eth0_pd.rx_sram_addr = 0; + eth0_pd.rx_sram_size = 0; + + eth1_pd.tx_sram_addr = 0; + eth1_pd.tx_sram_size = 0; + eth1_pd.rx_sram_addr = 0; + eth1_pd.rx_sram_size = 0; + +#ifdef BE_VERBOSE + printk("Pegasos II/Marvell MV64361: Can't enable the " + "SRAM\n"); +#endif + } } + +#ifdef BE_VERBOSE + printk("Pegasos II/Marvell MV64361: init is over\n"); +#endif + return ret; } + device_initcall(mv643xx_eth_add_pds); _ From akpm at osdl.org Sat Oct 29 10:46:30 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:30 -0700 Subject: [patch 17/43] mv643xx_eth_showsram: Added information message when using the SRAM Message-ID: <200510290047.j9T0l08S030038@shell0.pdx.osdl.net> From: Nicolas DET Added information message when using the SRAM in mv643xx_eth_probe() Signed-off-by: Nicolas DET Signed-off-by: Sven Luther Signed-off-by: Andrew Morton --- drivers/net/mv643xx_eth.c | 3 +++ 1 files changed, 3 insertions(+) diff -puN drivers/net/mv643xx_eth.c~mv643xx_eth_showsram-added-information drivers/net/mv643xx_eth.c --- devel/drivers/net/mv643xx_eth.c~mv643xx_eth_showsram-added-information 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/mv643xx_eth.c 2005-10-28 17:45:23.000000000 -0700 @@ -1533,6 +1533,9 @@ static int mv643xx_eth_probe(struct devi printk(KERN_NOTICE "%s: RX NAPI Enabled \n", dev->name); #endif + if (mp->tx_sram_size > 0) + printk(KERN_NOTICE "%s: Using SRAM\n", dev->name); + return 0; out: _ From akpm at osdl.org Sat Oct 29 10:46:37 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:37 -0700 Subject: [patch 24/43] ppc64 boot: missing include for size_t Message-ID: <200510290047.j9T0l8UZ030067@shell0.pdx.osdl.net> From: Olaf Hering string.h needs definition of size_t, but not the one from linux/include Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/string.h | 1 + 1 files changed, 1 insertion(+) diff -puN arch/ppc64/boot/string.h~ppc64-boot-missing-include-for-size_t arch/ppc64/boot/string.h --- devel/arch/ppc64/boot/string.h~ppc64-boot-missing-include-for-size_t 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/string.h 2005-10-28 17:44:04.000000000 -0700 @@ -1,5 +1,6 @@ #ifndef _PPC_BOOT_STRING_H_ #define _PPC_BOOT_STRING_H_ +#include extern char *strcpy(char *dest, const char *src); extern char *strncpy(char *dest, const char *src, size_t n); _ From akpm at osdl.org Sat Oct 29 10:46:37 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:37 -0700 Subject: [patch 23/43] ppc64 boot: remove include from include/linux/zutil.h Message-ID: <200510290047.j9T0l7Ml030063@shell0.pdx.osdl.net> From: Olaf Hering zutil.h does not need errno.h Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- include/linux/zutil.h | 1 - 1 files changed, 1 deletion(-) diff -puN include/linux/zutil.h~ppc64-boot-remove-include-from-include-linux-zutilh include/linux/zutil.h --- devel/include/linux/zutil.h~ppc64-boot-remove-include-from-include-linux-zutilh 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/include/linux/zutil.h 2005-10-28 17:44:04.000000000 -0700 @@ -15,7 +15,6 @@ #include #include -#include #include typedef unsigned char uch; _ From akpm at osdl.org Sat Oct 29 10:46:57 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:57 -0700 Subject: [patch 42/43] ppc64: remove duplicate local variable in set_preferred_console Message-ID: <200510290047.j9T0lSZ2030129@shell0.pdx.osdl.net> From: Olaf Hering remove duplicate local variable, saves 2 asm instructions. Signed-off-by: Olaf Hering Signed-off-by: Andrew Morton --- arch/ppc64/kernel/setup.c | 1 - 1 files changed, 1 deletion(-) diff -puN arch/ppc64/kernel/setup.c~ppc64-remove-duplicate-local-variable-in-set_preferred_console arch/ppc64/kernel/setup.c --- devel/arch/ppc64/kernel/setup.c~ppc64-remove-duplicate-local-variable-in-set_preferred_console 2005-10-28 17:44:05.000000000 -0700 +++ devel-akpm/arch/ppc64/kernel/setup.c 2005-10-28 17:44:05.000000000 -0700 @@ -881,7 +881,6 @@ static int __init set_preferred_console( if (reg && compat && (strcmp(compat, "hvterm-protocol") == 0)) { /* Host Virtual Serial Interface */ - int offset; switch (reg[0]) { case 0x30000000: offset = 0; _ From akpm at osdl.org Sat Oct 29 10:46:52 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:52 -0700 Subject: [patch 37/43] ppc64: make dma_addr_t 64 bits Message-ID: <200510290047.j9T0lMO5030114@shell0.pdx.osdl.net> From: Stephen Rothwell There has been a need expressed for dma_addr_t to be 64 bits on PPC64. This patch does that. I have built it for pSeries and iSeries and booted a virtual only iSeries partition. Signed-off-by: Anton Blanchard Acked-by: Dave Boutcher Signed-off-by: Andrew Morton --- include/asm-ppc64/scatterlist.h | 2 +- include/asm-ppc64/types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff -puN include/asm-ppc64/scatterlist.h~ppc64-make-dma_addr_t-64-bits include/asm-ppc64/scatterlist.h --- devel/include/asm-ppc64/scatterlist.h~ppc64-make-dma_addr_t-64-bits 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/include/asm-ppc64/scatterlist.h 2005-10-28 17:44:04.000000000 -0700 @@ -19,7 +19,7 @@ struct scatterlist { unsigned int length; /* For TCE support */ - u32 dma_address; + dma_addr_t dma_address; u32 dma_length; }; diff -puN include/asm-ppc64/types.h~ppc64-make-dma_addr_t-64-bits include/asm-ppc64/types.h --- devel/include/asm-ppc64/types.h~ppc64-make-dma_addr_t-64-bits 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/include/asm-ppc64/types.h 2005-10-28 17:44:04.000000000 -0700 @@ -63,7 +63,7 @@ typedef unsigned long u64; typedef __vector128 vector128; -typedef u32 dma_addr_t; +typedef u64 dma_addr_t; typedef u64 dma64_addr_t; typedef struct { _ From akpm at osdl.org Sat Oct 29 10:46:34 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:34 -0700 Subject: [patch 21/43] ppc64: Thermal control for SMU based machines Message-ID: <200510290047.j9T0l5k8030057@shell0.pdx.osdl.net> From: Benjamin Herrenschmidt This is the actual thermal control support for PowerMac8,1, PowerMac8,2 and PowerMac9,1 machines (SMU based), that is iMac G5 and single CPU desktop. It requires CPUFREQ to be enabled to properly deal with overtemp conditions. The new thermal control code implements a new framework (nicknamed "windfarm") to which I expect to port the old G5 thermal control, and possibly some of the powerbook thermal control drivers as well in the future. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton --- arch/ppc64/kernel/pmac_cpufreq.c | 44 - drivers/macintosh/Kconfig | 10 drivers/macintosh/Makefile | 5 drivers/macintosh/smu.c | 2 drivers/macintosh/windfarm.h | 122 ++ drivers/macintosh/windfarm_core.c | 428 ++++++++++ drivers/macintosh/windfarm_cpufreq_clamp.c | 105 ++ drivers/macintosh/windfarm_lm75_sensor.c | 255 ++++++ drivers/macintosh/windfarm_pid.c | 146 +++ drivers/macintosh/windfarm_pid.h | 84 + drivers/macintosh/windfarm_smu.c | 1220 +++++++++++++++++++++++++++++ drivers/macintosh/windfarm_smu_controls.c | 274 ++++++ drivers/macintosh/windfarm_smu_sensors.c | 471 +++++++++++ include/asm-ppc64/smu.h | 109 ++ 14 files changed, 3260 insertions(+), 15 deletions(-) diff -puN arch/ppc64/kernel/pmac_cpufreq.c~ppc64-thermal-control-for-smu-based-machines arch/ppc64/kernel/pmac_cpufreq.c --- devel/arch/ppc64/kernel/pmac_cpufreq.c~ppc64-thermal-control-for-smu-based-machines 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/arch/ppc64/kernel/pmac_cpufreq.c 2005-10-28 17:44:03.000000000 -0700 @@ -84,7 +84,8 @@ static struct freq_attr* g5_cpu_freqs_at static u32 *g5_pmode_data; static int g5_pmode_max; static int g5_pmode_cur; - +static int g5_driver_active; +static DECLARE_MUTEX(g5_switch_mutex); static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ static int g5_fvt_count; /* number of op. points */ @@ -105,11 +106,20 @@ static void g5_switch_volt(int speed_mod static int g5_switch_freq(int speed_mode) { + struct cpufreq_freqs freqs; int to; if (g5_pmode_cur == speed_mode) return 0; + down(&g5_switch_mutex); + + freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency; + freqs.new = g5_cpu_freqs[speed_mode].frequency; + freqs.cpu = 0; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + /* If frequency is going up, first ramp up the voltage */ if (speed_mode < g5_pmode_cur) g5_switch_volt(speed_mode); @@ -143,6 +153,10 @@ static int g5_switch_freq(int speed_mode g5_pmode_cur = speed_mode; ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + up(&g5_switch_mutex); + return 0; } @@ -159,12 +173,12 @@ static int g5_query_freq(void) } /* ----------------- cpufreq bookkeeping */ -static int __pmac g5_cpufreq_verify(struct cpufreq_policy *policy) +static int g5_cpufreq_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, g5_cpu_freqs); } -static int __pmac g5_cpufreq_target(struct cpufreq_policy *policy, +static int g5_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { unsigned int newstate = 0; @@ -173,10 +187,20 @@ static int __pmac g5_cpufreq_target(stru target_freq, relation, &newstate)) return -EINVAL; + DBG("g5_cpufreq: Request to switch to %d state: %d\n", + target_freq, newstate); + return g5_switch_freq(newstate); } -static int __pmac g5_cpufreq_cpu_init(struct cpufreq_policy *policy) +static unsigned int g5_cpufreq_get_speed(unsigned int cpu) +{ + DBG("g5_cpufreq: Get speed %d\n", + g5_cpu_freqs[g5_pmode_cur].frequency); + return g5_cpu_freqs[g5_pmode_cur].frequency; +} + +static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy) { if (policy->cpu != 0) return -ENODEV; @@ -198,6 +222,7 @@ static struct cpufreq_driver g5_cpufreq_ .init = g5_cpufreq_cpu_init, .verify = g5_cpufreq_verify, .target = g5_cpufreq_target, + .get = g5_cpufreq_get_speed, .attr = g5_cpu_freqs_attr, }; @@ -266,11 +291,14 @@ static int __init g5_cpufreq_init(void) /* Check current frequency */ g5_pmode_cur = g5_query_freq(); - if (g5_pmode_cur > 1) { + if (g5_pmode_cur > 1) /* We don't support anything but 1:1 and 1:2, fixup ... */ - g5_switch_freq(1); g5_pmode_cur = 1; - } + + /* Force apply current frequency to make sure everything is in + * sync (voltage is right for example) + */ + g5_switch_freq(g5_pmode_cur); printk(KERN_INFO "Registering G5 CPU frequency driver\n"); printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", @@ -279,6 +307,8 @@ static int __init g5_cpufreq_init(void) g5_cpu_freqs[g5_pmode_cur].frequency/1000); rc = cpufreq_register_driver(&g5_cpufreq_driver); + if (rc == 0) + g5_driver_active = 1; /* We keep the CPU node on hold... hopefully, Apple G5 don't have * hotplug CPU with a dynamic device-tree ... diff -puN drivers/macintosh/Kconfig~ppc64-thermal-control-for-smu-based-machines drivers/macintosh/Kconfig --- devel/drivers/macintosh/Kconfig~ppc64-thermal-control-for-smu-based-machines 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/macintosh/Kconfig 2005-10-28 17:44:03.000000000 -0700 @@ -169,6 +169,16 @@ config THERM_PM72 This driver provides thermostat and fan control for the desktop G5 machines. +config WINDFARM + tristate "New PowerMac thermal control infrastructure" + +config WINDFARM_SMU + tristate "Support for thermal management on SMU based PowerMacs" + depends on WINDFARM && I2C && CPU_FREQ_PMAC && PMAC_SMU + select I2C_PMAC_SMU + help + This driver provides thermal control for iMacG5 and newer + config ANSLCD tristate "Support for ANS LCD display" depends on ADB_CUDA && PPC_PMAC diff -puN drivers/macintosh/Makefile~ppc64-thermal-control-for-smu-based-machines drivers/macintosh/Makefile --- devel/drivers/macintosh/Makefile~ppc64-thermal-control-for-smu-based-machines 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/macintosh/Makefile 2005-10-28 17:44:03.000000000 -0700 @@ -26,3 +26,8 @@ obj-$(CONFIG_ADB_MACIO) += macio-adb.o obj-$(CONFIG_THERM_PM72) += therm_pm72.o obj-$(CONFIG_THERM_WINDTUNNEL) += therm_windtunnel.o obj-$(CONFIG_THERM_ADT746X) += therm_adt746x.o +obj-$(CONFIG_WINDFARM) += windfarm_core.o +obj-$(CONFIG_WINDFARM_SMU) += windfarm_smu_controls.o \ + windfarm_smu_sensors.o \ + windfarm_lm75_sensor.o windfarm_pid.o \ + windfarm_cpufreq_clamp.o windfarm_smu.o diff -puN drivers/macintosh/smu.c~ppc64-thermal-control-for-smu-based-machines drivers/macintosh/smu.c --- devel/drivers/macintosh/smu.c~ppc64-thermal-control-for-smu-based-machines 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/macintosh/smu.c 2005-10-28 17:44:03.000000000 -0700 @@ -590,6 +590,8 @@ static void smu_expose_childs(void *unus sprintf(name, "smu-i2c-%02x", *reg); of_platform_device_create(np, name, &smu->of_dev->dev); } + if (device_is_compatible(np, "smu-sensors")) + of_platform_device_create(np, "smu-sensors", &smu->of_dev->dev); } } diff -puN /dev/null drivers/macintosh/windfarm_core.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/macintosh/windfarm_core.c 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,428 @@ +/* + * Windfarm PowerMac thermal control. Core + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + * + * This core code tracks the list of sensors & controls, register + * clients, and holds the kernel thread used for control. + * + * TODO: + * + * Add some information about sensor/control type and data format to + * sensors/controls, and have the sysfs attribute stuff be moved + * generically here instead of hard coded in the platform specific + * driver as it us currently + * + * This however requires solving some annoying lifetime issues with + * sysfs which doesn't seem to have lifetime rules for struct attribute, + * I may have to create full features kobjects for every sensor/control + * instead which is a bit of an overkill imho + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.2" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +static LIST_HEAD(wf_controls); +static LIST_HEAD(wf_sensors); +static DECLARE_MUTEX(wf_lock); +static struct notifier_block *wf_client_list; +static int wf_client_count; +static unsigned int wf_overtemp; +static unsigned int wf_overtemp_counter; +struct task_struct *wf_thread; + +/* + * Utilities & tick thread + */ + +static inline void wf_notify(int event, void *param) +{ + notifier_call_chain(&wf_client_list, event, param); +} + +int wf_critical_overtemp(void) +{ + static char * critical_overtemp_path = "/sbin/critical_overtemp"; + char *argv[] = { critical_overtemp_path, NULL }; + static char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL }; + + return call_usermodehelper(critical_overtemp_path, argv, envp, 0); +} +EXPORT_SYMBOL_GPL(wf_critical_overtemp); + +static int wf_thread_func(void *data) +{ + unsigned long next, delay; + + next = jiffies; + + DBG("wf: thread started\n"); + + while(!kthread_should_stop()) { + try_to_freeze(); + + if (time_after_eq(jiffies, next)) { + wf_notify(WF_EVENT_TICK, NULL); + if (wf_overtemp) { + wf_overtemp_counter++; + /* 10 seconds overtemp, notify userland */ + if (wf_overtemp_counter > 10) + wf_critical_overtemp(); + /* 30 seconds, shutdown */ + if (wf_overtemp_counter > 30) { + printk(KERN_ERR "windfarm: Overtemp " + "for more than 30" + " seconds, shutting down\n"); + machine_power_off(); + } + } + next += HZ; + } + + set_current_state(TASK_INTERRUPTIBLE); + delay = next - jiffies; + if (delay <= HZ) + schedule_timeout(delay); + set_current_state(TASK_RUNNING); + + /* there should be no signal, but oh well */ + if (signal_pending(current)) { + printk(KERN_WARNING "windfarm: thread got sigl !\n"); + break; + } + } + + DBG("wf: thread stopped\n"); + + return 0; +} + +static void wf_start_thread(void) +{ + wf_thread = kthread_run(wf_thread_func, NULL, "kwindfarm"); + if (IS_ERR(wf_thread)) { + printk(KERN_ERR "windfarm: failed to create thread,err %ld\n", + PTR_ERR(wf_thread)); + wf_thread = NULL; + } +} + + +static void wf_stop_thread(void) +{ + if (wf_thread) + kthread_stop(wf_thread); + wf_thread = NULL; +} + +/* + * Controls + */ + +static void wf_control_release(struct kref *kref) +{ + struct wf_control *ct = container_of(kref, struct wf_control, ref); + + DBG("wf: Deleting control %s\n", ct->name); + + if (ct->ops && ct->ops->release) + ct->ops->release(ct); + else + kfree(ct); +} + +int wf_register_control(struct wf_control *new_ct) +{ + struct wf_control *ct; + + down(&wf_lock); + list_for_each_entry(ct, &wf_controls, link) { + if (!strcmp(ct->name, new_ct->name)) { + printk(KERN_WARNING "windfarm: trying to register" + " duplicate control %s\n", ct->name); + up(&wf_lock); + return -EEXIST; + } + } + kref_init(&new_ct->ref); + list_add(&new_ct->link, &wf_controls); + + DBG("wf: Registered control %s\n", new_ct->name); + + wf_notify(WF_EVENT_NEW_CONTROL, new_ct); + up(&wf_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(wf_register_control); + +void wf_unregister_control(struct wf_control *ct) +{ + down(&wf_lock); + list_del(&ct->link); + up(&wf_lock); + + DBG("wf: Unregistered control %s\n", ct->name); + + kref_put(&ct->ref, wf_control_release); +} +EXPORT_SYMBOL_GPL(wf_unregister_control); + +struct wf_control * wf_find_control(const char *name) +{ + struct wf_control *ct; + + down(&wf_lock); + list_for_each_entry(ct, &wf_controls, link) { + if (!strcmp(ct->name, name)) { + if (wf_get_control(ct)) + ct = NULL; + up(&wf_lock); + return ct; + } + } + up(&wf_lock); + return NULL; +} +EXPORT_SYMBOL_GPL(wf_find_control); + +int wf_get_control(struct wf_control *ct) +{ + if (!try_module_get(ct->ops->owner)) + return -ENODEV; + kref_get(&ct->ref); + return 0; +} +EXPORT_SYMBOL_GPL(wf_get_control); + +void wf_put_control(struct wf_control *ct) +{ + struct module *mod = ct->ops->owner; + kref_put(&ct->ref, wf_control_release); + module_put(mod); +} +EXPORT_SYMBOL_GPL(wf_put_control); + + +/* + * Sensors + */ + + +static void wf_sensor_release(struct kref *kref) +{ + struct wf_sensor *sr = container_of(kref, struct wf_sensor, ref); + + DBG("wf: Deleting sensor %s\n", sr->name); + + if (sr->ops && sr->ops->release) + sr->ops->release(sr); + else + kfree(sr); +} + +int wf_register_sensor(struct wf_sensor *new_sr) +{ + struct wf_sensor *sr; + + down(&wf_lock); + list_for_each_entry(sr, &wf_sensors, link) { + if (!strcmp(sr->name, new_sr->name)) { + printk(KERN_WARNING "windfarm: trying to register" + " duplicate sensor %s\n", sr->name); + up(&wf_lock); + return -EEXIST; + } + } + kref_init(&new_sr->ref); + list_add(&new_sr->link, &wf_sensors); + + DBG("wf: Registered sensor %s\n", new_sr->name); + + wf_notify(WF_EVENT_NEW_SENSOR, new_sr); + up(&wf_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(wf_register_sensor); + +void wf_unregister_sensor(struct wf_sensor *sr) +{ + down(&wf_lock); + list_del(&sr->link); + up(&wf_lock); + + DBG("wf: Unregistered sensor %s\n", sr->name); + + wf_put_sensor(sr); +} +EXPORT_SYMBOL_GPL(wf_unregister_sensor); + +struct wf_sensor * wf_find_sensor(const char *name) +{ + struct wf_sensor *sr; + + down(&wf_lock); + list_for_each_entry(sr, &wf_sensors, link) { + if (!strcmp(sr->name, name)) { + if (wf_get_sensor(sr)) + sr = NULL; + up(&wf_lock); + return sr; + } + } + up(&wf_lock); + return NULL; +} +EXPORT_SYMBOL_GPL(wf_find_sensor); + +int wf_get_sensor(struct wf_sensor *sr) +{ + if (!try_module_get(sr->ops->owner)) + return -ENODEV; + kref_get(&sr->ref); + return 0; +} +EXPORT_SYMBOL_GPL(wf_get_sensor); + +void wf_put_sensor(struct wf_sensor *sr) +{ + struct module *mod = sr->ops->owner; + kref_put(&sr->ref, wf_sensor_release); + module_put(mod); +} +EXPORT_SYMBOL_GPL(wf_put_sensor); + + +/* + * Client & notification + */ + +int wf_register_client(struct notifier_block *nb) +{ + int rc; + struct wf_control *ct; + struct wf_sensor *sr; + + down(&wf_lock); + rc = notifier_chain_register(&wf_client_list, nb); + if (rc != 0) + goto bail; + wf_client_count++; + list_for_each_entry(ct, &wf_controls, link) + wf_notify(WF_EVENT_NEW_CONTROL, ct); + list_for_each_entry(sr, &wf_sensors, link) + wf_notify(WF_EVENT_NEW_SENSOR, sr); + if (wf_client_count == 1) + wf_start_thread(); + bail: + up(&wf_lock); + return rc; +} +EXPORT_SYMBOL_GPL(wf_register_client); + +int wf_unregister_client(struct notifier_block *nb) +{ + down(&wf_lock); + notifier_chain_unregister(&wf_client_list, nb); + wf_client_count++; + if (wf_client_count == 0) + wf_stop_thread(); + up(&wf_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(wf_unregister_client); + +void wf_set_overtemp(void) +{ + down(&wf_lock); + wf_overtemp++; + if (wf_overtemp == 1) { + printk(KERN_WARNING "windfarm: Overtemp condition detected !\n"); + wf_overtemp_counter = 0; + wf_notify(WF_EVENT_OVERTEMP, NULL); + } + up(&wf_lock); +} +EXPORT_SYMBOL_GPL(wf_set_overtemp); + +void wf_clear_overtemp(void) +{ + down(&wf_lock); + WARN_ON(wf_overtemp == 0); + if (wf_overtemp == 0) { + up(&wf_lock); + return; + } + wf_overtemp--; + if (wf_overtemp == 0) { + printk(KERN_WARNING "windfarm: Overtemp condition cleared !\n"); + wf_notify(WF_EVENT_NORMALTEMP, NULL); + } + up(&wf_lock); +} +EXPORT_SYMBOL_GPL(wf_clear_overtemp); + +int wf_is_overtemp(void) +{ + return (wf_overtemp != 0); +} +EXPORT_SYMBOL_GPL(wf_is_overtemp); + +static struct platform_device wf_platform_device = { + .name = "windfarm", +}; + +static int __init windfarm_core_init(void) +{ + DBG("wf: core loaded\n"); + + platform_device_register(&wf_platform_device); + return 0; +} + +static void __exit windfarm_core_exit(void) +{ + BUG_ON(wf_client_count != 0); + + DBG("wf: core unloaded\n"); + + platform_device_unregister(&wf_platform_device); +} + + +module_init(windfarm_core_init); +module_exit(windfarm_core_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("Core component of PowerMac thermal control"); +MODULE_LICENSE("GPL"); + diff -puN /dev/null drivers/macintosh/windfarm_cpufreq_clamp.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/macintosh/windfarm_cpufreq_clamp.c 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.3" + +static int clamped; +static struct wf_control *clamp_control; + +static int clamp_notifier_call(struct notifier_block *self, + unsigned long event, void *data) +{ + struct cpufreq_policy *p = data; + unsigned long max_freq; + + if (event != CPUFREQ_ADJUST) + return 0; + + max_freq = clamped ? (p->cpuinfo.min_freq) : (p->cpuinfo.max_freq); + cpufreq_verify_within_limits(p, 0, max_freq); + + return 0; +} + +static struct notifier_block clamp_notifier = { + .notifier_call = clamp_notifier_call, +}; + +static int clamp_set(struct wf_control *ct, s32 value) +{ + if (value) + printk(KERN_INFO "windfarm: Clamping CPU frequency to " + "minimum !\n"); + else + printk(KERN_INFO "windfarm: CPU frequency unclamped !\n"); + clamped = value; + cpufreq_update_policy(0); + return 0; +} + +static int clamp_get(struct wf_control *ct, s32 *value) +{ + *value = clamped; + return 0; +} + +static s32 clamp_min(struct wf_control *ct) +{ + return 0; +} + +static s32 clamp_max(struct wf_control *ct) +{ + return 1; +} + +static struct wf_control_ops clamp_ops = { + .set_value = clamp_set, + .get_value = clamp_get, + .get_min = clamp_min, + .get_max = clamp_max, + .owner = THIS_MODULE, +}; + +static int __init wf_cpufreq_clamp_init(void) +{ + struct wf_control *clamp; + + clamp = kmalloc(sizeof(struct wf_control), GFP_KERNEL); + if (clamp == NULL) + return -ENOMEM; + cpufreq_register_notifier(&clamp_notifier, CPUFREQ_POLICY_NOTIFIER); + clamp->ops = &clamp_ops; + clamp->name = "cpufreq-clamp"; + if (wf_register_control(clamp)) + goto fail; + clamp_control = clamp; + return 0; + fail: + kfree(clamp); + return -ENODEV; +} + +static void __exit wf_cpufreq_clamp_exit(void) +{ + if (clamp_control) + wf_unregister_control(clamp_control); +} + + +module_init(wf_cpufreq_clamp_init); +module_exit(wf_cpufreq_clamp_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("CPU frequency clamp for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + diff -puN /dev/null drivers/macintosh/windfarm.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/macintosh/windfarm.h 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,122 @@ +#ifndef __WINDFARM_H__ +#define __WINDFARM_H__ + +#include +#include +#include +#include + +/* Display a 16.16 fixed point value */ +#define FIX32TOPRINT(f) ((f) >> 16),((((f) & 0xffff) * 1000) >> 16) + +/* + * Control objects + */ + +struct wf_control; + +struct wf_control_ops { + int (*set_value)(struct wf_control *ct, s32 val); + int (*get_value)(struct wf_control *ct, s32 *val); + s32 (*get_min)(struct wf_control *ct); + s32 (*get_max)(struct wf_control *ct); + void (*release)(struct wf_control *ct); + struct module *owner; +}; + +struct wf_control { + struct list_head link; + struct wf_control_ops *ops; + char *name; + int type; + struct kref ref; +}; + +#define WF_CONTROL_TYPE_GENERIC 0 +#define WF_CONTROL_RPM_FAN 1 +#define WF_CONTROL_PWM_FAN 2 + + +/* Note about lifetime rules: wf_register_control() will initialize + * the kref and wf_unregister_control will decrement it, thus the + * object creating/disposing a given control shouldn't assume it + * still exists after wf_unregister_control has been called. + * wf_find_control will inc the refcount for you + */ +extern int wf_register_control(struct wf_control *ct); +extern void wf_unregister_control(struct wf_control *ct); +extern struct wf_control * wf_find_control(const char *name); +extern int wf_get_control(struct wf_control *ct); +extern void wf_put_control(struct wf_control *ct); + +static inline int wf_control_set_max(struct wf_control *ct) +{ + s32 vmax = ct->ops->get_max(ct); + return ct->ops->set_value(ct, vmax); +} + +static inline int wf_control_set_min(struct wf_control *ct) +{ + s32 vmin = ct->ops->get_min(ct); + return ct->ops->set_value(ct, vmin); +} + +/* + * Sensor objects + */ + +struct wf_sensor; + +struct wf_sensor_ops { + int (*get_value)(struct wf_sensor *sr, s32 *val); + void (*release)(struct wf_sensor *sr); + struct module *owner; +}; + +struct wf_sensor { + struct list_head link; + struct wf_sensor_ops *ops; + char *name; + struct kref ref; +}; + +/* Same lifetime rules as controls */ +extern int wf_register_sensor(struct wf_sensor *sr); +extern void wf_unregister_sensor(struct wf_sensor *sr); +extern struct wf_sensor * wf_find_sensor(const char *name); +extern int wf_get_sensor(struct wf_sensor *sr); +extern void wf_put_sensor(struct wf_sensor *sr); + +/* For use by clients. Note that we are a bit racy here since + * notifier_block doesn't have a module owner field. I may fix + * it one day ... + * + * LOCKING NOTE ! + * + * All "events" except WF_EVENT_TICK are called with an internal mutex + * held which will deadlock if you call basically any core routine. + * So don't ! Just take note of the event and do your actual operations + * from the ticker. + * + */ +extern int wf_register_client(struct notifier_block *nb); +extern int wf_unregister_client(struct notifier_block *nb); + +/* Overtemp conditions. Those are refcounted */ +extern void wf_set_overtemp(void); +extern void wf_clear_overtemp(void); +extern int wf_is_overtemp(void); + +#define WF_EVENT_NEW_CONTROL 0 /* param is wf_control * */ +#define WF_EVENT_NEW_SENSOR 1 /* param is wf_sensor * */ +#define WF_EVENT_OVERTEMP 2 /* no param */ +#define WF_EVENT_NORMALTEMP 3 /* overtemp condition cleared */ +#define WF_EVENT_TICK 4 /* 1 second tick */ + +/* Note: If that driver gets more broad use, we could replace the + * simplistic overtemp bits with "environmental conditions". That + * could then be used to also notify of things like fan failure, + * case open, battery conditions, ... + */ + +#endif /* __WINDFARM_H__ */ diff -puN /dev/null drivers/macintosh/windfarm_lm75_sensor.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/macintosh/windfarm_lm75_sensor.c 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,255 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.1" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +struct wf_lm75_sensor { + int ds1775 : 1; + int inited : 1; + struct i2c_client i2c; + struct wf_sensor sens; +}; +#define wf_to_lm75(c) container_of(c, struct wf_lm75_sensor, sens) +#define i2c_to_lm75(c) container_of(c, struct wf_lm75_sensor, i2c) + +static int wf_lm75_attach(struct i2c_adapter *adapter); +static int wf_lm75_detach(struct i2c_client *client); + +static struct i2c_driver wf_lm75_driver = { + .owner = THIS_MODULE, + .name = "wf_lm75", + .flags = I2C_DF_NOTIFY, + .attach_adapter = wf_lm75_attach, + .detach_client = wf_lm75_detach, +}; + +static int wf_lm75_get(struct wf_sensor *sr, s32 *value) +{ + struct wf_lm75_sensor *lm = wf_to_lm75(sr); + s32 data; + + if (lm->i2c.adapter == NULL) + return -ENODEV; + + /* Init chip if necessary */ + if (!lm->inited) { + u8 cfg_new, cfg = (u8)i2c_smbus_read_byte_data(&lm->i2c, 1); + + DBG("wf_lm75: Initializing %s, cfg was: %02x\n", + sr->name, cfg); + + /* clear shutdown bit, keep other settings as left by + * the firmware for now + */ + cfg_new = cfg & ~0x01; + i2c_smbus_write_byte_data(&lm->i2c, 1, cfg_new); + lm->inited = 1; + + /* If we just powered it up, let's wait 200 ms */ + msleep(200); + } + + /* Read temperature register */ + data = (s32)le16_to_cpu(i2c_smbus_read_word_data(&lm->i2c, 0)); + data <<= 8; + *value = data; + + return 0; +} + +static void wf_lm75_release(struct wf_sensor *sr) +{ + struct wf_lm75_sensor *lm = wf_to_lm75(sr); + + /* check if client is registered and detach from i2c */ + if (lm->i2c.adapter) { + i2c_detach_client(&lm->i2c); + lm->i2c.adapter = NULL; + } + + kfree(lm); +} + +static struct wf_sensor_ops wf_lm75_ops = { + .get_value = wf_lm75_get, + .release = wf_lm75_release, + .owner = THIS_MODULE, +}; + +static struct wf_lm75_sensor *wf_lm75_create(struct i2c_adapter *adapter, + u8 addr, int ds1775, + const char *loc) +{ + struct wf_lm75_sensor *lm; + + DBG("wf_lm75: creating %s device at address 0x%02x\n", + ds1775 ? "ds1775" : "lm75", addr); + + lm = kmalloc(sizeof(struct wf_lm75_sensor), GFP_KERNEL); + if (lm == NULL) + return NULL; + memset(lm, 0, sizeof(struct wf_lm75_sensor)); + + /* Usual rant about sensor names not beeing very consistent in + * the device-tree, oh well ... + * Add more entries below as you deal with more setups + */ + if (!strcmp(loc, "Hard drive") || !strcmp(loc, "DRIVE BAY")) + lm->sens.name = "hd-temp"; + else + goto fail; + + lm->inited = 0; + lm->sens.ops = &wf_lm75_ops; + lm->ds1775 = ds1775; + lm->i2c.addr = (addr >> 1) & 0x7f; + lm->i2c.adapter = adapter; + lm->i2c.driver = &wf_lm75_driver; + strncpy(lm->i2c.name, lm->sens.name, I2C_NAME_SIZE-1); + + if (i2c_attach_client(&lm->i2c)) { + printk(KERN_ERR "windfarm: failed to attach %s %s to i2c\n", + ds1775 ? "ds1775" : "lm75", lm->i2c.name); + goto fail; + } + + if (wf_register_sensor(&lm->sens)) { + i2c_detach_client(&lm->i2c); + goto fail; + } + + return lm; + fail: + kfree(lm); + return NULL; +} + +static int wf_lm75_attach(struct i2c_adapter *adapter) +{ + u8 bus_id; + struct device_node *smu, *bus, *dev; + + /* We currently only deal with LM75's hanging off the SMU + * i2c busses. If we extend that driver to other/older + * machines, we should split this function into SMU-i2c, + * keywest-i2c, PMU-i2c, ... + */ + + DBG("wf_lm75: adapter %s detected\n", adapter->name); + + if (strncmp(adapter->name, "smu-i2c-", 8) != 0) + return 0; + smu = of_find_node_by_type(NULL, "smu"); + if (smu == NULL) + return 0; + + /* Look for the bus in the device-tree */ + bus_id = (u8)simple_strtoul(adapter->name + 8, NULL, 16); + + DBG("wf_lm75: bus ID is %x\n", bus_id); + + /* Look for sensors subdir */ + for (bus = NULL; + (bus = of_get_next_child(smu, bus)) != NULL;) { + u32 *reg; + + if (strcmp(bus->name, "i2c")) + continue; + reg = (u32 *)get_property(bus, "reg", NULL); + if (reg == NULL) + continue; + if (bus_id == *reg) + break; + } + of_node_put(smu); + if (bus == NULL) { + printk(KERN_WARNING "windfarm: SMU i2c bus 0x%x not found" + " in device-tree !\n", bus_id); + return 0; + } + + DBG("wf_lm75: bus found, looking for device...\n"); + + /* Now look for lm75(s) in there */ + for (dev = NULL; + (dev = of_get_next_child(bus, dev)) != NULL;) { + const char *loc = + get_property(dev, "hwsensor-location", NULL); + u32 *reg = (u32 *)get_property(dev, "reg", NULL); + DBG(" dev: %s... (loc: %p, reg: %p)\n", dev->name, loc, reg); + if (loc == NULL || reg == NULL) + continue; + /* real lm75 */ + if (device_is_compatible(dev, "lm75")) + wf_lm75_create(adapter, *reg, 0, loc); + /* ds1775 (compatible, better resolution */ + else if (device_is_compatible(dev, "ds1775")) + wf_lm75_create(adapter, *reg, 1, loc); + } + + of_node_put(bus); + + return 0; +} + +static int wf_lm75_detach(struct i2c_client *client) +{ + struct wf_lm75_sensor *lm = i2c_to_lm75(client); + + DBG("wf_lm75: i2c detatch called for %s\n", lm->sens.name); + + /* Mark client detached */ + lm->i2c.adapter = NULL; + + /* release sensor */ + wf_unregister_sensor(&lm->sens); + + return 0; +} + +static int __init wf_lm75_sensor_init(void) +{ + int rc; + + rc = i2c_add_driver(&wf_lm75_driver); + if (rc < 0) + return rc; + return 0; +} + +static void __exit wf_lm75_sensor_exit(void) +{ + i2c_del_driver(&wf_lm75_driver); +} + + +module_init(wf_lm75_sensor_init); +module_exit(wf_lm75_sensor_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("LM75 sensor objects for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + diff -puN /dev/null drivers/macintosh/windfarm_pid.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/macintosh/windfarm_pid.c 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,146 @@ +/* + * Windfarm PowerMac thermal control. Generic PID helpers + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + */ + +#include +#include +#include +#include +#include +#include + +#include "windfarm_pid.h" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +void wf_pid_init(struct wf_pid_state *st, struct wf_pid_param *param) +{ + memset(st, 0, sizeof(struct wf_pid_state)); + st->param = *param; + st->first = 1; +} +EXPORT_SYMBOL_GPL(wf_pid_init); + +s32 wf_pid_run(struct wf_pid_state *st, s32 new_sample) +{ + s64 error, integ, deriv; + s32 target; + int i, hlen = st->param.history_len; + + /* Calculate error term */ + error = new_sample - st->param.itarget; + + /* Get samples into our history buffer */ + if (st->first) { + for (i = 0; i < hlen; i++) { + st->samples[i] = new_sample; + st->errors[i] = error; + } + st->first = 0; + st->index = 0; + } else { + st->index = (st->index + 1) % hlen; + st->samples[st->index] = new_sample; + st->errors[st->index] = error; + } + + /* Calculate integral term */ + for (i = 0, integ = 0; i < hlen; i++) + integ += st->errors[(st->index + hlen - i) % hlen]; + integ *= st->param.interval; + + /* Calculate derivative term */ + deriv = st->errors[st->index] - + st->errors[(st->index + hlen - 1) % hlen]; + deriv /= st->param.interval; + + /* Calculate target */ + target = (s32)((integ * (s64)st->param.gr + deriv * (s64)st->param.gd + + error * (s64)st->param.gp) >> 36); + if (st->param.additive) + target += st->target; + target = max(target, st->param.min); + target = min(target, st->param.max); + st->target = target; + + return st->target; +} +EXPORT_SYMBOL_GPL(wf_pid_run); + +void wf_cpu_pid_init(struct wf_cpu_pid_state *st, + struct wf_cpu_pid_param *param) +{ + memset(st, 0, sizeof(struct wf_cpu_pid_state)); + st->param = *param; + st->first = 1; +} +EXPORT_SYMBOL_GPL(wf_cpu_pid_init); + +s32 wf_cpu_pid_run(struct wf_cpu_pid_state *st, s32 new_power, s32 new_temp) +{ + s64 error, integ, deriv, prop; + s32 target, sval, adj; + int i, hlen = st->param.history_len; + + /* Calculate error term */ + error = st->param.pmaxadj - new_power; + + /* Get samples into our history buffer */ + if (st->first) { + for (i = 0; i < hlen; i++) { + st->powers[i] = new_power; + st->errors[i] = error; + } + st->temps[0] = st->temps[1] = new_temp; + st->first = 0; + st->index = st->tindex = 0; + } else { + st->index = (st->index + 1) % hlen; + st->powers[st->index] = new_power; + st->errors[st->index] = error; + st->tindex = (st->tindex + 1) % 2; + st->temps[st->tindex] = new_temp; + } + + /* Calculate integral term */ + for (i = 0, integ = 0; i < hlen; i++) + integ += st->errors[(st->index + hlen - i) % hlen]; + integ *= st->param.interval; + integ *= st->param.gr; + sval = st->param.tmax - ((integ >> 20) & 0xffffffff); + adj = min(st->param.ttarget, sval); + + DBG("integ: %lx, sval: %lx, adj: %lx\n", integ, sval, adj); + + /* Calculate derivative term */ + deriv = st->temps[st->tindex] - + st->temps[(st->tindex + 2 - 1) % 2]; + deriv /= st->param.interval; + deriv *= st->param.gd; + + /* Calculate proportional term */ + prop = (new_temp - adj); + prop *= st->param.gp; + + DBG("deriv: %lx, prop: %lx\n", deriv, prop); + + /* Calculate target */ + target = st->target + (s32)((deriv + prop) >> 36); + target = max(target, st->param.min); + target = min(target, st->param.max); + st->target = target; + + return st->target; +} +EXPORT_SYMBOL_GPL(wf_cpu_pid_run); diff -puN /dev/null drivers/macintosh/windfarm_pid.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/macintosh/windfarm_pid.h 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,84 @@ +/* + * Windfarm PowerMac thermal control. Generic PID helpers + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + * + * This is a pair of generic PID helpers that can be used by + * control loops. One is the basic PID implementation, the + * other one is more specifically tailored to the loops used + * for CPU control with 2 input sample types (temp and power) + */ + +/* + * *** Simple PID *** + */ + +#define WF_PID_MAX_HISTORY 32 + +/* This parameter array is passed to the PID algorithm. Currently, + * we don't support changing parameters on the fly as it's not needed + * but could be implemented (with necessary adjustment of the history + * buffer + */ +struct wf_pid_param { + int interval; /* Interval between samples in seconds */ + int history_len; /* Size of history buffer */ + int additive; /* 1: target relative to previous value */ + s32 gd, gp, gr; /* PID gains */ + s32 itarget; /* PID input target */ + s32 min,max; /* min and max target values */ +}; + +struct wf_pid_state { + int first; /* first run of the loop */ + int index; /* index of current sample */ + s32 target; /* current target value */ + s32 samples[WF_PID_MAX_HISTORY]; /* samples history buffer */ + s32 errors[WF_PID_MAX_HISTORY]; /* error history buffer */ + + struct wf_pid_param param; +}; + +extern void wf_pid_init(struct wf_pid_state *st, struct wf_pid_param *param); +extern s32 wf_pid_run(struct wf_pid_state *st, s32 sample); + + +/* + * *** CPU PID *** + */ + +#define WF_CPU_PID_MAX_HISTORY 32 + +/* This parameter array is passed to the CPU PID algorithm. Currently, + * we don't support changing parameters on the fly as it's not needed + * but could be implemented (with necessary adjustment of the history + * buffer + */ +struct wf_cpu_pid_param { + int interval; /* Interval between samples in seconds */ + int history_len; /* Size of history buffer */ + s32 gd, gp, gr; /* PID gains */ + s32 pmaxadj; /* PID max power adjust */ + s32 ttarget; /* PID input target */ + s32 tmax; /* PID input max */ + s32 min,max; /* min and max target values */ +}; + +struct wf_cpu_pid_state { + int first; /* first run of the loop */ + int index; /* index of current power */ + int tindex; /* index of current temp */ + s32 target; /* current target value */ + s32 powers[WF_PID_MAX_HISTORY]; /* power history buffer */ + s32 errors[WF_PID_MAX_HISTORY]; /* error history buffer */ + s32 temps[2]; /* temp. history buffer */ + + struct wf_cpu_pid_param param; +}; + +extern void wf_cpu_pid_init(struct wf_cpu_pid_state *st, + struct wf_cpu_pid_param *param); +extern s32 wf_cpu_pid_run(struct wf_cpu_pid_state *st, s32 power, s32 temp); diff -puN /dev/null drivers/macintosh/windfarm_smu.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/macintosh/windfarm_smu.c 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,1220 @@ +/* + * Windfarm PowerMac thermal control. SMU based machines control loops + * + * (c) Copyright 2005 Benjamin Herrenschmidt, IBM Corp. + * + * + * Released under the term of the GNU GPL v2. + * + * The algorithm used is the PID control algorithm, used the same + * way the published Darwin code does, using the same values that + * are present in the Darwin 8.2 snapshot property lists (note however + * that none of the code has been re-used, it's a complete re-implementation + * + * The various control loops found in Darwin config file are: + * + * PowerMac8,1 and PowerMac8,2 + * =========================== + * + * System Fans control loop. Different based on models. In addition to the + * usual PID algorithm, the control loop gets 2 additional pairs of linear + * scaling factors (scale/offsets) expressed as 4.12 fixed point values + * signed offset, unsigned scale) + * + * The targets are modified such as: + * - the linked control (second control) gets the target value as-is + * (typically the drive fan) + * - the main control (first control) gets the target value scaled with + * the first pair of factors, and is then modified as below + * - the value of the target of the CPU Fan control loop is retreived, + * scaled with the second pair of factors, and the max of that and + * the scaled target is applied to the main control. + * + * # model_id: 2 + * controls : system-fan, drive-bay-fan + * sensors : hd-temp + * PID params : G_d = 0x15400000 + * G_p = 0x00200000 + * G_r = 0x000002fd + * History = 2 entries + * Input target = 0x3a0000 + * Interval = 5s + * linear-factors : offset = 0xff38 scale = 0x0ccd + * offset = 0x0208 scale = 0x07ae + * + * # model_id: 3 + * controls : system-fan, drive-bay-fan + * sensors : hd-temp + * PID params : G_d = 0x08e00000 + * G_p = 0x00566666 + * G_r = 0x0000072b + * History = 2 entries + * Input target = 0x350000 + * Interval = 5s + * linear-factors : offset = 0xff38 scale = 0x0ccd + * offset = 0x0000 scale = 0x0000 + * + * # model_id: 5 + * controls : system-fan + * sensors : hd-temp + * PID params : G_d = 0x15400000 + * G_p = 0x00233333 + * G_r = 0x000002fd + * History = 2 entries + * Input target = 0x3a0000 + * Interval = 5s + * linear-factors : offset = 0x0000 scale = 0x1000 + * offset = 0x0091 scale = 0x0bae + * + * CPU Fan control loop. The loop is identical for all models. it + * has an additional pair of scaling factor. This is used to scale the + * systems fan control loop target result (the one before it gets scaled + * by the System Fans control loop itself). Then, the max value of the + * calculated target value and system fan value is sent to the fans + * + * controls : cpu-fan + * sensors : cpu-temp cpu-power + * PID params : From SMU sdb partition + * linear-factors : offset = 0xfb50 scale = 0x1000 + * + * CPU Slew control loop. Not implemented. The cpufreq driver in linux is + * completely separate for now, though we could find a way to link it, either + * as a client reacting to overtemp notifications, or directling monitoring + * the CPU temperature + * + * WARNING ! The CPU control loop requires the CPU tmax for the current + * operating point. However, we currently are completely separated from + * the cpufreq driver and thus do not know what the current operating + * point is. Fortunately, we also do not have any hardware supporting anything + * but operating point 0 at the moment, thus we just peek that value directly + * from the SDB partition. If we ever end up with actually slewing the system + * clock and thus changing operating points, we'll have to find a way to + * communicate with the CPU freq driver; + * + * PowerMac9,1 + * =========== + * + * Has 3 control loops: CPU fans is similar to PowerMac8,1 (though it doesn't + * try to play with other control loops fans). Drive bay is rather basic PID + * with one sensor and one fan. Slots area is a bit different as the Darwin + * driver is supposed to be capable of working in a special "AGP" mode which + * involves the presence of an AGP sensor and an AGP fan (possibly on the + * AGP card itself). I can't deal with that special mode as I don't have + * access to those additional sensor/fans for now (though ultimately, it would + * be possible to add sensor objects for them) so I'm only implementing the + * basic PCI slot control loop + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" +#include "windfarm_pid.h" + +#define VERSION "0.3" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +/* define this to force CPU overtemp to 74 degree, useful for testing + * the overtemp code + */ +#undef HACKED_OVERTEMP + +/* Machine identification */ +#define MACHINE_PM81 81 /* PM81, PM82 */ +#define MACHINE_PM91 91 /* PM91 */ + +static int wf_smu_machine; /* machine */ +static int wf_smu_mach_model; /* machine model id */ + +static struct device *wf_smu_dev; + +/* Controls & sensors */ +static struct wf_sensor *sensor_cpu_power; +static struct wf_sensor *sensor_cpu_temp; +static struct wf_sensor *sensor_hd_temp; +static struct wf_sensor *sensor_slots_power; +static struct wf_control *fan_cpu_main; +static struct wf_control *fan_cpu_second; +static struct wf_control *fan_cpu_third; +static struct wf_control *fan_hd; +static struct wf_control *fan_system; +static struct wf_control *fan_slots; +static struct wf_control *cpufreq_clamp; + +/* Set to kick the control loop into life */ +static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok, wf_smu_started; + +/* Failure handling.. could be nicer */ +#define FAILURE_FAN 0x01 +#define FAILURE_SENSOR 0x02 +#define FAILURE_OVERTEMP 0x04 + +static unsigned int wf_smu_failure_state; +static int wf_smu_readjust, wf_smu_skipping; + +/* + * ****** System Fans Control Loop ****** + * + * (PowerMac8,1 and 8,2 only) + */ + +/* Parameters for the System Fans control loop. Parameters + * not in this table such as interval, history size, ... + * are common to all versions and thus hard coded for now. + */ +struct wf_smu_sys_fans_param { + int model_id; + s32 itarget; + s32 gd, gp, gr; + + s16 offset0; + u16 scale0; + s16 offset1; + u16 scale1; +}; + +#define WF_SMU_SYS_FANS_INTERVAL 5 +#define WF_SMU_SYS_FANS_HISTORY_SIZE 2 + +/* State data used by the system fans control loop + * (MACHINE_PM81 only) + */ +struct wf_smu_sys_fans_state { + int ticks; + s32 sys_setpoint; + s32 hd_setpoint; + s16 offset0; + u16 scale0; + s16 offset1; + u16 scale1; + struct wf_pid_state pid; +}; + +/* + * Configs for SMU Sytem Fan control loop + */ +static struct wf_smu_sys_fans_param wf_smu_sys_all_params[] = { + /* Model ID 2 */ + { + .model_id = 2, + .itarget = 0x3a0000, + .gd = 0x15400000, + .gp = 0x00200000, + .gr = 0x000002fd, + .offset0 = 0xff38, + .scale0 = 0x0ccd, + .offset1 = 0x0208, + .scale1 = 0x07ae, + }, + /* Model ID 3 */ + { + .model_id = 2, + .itarget = 0x350000, + .gd = 0x08e00000, + .gp = 0x00566666, + .gr = 0x0000072b, + .offset0 = 0xff38, + .scale0 = 0x0ccd, + .offset1 = 0x0000, + .scale1 = 0x0000, + }, + /* Model ID 5 */ + { + .model_id = 2, + .itarget = 0x3a0000, + .gd = 0x15400000, + .gp = 0x00233333, + .gr = 0x000002fd, + .offset0 = 0x0000, + .scale0 = 0x1000, + .offset1 = 0x0091, + .scale1 = 0x0bae, + }, +}; +#define WF_SMU_SYS_FANS_NUM_CONFIGS ARRAY_SIZE(wf_smu_sys_all_params) + +static struct wf_smu_sys_fans_state *wf_smu_sys_fans; + +/* + * ****** CPU Fans Control Loop ****** + * + */ + + +#define WF_SMU_CPU_FANS_INTERVAL 1 +#define WF_SMU_CPU_FANS_MAX_HISTORY 16 +#define WF_SMU_CPU_FANS_SIBLING_SCALE 0x00001000 +#define WF_SMU_CPU_FANS_SIBLING_OFFSET 0xfffffb50 + +/* State data used by the cpu fans control loop + */ +struct wf_smu_cpu_fans_state { + int ticks; + s32 cpu_setpoint; + s32 scale; + s32 offset; + struct wf_cpu_pid_state pid; +}; + +static struct wf_smu_cpu_fans_state *wf_smu_cpu_fans; + + + +/* + * ****** Drive Fan Control Loop ****** + * + */ + +struct wf_smu_drive_fans_state { + int ticks; + s32 setpoint; + struct wf_pid_state pid; +}; + +static struct wf_smu_drive_fans_state *wf_smu_drive_fans; + +/* + * ****** Slots Fan Control Loop ****** + * + */ + +struct wf_smu_slots_fans_state { + int ticks; + s32 setpoint; + struct wf_pid_state pid; +}; + +static struct wf_smu_slots_fans_state *wf_smu_slots_fans; + +/* + * ***** Implementation ***** + * + */ + +static void wf_smu_create_sys_fans(void) +{ + struct wf_smu_sys_fans_param *param = NULL; + struct wf_pid_param pid_param; + int i; + + /* First, locate the params for this model */ + for (i = 0; i < WF_SMU_SYS_FANS_NUM_CONFIGS; i++) + if (wf_smu_sys_all_params[i].model_id == wf_smu_mach_model) { + param = &wf_smu_sys_all_params[i]; + break; + } + + /* No params found, put fans to max */ + if (param == NULL) { + printk(KERN_WARNING "windfarm: System fan config not found " + "for this machine model, max fan speed\n"); + goto fail; + } + + /* Alloc & initialize state */ + wf_smu_sys_fans = kmalloc(sizeof(struct wf_smu_sys_fans_state), + GFP_KERNEL); + if (wf_smu_sys_fans == NULL) { + printk(KERN_WARNING "windfarm: Memory allocation error" + " max fan speed\n"); + goto fail; + } + wf_smu_sys_fans->ticks = 1; + wf_smu_sys_fans->scale0 = param->scale0; + wf_smu_sys_fans->offset0 = param->offset0; + wf_smu_sys_fans->scale1 = param->scale1; + wf_smu_sys_fans->offset1 = param->offset1; + + /* Fill PID params */ + pid_param.gd = param->gd; + pid_param.gp = param->gp; + pid_param.gr = param->gr; + pid_param.interval = WF_SMU_SYS_FANS_INTERVAL; + pid_param.history_len = WF_SMU_SYS_FANS_HISTORY_SIZE; + pid_param.itarget = param->itarget; + pid_param.min = fan_system->ops->get_min(fan_system); + pid_param.max = fan_system->ops->get_max(fan_system); + if (fan_hd) { + pid_param.min =max(pid_param.min,fan_hd->ops->get_min(fan_hd)); + pid_param.max =min(pid_param.max,fan_hd->ops->get_max(fan_hd)); + } + wf_pid_init(&wf_smu_sys_fans->pid, &pid_param); + + DBG("wf: System Fan control initialized.\n"); + DBG(" itarged=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(pid_param.itarget), pid_param.min, pid_param.max); + return; + + fail: + + if (fan_system) + wf_control_set_max(fan_system); + if (fan_hd) + wf_control_set_max(fan_hd); +} + +static void wf_smu_sys_fans_tick(struct wf_smu_sys_fans_state *st) +{ + s32 new_setpoint, temp, scaled, cputarget; + int rc; + + if (--st->ticks != 0) { + if (wf_smu_readjust) + goto readjust; + return; + } + st->ticks = WF_SMU_SYS_FANS_INTERVAL; + + rc = sensor_hd_temp->ops->get_value(sensor_hd_temp, &temp); + if (rc) { + printk(KERN_WARNING "windfarm: HD temp sensor error %d\n", + rc); + wf_smu_failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: System Fans tick ! HD temp: %d.%03d\n", + FIX32TOPRINT(temp)); + + if (temp > (st->pid.param.itarget + 0x50000)) + wf_smu_failure_state |= FAILURE_OVERTEMP; + + new_setpoint = wf_pid_run(&st->pid, temp); + + DBG("wf_smu: new_setpoint: %d RPM\n", (int)new_setpoint); + + scaled = ((((s64)new_setpoint) * (s64)st->scale0) >> 12) + st->offset0; + + DBG("wf_smu: scaled setpoint: %d RPM\n", (int)scaled); + + cputarget = wf_smu_cpu_fans ? wf_smu_cpu_fans->pid.target : 0; + cputarget = ((((s64)cputarget) * (s64)st->scale1) >> 12) + st->offset1; + scaled = max(scaled, cputarget); + scaled = max(scaled, st->pid.param.min); + scaled = min(scaled, st->pid.param.max); + + DBG("wf_smu: adjusted setpoint: %d RPM\n", (int)scaled); + + if (st->sys_setpoint == scaled && new_setpoint == st->hd_setpoint) + return; + st->sys_setpoint = scaled; + st->hd_setpoint = new_setpoint; + readjust: + if (fan_system && wf_smu_failure_state == 0) { + rc = fan_system->ops->set_value(fan_system, st->sys_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: Sys fan error %d\n", + rc); + wf_smu_failure_state |= FAILURE_FAN; + } + } + if (fan_hd && wf_smu_failure_state == 0) { + rc = fan_hd->ops->set_value(fan_hd, st->hd_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: HD fan error %d\n", + rc); + wf_smu_failure_state |= FAILURE_FAN; + } + } +} + +static void wf_smu_create_cpu_fans(void) +{ + struct wf_cpu_pid_param pid_param; + struct smu_sdbp_header *hdr; + struct smu_sdbp_cpupiddata *piddata; + struct smu_sdbp_fvt *fvt; + s32 tmax, tdelta, maxpow, powadj; + + /* First, locate the PID params in SMU SBD */ + hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL); + if (hdr == 0) { + printk(KERN_WARNING "windfarm: CPU PID fan config not found " + "max fan speed\n"); + goto fail; + } + piddata = (struct smu_sdbp_cpupiddata *)&hdr[1]; + + /* Get the FVT params for operating point 0 (the only supported one + * for now) in order to get tmax + */ + hdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL); + if (hdr) { + fvt = (struct smu_sdbp_fvt *)&hdr[1]; + tmax = ((s32)fvt->maxtemp) << 16; + } else + tmax = 0x5e0000; /* 94 degree default */ + + /* Alloc & initialize state */ + wf_smu_cpu_fans = kmalloc(sizeof(struct wf_smu_cpu_fans_state), + GFP_KERNEL); + if (wf_smu_cpu_fans == NULL) + goto fail; + wf_smu_cpu_fans->ticks = 1; + + if (wf_smu_machine == MACHINE_PM81) { + wf_smu_cpu_fans->scale = WF_SMU_CPU_FANS_SIBLING_SCALE; + wf_smu_cpu_fans->offset = WF_SMU_CPU_FANS_SIBLING_OFFSET; + } + + /* Fill PID params */ + pid_param.interval = WF_SMU_CPU_FANS_INTERVAL; + pid_param.history_len = piddata->history_len; + if (pid_param.history_len > WF_CPU_PID_MAX_HISTORY) { + printk(KERN_WARNING "windfarm: History size overflow on " + "CPU control loop (%d)\n", piddata->history_len); + pid_param.history_len = WF_CPU_PID_MAX_HISTORY; + } + pid_param.gd = piddata->gd; + pid_param.gp = piddata->gp; + pid_param.gr = piddata->gr / pid_param.history_len; + + tdelta = ((s32)piddata->target_temp_delta) << 16; + maxpow = ((s32)piddata->max_power) << 16; + powadj = ((s32)piddata->power_adj) << 16; + + pid_param.tmax = tmax; + pid_param.ttarget = tmax - tdelta; + pid_param.pmaxadj = maxpow - powadj; + + pid_param.min = fan_cpu_main->ops->get_min(fan_cpu_main); + pid_param.max = fan_cpu_main->ops->get_max(fan_cpu_main); + + wf_cpu_pid_init(&wf_smu_cpu_fans->pid, &pid_param); + + DBG("wf: CPU Fan control initialized.\n"); + DBG(" ttarged=%d.%03d, tmax=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(pid_param.ttarget), FIX32TOPRINT(pid_param.tmax), + pid_param.min, pid_param.max); + + return; + + fail: + printk(KERN_WARNING "windfarm: CPU fan config not found\n" + "for this machine model, max fan speed\n"); + + if (cpufreq_clamp) + wf_control_set_max(cpufreq_clamp); + if (fan_cpu_main) + wf_control_set_max(fan_cpu_main); +} + +static void wf_smu_cpu_fans_tick(struct wf_smu_cpu_fans_state *st) +{ + s32 new_setpoint, temp, power, systarget; + int rc; + + if (--st->ticks != 0) { + if (wf_smu_readjust) + goto readjust; + return; + } + st->ticks = WF_SMU_CPU_FANS_INTERVAL; + + rc = sensor_cpu_temp->ops->get_value(sensor_cpu_temp, &temp); + if (rc) { + printk(KERN_WARNING "windfarm: CPU temp sensor error %d\n", + rc); + wf_smu_failure_state |= FAILURE_SENSOR; + return; + } + + rc = sensor_cpu_power->ops->get_value(sensor_cpu_power, &power); + if (rc) { + printk(KERN_WARNING "windfarm: CPU power sensor error %d\n", + rc); + wf_smu_failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: CPU Fans tick ! CPU temp: %d.%03d, power: %d.%03d\n", + FIX32TOPRINT(temp), FIX32TOPRINT(power)); + +#ifdef HACKED_OVERTEMP + if (temp > 0x4a0000) + wf_smu_failure_state |= FAILURE_OVERTEMP; +#else + if (temp > st->pid.param.tmax) + wf_smu_failure_state |= FAILURE_OVERTEMP; +#endif + new_setpoint = wf_cpu_pid_run(&st->pid, power, temp); + + DBG("wf_smu: new_setpoint: %d RPM\n", (int)new_setpoint); + + if (wf_smu_machine == MACHINE_PM81) { + systarget = wf_smu_sys_fans ? wf_smu_sys_fans->pid.target : 0; + systarget = ((((s64)systarget) * (s64)st->scale) >> 12) + + st->offset; + new_setpoint = max(new_setpoint, systarget); + new_setpoint = max(new_setpoint, st->pid.param.min); + new_setpoint = min(new_setpoint, st->pid.param.max); + + DBG("wf_smu: adjusted setpoint: %d RPM\n", (int)new_setpoint); + } + if (st->cpu_setpoint == new_setpoint) + return; + st->cpu_setpoint = new_setpoint; + readjust: + if (fan_cpu_main && wf_smu_failure_state == 0) { + rc = fan_cpu_main->ops->set_value(fan_cpu_main, + st->cpu_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: CPU main fan" + " error %d\n", rc); + wf_smu_failure_state |= FAILURE_FAN; + } + } + if (fan_cpu_second && wf_smu_failure_state == 0) { + rc = fan_cpu_second->ops->set_value(fan_cpu_second, + st->cpu_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: CPU second fan" + " error %d\n", rc); + wf_smu_failure_state |= FAILURE_FAN; + } + } + if (fan_cpu_third && wf_smu_failure_state == 0) { + rc = fan_cpu_main->ops->set_value(fan_cpu_third, + st->cpu_setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: CPU third fan" + " error %d\n", rc); + wf_smu_failure_state |= FAILURE_FAN; + } + } +} + +static void wf_smu_create_drive_fans(void) +{ + struct wf_pid_param param = { + .interval = 5, + .history_len = 2, + .gd = 0x01e00000, + .gp = 0x00500000, + .gr = 0x00000000, + .itarget = 0x00200000, + }; + + /* Alloc & initialize state */ + wf_smu_drive_fans = kmalloc(sizeof(struct wf_smu_drive_fans_state), + GFP_KERNEL); + if (wf_smu_drive_fans == NULL) { + printk(KERN_WARNING "windfarm: Memory allocation error" + " max fan speed\n"); + goto fail; + } + wf_smu_drive_fans->ticks = 1; + + /* Fill PID params */ + param.additive = (fan_hd->type == WF_CONTROL_RPM_FAN); + param.min = fan_hd->ops->get_min(fan_hd); + param.max = fan_hd->ops->get_max(fan_hd); + wf_pid_init(&wf_smu_drive_fans->pid, ¶m); + + DBG("wf: Drive Fan control initialized.\n"); + DBG(" itarged=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(param.itarget), param.min, param.max); + return; + + fail: + if (fan_hd) + wf_control_set_max(fan_hd); +} + +static void wf_smu_drive_fans_tick(struct wf_smu_drive_fans_state *st) +{ + s32 new_setpoint, temp; + int rc; + + if (--st->ticks != 0) { + if (wf_smu_readjust) + goto readjust; + return; + } + st->ticks = st->pid.param.interval; + + rc = sensor_hd_temp->ops->get_value(sensor_hd_temp, &temp); + if (rc) { + printk(KERN_WARNING "windfarm: HD temp sensor error %d\n", + rc); + wf_smu_failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: Drive Fans tick ! HD temp: %d.%03d\n", + FIX32TOPRINT(temp)); + + if (temp > (st->pid.param.itarget + 0x50000)) + wf_smu_failure_state |= FAILURE_OVERTEMP; + + new_setpoint = wf_pid_run(&st->pid, temp); + + DBG("wf_smu: new_setpoint: %d\n", (int)new_setpoint); + + if (st->setpoint == new_setpoint) + return; + st->setpoint = new_setpoint; + readjust: + if (fan_hd && wf_smu_failure_state == 0) { + rc = fan_hd->ops->set_value(fan_hd, st->setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: HD fan error %d\n", + rc); + wf_smu_failure_state |= FAILURE_FAN; + } + } +} + +static void wf_smu_create_slots_fans(void) +{ + struct wf_pid_param param = { + .interval = 1, + .history_len = 8, + .gd = 0x00000000, + .gp = 0x00000000, + .gr = 0x00020000, + .itarget = 0x00000000 + }; + + /* Alloc & initialize state */ + wf_smu_slots_fans = kmalloc(sizeof(struct wf_smu_slots_fans_state), + GFP_KERNEL); + if (wf_smu_slots_fans == NULL) { + printk(KERN_WARNING "windfarm: Memory allocation error" + " max fan speed\n"); + goto fail; + } + wf_smu_slots_fans->ticks = 1; + + /* Fill PID params */ + param.additive = (fan_slots->type == WF_CONTROL_RPM_FAN); + param.min = fan_slots->ops->get_min(fan_slots); + param.max = fan_slots->ops->get_max(fan_slots); + wf_pid_init(&wf_smu_slots_fans->pid, ¶m); + + DBG("wf: Slots Fan control initialized.\n"); + DBG(" itarged=%d.%03d, min=%d RPM, max=%d RPM\n", + FIX32TOPRINT(param.itarget), param.min, param.max); + return; + + fail: + if (fan_slots) + wf_control_set_max(fan_slots); +} + +static void wf_smu_slots_fans_tick(struct wf_smu_slots_fans_state *st) +{ + s32 new_setpoint, power; + int rc; + + if (--st->ticks != 0) { + if (wf_smu_readjust) + goto readjust; + return; + } + st->ticks = st->pid.param.interval; + + rc = sensor_slots_power->ops->get_value(sensor_slots_power, &power); + if (rc) { + printk(KERN_WARNING "windfarm: Slots power sensor error %d\n", + rc); + wf_smu_failure_state |= FAILURE_SENSOR; + return; + } + + DBG("wf_smu: Slots Fans tick ! Slots power: %d.%03d\n", + FIX32TOPRINT(power)); + +#if 0 /* Check what makes a good overtemp condition */ + if (power > (st->pid.param.itarget + 0x50000)) + wf_smu_failure_state |= FAILURE_OVERTEMP; +#endif + + new_setpoint = wf_pid_run(&st->pid, power); + + DBG("wf_smu: new_setpoint: %d\n", (int)new_setpoint); + + if (st->setpoint == new_setpoint) + return; + st->setpoint = new_setpoint; + readjust: + if (fan_slots && wf_smu_failure_state == 0) { + rc = fan_slots->ops->set_value(fan_slots, st->setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: Slots fan error %d\n", + rc); + wf_smu_failure_state |= FAILURE_FAN; + } + } +} + + +/* + * ****** Attributes ****** + * + */ + +#define BUILD_SHOW_FUNC_FIX(name, data) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + ssize_t r; \ + s32 val = 0; \ + data->ops->get_value(data, &val); \ + r = sprintf(buf, "%d.%03d", FIX32TOPRINT(val)); \ + return r; \ +} \ +static DEVICE_ATTR(name,S_IRUGO,show_##name, NULL); + + +#define BUILD_SHOW_FUNC_INT(name, data) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + s32 val = 0; \ + data->ops->get_value(data, &val); \ + return sprintf(buf, "%d", val); \ +} \ +static DEVICE_ATTR(name,S_IRUGO,show_##name, NULL); + +BUILD_SHOW_FUNC_INT(cpu_fan, fan_cpu_main); +BUILD_SHOW_FUNC_INT(sys_fan, fan_system); +BUILD_SHOW_FUNC_INT(hd_fan, fan_hd); +BUILD_SHOW_FUNC_INT(slots_fan, fan_slots); + +BUILD_SHOW_FUNC_FIX(cpu_temp, sensor_cpu_temp); +BUILD_SHOW_FUNC_FIX(cpu_power, sensor_cpu_power); +BUILD_SHOW_FUNC_FIX(hd_temp, sensor_hd_temp); +BUILD_SHOW_FUNC_FIX(slots_power, sensor_slots_power); + +/* + * ****** Setup / Init / Misc ... ****** + * + */ + +static void wf_smu_tick(void) +{ + unsigned int last_failure = wf_smu_failure_state; + unsigned int new_failure; + + if (!wf_smu_started) { + DBG("wf: creating control loops !\n"); + if (wf_smu_machine == MACHINE_PM81) { + wf_smu_create_sys_fans(); + wf_smu_create_cpu_fans(); + } else if (wf_smu_machine == MACHINE_PM91) { + wf_smu_create_drive_fans(); + wf_smu_create_slots_fans(); + wf_smu_create_cpu_fans(); + } + wf_smu_started = 1; + } + + /* Skipping ticks */ + if (wf_smu_skipping && --wf_smu_skipping) + return; + + wf_smu_failure_state = 0; + if (wf_smu_sys_fans) + wf_smu_sys_fans_tick(wf_smu_sys_fans); + if (wf_smu_drive_fans) + wf_smu_drive_fans_tick(wf_smu_drive_fans); + if (wf_smu_slots_fans) + wf_smu_slots_fans_tick(wf_smu_slots_fans); + if (wf_smu_cpu_fans) + wf_smu_cpu_fans_tick(wf_smu_cpu_fans); + + wf_smu_readjust = 0; + new_failure = wf_smu_failure_state & ~last_failure; + + /* If entering failure mode, clamp cpufreq and ramp all + * fans to full speed. + */ + if (wf_smu_failure_state && !last_failure) { + if (cpufreq_clamp) + wf_control_set_max(cpufreq_clamp); + if (fan_system) + wf_control_set_max(fan_system); + if (fan_cpu_main) + wf_control_set_max(fan_cpu_main); + if (fan_cpu_second) + wf_control_set_max(fan_cpu_second); + if (fan_cpu_third) + wf_control_set_max(fan_cpu_third); + if (fan_hd) + wf_control_set_max(fan_hd); + if (fan_slots) + wf_control_set_max(fan_slots); + } + + /* If leaving failure mode, unclamp cpufreq and readjust + * all fans on next iteration + */ + if (!wf_smu_failure_state && last_failure) { + if (cpufreq_clamp) + wf_control_set_min(cpufreq_clamp); + wf_smu_readjust = 1; + } + + /* Overtemp condition detected, notify and start skipping a couple + * ticks to let the temperature go down + */ + if (new_failure & FAILURE_OVERTEMP) { + wf_set_overtemp(); + wf_smu_skipping = 2; + } + + /* We only clear the overtemp condition if overtemp is cleared + * _and_ no other failure is present. Since a sensor error will + * clear the overtemp condition (can't measure temperature) at + * the control loop levels, but we don't want to keep it clear + * here in this case + */ + if (new_failure == 0 && last_failure & FAILURE_OVERTEMP) + wf_clear_overtemp(); +} + +static void wf_smu_new_control81(struct wf_control *ct) +{ + if (wf_smu_all_controls_ok) + return; + + if (fan_cpu_main == NULL && !strcmp(ct->name, "cpu-fan")) { + if (wf_get_control(ct) == 0) { + fan_cpu_main = ct; + device_create_file(wf_smu_dev, &dev_attr_cpu_fan); + } + } + + if (fan_system == NULL && !strcmp(ct->name, "system-fan")) { + if (wf_get_control(ct) == 0) { + fan_system = ct; + device_create_file(wf_smu_dev, &dev_attr_sys_fan); + } + } + + if (cpufreq_clamp == NULL && !strcmp(ct->name, "cpufreq-clamp")) { + if (wf_get_control(ct) == 0) + cpufreq_clamp = ct; + } + + /* Darwin property list says the HD fan is only for model ID + * 0, 1, 2 and 3 + */ + + if (wf_smu_mach_model > 3) { + if (fan_system && fan_cpu_main && cpufreq_clamp) + wf_smu_all_controls_ok = 1; + return; + } + + if (fan_hd == NULL && !strcmp(ct->name, "drive-bay-fan")) { + if (wf_get_control(ct) == 0) { + fan_hd = ct; + device_create_file(wf_smu_dev, &dev_attr_hd_fan); + } + } + + if (fan_system && fan_hd && fan_cpu_main && cpufreq_clamp) + wf_smu_all_controls_ok = 1; +} + +static void wf_smu_new_control91(struct wf_control *ct) +{ + if (wf_smu_all_controls_ok) + return; + + if (fan_cpu_main == NULL && !strcmp(ct->name, "cpu-rear-fan-0")) { + if (wf_get_control(ct) == 0) { + fan_cpu_main = ct; + device_create_file(wf_smu_dev, &dev_attr_cpu_fan); + } + } + + if (fan_cpu_second == NULL && !strcmp(ct->name, "cpu-rear-fan-1")) { + if (wf_get_control(ct) == 0) + fan_cpu_second = ct; + } + + if (fan_cpu_third == NULL && !strcmp(ct->name, "cpu-front-fan-0")) { + if (wf_get_control(ct) == 0) + fan_cpu_third = ct; + } + + if (cpufreq_clamp == NULL && !strcmp(ct->name, "cpufreq-clamp")) { + if (wf_get_control(ct) == 0) + cpufreq_clamp = ct; + } + + if (fan_hd == NULL && !strcmp(ct->name, "drive-bay-fan")) { + if (wf_get_control(ct) == 0) { + fan_hd = ct; + device_create_file(wf_smu_dev, &dev_attr_hd_fan); + } + } + + if (fan_slots == NULL && !strcmp(ct->name, "slots-fan")) { + if (wf_get_control(ct) == 0) { + fan_slots = ct; + device_create_file(wf_smu_dev, &dev_attr_slots_fan); + } + } + + if (fan_cpu_main && (fan_cpu_second || fan_cpu_third) && fan_hd && + fan_slots && cpufreq_clamp) + wf_smu_all_controls_ok = 1; +} + +static void wf_smu_new_sensor(struct wf_sensor *sr) +{ + if (wf_smu_all_sensors_ok) + return; + + if (sensor_cpu_power == NULL && !strcmp(sr->name, "cpu-power")) { + if (wf_get_sensor(sr) == 0) { + sensor_cpu_power = sr; + device_create_file(wf_smu_dev, &dev_attr_cpu_power); + } + } + + if (sensor_cpu_temp == NULL && !strcmp(sr->name, "cpu-temp")) { + if (wf_get_sensor(sr) == 0) { + sensor_cpu_temp = sr; + device_create_file(wf_smu_dev, &dev_attr_cpu_temp); + } + } + + if (sensor_hd_temp == NULL && !strcmp(sr->name, "hd-temp")) { + if (wf_get_sensor(sr) == 0) { + sensor_hd_temp = sr; + device_create_file(wf_smu_dev, &dev_attr_hd_temp); + } + } + + if (sensor_slots_power == NULL && !strcmp(sr->name, "slots-power")) { + if (wf_get_sensor(sr) == 0) { + sensor_slots_power = sr; + device_create_file(wf_smu_dev, &dev_attr_slots_power); + } + } + + if (wf_smu_machine == MACHINE_PM81 && sensor_cpu_power && + sensor_cpu_temp && sensor_hd_temp) + wf_smu_all_sensors_ok = 1; + + if (wf_smu_machine == MACHINE_PM91 && sensor_cpu_power && + sensor_cpu_temp && sensor_hd_temp && sensor_slots_power) + wf_smu_all_sensors_ok = 1; +} + + +static int wf_smu_notify(struct notifier_block *self, + unsigned long event, void *data) +{ + switch(event) { + case WF_EVENT_NEW_CONTROL: + DBG("wf: new control %s detected\n", + ((struct wf_control *)data)->name); + if (wf_smu_machine == MACHINE_PM81) + wf_smu_new_control81(data); + else + wf_smu_new_control91(data); + wf_smu_readjust = 1; + break; + case WF_EVENT_NEW_SENSOR: + DBG("wf: new sensor %s detected\n", + ((struct wf_sensor *)data)->name); + wf_smu_new_sensor(data); + break; + case WF_EVENT_TICK: + if (wf_smu_all_controls_ok && wf_smu_all_sensors_ok) + wf_smu_tick(); + } + + return 0; +} + +static struct notifier_block wf_smu_events = { + .notifier_call = wf_smu_notify, +}; + +static int wf_init_pm81(void) +{ + struct smu_sdbp_header *hdr; + + wf_smu_machine = MACHINE_PM81; + + hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL); + if (hdr != 0) { + struct smu_sdbp_sensortree *st = + (struct smu_sdbp_sensortree *)&hdr[1]; + wf_smu_mach_model = st->model_id; + } + + printk(KERN_INFO "windfarm: Initializing for iMacG5 model ID %d\n", + wf_smu_mach_model); + + return 0; +} + +static int wf_init_pm91(void) +{ + wf_smu_machine = MACHINE_PM91; + + printk(KERN_INFO "windfarm: Initializing for Desktop G5 model\n"); + + return 0; +} + +static int wf_smu_probe(struct device *ddev) +{ + wf_smu_dev = ddev; + + wf_register_client(&wf_smu_events); + + return 0; +} + +static int wf_smu_remove(struct device *ddev) +{ + wf_unregister_client(&wf_smu_events); + + /* XXX We don't have yet a guarantee that our callback isn't + * in progress when returning from wf_unregister_client, so + * we add an arbitrary delay. I'll have to fix that in the core + */ + msleep(1000); + + /* Release all sensors */ + /* One more crappy race: I don't think we have any guarantee here + * that the attribute callback won't race with the sensor beeing + * disposed of, and I'm not 100% certain what best way to deal + * with that except by adding locks all over... I'll do that + * eventually but heh, who ever rmmod this module anyway ? + */ + if (sensor_cpu_power) { + device_remove_file(wf_smu_dev, &dev_attr_cpu_power); + wf_put_sensor(sensor_cpu_power); + } + if (sensor_cpu_temp) { + device_remove_file(wf_smu_dev, &dev_attr_cpu_temp); + wf_put_sensor(sensor_cpu_temp); + } + if (sensor_hd_temp) { + device_remove_file(wf_smu_dev, &dev_attr_hd_temp); + wf_put_sensor(sensor_hd_temp); + } + if (sensor_slots_power) { + device_remove_file(wf_smu_dev, &dev_attr_slots_power); + wf_put_sensor(sensor_slots_power); + } + + /* Release all controls */ + if (fan_cpu_main) { + device_remove_file(wf_smu_dev, &dev_attr_cpu_fan); + wf_put_control(fan_cpu_main); + } + if (fan_cpu_second) + wf_put_control(fan_cpu_second); + if (fan_cpu_third) + wf_put_control(fan_cpu_third); + if (fan_hd) { + device_remove_file(wf_smu_dev, &dev_attr_hd_fan); + wf_put_control(fan_hd); + } + if (fan_system) { + device_remove_file(wf_smu_dev, &dev_attr_sys_fan); + wf_put_control(fan_system); + } + if (fan_slots) { + device_remove_file(wf_smu_dev, &dev_attr_slots_fan); + wf_put_control(fan_slots); + } + if (cpufreq_clamp) + wf_put_control(cpufreq_clamp); + + /* Destroy control loops state structures */ + if (wf_smu_sys_fans) + kfree(wf_smu_sys_fans); + if (wf_smu_slots_fans) + kfree(wf_smu_cpu_fans); + if (wf_smu_drive_fans) + kfree(wf_smu_cpu_fans); + if (wf_smu_cpu_fans) + kfree(wf_smu_cpu_fans); + + wf_smu_dev = NULL; + + return 0; +} + +static struct device_driver wf_smu_driver = { + .name = "windfarm", + .bus = &platform_bus_type, + .probe = wf_smu_probe, + .remove = wf_smu_remove, +}; + + +static int __init wf_smu_init(void) +{ + int rc = -ENODEV; + + if (machine_is_compatible("PowerMac8,1") || + machine_is_compatible("PowerMac8,2")) + rc = wf_init_pm81(); + else if (machine_is_compatible("PowerMac9,1")) + rc = wf_init_pm91(); + + if (rc == 0) { +#ifdef MODULE + request_module("windfarm_smu_controls"); + request_module("windfarm_smu_sensors"); + request_module("windfarm_lm75_sensor"); + +#endif /* MODULE */ + driver_register(&wf_smu_driver); + } + + return rc; +} + +static void __exit wf_smu_exit(void) +{ + + driver_unregister(&wf_smu_driver); +} + + +module_init(wf_smu_init); +module_exit(wf_smu_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("Thermal control logic for SMU based PowerMacs"); +MODULE_LICENSE("GPL"); + diff -puN /dev/null drivers/macintosh/windfarm_smu_controls.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/macintosh/windfarm_smu_controls.c 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,274 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.3" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +/* + * SMU fans control object + */ + +static LIST_HEAD(smu_fans); + +struct smu_fan_control { + struct list_head link; + int fan_type; /* 0 = rpm, 1 = pwm */ + u32 reg; /* index in SMU */ + s32 value; /* current value */ + s32 min, max; /* min/max values */ + struct wf_control ctrl; +}; +#define to_smu_fan(c) container_of(c, struct smu_fan_control, ctrl) + +static int smu_set_fan(int pwm, u8 id, u16 value) +{ + struct smu_cmd cmd; + u8 buffer[16]; + DECLARE_COMPLETION(comp); + int rc; + + /* Fill SMU command structure */ + cmd.cmd = SMU_CMD_FAN_COMMAND; + cmd.data_len = 14; + cmd.reply_len = 16; + cmd.data_buf = cmd.reply_buf = buffer; + cmd.status = 0; + cmd.done = smu_done_complete; + cmd.misc = ∁ + + /* Fill argument buffer */ + memset(buffer, 0, 16); + buffer[0] = pwm ? 0x10 : 0x00; + buffer[1] = 0x01 << id; + *((u16 *)&buffer[2 + id * 2]) = value; + + rc = smu_queue_cmd(&cmd); + if (rc) + return rc; + wait_for_completion(&comp); + return cmd.status; +} + +static void smu_fan_release(struct wf_control *ct) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + + kfree(fct); +} + +static int smu_fan_set(struct wf_control *ct, s32 value) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + + if (value < fct->min) + value = fct->min; + if (value > fct->max) + value = fct->max; + fct->value = value; + + return smu_set_fan(fct->fan_type, fct->reg, value); +} + +static int smu_fan_get(struct wf_control *ct, s32 *value) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + *value = fct->value; /* todo: read from SMU */ + return 0; +} + +static s32 smu_fan_min(struct wf_control *ct) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + return fct->min; +} + +static s32 smu_fan_max(struct wf_control *ct) +{ + struct smu_fan_control *fct = to_smu_fan(ct); + return fct->max; +} + +static struct wf_control_ops smu_fan_ops = { + .set_value = smu_fan_set, + .get_value = smu_fan_get, + .get_min = smu_fan_min, + .get_max = smu_fan_max, + .release = smu_fan_release, + .owner = THIS_MODULE, +}; + +static struct smu_fan_control *smu_fan_create(struct device_node *node, + int pwm_fan) +{ + struct smu_fan_control *fct; + s32 *v; u32 *reg; + char *l; + + fct = kmalloc(sizeof(struct smu_fan_control), GFP_KERNEL); + if (fct == NULL) + return NULL; + fct->ctrl.ops = &smu_fan_ops; + l = (char *)get_property(node, "location", NULL); + if (l == NULL) + goto fail; + + fct->fan_type = pwm_fan; + fct->ctrl.type = pwm_fan ? WF_CONTROL_PWM_FAN : WF_CONTROL_RPM_FAN; + + /* We use the name & location here the same way we do for SMU sensors, + * see the comment in windfarm_smu_sensors.c. The locations are a bit + * less consistent here between the iMac and the desktop models, but + * that is good enough for our needs for now at least. + * + * One problem though is that Apple seem to be inconsistent with case + * and the kernel doesn't have strcasecmp =P + */ + + fct->ctrl.name = NULL; + + /* Names used on desktop models */ + if (!strcmp(l, "Rear Fan 0") || !strcmp(l, "Rear Fan") || + !strcmp(l, "Rear fan 0") || !strcmp(l, "Rear fan")) + fct->ctrl.name = "cpu-rear-fan-0"; + else if (!strcmp(l, "Rear Fan 1") || !strcmp(l, "Rear fan 1")) + fct->ctrl.name = "cpu-rear-fan-1"; + else if (!strcmp(l, "Front Fan 0") || !strcmp(l, "Front Fan") || + !strcmp(l, "Front fan 0") || !strcmp(l, "Front fan")) + fct->ctrl.name = "cpu-front-fan-0"; + else if (!strcmp(l, "Front Fan 1") || !strcmp(l, "Front fan 1")) + fct->ctrl.name = "cpu-front-fan-1"; + else if (!strcmp(l, "Slots Fan") || !strcmp(l, "Slots fan")) + fct->ctrl.name = "slots-fan"; + else if (!strcmp(l, "Drive Bay") || !strcmp(l, "Drive bay")) + fct->ctrl.name = "drive-bay-fan"; + + /* Names used on iMac models */ + if (!strcmp(l, "System Fan") || !strcmp(l, "System fan")) + fct->ctrl.name = "system-fan"; + else if (!strcmp(l, "CPU Fan") || !strcmp(l, "CPU fan")) + fct->ctrl.name = "cpu-fan"; + else if (!strcmp(l, "Hard Drive") || !strcmp(l, "Hard drive")) + fct->ctrl.name = "drive-bay-fan"; + + /* Unrecognized fan, bail out */ + if (fct->ctrl.name == NULL) + goto fail; + + /* Get min & max values*/ + v = (s32 *)get_property(node, "min-value", NULL); + if (v == NULL) + goto fail; + fct->min = *v; + v = (s32 *)get_property(node, "max-value", NULL); + if (v == NULL) + goto fail; + fct->max = *v; + + /* Get "reg" value */ + reg = (u32 *)get_property(node, "reg", NULL); + if (reg == NULL) + goto fail; + fct->reg = *reg; + + if (wf_register_control(&fct->ctrl)) + goto fail; + + return fct; + fail: + kfree(fct); + return NULL; +} + + +static int __init smu_controls_init(void) +{ + struct device_node *smu, *fans, *fan; + + if (!smu_present()) + return -ENODEV; + + smu = of_find_node_by_type(NULL, "smu"); + if (smu == NULL) + return -ENODEV; + + /* Look for RPM fans */ + for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;) + if (!strcmp(fans->name, "rpm-fans")) + break; + for (fan = NULL; + fans && (fan = of_get_next_child(fans, fan)) != NULL;) { + struct smu_fan_control *fct; + + fct = smu_fan_create(fan, 0); + if (fct == NULL) { + printk(KERN_WARNING "windfarm: Failed to create SMU " + "RPM fan %s\n", fan->name); + continue; + } + list_add(&fct->link, &smu_fans); + } + of_node_put(fans); + + + /* Look for PWM fans */ + for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;) + if (!strcmp(fans->name, "pwm-fans")) + break; + for (fan = NULL; + fans && (fan = of_get_next_child(fans, fan)) != NULL;) { + struct smu_fan_control *fct; + + fct = smu_fan_create(fan, 1); + if (fct == NULL) { + printk(KERN_WARNING "windfarm: Failed to create SMU " + "PWM fan %s\n", fan->name); + continue; + } + list_add(&fct->link, &smu_fans); + } + of_node_put(fans); + of_node_put(smu); + + return 0; +} + +static void __exit smu_controls_exit(void) +{ + struct smu_fan_control *fct; + + while (!list_empty(&smu_fans)) { + fct = list_entry(smu_fans.next, struct smu_fan_control, link); + list_del(&fct->link); + wf_unregister_control(&fct->ctrl); + } +} + + +module_init(smu_controls_init); +module_exit(smu_controls_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("SMU control objects for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + diff -puN /dev/null drivers/macintosh/windfarm_smu_sensors.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/macintosh/windfarm_smu_sensors.c 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,471 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windfarm.h" + +#define VERSION "0.2" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(args...) printk(args) +#else +#define DBG(args...) do { } while(0) +#endif + +/* + * Various SMU "partitions" calibration objects for which we + * keep pointers here for use by bits & pieces of the driver + */ +static struct smu_sdbp_cpuvcp *cpuvcp; +static int cpuvcp_version; +static struct smu_sdbp_cpudiode *cpudiode; +static struct smu_sdbp_slotspow *slotspow; +static u8 *debugswitches; + +/* + * SMU basic sensors objects + */ + +static LIST_HEAD(smu_ads); + +struct smu_ad_sensor { + struct list_head link; + u32 reg; /* index in SMU */ + struct wf_sensor sens; +}; +#define to_smu_ads(c) container_of(c, struct smu_ad_sensor, sens) + +static void smu_ads_release(struct wf_sensor *sr) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + + kfree(ads); +} + +static int smu_read_adc(u8 id, s32 *value) +{ + struct smu_simple_cmd cmd; + DECLARE_COMPLETION(comp); + int rc; + + rc = smu_queue_simple(&cmd, SMU_CMD_READ_ADC, 1, + smu_done_complete, &comp, id); + if (rc) + return rc; + wait_for_completion(&comp); + if (cmd.cmd.status != 0) + return cmd.cmd.status; + if (cmd.cmd.reply_len != 2) { + printk(KERN_ERR "winfarm: read ADC 0x%x returned %d bytes !\n", + id, cmd.cmd.reply_len); + return -EIO; + } + *value = *((u16 *)cmd.buffer); + return 0; +} + +static int smu_cputemp_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + int rc; + s32 val; + s64 scaled; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read CPU temp failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s64)(((u64)val) * (u64)cpudiode->m_value); + scaled >>= 3; + scaled += ((s64)cpudiode->b_value) << 9; + *value = (s32)(scaled << 1); + + return 0; +} + +static int smu_cpuamp_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + s32 val, scaled; + int rc; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read CPU current failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s32)(val * (u32)cpuvcp->curr_scale); + scaled += (s32)cpuvcp->curr_offset; + *value = scaled << 4; + + return 0; +} + +static int smu_cpuvolt_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + s32 val, scaled; + int rc; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read CPU voltage failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s32)(val * (u32)cpuvcp->volt_scale); + scaled += (s32)cpuvcp->volt_offset; + *value = scaled << 4; + + return 0; +} + +static int smu_slotspow_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_ad_sensor *ads = to_smu_ads(sr); + s32 val, scaled; + int rc; + + rc = smu_read_adc(ads->reg, &val); + if (rc) { + printk(KERN_ERR "windfarm: read slots power failed, err %d\n", + rc); + return rc; + } + + /* Ok, we have to scale & adjust, taking units into account */ + scaled = (s32)(val * (u32)slotspow->pow_scale); + scaled += (s32)slotspow->pow_offset; + *value = scaled << 4; + + return 0; +} + + +static struct wf_sensor_ops smu_cputemp_ops = { + .get_value = smu_cputemp_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; +static struct wf_sensor_ops smu_cpuamp_ops = { + .get_value = smu_cpuamp_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; +static struct wf_sensor_ops smu_cpuvolt_ops = { + .get_value = smu_cpuvolt_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; +static struct wf_sensor_ops smu_slotspow_ops = { + .get_value = smu_slotspow_get, + .release = smu_ads_release, + .owner = THIS_MODULE, +}; + + +static struct smu_ad_sensor *smu_ads_create(struct device_node *node) +{ + struct smu_ad_sensor *ads; + char *c, *l; + u32 *v; + + ads = kmalloc(sizeof(struct smu_ad_sensor), GFP_KERNEL); + if (ads == NULL) + return NULL; + c = (char *)get_property(node, "device_type", NULL); + l = (char *)get_property(node, "location", NULL); + if (c == NULL || l == NULL) + goto fail; + + /* We currently pick the sensors based on the OF name and location + * properties, while Darwin uses the sensor-id's. + * The problem with the IDs is that they are model specific while it + * looks like apple has been doing a reasonably good job at keeping + * the names and locations consistents so I'll stick with the names + * and locations for now. + */ + if (!strcmp(c, "temp-sensor") && + !strcmp(l, "CPU T-Diode")) { + ads->sens.ops = &smu_cputemp_ops; + ads->sens.name = "cpu-temp"; + } else if (!strcmp(c, "current-sensor") && + !strcmp(l, "CPU Current")) { + ads->sens.ops = &smu_cpuamp_ops; + ads->sens.name = "cpu-current"; + } else if (!strcmp(c, "voltage-sensor") && + !strcmp(l, "CPU Voltage")) { + ads->sens.ops = &smu_cpuvolt_ops; + ads->sens.name = "cpu-voltage"; + } else if (!strcmp(c, "power-sensor") && + !strcmp(l, "Slots Power")) { + ads->sens.ops = &smu_slotspow_ops; + ads->sens.name = "slots-power"; + if (slotspow == NULL) { + DBG("wf: slotspow partition (%02x) not found\n", + SMU_SDB_SLOTSPOW_ID); + goto fail; + } + } else + goto fail; + + v = (u32 *)get_property(node, "reg", NULL); + if (v == NULL) + goto fail; + ads->reg = *v; + + if (wf_register_sensor(&ads->sens)) + goto fail; + return ads; + fail: + kfree(ads); + return NULL; +} + +/* + * SMU Power combo sensor object + */ + +struct smu_cpu_power_sensor { + struct list_head link; + struct wf_sensor *volts; + struct wf_sensor *amps; + int fake_volts : 1; + int quadratic : 1; + struct wf_sensor sens; +}; +#define to_smu_cpu_power(c) container_of(c, struct smu_cpu_power_sensor, sens) + +static struct smu_cpu_power_sensor *smu_cpu_power; + +static void smu_cpu_power_release(struct wf_sensor *sr) +{ + struct smu_cpu_power_sensor *pow = to_smu_cpu_power(sr); + + if (pow->volts) + wf_put_sensor(pow->volts); + if (pow->amps) + wf_put_sensor(pow->amps); + kfree(pow); +} + +static int smu_cpu_power_get(struct wf_sensor *sr, s32 *value) +{ + struct smu_cpu_power_sensor *pow = to_smu_cpu_power(sr); + s32 volts, amps, power; + u64 tmps, tmpa, tmpb; + int rc; + + rc = pow->amps->ops->get_value(pow->amps, &s); + if (rc) + return rc; + + if (pow->fake_volts) { + *value = amps * 12 - 0x30000; + return 0; + } + + rc = pow->volts->ops->get_value(pow->volts, &volts); + if (rc) + return rc; + + power = (s32)((((u64)volts) * ((u64)amps)) >> 16); + if (!pow->quadratic) { + *value = power; + return 0; + } + tmps = (((u64)power) * ((u64)power)) >> 16; + tmpa = ((u64)cpuvcp->power_quads[0]) * tmps; + tmpb = ((u64)cpuvcp->power_quads[1]) * ((u64)power); + *value = (tmpa >> 28) + (tmpb >> 28) + (cpuvcp->power_quads[2] >> 12); + + return 0; +} + +static struct wf_sensor_ops smu_cpu_power_ops = { + .get_value = smu_cpu_power_get, + .release = smu_cpu_power_release, + .owner = THIS_MODULE, +}; + + +static struct smu_cpu_power_sensor * +smu_cpu_power_create(struct wf_sensor *volts, struct wf_sensor *amps) +{ + struct smu_cpu_power_sensor *pow; + + pow = kmalloc(sizeof(struct smu_cpu_power_sensor), GFP_KERNEL); + if (pow == NULL) + return NULL; + pow->sens.ops = &smu_cpu_power_ops; + pow->sens.name = "cpu-power"; + + wf_get_sensor(volts); + pow->volts = volts; + wf_get_sensor(amps); + pow->amps = amps; + + /* Some early machines need a faked voltage */ + if (debugswitches && ((*debugswitches) & 0x80)) { + printk(KERN_INFO "windfarm: CPU Power sensor using faked" + " voltage !\n"); + pow->fake_volts = 1; + } else + pow->fake_volts = 0; + + /* Try to use quadratic transforms on PowerMac8,1 and 9,1 for now, + * I yet have to figure out what's up with 8,2 and will have to + * adjust for later, unless we can 100% trust the SDB partition... + */ + if ((machine_is_compatible("PowerMac8,1") || + machine_is_compatible("PowerMac8,2") || + machine_is_compatible("PowerMac9,1")) && + cpuvcp_version >= 2) { + pow->quadratic = 1; + DBG("windfarm: CPU Power using quadratic transform\n"); + } else + pow->quadratic = 0; + + if (wf_register_sensor(&pow->sens)) + goto fail; + return pow; + fail: + kfree(pow); + return NULL; +} + +static int smu_fetch_param_partitions(void) +{ + struct smu_sdbp_header *hdr; + + /* Get CPU voltage/current/power calibration data */ + hdr = smu_get_sdb_partition(SMU_SDB_CPUVCP_ID, NULL); + if (hdr == NULL) { + DBG("wf: cpuvcp partition (%02x) not found\n", + SMU_SDB_CPUVCP_ID); + return -ENODEV; + } + cpuvcp = (struct smu_sdbp_cpuvcp *)&hdr[1]; + /* Keep version around */ + cpuvcp_version = hdr->version; + + /* Get CPU diode calibration data */ + hdr = smu_get_sdb_partition(SMU_SDB_CPUDIODE_ID, NULL); + if (hdr == NULL) { + DBG("wf: cpudiode partition (%02x) not found\n", + SMU_SDB_CPUDIODE_ID); + return -ENODEV; + } + cpudiode = (struct smu_sdbp_cpudiode *)&hdr[1]; + + /* Get slots power calibration data if any */ + hdr = smu_get_sdb_partition(SMU_SDB_SLOTSPOW_ID, NULL); + if (hdr != NULL) + slotspow = (struct smu_sdbp_slotspow *)&hdr[1]; + + /* Get debug switches if any */ + hdr = smu_get_sdb_partition(SMU_SDB_DEBUG_SWITCHES_ID, NULL); + if (hdr != NULL) + debugswitches = (u8 *)&hdr[1]; + + return 0; +} + +static int __init smu_sensors_init(void) +{ + struct device_node *smu, *sensors, *s; + struct smu_ad_sensor *volt_sensor = NULL, *curr_sensor = NULL; + int rc; + + if (!smu_present()) + return -ENODEV; + + /* Get parameters partitions */ + rc = smu_fetch_param_partitions(); + if (rc) + return rc; + + smu = of_find_node_by_type(NULL, "smu"); + if (smu == NULL) + return -ENODEV; + + /* Look for sensors subdir */ + for (sensors = NULL; + (sensors = of_get_next_child(smu, sensors)) != NULL;) + if (!strcmp(sensors->name, "sensors")) + break; + + of_node_put(smu); + + /* Create basic sensors */ + for (s = NULL; + sensors && (s = of_get_next_child(sensors, s)) != NULL;) { + struct smu_ad_sensor *ads; + + ads = smu_ads_create(s); + if (ads == NULL) + continue; + list_add(&ads->link, &smu_ads); + /* keep track of cpu voltage & current */ + if (!strcmp(ads->sens.name, "cpu-voltage")) + volt_sensor = ads; + else if (!strcmp(ads->sens.name, "cpu-current")) + curr_sensor = ads; + } + + of_node_put(sensors); + + /* Create CPU power sensor if possible */ + if (volt_sensor && curr_sensor) + smu_cpu_power = smu_cpu_power_create(&volt_sensor->sens, + &curr_sensor->sens); + + return 0; +} + +static void __exit smu_sensors_exit(void) +{ + struct smu_ad_sensor *ads; + + /* dispose of power sensor */ + if (smu_cpu_power) + wf_unregister_sensor(&smu_cpu_power->sens); + + /* dispose of basic sensors */ + while (!list_empty(&smu_ads)) { + ads = list_entry(smu_ads.next, struct smu_ad_sensor, link); + list_del(&ads->link); + wf_unregister_sensor(&ads->sens); + } +} + + +module_init(smu_sensors_init); +module_exit(smu_sensors_exit); + +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("SMU sensor objects for PowerMacs thermal control"); +MODULE_LICENSE("GPL"); + diff -puN include/asm-ppc64/smu.h~ppc64-thermal-control-for-smu-based-machines include/asm-ppc64/smu.h --- devel/include/asm-ppc64/smu.h~ppc64-thermal-control-for-smu-based-machines 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/include/asm-ppc64/smu.h 2005-10-28 17:44:03.000000000 -0700 @@ -41,8 +41,30 @@ /* * Fan control * - * This is a "mux" for fan control commands, first byte is the - * "sub" command. + * This is a "mux" for fan control commands. The command seem to + * act differently based on the number of arguments. With 1 byte + * of argument, this seem to be queries for fans status, setpoint, + * etc..., while with 0xe arguments, we will set the fans speeds. + * + * Queries (1 byte arg): + * --------------------- + * + * arg=0x01: read RPM fans status + * arg=0x02: read RPM fans setpoint + * arg=0x11: read PWM fans status + * arg=0x12: read PWM fans setpoint + * + * the "status" queries return the current speed while the "setpoint" ones + * return the programmed/target speed. It _seems_ that the result is a bit + * mask in the first byte of active/available fans, followed by 6 words (16 + * bits) containing the requested speed. + * + * Setpoint (14 bytes arg): + * ------------------------ + * + * first arg byte is 0 for RPM fans and 0x10 for PWM. Second arg byte is the + * mask of fans affected by the command. Followed by 6 words containing the + * setpoint value for selected fans in the mask (or 0 if mask value is 0) */ #define SMU_CMD_FAN_COMMAND 0x4a @@ -169,7 +191,16 @@ #define SMU_CMD_POWER_SHUTDOWN "SHUTDOWN" #define SMU_CMD_POWER_VOLTAGE_SLEW "VSLEW" -/* Misc commands +/* + * Read ADC sensors + * + * This command takes one byte of parameter: the sensor ID (or "reg" + * value in the device-tree) and returns a 16 bits value + */ +#define SMU_CMD_READ_ADC 0xd8 + +/* + * Misc commands * * This command seem to be a grab bag of various things */ @@ -386,10 +417,12 @@ struct smu_sdbp_header { }; /* - * 32 bits integers are usually encoded with 2x16 bits swapped, - * this demangles them + * demangle 16 and 32 bits integer in some SMU partitions + * (currently, afaik, this concerns only the FVT partition + * (0x12) */ -//#define SMU_U32_MIX(x) ((((x) << 16) & 0xffff0000u) | (((x) >> 16) & 0xffffu)) +#define SMU_U16_MIX(x) le16_to_cpu(x); +#define SMU_U32_MIX(x) ((((x) & 0xff00ff00u) >> 8)|(((x) & 0x00ff00ffu) << 8)) /* This is the definition of the SMU sdb-partition-0x12 table (called * CPU F/V/T operating points in Darwin). The definition for all those @@ -399,7 +432,8 @@ struct smu_sdbp_header { struct smu_sdbp_fvt { __u32 sysclk; /* Base SysClk frequency in Hz for - * this operating point + * this operating point. Value need to + * be unmixed with SMU_U32_MIX() */ __u8 pad; __u8 maxtemp; /* Max temp. supported by this @@ -408,10 +442,69 @@ struct smu_sdbp_fvt { __u16 volts[3]; /* CPU core voltage for the 3 * PowerTune modes, a mode with - * 0V = not supported. + * 0V = not supported. Value need + * to be unmixed with SMU_U16_MIX() */ }; +/* This partition contains voltage & current sensor calibration + * informations + */ +#define SMU_SDB_CPUVCP_ID 0x21 + +struct smu_sdbp_cpuvcp { + __u16 volt_scale; /* u4.12 fixed point */ + __s16 volt_offset; /* s4.12 fixed point */ + __u16 curr_scale; /* u4.12 fixed point */ + __s16 curr_offset; /* s4.12 fixed point */ + __s32 power_quads[3]; /* s4.28 fixed point */ +}; + +/* This partition contains CPU thermal diode calibration + */ +#define SMU_SDB_CPUDIODE_ID 0x18 + +struct smu_sdbp_cpudiode { + __u16 m_value; /* u1.15 fixed point */ + __s16 b_value; /* s10.6 fixed point */ + +}; + +/* This partition contains Slots power calibration + */ +#define SMU_SDB_SLOTSPOW_ID 0x78 + +struct smu_sdbp_slotspow { + __u16 pow_scale; /* u4.12 fixed point */ + __s16 pow_offset; /* s4.12 fixed point */ +}; + +/* This partition contains machine specific version information about + * the sensor/control layout + */ +#define SMU_SDB_SENSORTREE_ID 0x25 + +struct smu_sdbp_sensortree { + u8 model_id; + u8 unknown[3]; +}; + +/* This partition contains CPU thermal control PID informations. So far + * only single CPU machines have been seen with an SMU, so we assume this + * carries only informations for those + */ +#define SMU_SDB_CPUPIDDATA_ID 0x17 + +struct smu_sdbp_cpupiddata { + u8 unknown1; + u8 target_temp_delta; + u8 unknown2; + u8 history_len; + s16 power_adj; + u16 max_power; + s32 gp,gr,gd; +}; + /* Other partitions without known structures */ #define SMU_SDB_DEBUG_SWITCHES_ID 0x05 _ From akpm at osdl.org Sat Oct 29 10:46:58 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:58 -0700 Subject: [patch 43/43] ppc64 memory model depends on NUMA Message-ID: <200510290047.j9T0lT8B030132@shell0.pdx.osdl.net> From: Andy Whitcroft Currently when we first select memory model (FLAT, DISCONTIG, SPARSE) then select whether the machine is NUMA. However NUMA systems may not be FLAT. This constraint it not honoured and we may configure a NUMA/FLAT system. Reorder the configuration such that we choose NUMA first which allows us to only list the memory models which are valid. We now default NUMA for known NUMA systems. Note that this new order also matches that used in x86. Signed-off-by: Andy Whitcroft Signed-off-by: Joel Schopp Signed-off-by: Andrew Morton --- arch/ppc64/Kconfig | 11 ++++------- 1 files changed, 4 insertions(+), 7 deletions(-) diff -puN arch/ppc64/Kconfig~ppc64-memory-model-depends-on-numa arch/ppc64/Kconfig --- devel/arch/ppc64/Kconfig~ppc64-memory-model-depends-on-numa 2005-10-28 17:44:05.000000000 -0700 +++ devel-akpm/arch/ppc64/Kconfig 2005-10-28 17:44:05.000000000 -0700 @@ -245,6 +245,10 @@ config HMT This option enables hardware multithreading on RS64 cpus. pSeries systems p620 and p660 have such a cpu type. +config NUMA + bool "NUMA support" + default y if SMP && PPC_PSERIES + config ARCH_SELECT_MEMORY_MODEL def_bool y @@ -260,9 +264,6 @@ config ARCH_DISCONTIGMEM_DEFAULT def_bool y depends on ARCH_DISCONTIGMEM_ENABLE -config ARCH_FLATMEM_ENABLE - def_bool y - config ARCH_SPARSEMEM_ENABLE def_bool y depends on ARCH_DISCONTIGMEM_ENABLE @@ -285,10 +286,6 @@ config NODES_SPAN_OTHER_NODES def_bool y depends on NEED_MULTIPLE_NODES -config NUMA - bool "NUMA support" - default y if DISCONTIGMEM || SPARSEMEM - config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on SMP _ From akpm at osdl.org Sat Oct 29 10:46:41 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:41 -0700 Subject: [patch 27/43] ppc64 boot: move gunzip function before use Message-ID: <200510290047.j9T0lCb5030078@shell0.pdx.osdl.net> From: Olaf Hering Move the gunzip function up. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/main.c | 115 ++++++++++++++++++++++++------------------------- 1 files changed, 57 insertions(+), 58 deletions(-) diff -puN arch/ppc64/boot/main.c~ppc64-boot-move-gunzip-function-before-use arch/ppc64/boot/main.c --- devel/arch/ppc64/boot/main.c~ppc64-boot-move-gunzip-function-before-use 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/main.c 2005-10-28 17:45:22.000000000 -0700 @@ -17,7 +17,6 @@ #include "prom.h" #include "zlib.h" -static void gunzip(void *, int, unsigned char *, int *); extern void flush_cache(void *, unsigned long); @@ -56,6 +55,63 @@ typedef void (*kernel_entry_t)( unsigned static unsigned long claim_base; +#define HEAD_CRC 2 +#define EXTRA_FIELD 4 +#define ORIG_NAME 8 +#define COMMENT 0x10 +#define RESERVED 0xe0 + +static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) +{ + z_stream s; + int r, i, flags; + + /* skip header */ + i = 10; + flags = src[3]; + if (src[2] != Z_DEFLATED || (flags & RESERVED) != 0) { + printf("bad gzipped data\n\r"); + exit(); + } + if ((flags & EXTRA_FIELD) != 0) + i = 12 + src[10] + (src[11] << 8); + if ((flags & ORIG_NAME) != 0) + while (src[i++] != 0) + ; + if ((flags & COMMENT) != 0) + while (src[i++] != 0) + ; + if ((flags & HEAD_CRC) != 0) + i += 2; + if (i >= *lenp) { + printf("gunzip: ran out of data in header\n\r"); + exit(); + } + + if (zlib_inflate_workspacesize() > sizeof(scratch)) { + printf("gunzip needs more mem\n"); + exit(); + } + memset(&s, 0, sizeof(s)); + s.workspace = scratch; + r = zlib_inflateInit2(&s, -MAX_WBITS); + if (r != Z_OK) { + printf("inflateInit2 returned %d\n\r", r); + exit(); + } + s.next_in = src + i; + s.avail_in = *lenp - i; + s.next_out = dst; + s.avail_out = dstlen; + r = zlib_inflate(&s, Z_FULL_FLUSH); + if (r != Z_OK && r != Z_STREAM_END) { + printf("inflate returned %d msg: %s\n\r", r, s.msg); + exit(); + } + *lenp = s.next_out - (unsigned char *) dst; + zlib_inflateEnd(&s); +} + static unsigned long try_claim(unsigned long size) { unsigned long addr = 0; @@ -213,60 +269,3 @@ void start(unsigned long a1, unsigned lo exit(); } -#define HEAD_CRC 2 -#define EXTRA_FIELD 4 -#define ORIG_NAME 8 -#define COMMENT 0x10 -#define RESERVED 0xe0 - -static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) -{ - z_stream s; - int r, i, flags; - - /* skip header */ - i = 10; - flags = src[3]; - if (src[2] != Z_DEFLATED || (flags & RESERVED) != 0) { - printf("bad gzipped data\n\r"); - exit(); - } - if ((flags & EXTRA_FIELD) != 0) - i = 12 + src[10] + (src[11] << 8); - if ((flags & ORIG_NAME) != 0) - while (src[i++] != 0) - ; - if ((flags & COMMENT) != 0) - while (src[i++] != 0) - ; - if ((flags & HEAD_CRC) != 0) - i += 2; - if (i >= *lenp) { - printf("gunzip: ran out of data in header\n\r"); - exit(); - } - - if (zlib_inflate_workspacesize() > sizeof(scratch)) { - printf("gunzip needs more mem\n"); - exit(); - } - memset(&s, 0, sizeof(s)); - s.workspace = scratch; - r = zlib_inflateInit2(&s, -MAX_WBITS); - if (r != Z_OK) { - printf("inflateInit2 returned %d\n\r", r); - exit(); - } - s.next_in = src + i; - s.avail_in = *lenp - i; - s.next_out = dst; - s.avail_out = dstlen; - r = zlib_inflate(&s, Z_FULL_FLUSH); - if (r != Z_OK && r != Z_STREAM_END) { - printf("inflate returned %d msg: %s\n\r", r, s.msg); - exit(); - } - *lenp = s.next_out - (unsigned char *) dst; - zlib_inflateEnd(&s); -} - _ From akpm at osdl.org Sat Oct 29 10:46:43 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:43 -0700 Subject: [patch 28/43] ppc64 boot: bootfiles depend on linker script Message-ID: <200510290047.j9T0lDNb030084@shell0.pdx.osdl.net> From: Olaf Hering bootfiles must be relinked of linker script changes Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/Makefile | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff -puN arch/ppc64/boot/Makefile~ppc64-boot-bootfiles-depend-on-linker-script arch/ppc64/boot/Makefile --- devel/arch/ppc64/boot/Makefile~ppc64-boot-bootfiles-depend-on-linker-script 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/Makefile 2005-10-28 17:45:21.000000000 -0700 @@ -127,11 +127,11 @@ $(call obj-sec, $(required) $(initrd)): $(call cmd,addsection) $(obj)/zImage.vmode: obj-boot += $(call obj-sec, $(required)) -$(obj)/zImage.vmode: $(call obj-sec, $(required)) $(obj-boot) +$(obj)/zImage.vmode: $(call obj-sec, $(required)) $(obj-boot) $(srctree)/$(src)/zImage.lds $(call cmd,bootld,$(obj-boot)) $(obj)/zImage.initrd.vmode: obj-boot += $(call obj-sec, $(required) $(initrd)) -$(obj)/zImage.initrd.vmode: $(call obj-sec, $(required) $(initrd)) $(obj-boot) +$(obj)/zImage.initrd.vmode: $(call obj-sec, $(required) $(initrd)) $(obj-boot) $(srctree)/$(src)/zImage.lds $(call cmd,bootld,$(obj-boot)) $(obj)/zImage: $(obj)/zImage.vmode $(obj)/addnote _ From akpm at osdl.org Sat Oct 29 10:46:40 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:40 -0700 Subject: [patch 26/43] ppc64 boot: remove need for imagesize.c Message-ID: <200510290047.j9T0lB2u030075@shell0.pdx.osdl.net> From: Olaf Hering Compute the vmlinux size at runtime. Use Z_FULL_FLUSH instead of Z_FINISH, to extract only the ELF header and ELF program header. ->p_memsz is the required memory range for the executable, including bss ->p_filesz is the size of .text, .data and other runtime sections These values must be used for the claim call. All additional memory needed by the kernel is claimed in prom_init, remove the extra Mb. Pass the full memsize as target area to gunzip, otherwise not everything will be uncompressed. flush_cache has to flush all runtime sections, do not reduce the memrange by the ->p_offset value because its just that: an offset. Remove the Makefile code to produce an imagesize.c, its not needed anymore. Remove all FORCE flags, to not rebuild the zImage if vmlinux was not changed. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/Makefile | 32 ++++++------------ arch/ppc64/boot/main.c | 80 ++++++++++++++++++++++------------------------- 2 files changed, 50 insertions(+), 62 deletions(-) diff -puN arch/ppc64/boot/main.c~ppc64-boot-remove-need-for-imagesizec arch/ppc64/boot/main.c --- devel/arch/ppc64/boot/main.c~ppc64-boot-remove-need-for-imagesizec 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/main.c 2005-10-28 17:45:22.000000000 -0700 @@ -32,8 +32,6 @@ extern char _vmlinux_start[]; extern char _vmlinux_end[]; extern char _initrd_start[]; extern char _initrd_end[]; -extern unsigned long vmlinux_filesize; -extern unsigned long vmlinux_memsize; struct addr_range { unsigned long addr; @@ -45,6 +43,7 @@ static struct addr_range vmlinuz = {0, 0 static struct addr_range initrd = {0, 0, 0}; static char scratch[46912]; /* scratch space for gunzip, from zlib_inflate_workspacesize() */ +static char elfheader[256]; typedef void (*kernel_entry_t)( unsigned long, @@ -78,6 +77,7 @@ static unsigned long try_claim(unsigned void start(unsigned long a1, unsigned long a2, void *promptr) { unsigned long i; + int len; kernel_entry_t kernel_entry; Elf64_Ehdr *elf64; Elf64_Phdr *elf64ph; @@ -113,25 +113,45 @@ void start(unsigned long a1, unsigned lo claim_base = PROG_START; #endif - /* - * Now we try to claim some memory for the kernel itself - * our "vmlinux_memsize" is the memory footprint in RAM, _HOWEVER_, what - * our Makefile stuffs in is an image containing all sort of junk including - * an ELF header. We need to do some calculations here to find the right - * size... In practice we add 1Mb, that is enough, but we should really - * consider fixing the Makefile to put a _raw_ kernel in there ! - */ - vmlinux_memsize += ONE_MB; - printf("Allocating 0x%lx bytes for kernel ...\n\r", vmlinux_memsize); - vmlinux.addr = try_claim(vmlinux_memsize); + vmlinuz.addr = (unsigned long)_vmlinux_start; + vmlinuz.size = (unsigned long)(_vmlinux_end - _vmlinux_start); + + /* gunzip the ELF header of the kernel */ + if (*(unsigned short *)vmlinuz.addr == 0x1f8b) { + len = vmlinuz.size; + gunzip(elfheader, sizeof(elfheader), + (unsigned char *)vmlinuz.addr, &len); + } else + memcpy(elfheader, (const void *)vmlinuz.addr, sizeof(elfheader)); + + elf64 = (Elf64_Ehdr *)elfheader; + if ( elf64->e_ident[EI_MAG0] != ELFMAG0 || + elf64->e_ident[EI_MAG1] != ELFMAG1 || + elf64->e_ident[EI_MAG2] != ELFMAG2 || + elf64->e_ident[EI_MAG3] != ELFMAG3 || + elf64->e_ident[EI_CLASS] != ELFCLASS64 || + elf64->e_ident[EI_DATA] != ELFDATA2MSB || + elf64->e_type != ET_EXEC || + elf64->e_machine != EM_PPC64 ) + { + printf("Error: not a valid PPC64 ELF file!\n\r"); + exit(); + } + + elf64ph = (Elf64_Phdr *)((unsigned long)elf64 + + (unsigned long)elf64->e_phoff); + for(i=0; i < (unsigned int)elf64->e_phnum ;i++,elf64ph++) { + if (elf64ph->p_type == PT_LOAD && elf64ph->p_offset != 0) + break; + } + vmlinux.size = (unsigned long)elf64ph->p_filesz; + vmlinux.memsize = (unsigned long)elf64ph->p_memsz; + printf("Allocating 0x%lx bytes for kernel ...\n\r", vmlinux.memsize); + vmlinux.addr = try_claim(vmlinux.memsize); if (vmlinux.addr == 0) { printf("Can't allocate memory for kernel image !\n\r"); exit(); } - vmlinuz.addr = (unsigned long)_vmlinux_start; - vmlinuz.size = (unsigned long)(_vmlinux_end - _vmlinux_start); - vmlinux.size = PAGE_ALIGN(vmlinux_filesize); - vmlinux.memsize = vmlinux_memsize; /* * Now we try to claim memory for the initrd (and copy it there) @@ -155,11 +175,10 @@ void start(unsigned long a1, unsigned lo /* Eventually gunzip the kernel */ if (*(unsigned short *)vmlinuz.addr == 0x1f8b) { - int len; printf("gunzipping (0x%lx <- 0x%lx:0x%0lx)...", vmlinux.addr, vmlinuz.addr, vmlinuz.addr+vmlinuz.size); len = vmlinuz.size; - gunzip((void *)vmlinux.addr, vmlinux.size, + gunzip((void *)vmlinux.addr, vmlinux.memsize, (unsigned char *)vmlinuz.addr, &len); printf("done 0x%lx bytes\n\r", len); } else { @@ -167,32 +186,11 @@ void start(unsigned long a1, unsigned lo } /* Skip over the ELF header */ - elf64 = (Elf64_Ehdr *)vmlinux.addr; - if ( elf64->e_ident[EI_MAG0] != ELFMAG0 || - elf64->e_ident[EI_MAG1] != ELFMAG1 || - elf64->e_ident[EI_MAG2] != ELFMAG2 || - elf64->e_ident[EI_MAG3] != ELFMAG3 || - elf64->e_ident[EI_CLASS] != ELFCLASS64 || - elf64->e_ident[EI_DATA] != ELFDATA2MSB || - elf64->e_type != ET_EXEC || - elf64->e_machine != EM_PPC64 ) - { - printf("Error: not a valid PPC64 ELF file!\n\r"); - exit(); - } - - elf64ph = (Elf64_Phdr *)((unsigned long)elf64 + - (unsigned long)elf64->e_phoff); - for(i=0; i < (unsigned int)elf64->e_phnum ;i++,elf64ph++) { - if (elf64ph->p_type == PT_LOAD && elf64ph->p_offset != 0) - break; - } #ifdef DEBUG printf("... skipping 0x%lx bytes of ELF header\n\r", (unsigned long)elf64ph->p_offset); #endif vmlinux.addr += (unsigned long)elf64ph->p_offset; - vmlinux.size -= (unsigned long)elf64ph->p_offset; flush_cache((void *)vmlinux.addr, vmlinux.size); @@ -263,7 +261,7 @@ static void gunzip(void *dst, int dstlen s.avail_in = *lenp - i; s.next_out = dst; s.avail_out = dstlen; - r = zlib_inflate(&s, Z_FINISH); + r = zlib_inflate(&s, Z_FULL_FLUSH); if (r != Z_OK && r != Z_STREAM_END) { printf("inflate returned %d msg: %s\n\r", r, s.msg); exit(); diff -puN arch/ppc64/boot/Makefile~ppc64-boot-remove-need-for-imagesizec arch/ppc64/boot/Makefile --- devel/arch/ppc64/boot/Makefile~ppc64-boot-remove-need-for-imagesizec 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/Makefile 2005-10-28 17:45:22.000000000 -0700 @@ -34,7 +34,7 @@ zliblinuxheader := zlib.h zconf.h zutil. $(addprefix $(obj)/,$(zlib) main.o): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) #$(addprefix $(obj)/,main.o): $(addprefix $(obj)/,zlib.h) -src-boot := crt0.S string.S prom.c main.c imagesize.c div64.S +src-boot := crt0.S string.S prom.c main.c div64.S src-boot += $(zlib) src-boot := $(addprefix $(obj)/, $(src-boot)) obj-boot := $(addsuffix .o, $(basename $(src-boot))) @@ -87,7 +87,7 @@ src-sec = $(foreach section, $(1), $(pat gz-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.gz, $(section))) hostprogs-y := addnote addRamDisk -targets += zImage.vmode zImage.initrd.vmode zImage zImage.initrd imagesize.c \ +targets += zImage.vmode zImage.initrd.vmode zImage zImage.initrd \ $(patsubst $(obj)/%,%, $(call obj-sec, $(required) $(initrd))) \ $(patsubst $(obj)/%,%, $(call src-sec, $(required) $(initrd))) \ $(patsubst $(obj)/%,%, $(call gz-sec, $(required) $(initrd))) \ @@ -100,9 +100,9 @@ quiet_cmd_ramdisk = RAMDISK $@ quiet_cmd_stripvm = STRIP $@ cmd_stripvm = $(STRIP) -s $< -o $@ -vmlinux.strip: vmlinux FORCE +vmlinux.strip: vmlinux $(call if_changed,stripvm) -$(obj)/vmlinux.initrd: vmlinux.strip $(obj)/addRamDisk $(obj)/ramdisk.image.gz FORCE +$(obj)/vmlinux.initrd: vmlinux.strip $(obj)/addRamDisk $(obj)/ramdisk.image.gz $(call if_changed,ramdisk) quiet_cmd_addsection = ADDSEC $@ @@ -110,48 +110,38 @@ quiet_cmd_addsection = ADDSEC $@ --add-section=.kernel:$(strip $(patsubst $(obj)/kernel-%.o,%, $@))=$(patsubst %.o,%.gz, $@) \ --set-section-flags=.kernel:$(strip $(patsubst $(obj)/kernel-%.o,%, $@))=$(OBJCOPYFLAGS) -quiet_cmd_imagesize = GENSIZE $@ - cmd_imagesize = ls -l vmlinux.strip | \ - awk '{printf "/* generated -- do not edit! */\n" "unsigned long vmlinux_filesize = %d;\n", $$5}' \ - > $(obj)/imagesize.c && \ - $(CROSS_COMPILE)nm -n vmlinux | tail -n 1 | \ - awk '{printf "unsigned long vmlinux_memsize = 0x%s;\n", substr($$1,8)}' >> $(obj)/imagesize.c - quiet_cmd_addnote = ADDNOTE $@ cmd_addnote = $(obj)/addnote $@ -$(call gz-sec, $(required)): $(obj)/kernel-%.gz: % FORCE +$(call gz-sec, $(required)): $(obj)/kernel-%.gz: % $(call if_changed,gzip) $(obj)/kernel-initrd.gz: $(obj)/ramdisk.image.gz cp -f $(obj)/ramdisk.image.gz $@ -$(call src-sec, $(required) $(initrd)): $(obj)/kernel-%.c: $(obj)/kernel-%.gz FORCE +$(call src-sec, $(required) $(initrd)): $(obj)/kernel-%.c: $(obj)/kernel-%.gz @touch $@ -$(call obj-sec, $(required) $(initrd)): $(obj)/kernel-%.o: $(obj)/kernel-%.c FORCE +$(call obj-sec, $(required) $(initrd)): $(obj)/kernel-%.o: $(obj)/kernel-%.c $(call if_changed_dep,bootcc) $(call cmd,addsection) $(obj)/zImage.vmode: obj-boot += $(call obj-sec, $(required)) -$(obj)/zImage.vmode: $(call obj-sec, $(required)) $(obj-boot) FORCE +$(obj)/zImage.vmode: $(call obj-sec, $(required)) $(obj-boot) $(call cmd,bootld,$(obj-boot)) $(obj)/zImage.initrd.vmode: obj-boot += $(call obj-sec, $(required) $(initrd)) -$(obj)/zImage.initrd.vmode: $(call obj-sec, $(required) $(initrd)) $(obj-boot) FORCE +$(obj)/zImage.initrd.vmode: $(call obj-sec, $(required) $(initrd)) $(obj-boot) $(call cmd,bootld,$(obj-boot)) -$(obj)/zImage: $(obj)/zImage.vmode $(obj)/addnote FORCE +$(obj)/zImage: $(obj)/zImage.vmode $(obj)/addnote @cp -f $< $@ $(call if_changed,addnote) -$(obj)/zImage.initrd: $(obj)/zImage.initrd.vmode $(obj)/addnote FORCE +$(obj)/zImage.initrd: $(obj)/zImage.initrd.vmode $(obj)/addnote @cp -f $< $@ $(call if_changed,addnote) -$(obj)/imagesize.c: vmlinux.strip - $(call cmd,imagesize) - install: $(CONFIGURE) $(BOOTIMAGE) sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" "$(BOOTIMAGE)" _ From akpm at osdl.org Sat Oct 29 10:46:45 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:45 -0700 Subject: [patch 30/43] ppc64 boot: use memset to clear bss Message-ID: <200510290047.j9T0lGUx030093@shell0.pdx.osdl.net> From: Olaf Hering Use memset to clear bss, instead of own version. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/crt0.S | 19 ------------------- arch/ppc64/boot/main.c | 3 +++ 2 files changed, 3 insertions(+), 19 deletions(-) diff -puN arch/ppc64/boot/crt0.S~ppc64-boot-use-memset-to-clear-bss arch/ppc64/boot/crt0.S --- devel/arch/ppc64/boot/crt0.S~ppc64-boot-use-memset-to-clear-bss 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/crt0.S 2005-10-28 17:45:21.000000000 -0700 @@ -25,24 +25,5 @@ _start: sync isync - ## Clear out the BSS as per ANSI C requirements - - lis r7,_end at ha - addi r7,r7,_end at l # r7 = &_end - lis r8,__bss_start at ha # - addi r8,r8,__bss_start at l # r8 = &_bss_start - - ## Determine how large an area, in number of words, to clear - - subf r7,r8,r7 # r7 = &_end - &_bss_start + 1 - addi r7,r7,3 # r7 += 3 - srwi. r7,r7,2 # r7 = size in words. - beq 3f # If the size is zero, don't bother - addi r8,r8,-4 # r8 -= 4 - mtctr r7 # SPRN_CTR = number of words to clear - li r0,0 # r0 = 0 -2: stwu r0,4(r8) # Clear out a word - bdnz 2b # Keep clearing until done -3: b start diff -puN arch/ppc64/boot/main.c~ppc64-boot-use-memset-to-clear-bss arch/ppc64/boot/main.c --- devel/arch/ppc64/boot/main.c~ppc64-boot-use-memset-to-clear-bss 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/main.c 2005-10-28 17:45:22.000000000 -0700 @@ -26,6 +26,7 @@ extern void flush_cache(void *, unsigned #define ONE_MB 0x100000 extern char _start[]; +extern char __bss_start[]; extern char _end[]; extern char _vmlinux_start[]; extern char _vmlinux_end[]; @@ -138,6 +139,8 @@ void start(unsigned long a1, unsigned lo Elf64_Ehdr *elf64; Elf64_Phdr *elf64ph; + memset(__bss_start, 0, _end - __bss_start); + prom = (int (*)(void *)) promptr; chosen_handle = finddevice("/chosen"); if (chosen_handle == (void *) -1) _ From akpm at osdl.org Sat Oct 29 10:46:46 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:46 -0700 Subject: [patch 31/43] ppc64 boot: fix typo in asm comments Message-ID: <200510290047.j9T0lH68030096@shell0.pdx.osdl.net> From: Olaf Hering Update comment in memcpy, r7 contains the byte count. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/string.S | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff -puN arch/ppc64/boot/string.S~ppc64-boot-fix-typo-in-asm-comments arch/ppc64/boot/string.S --- devel/arch/ppc64/boot/string.S~ppc64-boot-fix-typo-in-asm-comments 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/string.S 2005-10-28 17:44:04.000000000 -0700 @@ -104,7 +104,7 @@ memmove: .globl memcpy memcpy: - rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + rlwinm. r7,r5,32-3,3,31 /* r7 = r5 >> 3 */ addi r6,r3,-4 addi r4,r4,-4 beq 2f /* if less than 8 bytes to do */ @@ -146,7 +146,7 @@ memcpy: .globl backwards_memcpy backwards_memcpy: - rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + rlwinm. r7,r5,32-3,3,31 /* r7 = r5 >> 3 */ add r6,r3,r5 add r4,r4,r5 beq 2f _ From akpm at osdl.org Sat Oct 29 10:46:44 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:44 -0700 Subject: [patch 29/43] ppc64 boot: cleanup linker script Message-ID: <200510290047.j9T0lFdo030090@shell0.pdx.osdl.net> From: Olaf Hering Remove userland related stuff from ld.script, they are not required for zImage use wildcards for some sections. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/zImage.lds | 59 +++------------------------------------------ 1 files changed, 5 insertions(+), 54 deletions(-) diff -puN arch/ppc64/boot/zImage.lds~ppc64-boot-cleanup-linker-script arch/ppc64/boot/zImage.lds --- devel/arch/ppc64/boot/zImage.lds~ppc64-boot-cleanup-linker-script 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/zImage.lds 2005-10-28 17:45:21.000000000 -0700 @@ -1,62 +1,19 @@ OUTPUT_ARCH(powerpc:common) -SEARCH_DIR(/lib); SEARCH_DIR(/usr/lib); SEARCH_DIR(/usr/local/lib); SEARCH_DIR(/usr/local/powerpc-any-elf/lib); -/* Do we need any of these for elf? - __DYNAMIC = 0; */ SECTIONS { - /* Read-only sections, merged into text segment: */ - . = + SIZEOF_HEADERS; - .interp : { *(.interp) } - .hash : { *(.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .rel.text : { *(.rel.text) } - .rela.text : { *(.rela.text) } - .rel.data : { *(.rel.data) } - .rela.data : { *(.rela.data) } - .rel.rodata : { *(.rel.rodata) } - .rela.rodata : { *(.rela.rodata) } - .rel.got : { *(.rel.got) } - .rela.got : { *(.rela.got) } - .rel.ctors : { *(.rel.ctors) } - .rela.ctors : { *(.rela.ctors) } - .rel.dtors : { *(.rel.dtors) } - .rela.dtors : { *(.rela.dtors) } - .rel.bss : { *(.rel.bss) } - .rela.bss : { *(.rela.bss) } - .rel.plt : { *(.rel.plt) } - .rela.plt : { *(.rela.plt) } - .plt : { *(.plt) } .text : { *(.text) *(.fixup) - *(.got1) } - . = ALIGN(4096); _etext = .; - PROVIDE (etext = .); - .rodata : - { - *(.rodata) - *(.rodata1) - } - .kstrtab : { *(.kstrtab) } - __vermagic : { *(__vermagic) } - .fini : { *(.fini) } =0 - .ctors : { *(.ctors) } - .dtors : { *(.dtors) } - /* Read-write section, merged into data segment: */ . = ALIGN(4096); .data : { - *(.data) - *(.data1) - *(.sdata) - *(.sdata2) - *(.got.plt) *(.got) - *(.dynamic) - CONSTRUCTORS + *(.rodata*) + *(.data*) + *(.sdata*) + *(.got*) } . = ALIGN(4096); @@ -71,20 +28,14 @@ SECTIONS . = ALIGN(4096); _edata = .; - PROVIDE (edata = .); - - .fixup : { *(.fixup) } . = ALIGN(4096); __bss_start = .; .bss : { - *(.sbss) *(.scommon) - *(.dynbss) + *(.sbss) *(.bss) - *(COMMON) } . = ALIGN(4096); _end = . ; - PROVIDE (end = .); } _ From akpm at osdl.org Sat Oct 29 10:46:47 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:47 -0700 Subject: [patch 32/43] ppc64 boot: remove global initializers Message-ID: <200510290047.j9T0lIrL030099@shell0.pdx.osdl.net> From: Olaf Hering No need to initialize global variables. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/main.c | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff -puN arch/ppc64/boot/main.c~ppc64-boot-remove-global-initializers arch/ppc64/boot/main.c --- devel/arch/ppc64/boot/main.c~ppc64-boot-remove-global-initializers 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/main.c 2005-10-28 17:45:21.000000000 -0700 @@ -38,9 +38,9 @@ struct addr_range { unsigned long size; unsigned long memsize; }; -static struct addr_range vmlinux = {0, 0, 0}; -static struct addr_range vmlinuz = {0, 0, 0}; -static struct addr_range initrd = {0, 0, 0}; +static struct addr_range vmlinux; +static struct addr_range vmlinuz; +static struct addr_range initrd; static char scratch[46912]; /* scratch space for gunzip, from zlib_inflate_workspacesize() */ static char elfheader[256]; _ From akpm at osdl.org Sat Oct 29 10:46:38 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:38 -0700 Subject: [patch 25/43] ppc64 boot: remove zlib Message-ID: <200510290047.j9T0l94u030072@shell0.pdx.osdl.net> From: Olaf Hering Switch ppc64 to the in-kernel zlib, it has less bugs than the current one. The code in arch/ppc64/boot is compiled as 32bit, so it can not use the includes from include/asm. Copy all zlib related header files and convert them with sed. Reduce the scratch size to 47k, check possible changes at runtime. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- dev/null | 2627 ----------------------------------------------- arch/ppc64/boot/Makefile | 33 arch/ppc64/boot/main.c | 82 - 3 files changed, 44 insertions(+), 2698 deletions(-) diff -puN arch/ppc64/boot/main.c~ppc64-boot-remove-zlib arch/ppc64/boot/main.c --- devel/arch/ppc64/boot/main.c~ppc64-boot-remove-zlib 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/main.c 2005-10-28 17:45:22.000000000 -0700 @@ -26,12 +26,6 @@ extern void flush_cache(void *, unsigned #define RAM_END (512<<20) // Fixme: use OF */ #define ONE_MB 0x100000 -static char *avail_ram; -static char *begin_avail, *end_avail; -static char *avail_high; -static unsigned int heap_use; -static unsigned int heap_max; - extern char _start[]; extern char _end[]; extern char _vmlinux_start[]; @@ -50,7 +44,8 @@ static struct addr_range vmlinux = {0, 0 static struct addr_range vmlinuz = {0, 0, 0}; static struct addr_range initrd = {0, 0, 0}; -static char scratch[128<<10]; /* 128kB of scratch space for gunzip */ +static char scratch[46912]; /* scratch space for gunzip, from zlib_inflate_workspacesize() */ + typedef void (*kernel_entry_t)( unsigned long, unsigned long, @@ -161,17 +156,12 @@ void start(unsigned long a1, unsigned lo /* Eventually gunzip the kernel */ if (*(unsigned short *)vmlinuz.addr == 0x1f8b) { int len; - avail_ram = scratch; - begin_avail = avail_high = avail_ram; - end_avail = scratch + sizeof(scratch); printf("gunzipping (0x%lx <- 0x%lx:0x%0lx)...", vmlinux.addr, vmlinuz.addr, vmlinuz.addr+vmlinuz.size); len = vmlinuz.size; gunzip((void *)vmlinux.addr, vmlinux.size, (unsigned char *)vmlinuz.addr, &len); printf("done 0x%lx bytes\n\r", len); - printf("0x%x bytes of heap consumed, max in use 0x%x\n\r", - (unsigned)(avail_high - begin_avail), heap_max); } else { memmove((void *)vmlinux.addr,(void *)vmlinuz.addr,vmlinuz.size); } @@ -225,64 +215,12 @@ void start(unsigned long a1, unsigned lo exit(); } -struct memchunk { - unsigned int size; - unsigned int pad; - struct memchunk *next; -}; - -static struct memchunk *freechunks; - -void *zalloc(void *x, unsigned items, unsigned size) -{ - void *p; - struct memchunk **mpp, *mp; - - size *= items; - size = _ALIGN(size, sizeof(struct memchunk)); - heap_use += size; - if (heap_use > heap_max) - heap_max = heap_use; - for (mpp = &freechunks; (mp = *mpp) != 0; mpp = &mp->next) { - if (mp->size == size) { - *mpp = mp->next; - return mp; - } - } - p = avail_ram; - avail_ram += size; - if (avail_ram > avail_high) - avail_high = avail_ram; - if (avail_ram > end_avail) { - printf("oops... out of memory\n\r"); - pause(); - } - return p; -} - -void zfree(void *x, void *addr, unsigned nb) -{ - struct memchunk *mp = addr; - - nb = _ALIGN(nb, sizeof(struct memchunk)); - heap_use -= nb; - if (avail_ram == addr + nb) { - avail_ram = addr; - return; - } - mp->size = nb; - mp->next = freechunks; - freechunks = mp; -} - #define HEAD_CRC 2 #define EXTRA_FIELD 4 #define ORIG_NAME 8 #define COMMENT 0x10 #define RESERVED 0xe0 -#define DEFLATED 8 - static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) { z_stream s; @@ -291,7 +229,7 @@ static void gunzip(void *dst, int dstlen /* skip header */ i = 10; flags = src[3]; - if (src[2] != DEFLATED || (flags & RESERVED) != 0) { + if (src[2] != Z_DEFLATED || (flags & RESERVED) != 0) { printf("bad gzipped data\n\r"); exit(); } @@ -310,9 +248,13 @@ static void gunzip(void *dst, int dstlen exit(); } - s.zalloc = zalloc; - s.zfree = zfree; - r = inflateInit2(&s, -MAX_WBITS); + if (zlib_inflate_workspacesize() > sizeof(scratch)) { + printf("gunzip needs more mem\n"); + exit(); + } + memset(&s, 0, sizeof(s)); + s.workspace = scratch; + r = zlib_inflateInit2(&s, -MAX_WBITS); if (r != Z_OK) { printf("inflateInit2 returned %d\n\r", r); exit(); @@ -321,12 +263,12 @@ static void gunzip(void *dst, int dstlen s.avail_in = *lenp - i; s.next_out = dst; s.avail_out = dstlen; - r = inflate(&s, Z_FINISH); + r = zlib_inflate(&s, Z_FINISH); if (r != Z_OK && r != Z_STREAM_END) { printf("inflate returned %d msg: %s\n\r", r, s.msg); exit(); } *lenp = s.next_out - (unsigned char *) dst; - inflateEnd(&s); + zlib_inflateEnd(&s); } diff -puN arch/ppc64/boot/Makefile~ppc64-boot-remove-zlib arch/ppc64/boot/Makefile --- devel/arch/ppc64/boot/Makefile~ppc64-boot-remove-zlib 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/Makefile 2005-10-28 17:45:22.000000000 -0700 @@ -27,10 +27,41 @@ BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAG BOOTLFLAGS := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds OBJCOPYFLAGS := contents,alloc,load,readonly,data -src-boot := crt0.S string.S prom.c main.c zlib.c imagesize.c div64.S +zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c +zlibheader := infblock.h infcodes.h inffast.h inftrees.h infutil.h +zliblinuxheader := zlib.h zconf.h zutil.h + +$(addprefix $(obj)/,$(zlib) main.o): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) +#$(addprefix $(obj)/,main.o): $(addprefix $(obj)/,zlib.h) + +src-boot := crt0.S string.S prom.c main.c imagesize.c div64.S +src-boot += $(zlib) src-boot := $(addprefix $(obj)/, $(src-boot)) obj-boot := $(addsuffix .o, $(basename $(src-boot))) +BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) + +quiet_cmd_copy_zlib = COPY $@ + cmd_copy_zlib = sed "s at __attribute_used__@@;s@]\+\).*@\"\1\"@" $< > $@ + +quiet_cmd_copy_zlibheader = COPY $@ + cmd_copy_zlibheader = sed "s@]\+\).*@\"\1\"@" $< > $@ +# stddef.h for NULL +quiet_cmd_copy_zliblinuxheader = COPY $@ + cmd_copy_zliblinuxheader = sed "s@@\"string.h\"@;s@@@;s@]\+\).*@\"\1\"@" $< > $@ + +$(addprefix $(obj)/,$(zlib)): $(obj)/%: $(srctree)/lib/zlib_inflate/% + $(call cmd,copy_zlib) + +$(addprefix $(obj)/,$(zlibheader)): $(obj)/%: $(srctree)/lib/zlib_inflate/% + $(call cmd,copy_zlibheader) + +$(addprefix $(obj)/,$(zliblinuxheader)): $(obj)/%: $(srctree)/include/linux/% + $(call cmd,copy_zliblinuxheader) + +clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) + + quiet_cmd_bootcc = BOOTCC $@ cmd_bootcc = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< diff -L arch/ppc64/boot/zlib.c -puN arch/ppc64/boot/zlib.c~ppc64-boot-remove-zlib /dev/null --- devel/arch/ppc64/boot/zlib.c +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,2195 +0,0 @@ -/* - * This file is derived from various .h and .c files from the zlib-0.95 - * distribution by Jean-loup Gailly and Mark Adler, with some additions - * by Paul Mackerras to aid in implementing Deflate compression and - * decompression for PPP packets. See zlib.h for conditions of - * distribution and use. - * - * Changes that have been made include: - * - changed functions not used outside this file to "local" - * - added minCompression parameter to deflateInit2 - * - added Z_PACKET_FLUSH (see zlib.h for details) - * - added inflateIncomp - * - Copyright (C) 1995 Jean-loup Gailly and Mark Adler - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - Jean-loup Gailly Mark Adler - gzip at prep.ai.mit.edu madler at alumni.caltech.edu - - * - * - */ - -/*+++++*/ -/* zutil.h -- internal interface and configuration of the compression library - * Copyright (C) 1995 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* From: zutil.h,v 1.9 1995/05/03 17:27:12 jloup Exp */ - -#define _Z_UTIL_H - -#include "zlib.h" - -#ifndef local -# define local static -#endif -/* compile with -Dlocal if your debugger can't find static symbols */ - -#define FAR - -typedef unsigned char uch; -typedef uch FAR uchf; -typedef unsigned short ush; -typedef ush FAR ushf; -typedef unsigned long ulg; - -extern char *z_errmsg[]; /* indexed by 1-zlib_error */ - -#define ERR_RETURN(strm,err) return (strm->msg=z_errmsg[1-err], err) -/* To be used only when the state is known to be valid */ - -#ifndef NULL -#define NULL ((void *) 0) -#endif - - /* common constants */ - -#define DEFLATED 8 - -#ifndef DEF_WBITS -# define DEF_WBITS MAX_WBITS -#endif -/* default windowBits for decompression. MAX_WBITS is for compression only */ - -#if MAX_MEM_LEVEL >= 8 -# define DEF_MEM_LEVEL 8 -#else -# define DEF_MEM_LEVEL MAX_MEM_LEVEL -#endif -/* default memLevel */ - -#define STORED_BLOCK 0 -#define STATIC_TREES 1 -#define DYN_TREES 2 -/* The three kinds of block type */ - -#define MIN_MATCH 3 -#define MAX_MATCH 258 -/* The minimum and maximum match lengths */ - - /* functions */ - -extern void *memcpy(void *, const void *, unsigned long); -#define zmemcpy memcpy - -/* Diagnostic functions */ -#ifdef DEBUG_ZLIB -# include "stdio.h" -# ifndef verbose -# define verbose 0 -# endif -# define Assert(cond,msg) {if(!(cond)) z_error(msg);} -# define Trace(x) fprintf x -# define Tracev(x) {if (verbose) fprintf x ;} -# define Tracevv(x) {if (verbose>1) fprintf x ;} -# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} -# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} -#else -# define Assert(cond,msg) -# define Trace(x) -# define Tracev(x) -# define Tracevv(x) -# define Tracec(c,x) -# define Tracecv(c,x) -#endif - - -typedef uLong (*check_func) OF((uLong check, Bytef *buf, uInt len)); - -/* voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); */ -/* void zcfree OF((voidpf opaque, voidpf ptr)); */ - -#define ZALLOC(strm, items, size) \ - (*((strm)->zalloc))((strm)->opaque, (items), (size)) -#define ZFREE(strm, addr, size) \ - (*((strm)->zfree))((strm)->opaque, (voidpf)(addr), (size)) -#define TRY_FREE(s, p, n) {if (p) ZFREE(s, p, n);} - -/* deflate.h -- internal compression state - * Copyright (C) 1995 Jean-loup Gailly - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/*+++++*/ -/* infblock.h -- header to use infblock.c - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -struct inflate_blocks_state; -typedef struct inflate_blocks_state FAR inflate_blocks_statef; - -local inflate_blocks_statef * inflate_blocks_new OF(( - z_stream *z, - check_func c, /* check function */ - uInt w)); /* window size */ - -local int inflate_blocks OF(( - inflate_blocks_statef *, - z_stream *, - int)); /* initial return code */ - -local void inflate_blocks_reset OF(( - inflate_blocks_statef *, - z_stream *, - uLongf *)); /* check value on output */ - -local int inflate_blocks_free OF(( - inflate_blocks_statef *, - z_stream *, - uLongf *)); /* check value on output */ - -local int inflate_addhistory OF(( - inflate_blocks_statef *, - z_stream *)); - -local int inflate_packet_flush OF(( - inflate_blocks_statef *)); - -/*+++++*/ -/* inftrees.h -- header to use inftrees.c - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* Huffman code lookup table entry--this entry is four bytes for machines - that have 16-bit pointers (e.g. PC's in the small or medium model). */ - -typedef struct inflate_huft_s FAR inflate_huft; - -struct inflate_huft_s { - union { - struct { - Byte Exop; /* number of extra bits or operation */ - Byte Bits; /* number of bits in this code or subcode */ - } what; - uInt Nalloc; /* number of these allocated here */ - Bytef *pad; /* pad structure to a power of 2 (4 bytes for */ - } word; /* 16-bit, 8 bytes for 32-bit machines) */ - union { - uInt Base; /* literal, length base, or distance base */ - inflate_huft *Next; /* pointer to next level of table */ - } more; -}; - -#ifdef DEBUG_ZLIB - local uInt inflate_hufts; -#endif - -local int inflate_trees_bits OF(( - uIntf *, /* 19 code lengths */ - uIntf *, /* bits tree desired/actual depth */ - inflate_huft * FAR *, /* bits tree result */ - z_stream *)); /* for zalloc, zfree functions */ - -local int inflate_trees_dynamic OF(( - uInt, /* number of literal/length codes */ - uInt, /* number of distance codes */ - uIntf *, /* that many (total) code lengths */ - uIntf *, /* literal desired/actual bit depth */ - uIntf *, /* distance desired/actual bit depth */ - inflate_huft * FAR *, /* literal/length tree result */ - inflate_huft * FAR *, /* distance tree result */ - z_stream *)); /* for zalloc, zfree functions */ - -local int inflate_trees_fixed OF(( - uIntf *, /* literal desired/actual bit depth */ - uIntf *, /* distance desired/actual bit depth */ - inflate_huft * FAR *, /* literal/length tree result */ - inflate_huft * FAR *)); /* distance tree result */ - -local int inflate_trees_free OF(( - inflate_huft *, /* tables to free */ - z_stream *)); /* for zfree function */ - - -/*+++++*/ -/* infcodes.h -- header to use infcodes.c - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -struct inflate_codes_state; -typedef struct inflate_codes_state FAR inflate_codes_statef; - -local inflate_codes_statef *inflate_codes_new OF(( - uInt, uInt, - inflate_huft *, inflate_huft *, - z_stream *)); - -local int inflate_codes OF(( - inflate_blocks_statef *, - z_stream *, - int)); - -local void inflate_codes_free OF(( - inflate_codes_statef *, - z_stream *)); - - -/*+++++*/ -/* inflate.c -- zlib interface to inflate modules - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* inflate private state */ -struct internal_state { - - /* mode */ - enum { - METHOD, /* waiting for method byte */ - FLAG, /* waiting for flag byte */ - BLOCKS, /* decompressing blocks */ - CHECK4, /* four check bytes to go */ - CHECK3, /* three check bytes to go */ - CHECK2, /* two check bytes to go */ - CHECK1, /* one check byte to go */ - DONE, /* finished check, done */ - BAD} /* got an error--stay here */ - mode; /* current inflate mode */ - - /* mode dependent information */ - union { - uInt method; /* if FLAGS, method byte */ - struct { - uLong was; /* computed check value */ - uLong need; /* stream check value */ - } check; /* if CHECK, check values to compare */ - uInt marker; /* if BAD, inflateSync's marker bytes count */ - } sub; /* submode */ - - /* mode independent information */ - int nowrap; /* flag for no wrapper */ - uInt wbits; /* log2(window size) (8..15, defaults to 15) */ - inflate_blocks_statef - *blocks; /* current inflate_blocks state */ - -}; - - -int inflateReset( - z_stream *z -) -{ - uLong c; - - if (z == Z_NULL || z->state == Z_NULL) - return Z_STREAM_ERROR; - z->total_in = z->total_out = 0; - z->msg = Z_NULL; - z->state->mode = z->state->nowrap ? BLOCKS : METHOD; - inflate_blocks_reset(z->state->blocks, z, &c); - Trace((stderr, "inflate: reset\n")); - return Z_OK; -} - - -int inflateEnd( - z_stream *z -) -{ - uLong c; - - if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL) - return Z_STREAM_ERROR; - if (z->state->blocks != Z_NULL) - inflate_blocks_free(z->state->blocks, z, &c); - ZFREE(z, z->state, sizeof(struct internal_state)); - z->state = Z_NULL; - Trace((stderr, "inflate: end\n")); - return Z_OK; -} - - -int inflateInit2( - z_stream *z, - int w -) -{ - /* initialize state */ - if (z == Z_NULL) - return Z_STREAM_ERROR; -/* if (z->zalloc == Z_NULL) z->zalloc = zcalloc; */ -/* if (z->zfree == Z_NULL) z->zfree = zcfree; */ - if ((z->state = (struct internal_state FAR *) - ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL) - return Z_MEM_ERROR; - z->state->blocks = Z_NULL; - - /* handle undocumented nowrap option (no zlib header or check) */ - z->state->nowrap = 0; - if (w < 0) - { - w = - w; - z->state->nowrap = 1; - } - - /* set window size */ - if (w < 8 || w > 15) - { - inflateEnd(z); - return Z_STREAM_ERROR; - } - z->state->wbits = (uInt)w; - - /* create inflate_blocks state */ - if ((z->state->blocks = - inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, 1 << w)) - == Z_NULL) - { - inflateEnd(z); - return Z_MEM_ERROR; - } - Trace((stderr, "inflate: allocated\n")); - - /* reset state */ - inflateReset(z); - return Z_OK; -} - - -int inflateInit( - z_stream *z -) -{ - return inflateInit2(z, DEF_WBITS); -} - - -#define NEEDBYTE {if(z->avail_in==0)goto empty;r=Z_OK;} -#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++) - -int inflate( - z_stream *z, - int f -) -{ - int r; - uInt b; - - if (z == Z_NULL || z->next_in == Z_NULL) - return Z_STREAM_ERROR; - r = Z_BUF_ERROR; - while (1) switch (z->state->mode) - { - case METHOD: - NEEDBYTE - if (((z->state->sub.method = NEXTBYTE) & 0xf) != DEFLATED) - { - z->state->mode = BAD; - z->msg = "unknown compression method"; - z->state->sub.marker = 5; /* can't try inflateSync */ - break; - } - if ((z->state->sub.method >> 4) + 8 > z->state->wbits) - { - z->state->mode = BAD; - z->msg = "invalid window size"; - z->state->sub.marker = 5; /* can't try inflateSync */ - break; - } - z->state->mode = FLAG; - case FLAG: - NEEDBYTE - if ((b = NEXTBYTE) & 0x20) - { - z->state->mode = BAD; - z->msg = "invalid reserved bit"; - z->state->sub.marker = 5; /* can't try inflateSync */ - break; - } - if (((z->state->sub.method << 8) + b) % 31) - { - z->state->mode = BAD; - z->msg = "incorrect header check"; - z->state->sub.marker = 5; /* can't try inflateSync */ - break; - } - Trace((stderr, "inflate: zlib header ok\n")); - z->state->mode = BLOCKS; - case BLOCKS: - r = inflate_blocks(z->state->blocks, z, r); - if (f == Z_PACKET_FLUSH && z->avail_in == 0 && z->avail_out != 0) - r = inflate_packet_flush(z->state->blocks); - if (r == Z_DATA_ERROR) - { - z->state->mode = BAD; - z->state->sub.marker = 0; /* can try inflateSync */ - break; - } - if (r != Z_STREAM_END) - return r; - r = Z_OK; - inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was); - if (z->state->nowrap) - { - z->state->mode = DONE; - break; - } - z->state->mode = CHECK4; - case CHECK4: - NEEDBYTE - z->state->sub.check.need = (uLong)NEXTBYTE << 24; - z->state->mode = CHECK3; - case CHECK3: - NEEDBYTE - z->state->sub.check.need += (uLong)NEXTBYTE << 16; - z->state->mode = CHECK2; - case CHECK2: - NEEDBYTE - z->state->sub.check.need += (uLong)NEXTBYTE << 8; - z->state->mode = CHECK1; - case CHECK1: - NEEDBYTE - z->state->sub.check.need += (uLong)NEXTBYTE; - - if (z->state->sub.check.was != z->state->sub.check.need) - { - z->state->mode = BAD; - z->msg = "incorrect data check"; - z->state->sub.marker = 5; /* can't try inflateSync */ - break; - } - Trace((stderr, "inflate: zlib check ok\n")); - z->state->mode = DONE; - case DONE: - return Z_STREAM_END; - case BAD: - return Z_DATA_ERROR; - default: - return Z_STREAM_ERROR; - } - - empty: - if (f != Z_PACKET_FLUSH) - return r; - z->state->mode = BAD; - z->state->sub.marker = 0; /* can try inflateSync */ - return Z_DATA_ERROR; -} - -/* - * This subroutine adds the data at next_in/avail_in to the output history - * without performing any output. The output buffer must be "caught up"; - * i.e. no pending output (hence s->read equals s->write), and the state must - * be BLOCKS (i.e. we should be willing to see the start of a series of - * BLOCKS). On exit, the output will also be caught up, and the checksum - * will have been updated if need be. - */ - -int inflateIncomp( - z_stream *z -) -{ - if (z->state->mode != BLOCKS) - return Z_DATA_ERROR; - return inflate_addhistory(z->state->blocks, z); -} - - -int inflateSync( - z_stream *z -) -{ - uInt n; /* number of bytes to look at */ - Bytef *p; /* pointer to bytes */ - uInt m; /* number of marker bytes found in a row */ - uLong r, w; /* temporaries to save total_in and total_out */ - - /* set up */ - if (z == Z_NULL || z->state == Z_NULL) - return Z_STREAM_ERROR; - if (z->state->mode != BAD) - { - z->state->mode = BAD; - z->state->sub.marker = 0; - } - if ((n = z->avail_in) == 0) - return Z_BUF_ERROR; - p = z->next_in; - m = z->state->sub.marker; - - /* search */ - while (n && m < 4) - { - if (*p == (Byte)(m < 2 ? 0 : 0xff)) - m++; - else if (*p) - m = 0; - else - m = 4 - m; - p++, n--; - } - - /* restore */ - z->total_in += p - z->next_in; - z->next_in = p; - z->avail_in = n; - z->state->sub.marker = m; - - /* return no joy or set up to restart on a new block */ - if (m != 4) - return Z_DATA_ERROR; - r = z->total_in; w = z->total_out; - inflateReset(z); - z->total_in = r; z->total_out = w; - z->state->mode = BLOCKS; - return Z_OK; -} - -#undef NEEDBYTE -#undef NEXTBYTE - -/*+++++*/ -/* infutil.h -- types and macros common to blocks and codes - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* inflate blocks semi-private state */ -struct inflate_blocks_state { - - /* mode */ - enum { - TYPE, /* get type bits (3, including end bit) */ - LENS, /* get lengths for stored */ - STORED, /* processing stored block */ - TABLE, /* get table lengths */ - BTREE, /* get bit lengths tree for a dynamic block */ - DTREE, /* get length, distance trees for a dynamic block */ - CODES, /* processing fixed or dynamic block */ - DRY, /* output remaining window bytes */ - DONEB, /* finished last block, done */ - BADB} /* got a data error--stuck here */ - mode; /* current inflate_block mode */ - - /* mode dependent information */ - union { - uInt left; /* if STORED, bytes left to copy */ - struct { - uInt table; /* table lengths (14 bits) */ - uInt index; /* index into blens (or border) */ - uIntf *blens; /* bit lengths of codes */ - uInt bb; /* bit length tree depth */ - inflate_huft *tb; /* bit length decoding tree */ - int nblens; /* # elements allocated at blens */ - } trees; /* if DTREE, decoding info for trees */ - struct { - inflate_huft *tl, *td; /* trees to free */ - inflate_codes_statef - *codes; - } decode; /* if CODES, current state */ - } sub; /* submode */ - uInt last; /* true if this block is the last block */ - - /* mode independent information */ - uInt bitk; /* bits in bit buffer */ - uLong bitb; /* bit buffer */ - Bytef *window; /* sliding window */ - Bytef *end; /* one byte after sliding window */ - Bytef *read; /* window read pointer */ - Bytef *write; /* window write pointer */ - check_func checkfn; /* check function */ - uLong check; /* check on output */ - -}; - - -/* defines for inflate input/output */ -/* update pointers and return */ -#define UPDBITS {s->bitb=b;s->bitk=k;} -#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;} -#define UPDOUT {s->write=q;} -#define UPDATE {UPDBITS UPDIN UPDOUT} -#define LEAVE {UPDATE return inflate_flush(s,z,r);} -/* get bytes and bits */ -#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;} -#define NEEDBYTE {if(n)r=Z_OK;else LEAVE} -#define NEXTBYTE (n--,*p++) -#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<>=(j);k-=(j);} -/* output bytes */ -#define WAVAIL (qread?s->read-q-1:s->end-q) -#define LOADOUT {q=s->write;m=WAVAIL;} -#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=WAVAIL;}} -#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT} -#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;} -#define OUTBYTE(a) {*q++=(Byte)(a);m--;} -/* load local pointers */ -#define LOAD {LOADIN LOADOUT} - -/* And'ing with mask[n] masks the lower n bits */ -local uInt inflate_mask[] = { - 0x0000, - 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, - 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff -}; - -/* copy as much as possible from the sliding window to the output area */ -local int inflate_flush OF(( - inflate_blocks_statef *, - z_stream *, - int)); - -/*+++++*/ -/* inffast.h -- header to use inffast.c - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -local int inflate_fast OF(( - uInt, - uInt, - inflate_huft *, - inflate_huft *, - inflate_blocks_statef *, - z_stream *)); - - -/*+++++*/ -/* infblock.c -- interpret and process block types to last block - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* Table for deflate from PKZIP's appnote.txt. */ -local uInt border[] = { /* Order of the bit length code lengths */ - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - -/* - Notes beyond the 1.93a appnote.txt: - - 1. Distance pointers never point before the beginning of the output - stream. - 2. Distance pointers can point back across blocks, up to 32k away. - 3. There is an implied maximum of 7 bits for the bit length table and - 15 bits for the actual data. - 4. If only one code exists, then it is encoded using one bit. (Zero - would be more efficient, but perhaps a little confusing.) If two - codes exist, they are coded using one bit each (0 and 1). - 5. There is no way of sending zero distance codes--a dummy must be - sent if there are none. (History: a pre 2.0 version of PKZIP would - store blocks with no distance codes, but this was discovered to be - too harsh a criterion.) Valid only for 1.93a. 2.04c does allow - zero distance codes, which is sent as one code of zero bits in - length. - 6. There are up to 286 literal/length codes. Code 256 represents the - end-of-block. Note however that the static length tree defines - 288 codes just to fill out the Huffman codes. Codes 286 and 287 - cannot be used though, since there is no length base or extra bits - defined for them. Similarily, there are up to 30 distance codes. - However, static trees define 32 codes (all 5 bits) to fill out the - Huffman codes, but the last two had better not show up in the data. - 7. Unzip can check dynamic Huffman blocks for complete code sets. - The exception is that a single code would not be complete (see #4). - 8. The five bits following the block type is really the number of - literal codes sent minus 257. - 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits - (1+6+6). Therefore, to output three times the length, you output - three codes (1+1+1), whereas to output four times the same length, - you only need two codes (1+3). Hmm. - 10. In the tree reconstruction algorithm, Code = Code + Increment - only if BitLength(i) is not zero. (Pretty obvious.) - 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) - 12. Note: length code 284 can represent 227-258, but length code 285 - really is 258. The last length deserves its own, short code - since it gets used a lot in very redundant files. The length - 258 is special since 258 - 3 (the min match length) is 255. - 13. The literal/length and distance code bit lengths are read as a - single stream of lengths. It is possible (and advantageous) for - a repeat code (16, 17, or 18) to go across the boundary between - the two sets of lengths. - */ - - -local void inflate_blocks_reset( - inflate_blocks_statef *s, - z_stream *z, - uLongf *c -) -{ - if (s->checkfn != Z_NULL) - *c = s->check; - if (s->mode == BTREE || s->mode == DTREE) - ZFREE(z, s->sub.trees.blens, s->sub.trees.nblens * sizeof(uInt)); - if (s->mode == CODES) - { - inflate_codes_free(s->sub.decode.codes, z); - inflate_trees_free(s->sub.decode.td, z); - inflate_trees_free(s->sub.decode.tl, z); - } - s->mode = TYPE; - s->bitk = 0; - s->bitb = 0; - s->read = s->write = s->window; - if (s->checkfn != Z_NULL) - s->check = (*s->checkfn)(0L, Z_NULL, 0); - Trace((stderr, "inflate: blocks reset\n")); -} - - -local inflate_blocks_statef *inflate_blocks_new( - z_stream *z, - check_func c, - uInt w -) -{ - inflate_blocks_statef *s; - - if ((s = (inflate_blocks_statef *)ZALLOC - (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL) - return s; - if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL) - { - ZFREE(z, s, sizeof(struct inflate_blocks_state)); - return Z_NULL; - } - s->end = s->window + w; - s->checkfn = c; - s->mode = TYPE; - Trace((stderr, "inflate: blocks allocated\n")); - inflate_blocks_reset(s, z, &s->check); - return s; -} - - -local int inflate_blocks( - inflate_blocks_statef *s, - z_stream *z, - int r -) -{ - uInt t; /* temporary storage */ - uLong b; /* bit buffer */ - uInt k; /* bits in bit buffer */ - Bytef *p; /* input data pointer */ - uInt n; /* bytes available there */ - Bytef *q; /* output window write pointer */ - uInt m; /* bytes to end of window or read pointer */ - - /* copy input/output information to locals (UPDATE macro restores) */ - LOAD - - /* process input based on current state */ - while (1) switch (s->mode) - { - case TYPE: - NEEDBITS(3) - t = (uInt)b & 7; - s->last = t & 1; - switch (t >> 1) - { - case 0: /* stored */ - Trace((stderr, "inflate: stored block%s\n", - s->last ? " (last)" : "")); - DUMPBITS(3) - t = k & 7; /* go to byte boundary */ - DUMPBITS(t) - s->mode = LENS; /* get length of stored block */ - break; - case 1: /* fixed */ - Trace((stderr, "inflate: fixed codes block%s\n", - s->last ? " (last)" : "")); - { - uInt bl, bd; - inflate_huft *tl, *td; - - inflate_trees_fixed(&bl, &bd, &tl, &td); - s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z); - if (s->sub.decode.codes == Z_NULL) - { - r = Z_MEM_ERROR; - LEAVE - } - s->sub.decode.tl = Z_NULL; /* don't try to free these */ - s->sub.decode.td = Z_NULL; - } - DUMPBITS(3) - s->mode = CODES; - break; - case 2: /* dynamic */ - Trace((stderr, "inflate: dynamic codes block%s\n", - s->last ? " (last)" : "")); - DUMPBITS(3) - s->mode = TABLE; - break; - case 3: /* illegal */ - DUMPBITS(3) - s->mode = BADB; - z->msg = "invalid block type"; - r = Z_DATA_ERROR; - LEAVE - } - break; - case LENS: - NEEDBITS(32) - if (((~b) >> 16) != (b & 0xffff)) - { - s->mode = BADB; - z->msg = "invalid stored block lengths"; - r = Z_DATA_ERROR; - LEAVE - } - s->sub.left = (uInt)b & 0xffff; - b = k = 0; /* dump bits */ - Tracev((stderr, "inflate: stored length %u\n", s->sub.left)); - s->mode = s->sub.left ? STORED : TYPE; - break; - case STORED: - if (n == 0) - LEAVE - NEEDOUT - t = s->sub.left; - if (t > n) t = n; - if (t > m) t = m; - zmemcpy(q, p, t); - p += t; n -= t; - q += t; m -= t; - if ((s->sub.left -= t) != 0) - break; - Tracev((stderr, "inflate: stored end, %lu total out\n", - z->total_out + (q >= s->read ? q - s->read : - (s->end - s->read) + (q - s->window)))); - s->mode = s->last ? DRY : TYPE; - break; - case TABLE: - NEEDBITS(14) - s->sub.trees.table = t = (uInt)b & 0x3fff; -#ifndef PKZIP_BUG_WORKAROUND - if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29) - { - s->mode = BADB; - z->msg = "too many length or distance symbols"; - r = Z_DATA_ERROR; - LEAVE - } -#endif - t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); - if (t < 19) - t = 19; - if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL) - { - r = Z_MEM_ERROR; - LEAVE - } - s->sub.trees.nblens = t; - DUMPBITS(14) - s->sub.trees.index = 0; - Tracev((stderr, "inflate: table sizes ok\n")); - s->mode = BTREE; - case BTREE: - while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10)) - { - NEEDBITS(3) - s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7; - DUMPBITS(3) - } - while (s->sub.trees.index < 19) - s->sub.trees.blens[border[s->sub.trees.index++]] = 0; - s->sub.trees.bb = 7; - t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb, - &s->sub.trees.tb, z); - if (t != Z_OK) - { - r = t; - if (r == Z_DATA_ERROR) - s->mode = BADB; - LEAVE - } - s->sub.trees.index = 0; - Tracev((stderr, "inflate: bits tree ok\n")); - s->mode = DTREE; - case DTREE: - while (t = s->sub.trees.table, - s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f)) - { - inflate_huft *h; - uInt i, j, c; - - t = s->sub.trees.bb; - NEEDBITS(t) - h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]); - t = h->word.what.Bits; - c = h->more.Base; - if (c < 16) - { - DUMPBITS(t) - s->sub.trees.blens[s->sub.trees.index++] = c; - } - else /* c == 16..18 */ - { - i = c == 18 ? 7 : c - 14; - j = c == 18 ? 11 : 3; - NEEDBITS(t + i) - DUMPBITS(t) - j += (uInt)b & inflate_mask[i]; - DUMPBITS(i) - i = s->sub.trees.index; - t = s->sub.trees.table; - if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) || - (c == 16 && i < 1)) - { - s->mode = BADB; - z->msg = "invalid bit length repeat"; - r = Z_DATA_ERROR; - LEAVE - } - c = c == 16 ? s->sub.trees.blens[i - 1] : 0; - do { - s->sub.trees.blens[i++] = c; - } while (--j); - s->sub.trees.index = i; - } - } - inflate_trees_free(s->sub.trees.tb, z); - s->sub.trees.tb = Z_NULL; - { - uInt bl, bd; - inflate_huft *tl, *td; - inflate_codes_statef *c; - - bl = 9; /* must be <= 9 for lookahead assumptions */ - bd = 6; /* must be <= 9 for lookahead assumptions */ - t = s->sub.trees.table; - t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f), - s->sub.trees.blens, &bl, &bd, &tl, &td, z); - if (t != Z_OK) - { - if (t == (uInt)Z_DATA_ERROR) - s->mode = BADB; - r = t; - LEAVE - } - Tracev((stderr, "inflate: trees ok\n")); - if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL) - { - inflate_trees_free(td, z); - inflate_trees_free(tl, z); - r = Z_MEM_ERROR; - LEAVE - } - ZFREE(z, s->sub.trees.blens, s->sub.trees.nblens * sizeof(uInt)); - s->sub.decode.codes = c; - s->sub.decode.tl = tl; - s->sub.decode.td = td; - } - s->mode = CODES; - case CODES: - UPDATE - if ((r = inflate_codes(s, z, r)) != Z_STREAM_END) - return inflate_flush(s, z, r); - r = Z_OK; - inflate_codes_free(s->sub.decode.codes, z); - inflate_trees_free(s->sub.decode.td, z); - inflate_trees_free(s->sub.decode.tl, z); - LOAD - Tracev((stderr, "inflate: codes end, %lu total out\n", - z->total_out + (q >= s->read ? q - s->read : - (s->end - s->read) + (q - s->window)))); - if (!s->last) - { - s->mode = TYPE; - break; - } - if (k > 7) /* return unused byte, if any */ - { - Assert(k < 16, "inflate_codes grabbed too many bytes") - k -= 8; - n++; - p--; /* can always return one */ - } - s->mode = DRY; - case DRY: - FLUSH - if (s->read != s->write) - LEAVE - s->mode = DONEB; - case DONEB: - r = Z_STREAM_END; - LEAVE - case BADB: - r = Z_DATA_ERROR; - LEAVE - default: - r = Z_STREAM_ERROR; - LEAVE - } -} - - -local int inflate_blocks_free( - inflate_blocks_statef *s, - z_stream *z, - uLongf *c -) -{ - inflate_blocks_reset(s, z, c); - ZFREE(z, s->window, s->end - s->window); - ZFREE(z, s, sizeof(struct inflate_blocks_state)); - Trace((stderr, "inflate: blocks freed\n")); - return Z_OK; -} - -/* - * This subroutine adds the data at next_in/avail_in to the output history - * without performing any output. The output buffer must be "caught up"; - * i.e. no pending output (hence s->read equals s->write), and the state must - * be BLOCKS (i.e. we should be willing to see the start of a series of - * BLOCKS). On exit, the output will also be caught up, and the checksum - * will have been updated if need be. - */ -local int inflate_addhistory( - inflate_blocks_statef *s, - z_stream *z -) -{ - uLong b; /* bit buffer */ /* NOT USED HERE */ - uInt k; /* bits in bit buffer */ /* NOT USED HERE */ - uInt t; /* temporary storage */ - Bytef *p; /* input data pointer */ - uInt n; /* bytes available there */ - Bytef *q; /* output window write pointer */ - uInt m; /* bytes to end of window or read pointer */ - - if (s->read != s->write) - return Z_STREAM_ERROR; - if (s->mode != TYPE) - return Z_DATA_ERROR; - - /* we're ready to rock */ - LOAD - /* while there is input ready, copy to output buffer, moving - * pointers as needed. - */ - while (n) { - t = n; /* how many to do */ - /* is there room until end of buffer? */ - if (t > m) t = m; - /* update check information */ - if (s->checkfn != Z_NULL) - s->check = (*s->checkfn)(s->check, q, t); - zmemcpy(q, p, t); - q += t; - p += t; - n -= t; - z->total_out += t; - s->read = q; /* drag read pointer forward */ -/* WRAP */ /* expand WRAP macro by hand to handle s->read */ - if (q == s->end) { - s->read = q = s->window; - m = WAVAIL; - } - } - UPDATE - return Z_OK; -} - - -/* - * At the end of a Deflate-compressed PPP packet, we expect to have seen - * a `stored' block type value but not the (zero) length bytes. - */ -local int inflate_packet_flush( - inflate_blocks_statef *s -) -{ - if (s->mode != LENS) - return Z_DATA_ERROR; - s->mode = TYPE; - return Z_OK; -} - - -/*+++++*/ -/* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* simplify the use of the inflate_huft type with some defines */ -#define base more.Base -#define next more.Next -#define exop word.what.Exop -#define bits word.what.Bits - - -local int huft_build OF(( - uIntf *, /* code lengths in bits */ - uInt, /* number of codes */ - uInt, /* number of "simple" codes */ - uIntf *, /* list of base values for non-simple codes */ - uIntf *, /* list of extra bits for non-simple codes */ - inflate_huft * FAR*,/* result: starting table */ - uIntf *, /* maximum lookup bits (returns actual) */ - z_stream *)); /* for zalloc function */ - -local voidpf falloc OF(( - voidpf, /* opaque pointer (not used) */ - uInt, /* number of items */ - uInt)); /* size of item */ - -local void ffree OF(( - voidpf q, /* opaque pointer (not used) */ - voidpf p, /* what to free (not used) */ - uInt n)); /* number of bytes (not used) */ - -/* Tables for deflate from PKZIP's appnote.txt. */ -local uInt cplens[] = { /* Copy lengths for literal codes 257..285 */ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, - 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; - /* actually lengths - 2; also see note #13 above about 258 */ -local uInt cplext[] = { /* Extra bits for literal codes 257..285 */ - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, - 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 192, 192}; /* 192==invalid */ -local uInt cpdist[] = { /* Copy offsets for distance codes 0..29 */ - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, - 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, - 8193, 12289, 16385, 24577}; -local uInt cpdext[] = { /* Extra bits for distance codes */ - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, - 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, - 12, 12, 13, 13}; - -/* - Huffman code decoding is performed using a multi-level table lookup. - The fastest way to decode is to simply build a lookup table whose - size is determined by the longest code. However, the time it takes - to build this table can also be a factor if the data being decoded - is not very long. The most common codes are necessarily the - shortest codes, so those codes dominate the decoding time, and hence - the speed. The idea is you can have a shorter table that decodes the - shorter, more probable codes, and then point to subsidiary tables for - the longer codes. The time it costs to decode the longer codes is - then traded against the time it takes to make longer tables. - - This results of this trade are in the variables lbits and dbits - below. lbits is the number of bits the first level table for literal/ - length codes can decode in one step, and dbits is the same thing for - the distance codes. Subsequent tables are also less than or equal to - those sizes. These values may be adjusted either when all of the - codes are shorter than that, in which case the longest code length in - bits is used, or when the shortest code is *longer* than the requested - table size, in which case the length of the shortest code in bits is - used. - - There are two different values for the two tables, since they code a - different number of possibilities each. The literal/length table - codes 286 possible values, or in a flat code, a little over eight - bits. The distance table codes 30 possible values, or a little less - than five bits, flat. The optimum values for speed end up being - about one bit more than those, so lbits is 8+1 and dbits is 5+1. - The optimum values may differ though from machine to machine, and - possibly even between compilers. Your mileage may vary. - */ - - -/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */ -#define BMAX 15 /* maximum bit length of any code */ -#define N_MAX 288 /* maximum number of codes in any set */ - -#ifdef DEBUG_ZLIB - uInt inflate_hufts; -#endif - -local int huft_build( - uIntf *b, /* code lengths in bits (all assumed <= BMAX) */ - uInt n, /* number of codes (assumed <= N_MAX) */ - uInt s, /* number of simple-valued codes (0..s-1) */ - uIntf *d, /* list of base values for non-simple codes */ - uIntf *e, /* list of extra bits for non-simple codes */ - inflate_huft * FAR *t, /* result: starting table */ - uIntf *m, /* maximum lookup bits, returns actual */ - z_stream *zs /* for zalloc function */ -) -/* Given a list of code lengths and a maximum table size, make a set of - tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR - if the given code set is incomplete (the tables are still built in this - case), Z_DATA_ERROR if the input is invalid (all zero length codes or an - over-subscribed set of lengths), or Z_MEM_ERROR if not enough memory. */ -{ - - uInt a; /* counter for codes of length k */ - uInt c[BMAX+1]; /* bit length count table */ - uInt f; /* i repeats in table every f entries */ - int g; /* maximum code length */ - int h; /* table level */ - register uInt i; /* counter, current code */ - register uInt j; /* counter */ - register int k; /* number of bits in current code */ - int l; /* bits per table (returned in m) */ - register uIntf *p; /* pointer into c[], b[], or v[] */ - inflate_huft *q; /* points to current table */ - struct inflate_huft_s r; /* table entry for structure assignment */ - inflate_huft *u[BMAX]; /* table stack */ - uInt v[N_MAX]; /* values in order of bit length */ - register int w; /* bits before this table == (l * h) */ - uInt x[BMAX+1]; /* bit offsets, then code stack */ - uIntf *xp; /* pointer into x */ - int y; /* number of dummy codes added */ - uInt z; /* number of entries in current table */ - - - /* Generate counts for each bit length */ - p = c; -#define C0 *p++ = 0; -#define C2 C0 C0 C0 C0 -#define C4 C2 C2 C2 C2 - C4 /* clear c[]--assume BMAX+1 is 16 */ - p = b; i = n; - do { - c[*p++]++; /* assume all entries <= BMAX */ - } while (--i); - if (c[0] == n) /* null input--all zero length codes */ - { - *t = (inflate_huft *)Z_NULL; - *m = 0; - return Z_DATA_ERROR; - } - - - /* Find minimum and maximum length, bound *m by those */ - l = *m; - for (j = 1; j <= BMAX; j++) - if (c[j]) - break; - k = j; /* minimum code length */ - if ((uInt)l < j) - l = j; - for (i = BMAX; i; i--) - if (c[i]) - break; - g = i; /* maximum code length */ - if ((uInt)l > i) - l = i; - *m = l; - - - /* Adjust last length count to fill out codes, if needed */ - for (y = 1 << j; j < i; j++, y <<= 1) - if ((y -= c[j]) < 0) - return Z_DATA_ERROR; - if ((y -= c[i]) < 0) - return Z_DATA_ERROR; - c[i] += y; - - - /* Generate starting offsets into the value table for each length */ - x[1] = j = 0; - p = c + 1; xp = x + 2; - while (--i) { /* note that i == g from above */ - *xp++ = (j += *p++); - } - - - /* Make a table of values in order of bit lengths */ - p = b; i = 0; - do { - if ((j = *p++) != 0) - v[x[j]++] = i; - } while (++i < n); - n = x[g]; /* set n to length of v */ - - - /* Generate the Huffman codes and for each, make the table entries */ - x[0] = i = 0; /* first Huffman code is zero */ - p = v; /* grab values in bit order */ - h = -1; /* no tables yet--level -1 */ - w = -l; /* bits decoded == (l * h) */ - u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */ - q = (inflate_huft *)Z_NULL; /* ditto */ - z = 0; /* ditto */ - - /* go through the bit lengths (k already is bits in shortest code) */ - for (; k <= g; k++) - { - a = c[k]; - while (a--) - { - /* here i is the Huffman code of length k bits for value *p */ - /* make tables up to required level */ - while (k > w + l) - { - h++; - w += l; /* previous table always l bits */ - - /* compute minimum size table less than or equal to l bits */ - z = (z = g - w) > (uInt)l ? l : z; /* table size upper limit */ - if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ - { /* too few codes for k-w bit table */ - f -= a + 1; /* deduct codes from patterns left */ - xp = c + k; - if (j < z) - while (++j < z) /* try smaller tables up to z bits */ - { - if ((f <<= 1) <= *++xp) - break; /* enough codes to use up j bits */ - f -= *xp; /* else deduct codes from patterns */ - } - } - z = 1 << j; /* table entries for j-bit table */ - - /* allocate and link in new table */ - if ((q = (inflate_huft *)ZALLOC - (zs,z + 1,sizeof(inflate_huft))) == Z_NULL) - { - if (h) - inflate_trees_free(u[0], zs); - return Z_MEM_ERROR; /* not enough memory */ - } - q->word.Nalloc = z + 1; -#ifdef DEBUG_ZLIB - inflate_hufts += z + 1; -#endif - *t = q + 1; /* link to list for huft_free() */ - *(t = &(q->next)) = Z_NULL; - u[h] = ++q; /* table starts after link */ - - /* connect to last table, if there is one */ - if (h) - { - x[h] = i; /* save pattern for backing up */ - r.bits = (Byte)l; /* bits to dump before this table */ - r.exop = (Byte)j; /* bits in this table */ - r.next = q; /* pointer to this table */ - j = i >> (w - l); /* (get around Turbo C bug) */ - u[h-1][j] = r; /* connect to last table */ - } - } - - /* set up table entry in r */ - r.bits = (Byte)(k - w); - if (p >= v + n) - r.exop = 128 + 64; /* out of values--invalid code */ - else if (*p < s) - { - r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */ - r.base = *p++; /* simple code is just the value */ - } - else - { - r.exop = (Byte)e[*p - s] + 16 + 64; /* non-simple--look up in lists */ - r.base = d[*p++ - s]; - } - - /* fill code-like entries with r */ - f = 1 << (k - w); - for (j = i >> w; j < z; j += f) - q[j] = r; - - /* backwards increment the k-bit code i */ - for (j = 1 << (k - 1); i & j; j >>= 1) - i ^= j; - i ^= j; - - /* backup over finished tables */ - while ((i & ((1 << w) - 1)) != x[h]) - { - h--; /* don't need to update q */ - w -= l; - } - } - } - - - /* Return Z_BUF_ERROR if we were given an incomplete table */ - return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK; -} - - -local int inflate_trees_bits( - uIntf *c, /* 19 code lengths */ - uIntf *bb, /* bits tree desired/actual depth */ - inflate_huft * FAR *tb, /* bits tree result */ - z_stream *z /* for zfree function */ -) -{ - int r; - - r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL, tb, bb, z); - if (r == Z_DATA_ERROR) - z->msg = "oversubscribed dynamic bit lengths tree"; - else if (r == Z_BUF_ERROR) - { - inflate_trees_free(*tb, z); - z->msg = "incomplete dynamic bit lengths tree"; - r = Z_DATA_ERROR; - } - return r; -} - - -local int inflate_trees_dynamic( - uInt nl, /* number of literal/length codes */ - uInt nd, /* number of distance codes */ - uIntf *c, /* that many (total) code lengths */ - uIntf *bl, /* literal desired/actual bit depth */ - uIntf *bd, /* distance desired/actual bit depth */ - inflate_huft * FAR *tl, /* literal/length tree result */ - inflate_huft * FAR *td, /* distance tree result */ - z_stream *z /* for zfree function */ -) -{ - int r; - - /* build literal/length tree */ - if ((r = huft_build(c, nl, 257, cplens, cplext, tl, bl, z)) != Z_OK) - { - if (r == Z_DATA_ERROR) - z->msg = "oversubscribed literal/length tree"; - else if (r == Z_BUF_ERROR) - { - inflate_trees_free(*tl, z); - z->msg = "incomplete literal/length tree"; - r = Z_DATA_ERROR; - } - return r; - } - - /* build distance tree */ - if ((r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, z)) != Z_OK) - { - if (r == Z_DATA_ERROR) - z->msg = "oversubscribed literal/length tree"; - else if (r == Z_BUF_ERROR) { -#ifdef PKZIP_BUG_WORKAROUND - r = Z_OK; - } -#else - inflate_trees_free(*td, z); - z->msg = "incomplete literal/length tree"; - r = Z_DATA_ERROR; - } - inflate_trees_free(*tl, z); - return r; -#endif - } - - /* done */ - return Z_OK; -} - - -/* build fixed tables only once--keep them here */ -local int fixed_lock = 0; -local int fixed_built = 0; -#define FIXEDH 530 /* number of hufts used by fixed tables */ -local uInt fixed_left = FIXEDH; -local inflate_huft fixed_mem[FIXEDH]; -local uInt fixed_bl; -local uInt fixed_bd; -local inflate_huft *fixed_tl; -local inflate_huft *fixed_td; - - -local voidpf falloc( - voidpf q, /* opaque pointer (not used) */ - uInt n, /* number of items */ - uInt s /* size of item */ -) -{ - Assert(s == sizeof(inflate_huft) && n <= fixed_left, - "inflate_trees falloc overflow"); - if (q) s++; /* to make some compilers happy */ - fixed_left -= n; - return (voidpf)(fixed_mem + fixed_left); -} - - -local void ffree( - voidpf q, - voidpf p, - uInt n -) -{ - Assert(0, "inflate_trees ffree called!"); - if (q) q = p; /* to make some compilers happy */ -} - - -local int inflate_trees_fixed( - uIntf *bl, /* literal desired/actual bit depth */ - uIntf *bd, /* distance desired/actual bit depth */ - inflate_huft * FAR *tl, /* literal/length tree result */ - inflate_huft * FAR *td /* distance tree result */ -) -{ - /* build fixed tables if not built already--lock out other instances */ - while (++fixed_lock > 1) - fixed_lock--; - if (!fixed_built) - { - int k; /* temporary variable */ - unsigned c[288]; /* length list for huft_build */ - z_stream z; /* for falloc function */ - - /* set up fake z_stream for memory routines */ - z.zalloc = falloc; - z.zfree = ffree; - z.opaque = Z_NULL; - - /* literal table */ - for (k = 0; k < 144; k++) - c[k] = 8; - for (; k < 256; k++) - c[k] = 9; - for (; k < 280; k++) - c[k] = 7; - for (; k < 288; k++) - c[k] = 8; - fixed_bl = 7; - huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, &z); - - /* distance table */ - for (k = 0; k < 30; k++) - c[k] = 5; - fixed_bd = 5; - huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, &z); - - /* done */ - fixed_built = 1; - } - fixed_lock--; - *bl = fixed_bl; - *bd = fixed_bd; - *tl = fixed_tl; - *td = fixed_td; - return Z_OK; -} - - -local int inflate_trees_free( - inflate_huft *t, /* table to free */ - z_stream *z /* for zfree function */ -) -/* Free the malloc'ed tables built by huft_build(), which makes a linked - list of the tables it made, with the links in a dummy first entry of - each table. */ -{ - register inflate_huft *p, *q; - - /* Go through linked list, freeing from the malloced (t[-1]) address. */ - p = t; - while (p != Z_NULL) - { - q = (--p)->next; - ZFREE(z, p, p->word.Nalloc * sizeof(inflate_huft)); - p = q; - } - return Z_OK; -} - -/*+++++*/ -/* infcodes.c -- process literals and length/distance pairs - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* simplify the use of the inflate_huft type with some defines */ -#define base more.Base -#define next more.Next -#define exop word.what.Exop -#define bits word.what.Bits - -/* inflate codes private state */ -struct inflate_codes_state { - - /* mode */ - enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ - START, /* x: set up for LEN */ - LEN, /* i: get length/literal/eob next */ - LENEXT, /* i: getting length extra (have base) */ - DIST, /* i: get distance next */ - DISTEXT, /* i: getting distance extra */ - COPY, /* o: copying bytes in window, waiting for space */ - LIT, /* o: got literal, waiting for output space */ - WASH, /* o: got eob, possibly still output waiting */ - END, /* x: got eob and all data flushed */ - BADCODE} /* x: got error */ - mode; /* current inflate_codes mode */ - - /* mode dependent information */ - uInt len; - union { - struct { - inflate_huft *tree; /* pointer into tree */ - uInt need; /* bits needed */ - } code; /* if LEN or DIST, where in tree */ - uInt lit; /* if LIT, literal */ - struct { - uInt get; /* bits to get for extra */ - uInt dist; /* distance back to copy from */ - } copy; /* if EXT or COPY, where and how much */ - } sub; /* submode */ - - /* mode independent information */ - Byte lbits; /* ltree bits decoded per branch */ - Byte dbits; /* dtree bits decoder per branch */ - inflate_huft *ltree; /* literal/length/eob tree */ - inflate_huft *dtree; /* distance tree */ - -}; - - -local inflate_codes_statef *inflate_codes_new( - uInt bl, - uInt bd, - inflate_huft *tl, - inflate_huft *td, - z_stream *z -) -{ - inflate_codes_statef *c; - - if ((c = (inflate_codes_statef *) - ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL) - { - c->mode = START; - c->lbits = (Byte)bl; - c->dbits = (Byte)bd; - c->ltree = tl; - c->dtree = td; - Tracev((stderr, "inflate: codes new\n")); - } - return c; -} - - -local int inflate_codes( - inflate_blocks_statef *s, - z_stream *z, - int r -) -{ - uInt j; /* temporary storage */ - inflate_huft *t; /* temporary pointer */ - uInt e; /* extra bits or operation */ - uLong b; /* bit buffer */ - uInt k; /* bits in bit buffer */ - Bytef *p; /* input data pointer */ - uInt n; /* bytes available there */ - Bytef *q; /* output window write pointer */ - uInt m; /* bytes to end of window or read pointer */ - Bytef *f; /* pointer to copy strings from */ - inflate_codes_statef *c = s->sub.decode.codes; /* codes state */ - - /* copy input/output information to locals (UPDATE macro restores) */ - LOAD - - /* process input and output based on current state */ - while (1) switch (c->mode) - { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ - case START: /* x: set up for LEN */ -#ifndef SLOW - if (m >= 258 && n >= 10) - { - UPDATE - r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z); - LOAD - if (r != Z_OK) - { - c->mode = r == Z_STREAM_END ? WASH : BADCODE; - break; - } - } -#endif /* !SLOW */ - c->sub.code.need = c->lbits; - c->sub.code.tree = c->ltree; - c->mode = LEN; - case LEN: /* i: get length/literal/eob next */ - j = c->sub.code.need; - NEEDBITS(j) - t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); - DUMPBITS(t->bits) - e = (uInt)(t->exop); - if (e == 0) /* literal */ - { - c->sub.lit = t->base; - Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? - "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", t->base)); - c->mode = LIT; - break; - } - if (e & 16) /* length */ - { - c->sub.copy.get = e & 15; - c->len = t->base; - c->mode = LENEXT; - break; - } - if ((e & 64) == 0) /* next table */ - { - c->sub.code.need = e; - c->sub.code.tree = t->next; - break; - } - if (e & 32) /* end of block */ - { - Tracevv((stderr, "inflate: end of block\n")); - c->mode = WASH; - break; - } - c->mode = BADCODE; /* invalid code */ - z->msg = "invalid literal/length code"; - r = Z_DATA_ERROR; - LEAVE - case LENEXT: /* i: getting length extra (have base) */ - j = c->sub.copy.get; - NEEDBITS(j) - c->len += (uInt)b & inflate_mask[j]; - DUMPBITS(j) - c->sub.code.need = c->dbits; - c->sub.code.tree = c->dtree; - Tracevv((stderr, "inflate: length %u\n", c->len)); - c->mode = DIST; - case DIST: /* i: get distance next */ - j = c->sub.code.need; - NEEDBITS(j) - t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); - DUMPBITS(t->bits) - e = (uInt)(t->exop); - if (e & 16) /* distance */ - { - c->sub.copy.get = e & 15; - c->sub.copy.dist = t->base; - c->mode = DISTEXT; - break; - } - if ((e & 64) == 0) /* next table */ - { - c->sub.code.need = e; - c->sub.code.tree = t->next; - break; - } - c->mode = BADCODE; /* invalid code */ - z->msg = "invalid distance code"; - r = Z_DATA_ERROR; - LEAVE - case DISTEXT: /* i: getting distance extra */ - j = c->sub.copy.get; - NEEDBITS(j) - c->sub.copy.dist += (uInt)b & inflate_mask[j]; - DUMPBITS(j) - Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist)); - c->mode = COPY; - case COPY: /* o: copying bytes in window, waiting for space */ -#ifndef __TURBOC__ /* Turbo C bug for following expression */ - f = (uInt)(q - s->window) < c->sub.copy.dist ? - s->end - (c->sub.copy.dist - (q - s->window)) : - q - c->sub.copy.dist; -#else - f = q - c->sub.copy.dist; - if ((uInt)(q - s->window) < c->sub.copy.dist) - f = s->end - (c->sub.copy.dist - (q - s->window)); -#endif - while (c->len) - { - NEEDOUT - OUTBYTE(*f++) - if (f == s->end) - f = s->window; - c->len--; - } - c->mode = START; - break; - case LIT: /* o: got literal, waiting for output space */ - NEEDOUT - OUTBYTE(c->sub.lit) - c->mode = START; - break; - case WASH: /* o: got eob, possibly more output */ - FLUSH - if (s->read != s->write) - LEAVE - c->mode = END; - case END: - r = Z_STREAM_END; - LEAVE - case BADCODE: /* x: got error */ - r = Z_DATA_ERROR; - LEAVE - default: - r = Z_STREAM_ERROR; - LEAVE - } -} - - -local void inflate_codes_free( - inflate_codes_statef *c, - z_stream *z -) -{ - ZFREE(z, c, sizeof(struct inflate_codes_state)); - Tracev((stderr, "inflate: codes free\n")); -} - -/*+++++*/ -/* inflate_util.c -- data and routines common to blocks and codes - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* copy as much as possible from the sliding window to the output area */ -local int inflate_flush( - inflate_blocks_statef *s, - z_stream *z, - int r -) -{ - uInt n; - Bytef *p, *q; - - /* local copies of source and destination pointers */ - p = z->next_out; - q = s->read; - - /* compute number of bytes to copy as far as end of window */ - n = (uInt)((q <= s->write ? s->write : s->end) - q); - if (n > z->avail_out) n = z->avail_out; - if (n && r == Z_BUF_ERROR) r = Z_OK; - - /* update counters */ - z->avail_out -= n; - z->total_out += n; - - /* update check information */ - if (s->checkfn != Z_NULL) - s->check = (*s->checkfn)(s->check, q, n); - - /* copy as far as end of window */ - zmemcpy(p, q, n); - p += n; - q += n; - - /* see if more to copy at beginning of window */ - if (q == s->end) - { - /* wrap pointers */ - q = s->window; - if (s->write == s->end) - s->write = s->window; - - /* compute bytes to copy */ - n = (uInt)(s->write - q); - if (n > z->avail_out) n = z->avail_out; - if (n && r == Z_BUF_ERROR) r = Z_OK; - - /* update counters */ - z->avail_out -= n; - z->total_out += n; - - /* update check information */ - if (s->checkfn != Z_NULL) - s->check = (*s->checkfn)(s->check, q, n); - - /* copy */ - zmemcpy(p, q, n); - p += n; - q += n; - } - - /* update pointers */ - z->next_out = p; - s->read = q; - - /* done */ - return r; -} - - -/*+++++*/ -/* inffast.c -- process literals and length/distance pairs fast - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* simplify the use of the inflate_huft type with some defines */ -#define base more.Base -#define next more.Next -#define exop word.what.Exop -#define bits word.what.Bits - -/* macros for bit input with no checking and for returning unused bytes */ -#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<>3);p-=c;k&=7;} - -/* Called with number of bytes left to write in window at least 258 - (the maximum string length) and number of input bytes available - at least ten. The ten bytes are six bytes for the longest length/ - distance pair plus four bytes for overloading the bit buffer. */ - -local int inflate_fast( - uInt bl, - uInt bd, - inflate_huft *tl, - inflate_huft *td, - inflate_blocks_statef *s, - z_stream *z -) -{ - inflate_huft *t; /* temporary pointer */ - uInt e; /* extra bits or operation */ - uLong b; /* bit buffer */ - uInt k; /* bits in bit buffer */ - Bytef *p; /* input data pointer */ - uInt n; /* bytes available there */ - Bytef *q; /* output window write pointer */ - uInt m; /* bytes to end of window or read pointer */ - uInt ml; /* mask for literal/length tree */ - uInt md; /* mask for distance tree */ - uInt c; /* bytes to copy */ - uInt d; /* distance back to copy from */ - Bytef *r; /* copy source pointer */ - - /* load input, output, bit values */ - LOAD - - /* initialize masks */ - ml = inflate_mask[bl]; - md = inflate_mask[bd]; - - /* do until not enough input or output space for fast loop */ - do { /* assume called with m >= 258 && n >= 10 */ - /* get literal/length code */ - GRABBITS(20) /* max bits for literal/length code */ - if ((e = (t = tl + ((uInt)b & ml))->exop) == 0) - { - DUMPBITS(t->bits) - Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? - "inflate: * literal '%c'\n" : - "inflate: * literal 0x%02x\n", t->base)); - *q++ = (Byte)t->base; - m--; - continue; - } - do { - DUMPBITS(t->bits) - if (e & 16) - { - /* get extra bits for length */ - e &= 15; - c = t->base + ((uInt)b & inflate_mask[e]); - DUMPBITS(e) - Tracevv((stderr, "inflate: * length %u\n", c)); - - /* decode distance base of block to copy */ - GRABBITS(15); /* max bits for distance code */ - e = (t = td + ((uInt)b & md))->exop; - do { - DUMPBITS(t->bits) - if (e & 16) - { - /* get extra bits to add to distance base */ - e &= 15; - GRABBITS(e) /* get extra bits (up to 13) */ - d = t->base + ((uInt)b & inflate_mask[e]); - DUMPBITS(e) - Tracevv((stderr, "inflate: * distance %u\n", d)); - - /* do the copy */ - m -= c; - if ((uInt)(q - s->window) >= d) /* offset before dest */ - { /* just copy */ - r = q - d; - *q++ = *r++; c--; /* minimum count is three, */ - *q++ = *r++; c--; /* so unroll loop a little */ - } - else /* else offset after destination */ - { - e = d - (q - s->window); /* bytes from offset to end */ - r = s->end - e; /* pointer to offset */ - if (c > e) /* if source crosses, */ - { - c -= e; /* copy to end of window */ - do { - *q++ = *r++; - } while (--e); - r = s->window; /* copy rest from start of window */ - } - } - do { /* copy all or what's left */ - *q++ = *r++; - } while (--c); - break; - } - else if ((e & 64) == 0) - e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop; - else - { - z->msg = "invalid distance code"; - UNGRAB - UPDATE - return Z_DATA_ERROR; - } - } while (1); - break; - } - if ((e & 64) == 0) - { - if ((e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop) == 0) - { - DUMPBITS(t->bits) - Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? - "inflate: * literal '%c'\n" : - "inflate: * literal 0x%02x\n", t->base)); - *q++ = (Byte)t->base; - m--; - break; - } - } - else if (e & 32) - { - Tracevv((stderr, "inflate: * end of block\n")); - UNGRAB - UPDATE - return Z_STREAM_END; - } - else - { - z->msg = "invalid literal/length code"; - UNGRAB - UPDATE - return Z_DATA_ERROR; - } - } while (1); - } while (m >= 258 && n >= 10); - - /* not enough input or output--restore pointers and return */ - UNGRAB - UPDATE - return Z_OK; -} - - -/*+++++*/ -/* zutil.c -- target dependent utility functions for the compression library - * Copyright (C) 1995 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* From: zutil.c,v 1.8 1995/05/03 17:27:12 jloup Exp */ - -char *zlib_version = ZLIB_VERSION; - -char *z_errmsg[] = { -"stream end", /* Z_STREAM_END 1 */ -"", /* Z_OK 0 */ -"file error", /* Z_ERRNO (-1) */ -"stream error", /* Z_STREAM_ERROR (-2) */ -"data error", /* Z_DATA_ERROR (-3) */ -"insufficient memory", /* Z_MEM_ERROR (-4) */ -"buffer error", /* Z_BUF_ERROR (-5) */ -""}; - - -/*+++++*/ -/* adler32.c -- compute the Adler-32 checksum of a data stream - * Copyright (C) 1995 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* From: adler32.c,v 1.6 1995/05/03 17:27:08 jloup Exp */ - -#define BASE 65521L /* largest prime smaller than 65536 */ -#define NMAX 5552 -/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ - -#define DO1(buf) {s1 += *buf++; s2 += s1;} -#define DO2(buf) DO1(buf); DO1(buf); -#define DO4(buf) DO2(buf); DO2(buf); -#define DO8(buf) DO4(buf); DO4(buf); -#define DO16(buf) DO8(buf); DO8(buf); - -/* ========================================================================= */ -uLong adler32( - uLong adler, - Bytef *buf, - uInt len -) -{ - unsigned long s1 = adler & 0xffff; - unsigned long s2 = (adler >> 16) & 0xffff; - int k; - - if (buf == Z_NULL) return 1L; - - while (len > 0) { - k = len < NMAX ? len : NMAX; - len -= k; - while (k >= 16) { - DO16(buf); - k -= 16; - } - if (k != 0) do { - DO1(buf); - } while (--k); - s1 %= BASE; - s2 %= BASE; - } - return (s2 << 16) | s1; -} diff -L arch/ppc64/boot/zlib.h -puN arch/ppc64/boot/zlib.h~ppc64-boot-remove-zlib /dev/null --- devel/arch/ppc64/boot/zlib.h +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,432 +0,0 @@ -/* */ - -/* - * This file is derived from zlib.h and zconf.h from the zlib-0.95 - * distribution by Jean-loup Gailly and Mark Adler, with some additions - * by Paul Mackerras to aid in implementing Deflate compression and - * decompression for PPP packets. - */ - -/* - * ==FILEVERSION 960122== - * - * This marker is used by the Linux installation script to determine - * whether an up-to-date version of this file is already installed. - */ - -/* zlib.h -- interface of the 'zlib' general purpose compression library - version 0.95, Aug 16th, 1995. - - Copyright (C) 1995 Jean-loup Gailly and Mark Adler - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - Jean-loup Gailly Mark Adler - gzip at prep.ai.mit.edu madler at alumni.caltech.edu - */ - -#ifndef _ZLIB_H -#define _ZLIB_H - -/* #include "zconf.h" */ /* included directly here */ - -/* zconf.h -- configuration of the zlib compression library - * Copyright (C) 1995 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* From: zconf.h,v 1.12 1995/05/03 17:27:12 jloup Exp */ - -/* - The library does not install any signal handler. It is recommended to - add at least a handler for SIGSEGV when decompressing; the library checks - the consistency of the input data whenever possible but may go nuts - for some forms of corrupted input. - */ - -/* - * Compile with -DMAXSEG_64K if the alloc function cannot allocate more - * than 64k bytes at a time (needed on systems with 16-bit int). - * Compile with -DUNALIGNED_OK if it is OK to access shorts or ints - * at addresses which are not a multiple of their size. - * Under DOS, -DFAR=far or -DFAR=__far may be needed. - */ - -#ifndef STDC -# if defined(MSDOS) || defined(__STDC__) || defined(__cplusplus) -# define STDC -# endif -#endif - -#ifdef __MWERKS__ /* Metrowerks CodeWarrior declares fileno() in unix.h */ -# include -#endif - -/* Maximum value for memLevel in deflateInit2 */ -#ifndef MAX_MEM_LEVEL -# ifdef MAXSEG_64K -# define MAX_MEM_LEVEL 8 -# else -# define MAX_MEM_LEVEL 9 -# endif -#endif - -#ifndef FAR -# define FAR -#endif - -/* Maximum value for windowBits in deflateInit2 and inflateInit2 */ -#ifndef MAX_WBITS -# define MAX_WBITS 15 /* 32K LZ77 window */ -#endif - -/* The memory requirements for deflate are (in bytes): - 1 << (windowBits+2) + 1 << (memLevel+9) - that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) - plus a few kilobytes for small objects. For example, if you want to reduce - the default memory requirements from 256K to 128K, compile with - make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" - Of course this will generally degrade compression (there's no free lunch). - - The memory requirements for inflate are (in bytes) 1 << windowBits - that is, 32K for windowBits=15 (default value) plus a few kilobytes - for small objects. -*/ - - /* Type declarations */ - -#ifndef OF /* function prototypes */ -# ifdef STDC -# define OF(args) args -# else -# define OF(args) () -# endif -#endif - -typedef unsigned char Byte; /* 8 bits */ -typedef unsigned int uInt; /* 16 bits or more */ -typedef unsigned long uLong; /* 32 bits or more */ - -typedef Byte FAR Bytef; -typedef char FAR charf; -typedef int FAR intf; -typedef uInt FAR uIntf; -typedef uLong FAR uLongf; - -#ifdef STDC - typedef void FAR *voidpf; - typedef void *voidp; -#else - typedef Byte FAR *voidpf; - typedef Byte *voidp; -#endif - -/* end of original zconf.h */ - -#define ZLIB_VERSION "0.95P" - -/* - The 'zlib' compression library provides in-memory compression and - decompression functions, including integrity checks of the uncompressed - data. This version of the library supports only one compression method - (deflation) but other algorithms may be added later and will have the same - stream interface. - - For compression the application must provide the output buffer and - may optionally provide the input buffer for optimization. For decompression, - the application must provide the input buffer and may optionally provide - the output buffer for optimization. - - Compression can be done in a single step if the buffers are large - enough (for example if an input file is mmap'ed), or can be done by - repeated calls of the compression function. In the latter case, the - application must provide more input and/or consume the output - (providing more output space) before each call. -*/ - -typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); -typedef void (*free_func) OF((voidpf opaque, voidpf address, uInt nbytes)); - -struct internal_state; - -typedef struct z_stream_s { - Bytef *next_in; /* next input byte */ - uInt avail_in; /* number of bytes available at next_in */ - uLong total_in; /* total nb of input bytes read so far */ - - Bytef *next_out; /* next output byte should be put there */ - uInt avail_out; /* remaining free space at next_out */ - uLong total_out; /* total nb of bytes output so far */ - - char *msg; /* last error message, NULL if no error */ - struct internal_state FAR *state; /* not visible by applications */ - - alloc_func zalloc; /* used to allocate the internal state */ - free_func zfree; /* used to free the internal state */ - voidp opaque; /* private data object passed to zalloc and zfree */ - - Byte data_type; /* best guess about the data type: ascii or binary */ - -} z_stream; - -/* - The application must update next_in and avail_in when avail_in has - dropped to zero. It must update next_out and avail_out when avail_out - has dropped to zero. The application must initialize zalloc, zfree and - opaque before calling the init function. All other fields are set by the - compression library and must not be updated by the application. - - The opaque value provided by the application will be passed as the first - parameter for calls of zalloc and zfree. This can be useful for custom - memory management. The compression library attaches no meaning to the - opaque value. - - zalloc must return Z_NULL if there is not enough memory for the object. - On 16-bit systems, the functions zalloc and zfree must be able to allocate - exactly 65536 bytes, but will not be required to allocate more than this - if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, - pointers returned by zalloc for objects of exactly 65536 bytes *must* - have their offset normalized to zero. The default allocation function - provided by this library ensures this (see zutil.c). To reduce memory - requirements and avoid any allocation of 64K objects, at the expense of - compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). - - The fields total_in and total_out can be used for statistics or - progress reports. After compression, total_in holds the total size of - the uncompressed data and may be saved for use in the decompressor - (particularly if the decompressor wants to decompress everything in - a single step). -*/ - - /* constants */ - -#define Z_NO_FLUSH 0 -#define Z_PARTIAL_FLUSH 1 -#define Z_FULL_FLUSH 2 -#define Z_SYNC_FLUSH 3 /* experimental: partial_flush + byte align */ -#define Z_FINISH 4 -#define Z_PACKET_FLUSH 5 -/* See deflate() below for the usage of these constants */ - -#define Z_OK 0 -#define Z_STREAM_END 1 -#define Z_ERRNO (-1) -#define Z_STREAM_ERROR (-2) -#define Z_DATA_ERROR (-3) -#define Z_MEM_ERROR (-4) -#define Z_BUF_ERROR (-5) -/* error codes for the compression/decompression functions */ - -#define Z_BEST_SPEED 1 -#define Z_BEST_COMPRESSION 9 -#define Z_DEFAULT_COMPRESSION (-1) -/* compression levels */ - -#define Z_FILTERED 1 -#define Z_HUFFMAN_ONLY 2 -#define Z_DEFAULT_STRATEGY 0 - -#define Z_BINARY 0 -#define Z_ASCII 1 -#define Z_UNKNOWN 2 -/* Used to set the data_type field */ - -#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ - -extern char *zlib_version; -/* The application can compare zlib_version and ZLIB_VERSION for consistency. - If the first character differs, the library code actually used is - not compatible with the zlib.h header file used by the application. - */ - - /* basic functions */ - -extern int inflateInit OF((z_stream *strm)); -/* - Initializes the internal stream state for decompression. The fields - zalloc and zfree must be initialized before by the caller. If zalloc and - zfree are set to Z_NULL, inflateInit updates them to use default allocation - functions. - - inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not - enough memory. msg is set to null if there is no error message. - inflateInit does not perform any decompression: this will be done by - inflate(). -*/ - - -extern int inflate OF((z_stream *strm, int flush)); -/* - Performs one or both of the following actions: - - - Decompress more input starting at next_in and update next_in and avail_in - accordingly. If not all input can be processed (because there is not - enough room in the output buffer), next_in is updated and processing - will resume at this point for the next call of inflate(). - - - Provide more output starting at next_out and update next_out and avail_out - accordingly. inflate() always provides as much output as possible - (until there is no more input data or no more space in the output buffer). - - Before the call of inflate(), the application should ensure that at least - one of the actions is possible, by providing more input and/or consuming - more output, and updating the next_* and avail_* values accordingly. - The application can consume the uncompressed output when it wants, for - example when the output buffer is full (avail_out == 0), or after each - call of inflate(). - - If the parameter flush is set to Z_PARTIAL_FLUSH or Z_PACKET_FLUSH, - inflate flushes as much output as possible to the output buffer. The - flushing behavior of inflate is not specified for values of the flush - parameter other than Z_PARTIAL_FLUSH, Z_PACKET_FLUSH or Z_FINISH, but the - current implementation actually flushes as much output as possible - anyway. For Z_PACKET_FLUSH, inflate checks that once all the input data - has been consumed, it is expecting to see the length field of a stored - block; if not, it returns Z_DATA_ERROR. - - inflate() should normally be called until it returns Z_STREAM_END or an - error. However if all decompression is to be performed in a single step - (a single call of inflate), the parameter flush should be set to - Z_FINISH. In this case all pending input is processed and all pending - output is flushed; avail_out must be large enough to hold all the - uncompressed data. (The size of the uncompressed data may have been saved - by the compressor for this purpose.) The next operation on this stream must - be inflateEnd to deallocate the decompression state. The use of Z_FINISH - is never required, but can be used to inform inflate that a faster routine - may be used for the single inflate() call. - - inflate() returns Z_OK if some progress has been made (more input - processed or more output produced), Z_STREAM_END if the end of the - compressed data has been reached and all uncompressed output has been - produced, Z_DATA_ERROR if the input data was corrupted, Z_STREAM_ERROR if - the stream structure was inconsistent (for example if next_in or next_out - was NULL), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if no - progress is possible or if there was not enough room in the output buffer - when Z_FINISH is used. In the Z_DATA_ERROR case, the application may then - call inflateSync to look for a good compression block. */ - - -extern int inflateEnd OF((z_stream *strm)); -/* - All dynamically allocated data structures for this stream are freed. - This function discards any unprocessed input and does not flush any - pending output. - - inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state - was inconsistent. In the error case, msg may be set but then points to a - static string (which must not be deallocated). -*/ - - /* advanced functions */ - -extern int inflateInit2 OF((z_stream *strm, - int windowBits)); -/* - This is another version of inflateInit with more compression options. The - fields next_out, zalloc and zfree must be initialized before by the caller. - - The windowBits parameter is the base two logarithm of the maximum window - size (the size of the history buffer). It should be in the range 8..15 for - this version of the library (the value 16 will be allowed soon). The - default value is 15 if inflateInit is used instead. If a compressed stream - with a larger window size is given as input, inflate() will return with - the error code Z_DATA_ERROR instead of trying to allocate a larger window. - - If next_out is not null, the library will use this buffer for the history - buffer; the buffer must either be large enough to hold the entire output - data, or have at least 1< From: Olaf Hering Make the zImage relocateable. So yaboot could just load and run any ELF binary, without worrying about its load address. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/Makefile | 2 +- arch/ppc64/boot/crt0.S | 29 +++++++++++++++++++++++++++++ arch/ppc64/boot/zImage.lds | 4 +++- 3 files changed, 33 insertions(+), 2 deletions(-) diff -puN arch/ppc64/boot/crt0.S~ppc64-boot-make-the-zimage-relocateable arch/ppc64/boot/crt0.S --- devel/arch/ppc64/boot/crt0.S~ppc64-boot-make-the-zimage-relocateable 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/crt0.S 2005-10-28 17:45:21.000000000 -0700 @@ -14,9 +14,38 @@ .text .globl _start _start: + bl reloc_offset + +reloc_offset: + mflr r0 + lis r9,reloc_offset at ha + addi r9,r9,reloc_offset at l + subf. r0,r9,r0 + beq clear_caches + +reloc_got2: + lis r9,__got2_start at ha + addi r9,r9,__got2_start at l + lis r8,__got2_end at ha + addi r8,r8,__got2_end at l + subf. r8,r9,r8 + beq clear_caches + srwi. r8,r8,2 + mtctr r8 + add r9,r0,r9 +reloc_got2_loop: + lwz r8,0(r9) + add r8,r8,r0 + stw r8,0(r9) + addi r9,r9,4 + bdnz reloc_got2_loop + +clear_caches: lis r9,_start at h + add r9,r0,r9 lis r8,_etext at ha addi r8,r8,_etext at l + add r8,r0,r8 1: dcbf r0,r9 icbi r0,r9 addi r9,r9,0x20 diff -puN arch/ppc64/boot/Makefile~ppc64-boot-make-the-zimage-relocateable arch/ppc64/boot/Makefile --- devel/arch/ppc64/boot/Makefile~ppc64-boot-make-the-zimage-relocateable 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/Makefile 2005-10-28 17:45:21.000000000 -0700 @@ -22,7 +22,7 @@ HOSTCC := gcc -BOOTCFLAGS := $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include) +BOOTCFLAGS := $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include) -fPIC BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc BOOTLFLAGS := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds OBJCOPYFLAGS := contents,alloc,load,readonly,data diff -puN arch/ppc64/boot/zImage.lds~ppc64-boot-make-the-zimage-relocateable arch/ppc64/boot/zImage.lds --- devel/arch/ppc64/boot/zImage.lds~ppc64-boot-make-the-zimage-relocateable 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/zImage.lds 2005-10-28 17:45:21.000000000 -0700 @@ -13,7 +13,9 @@ SECTIONS *(.rodata*) *(.data*) *(.sdata*) - *(.got*) + __got2_start = .; + *(.got2) + __got2_end = .; } . = ALIGN(4096); _ From akpm at osdl.org Sat Oct 29 10:46:50 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:50 -0700 Subject: [patch 35/43] ppc64 boot: print firmware provided stackpointer Message-ID: <200510290047.j9T0lLdN030108@shell0.pdx.osdl.net> From: Olaf Hering Show firmware provided stackpointer during boot. This helps to find the "taboo" areas on the various boards. claim tends to fail for these memory areas, but some jokers return success anyway. Use %p to print the load address, its a pointer. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/crt0.S | 1 + arch/ppc64/boot/main.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff -puN arch/ppc64/boot/crt0.S~ppc64-boot-print-firmware-provided-stackpointer arch/ppc64/boot/crt0.S --- devel/arch/ppc64/boot/crt0.S~ppc64-boot-print-firmware-provided-stackpointer 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/crt0.S 2005-10-28 17:44:04.000000000 -0700 @@ -54,5 +54,6 @@ clear_caches: sync isync + mr r6,r1 b start diff -puN arch/ppc64/boot/main.c~ppc64-boot-print-firmware-provided-stackpointer arch/ppc64/boot/main.c --- devel/arch/ppc64/boot/main.c~ppc64-boot-print-firmware-provided-stackpointer 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/main.c 2005-10-28 17:44:04.000000000 -0700 @@ -131,7 +131,7 @@ static unsigned long try_claim(unsigned return addr; } -void start(unsigned long a1, unsigned long a2, void *promptr) +void start(unsigned long a1, unsigned long a2, void *promptr, void *sp) { unsigned long i; int len; @@ -151,7 +151,7 @@ void start(unsigned long a1, unsigned lo if (getprop(chosen_handle, "stdin", &stdin, sizeof(stdin)) != 4) exit(); - printf("\n\rzImage starting: loaded at 0x%lx\n\r", (unsigned long) _start); + printf("\n\rzImage starting: loaded at 0x%p (sp: 0x%p)\n\r", _start, sp); /* * The first available claim_base must be above the end of the _ From akpm at osdl.org Sat Oct 29 10:46:51 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:51 -0700 Subject: [patch 36/43] ppc64: AC Power handling broken for desktops Message-ID: <200510290047.j9T0lMmf030111@shell0.pdx.osdl.net> From: Olaf Hering Currently, AC Power is 0 on a desktop G4. No batteries present should mean AC Power == 1. Signed-off-by: Olaf Hering Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton --- drivers/macintosh/apm_emu.c | 8 ++++++-- 1 files changed, 6 insertions(+), 2 deletions(-) diff -puN drivers/macintosh/apm_emu.c~ppc64-ac-power-handling-broken-for-desktops drivers/macintosh/apm_emu.c --- devel/drivers/macintosh/apm_emu.c~ppc64-ac-power-handling-broken-for-desktops 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/drivers/macintosh/apm_emu.c 2005-10-28 17:44:04.000000000 -0700 @@ -430,8 +430,8 @@ static int apm_emu_get_info(char *buf, c -1: Unknown 8) min = minutes; sec = seconds */ - unsigned short ac_line_status = 0xff; - unsigned short battery_status = 0xff; + unsigned short ac_line_status; + unsigned short battery_status = 0; unsigned short battery_flag = 0xff; int percentage = -1; int time_units = -1; @@ -446,6 +446,7 @@ static int apm_emu_get_info(char *buf, c ac_line_status = ((pmu_power_flags & PMU_PWR_AC_PRESENT) != 0); for (i=0; i From: Olaf Hering To prove that the relocation works, move the crt0.o away from the beginning. Move linker options from command line into linker script. rename entry point because '_start' is referenced in printf output. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/Makefile | 4 ++-- arch/ppc64/boot/crt0.S | 4 ++-- arch/ppc64/boot/zImage.lds | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff -puN arch/ppc64/boot/crt0.S~ppc64-boot-proof-that-reloc-works arch/ppc64/boot/crt0.S --- devel/arch/ppc64/boot/crt0.S~ppc64-boot-proof-that-reloc-works 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/crt0.S 2005-10-28 17:45:21.000000000 -0700 @@ -12,8 +12,8 @@ #include "ppc_asm.h" .text - .globl _start -_start: + .globl _zimage_start +_zimage_start: bl reloc_offset reloc_offset: diff -puN arch/ppc64/boot/Makefile~ppc64-boot-proof-that-reloc-works arch/ppc64/boot/Makefile --- devel/arch/ppc64/boot/Makefile~ppc64-boot-proof-that-reloc-works 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/Makefile 2005-10-28 17:44:04.000000000 -0700 @@ -24,7 +24,7 @@ HOSTCC := gcc BOOTCFLAGS := $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include) -fPIC BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc -BOOTLFLAGS := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds +BOOTLFLAGS := -T $(srctree)/$(src)/zImage.lds OBJCOPYFLAGS := contents,alloc,load,readonly,data zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c @@ -34,7 +34,7 @@ zliblinuxheader := zlib.h zconf.h zutil. $(addprefix $(obj)/,$(zlib) main.o): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) #$(addprefix $(obj)/,main.o): $(addprefix $(obj)/,zlib.h) -src-boot := crt0.S string.S prom.c main.c div64.S +src-boot := string.S prom.c main.c div64.S crt0.S src-boot += $(zlib) src-boot := $(addprefix $(obj)/, $(src-boot)) obj-boot := $(addsuffix .o, $(basename $(src-boot))) diff -puN arch/ppc64/boot/zImage.lds~ppc64-boot-proof-that-reloc-works arch/ppc64/boot/zImage.lds --- devel/arch/ppc64/boot/zImage.lds~ppc64-boot-proof-that-reloc-works 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/zImage.lds 2005-10-28 17:44:04.000000000 -0700 @@ -1,6 +1,9 @@ OUTPUT_ARCH(powerpc:common) +ENTRY(_zimage_start) SECTIONS { + . = (4*1024*1024); + _start = .; .text : { *(.text) _ From akpm at osdl.org Sat Oct 29 10:46:54 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:54 -0700 Subject: [patch 39/43] ppc64: compile nls_cp437 and nls_iso8859_1 into the kernel in defconfig Message-ID: <200510290047.j9T0lOho030120@shell0.pdx.osdl.net> From: Olaf Hering compile nls_cp437 and nls_iso8859_1 into the kernel in defconfig. This is already enabled in pSeries_defconfig. Reason: if one just boots the new shiny zImage and the root filesystem is on a filesystem not readable by yaboot (like jfs, raid or lvm) upgrading the bootloader will fail because the FAT bootpartition can not be mounted. Signed-off-by: Olaf Hering Signed-off-by: Andrew Morton --- arch/ppc64/defconfig | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff -puN arch/ppc64/defconfig~ppc64-compile-nls_cp437-and-nls_iso8859_1-into-the-kernel-in-defconfig arch/ppc64/defconfig --- devel/arch/ppc64/defconfig~ppc64-compile-nls_cp437-and-nls_iso8859_1-into-the-kernel-in-defconfig 2005-10-28 17:44:05.000000000 -0700 +++ devel-akpm/arch/ppc64/defconfig 2005-10-28 17:44:05.000000000 -0700 @@ -1318,7 +1318,7 @@ CONFIG_MSDOS_PARTITION=y # CONFIG_NLS=y CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_737=m CONFIG_NLS_CODEPAGE_775=m CONFIG_NLS_CODEPAGE_850=m @@ -1342,7 +1342,7 @@ CONFIG_NLS_ISO8859_8=m CONFIG_NLS_CODEPAGE_1250=m CONFIG_NLS_CODEPAGE_1251=m CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_2=m CONFIG_NLS_ISO8859_3=m CONFIG_NLS_ISO8859_4=m _ From akpm at osdl.org Sat Oct 29 10:46:55 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:55 -0700 Subject: [patch 40/43] ppc64: reenable make install with defconfig Message-ID: <200510290047.j9T0lPEf030123@shell0.pdx.osdl.net> From: Olaf Hering 'make ARCH=ppc64 O=../O install' does not work with the defconfig. CONFIG_PPC_BPA is part of it, but the BPA bootimage variable is wrong: make[2]: *** No rule to make target `zImage', needed by `install'. Stop. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/Makefile | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) diff -puN arch/ppc64/Makefile~ppc64-reenable-make-install-with-defconfig arch/ppc64/Makefile --- devel/arch/ppc64/Makefile~ppc64-reenable-make-install-with-defconfig 2005-10-28 17:44:05.000000000 -0700 +++ devel-akpm/arch/ppc64/Makefile 2005-10-28 17:44:05.000000000 -0700 @@ -103,7 +103,7 @@ $(boottargets-y): vmlinux bootimage-$(CONFIG_PPC_PSERIES) := $(boot)/zImage bootimage-$(CONFIG_PPC_PMAC) := vmlinux bootimage-$(CONFIG_PPC_MAPLE) := $(boot)/zImage -bootimage-$(CONFIG_PPC_BPA) := zImage +bootimage-$(CONFIG_PPC_BPA) := $(boot)/zImage bootimage-$(CONFIG_PPC_ISERIES) := vmlinux BOOTIMAGE := $(bootimage-y) install: vmlinux _ From akpm at osdl.org Sat Oct 29 10:46:56 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:56 -0700 Subject: [patch 41/43] ppc64: change name of target file during make install Message-ID: <200510290047.j9T0lRGc030126@shell0.pdx.osdl.net> From: Olaf Hering 'make install' creates a /boot/zImage[.vmode] file when the defconfig is used. It uses the second arg as file content, which is the vmlinux, and the 5th arg as file name, which is the BOOTIMAGE name. A comment in an earlier patch to install.sh states that yaboot can not load a zImage+initrd combo. This was true in kernel 2.6.5 because it did use bi_recs to pass the initrd info. But this concept was always broken. Register r3 holds the initrd address and r4 holds the initrd size. This works with all kernel versions. The current code in main.c leaves r3 and r4 alone, so the kernel should be able to see and use the memory range with the initrd content. If one wants to rerun mkinitrd, it is currently hard to get the uname -r value for the installed zImage. Without this info, mkinitrd can not know what modules to use. This would be fixable by including the /proc/version output of the new kernel. But it is simpler to just use the plain vmlinux. So all this patch does is to write to /boot/vmlinux instead to /boot/zImage Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/boot/install.sh | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) diff -puN arch/ppc64/boot/install.sh~ppc64-change-name-of-target-file-during-make-install arch/ppc64/boot/install.sh --- devel/arch/ppc64/boot/install.sh~ppc64-change-name-of-target-file-during-make-install 2005-10-28 17:44:05.000000000 -0700 +++ devel-akpm/arch/ppc64/boot/install.sh 2005-10-28 17:44:05.000000000 -0700 @@ -28,7 +28,7 @@ if [ -x /sbin/${CROSS_COMPILE}installker # Default install # this should work for both the pSeries zImage and the iSeries vmlinux.sm -image_name=`basename $5` +image_name=`basename $2` if [ -f $4/$image_name ]; then mv $4/$image_name $4/$image_name.old _ From akpm at osdl.org Sat Oct 29 10:46:53 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:53 -0700 Subject: [patch 38/43] Various powerpc 32bit ppc64 build fixes Message-ID: <200510290047.j9T0lNNJ030117@shell0.pdx.osdl.net> From: Sven Luther Find here attached two small build fixes for the 32bit ppc64 kernels. I know the support for those kernels will soon go away, and debian is not using them anymore, but until then it is nice to not have it broken, since other users may need it. Signed-off-by: Sven Luther Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton --- arch/ppc/boot/simple/misc-prep.c | 2 ++ arch/ppc/platforms/Makefile | 6 ++++++ 2 files changed, 8 insertions(+) diff -puN arch/ppc/boot/simple/misc-prep.c~various-powerpc-32bit-ppc64-build-fixes arch/ppc/boot/simple/misc-prep.c --- devel/arch/ppc/boot/simple/misc-prep.c~various-powerpc-32bit-ppc64-build-fixes 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc/boot/simple/misc-prep.c 2005-10-28 17:44:04.000000000 -0700 @@ -152,9 +152,11 @@ load_kernel(unsigned long load_addr, int hold_residual->VitalProductData.Reserved5 = 0xdeadbeef; } +#if defined(CONFIG_6xx) /* Now go and clear out the BATs and ensure that our MSR is * correct .*/ disable_6xx_mmu(); +#endif /* Make r3 be a pointer to the residual data. */ return (unsigned long)hold_residual; diff -puN arch/ppc/platforms/Makefile~various-powerpc-32bit-ppc64-build-fixes arch/ppc/platforms/Makefile --- devel/arch/ppc/platforms/Makefile~various-powerpc-32bit-ppc64-build-fixes 2005-10-28 17:44:04.000000000 -0700 +++ devel-akpm/arch/ppc/platforms/Makefile 2005-10-28 17:44:04.000000000 -0700 @@ -9,9 +9,15 @@ obj-$(CONFIG_APUS) += apus_setup.o ifeq ($(CONFIG_APUS),y) obj-$(CONFIG_PCI) += apus_pci.o endif +ifeq ($(CONFIG_6xx),y) obj-$(CONFIG_PPC_PMAC) += pmac_pic.o pmac_setup.o pmac_time.o \ pmac_feature.o pmac_pci.o pmac_sleep.o \ pmac_low_i2c.o pmac_cache.o +else +obj-$(CONFIG_PPC_PMAC) += pmac_pic.o pmac_setup.o pmac_time.o \ + pmac_feature.o pmac_pci.o pmac_sleep.o \ + pmac_low_i2c.o +endif obj-$(CONFIG_PPC_CHRP) += chrp_setup.o chrp_time.o chrp_pci.o \ chrp_pegasos_eth.o ifeq ($(CONFIG_PPC_CHRP),y) _ From paulus at samba.org Sat Oct 29 13:24:28 2005 From: paulus at samba.org (Paul Mackerras) Date: Sat, 29 Oct 2005 13:24:28 +1000 Subject: [patch 13/43] Add MAINTAINER entry for the new PowerPC 4xx on-chip ethernet controller driver In-Reply-To: <20051029012413.GA1371@gate.ebshome.net> References: <200510290046.j9T0ks7c030026@shell0.pdx.osdl.net> <20051029012413.GA1371@gate.ebshome.net> Message-ID: <17250.60268.703445.281015@cargo.ozlabs.ibm.com> Eugene Surovegin writes: > Paul, drop this and the previous patch. They will go in through netdev > tree and aren't affected by the ppc/ppc64 merge. Sure, that makes it easier for me. :) Paul. From akpm at osdl.org Sat Oct 29 10:46:22 2005 From: akpm at osdl.org (akpm at osdl.org) Date: Fri, 28 Oct 2005 17:46:22 -0700 Subject: [patch 12/43] New PowerPC 4xx on-chip ethernet controller driver Message-ID: <200510290046.j9T0krpC030023@shell0.pdx.osdl.net> From: Eugene Surovegin This patch replaces current PowerPC 4xx EMAC driver with new, re-written from the scratch version. This patch is quite big (~234K) because there is virtualy 0% of common code between old and new version. New driver uses NAPI, it solves stability problems under heavy packet load and low memory, corrects chip register access and fixes numerous small bugs I don't even remember now. This patch has been tested on all supported in 2.6 PPC 4xx boards. It's been used in production for almost a year now on custom 4xx hardware. PPC32 specific parts are already upstream. Patch was acked by the current EMAC driver maintainer (Matt Porter). I will be maintaining this new version. Signed-off-by: Eugene Surovegin Signed-off-by: Andrew Morton --- drivers/net/Kconfig | 72 drivers/net/ibm_emac/Makefile | 11 drivers/net/ibm_emac/ibm_emac.h | 418 ++-- drivers/net/ibm_emac/ibm_emac_core.c | 3414 ++++++++++++++++++---------------- drivers/net/ibm_emac/ibm_emac_core.h | 313 +-- drivers/net/ibm_emac/ibm_emac_debug.c | 377 +-- drivers/net/ibm_emac/ibm_emac_debug.h | 63 drivers/net/ibm_emac/ibm_emac_mal.c | 674 +++--- drivers/net/ibm_emac/ibm_emac_mal.h | 336 ++- drivers/net/ibm_emac/ibm_emac_phy.c | 335 +-- drivers/net/ibm_emac/ibm_emac_phy.h | 105 - drivers/net/ibm_emac/ibm_emac_rgmii.c | 201 ++ drivers/net/ibm_emac/ibm_emac_rgmii.h | 68 drivers/net/ibm_emac/ibm_emac_tah.c | 111 + drivers/net/ibm_emac/ibm_emac_tah.h | 96 drivers/net/ibm_emac/ibm_emac_zmii.c | 255 ++ drivers/net/ibm_emac/ibm_emac_zmii.h | 114 - 17 files changed, 4113 insertions(+), 2850 deletions(-) diff -puN drivers/net/ibm_emac/ibm_emac_core.c~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/ibm_emac_core.c --- devel/drivers/net/ibm_emac/ibm_emac_core.c~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_core.c 2005-10-28 17:44:03.000000000 -0700 @@ -1,13 +1,14 @@ /* - * ibm_emac_core.c + * drivers/net/ibm_emac/ibm_emac_core.c * - * Ethernet driver for the built in ethernet on the IBM 4xx PowerPC - * processors. - * - * (c) 2003 Benjamin Herrenschmidt + * Driver for PowerPC 4xx on-chip ethernet controller. * - * Based on original work by + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin or * + * Based on original work by + * Matt Porter + * (c) 2003 Benjamin Herrenschmidt * Armin Kuster * Johnnie Peters * @@ -15,29 +16,24 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * TODO - * - Check for races in the "remove" code path - * - Add some Power Management to the MAC and the PHY - * - Audit remaining of non-rewritten code (--BenH) - * - Cleanup message display using msglevel mecanism - * - Address all errata - * - Audit all register update paths to ensure they - * are being written post soft reset if required. + * */ + +#include #include #include #include #include -#include -#include #include -#include -#include #include #include #include #include -#include +#include +#include +#include +#include +#include #include #include #include @@ -45,1691 +41,1893 @@ #include #include #include -#include #include #include -#include -#include -#include -#include - #include "ibm_emac_core.h" - -//#define MDIO_DEBUG(fmt) printk fmt -#define MDIO_DEBUG(fmt) - -//#define LINK_DEBUG(fmt) printk fmt -#define LINK_DEBUG(fmt) - -//#define PKT_DEBUG(fmt) printk fmt -#define PKT_DEBUG(fmt) - -#define DRV_NAME "emac" -#define DRV_VERSION "2.0" -#define DRV_AUTHOR "Benjamin Herrenschmidt " -#define DRV_DESC "IBM EMAC Ethernet driver" +#include "ibm_emac_debug.h" /* - * When mdio_idx >= 0, contains a list of emac ocp_devs - * that have had their initialization deferred until the - * common MDIO controller has been initialized. + * Lack of dma_unmap_???? calls is intentional. + * + * API-correct usage requires additional support state information to be + * maintained for every RX and TX buffer descriptor (BD). Unfortunately, due to + * EMAC design (e.g. TX buffer passed from network stack can be split into + * several BDs, dma_map_single/dma_map_page can be used to map particular BD), + * maintaining such information will add additional overhead. + * Current DMA API implementation for 4xx processors only ensures cache coherency + * and dma_unmap_???? routines are empty and are likely to stay this way. + * I decided to omit dma_unmap_??? calls because I don't want to add additional + * complexity just for the sake of following some abstract API, when it doesn't + * add any real benefit to the driver. I understand that this decision maybe + * controversial, but I really tried to make code API-correct and efficient + * at the same time and didn't come up with code I liked :(. --ebs */ -LIST_HEAD(emac_init_list); -MODULE_AUTHOR(DRV_AUTHOR); +#define DRV_NAME "emac" +#define DRV_VERSION "3.53" +#define DRV_DESC "PPC 4xx OCP EMAC driver" + MODULE_DESCRIPTION(DRV_DESC); +MODULE_AUTHOR + ("Eugene Surovegin or "); MODULE_LICENSE("GPL"); -static int skb_res = SKB_RES; -module_param(skb_res, int, 0444); -MODULE_PARM_DESC(skb_res, "Amount of data to reserve on skb buffs\n" - "The 405 handles a misaligned IP header fine but\n" - "this can help if you are routing to a tunnel or a\n" - "device that needs aligned data. 0..2"); - -#define RGMII_PRIV(ocpdev) ((struct ibm_ocp_rgmii*)ocp_get_drvdata(ocpdev)) - -static unsigned int rgmii_enable[] = { - RGMII_RTBI, - RGMII_RGMII, - RGMII_TBI, - RGMII_GMII -}; +/* minimum number of free TX descriptors required to wake up TX process */ +#define EMAC_TX_WAKEUP_THRESH (NUM_TX_BUFF / 4) -static unsigned int rgmii_speed_mask[] = { - RGMII_MII2_SPDMASK, - RGMII_MII3_SPDMASK -}; - -static unsigned int rgmii_speed100[] = { - RGMII_MII2_100MB, - RGMII_MII3_100MB -}; - -static unsigned int rgmii_speed1000[] = { - RGMII_MII2_1000MB, - RGMII_MII3_1000MB -}; - -#define ZMII_PRIV(ocpdev) ((struct ibm_ocp_zmii*)ocp_get_drvdata(ocpdev)) - -static unsigned int zmii_enable[][4] = { - {ZMII_SMII0, ZMII_RMII0, ZMII_MII0, - ~(ZMII_MDI1 | ZMII_MDI2 | ZMII_MDI3)}, - {ZMII_SMII1, ZMII_RMII1, ZMII_MII1, - ~(ZMII_MDI0 | ZMII_MDI2 | ZMII_MDI3)}, - {ZMII_SMII2, ZMII_RMII2, ZMII_MII2, - ~(ZMII_MDI0 | ZMII_MDI1 | ZMII_MDI3)}, - {ZMII_SMII3, ZMII_RMII3, ZMII_MII3, ~(ZMII_MDI0 | ZMII_MDI1 | ZMII_MDI2)} -}; - -static unsigned int mdi_enable[] = { - ZMII_MDI0, - ZMII_MDI1, - ZMII_MDI2, - ZMII_MDI3 -}; - -static unsigned int zmii_speed = 0x0; -static unsigned int zmii_speed100[] = { - ZMII_MII0_100MB, - ZMII_MII1_100MB, - ZMII_MII2_100MB, - ZMII_MII3_100MB -}; +/* If packet size is less than this number, we allocate small skb and copy packet + * contents into it instead of just sending original big skb up + */ +#define EMAC_RX_COPY_THRESH CONFIG_IBM_EMAC_RX_COPY_THRESHOLD /* Since multiple EMACs share MDIO lines in various ways, we need * to avoid re-using the same PHY ID in cases where the arch didn't * setup precise phy_map entries */ -static u32 busy_phy_map = 0; +static u32 busy_phy_map; -/* If EMACs share a common MDIO device, this points to it */ -static struct net_device *mdio_ndev = NULL; - -struct emac_def_dev { - struct list_head link; - struct ocp_device *ocpdev; - struct ibm_ocp_mal *mal; -}; - -static struct net_device_stats *emac_stats(struct net_device *dev) -{ - struct ocp_enet_private *fep = dev->priv; - return &fep->stats; -}; - -static int -emac_init_rgmii(struct ocp_device *rgmii_dev, int input, int phy_mode) +#if defined(CONFIG_IBM_EMAC_PHY_RX_CLK_FIX) && (defined(CONFIG_405EP) || defined(CONFIG_440EP)) +/* 405EP has "EMAC to PHY Control Register" (CPC0_EPCTL) which can help us + * with PHY RX clock problem. + * 440EP has more sane SDR0_MFR register implementation than 440GX, which + * also allows controlling each EMAC clock + */ +static inline void EMAC_RX_CLK_TX(int idx) { - struct ibm_ocp_rgmii *rgmii = RGMII_PRIV(rgmii_dev); - const char *mode_name[] = { "RTBI", "RGMII", "TBI", "GMII" }; - int mode = -1; + unsigned long flags; + local_irq_save(flags); - if (!rgmii) { - rgmii = kmalloc(sizeof(struct ibm_ocp_rgmii), GFP_KERNEL); +#if defined(CONFIG_405EP) + mtdcr(0xf3, mfdcr(0xf3) | (1 << idx)); +#else /* CONFIG_440EP */ + SDR_WRITE(DCRN_SDR_MFR, SDR_READ(DCRN_SDR_MFR) | (0x08000000 >> idx)); +#endif - if (rgmii == NULL) { - printk(KERN_ERR - "rgmii%d: Out of memory allocating RGMII structure!\n", - rgmii_dev->def->index); - return -ENOMEM; - } + local_irq_restore(flags); +} - memset(rgmii, 0, sizeof(*rgmii)); +static inline void EMAC_RX_CLK_DEFAULT(int idx) +{ + unsigned long flags; + local_irq_save(flags); - rgmii->base = - (struct rgmii_regs *)ioremap(rgmii_dev->def->paddr, - sizeof(*rgmii->base)); - if (rgmii->base == NULL) { - printk(KERN_ERR - "rgmii%d: Cannot ioremap bridge registers!\n", - rgmii_dev->def->index); +#if defined(CONFIG_405EP) + mtdcr(0xf3, mfdcr(0xf3) & ~(1 << idx)); +#else /* CONFIG_440EP */ + SDR_WRITE(DCRN_SDR_MFR, SDR_READ(DCRN_SDR_MFR) & ~(0x08000000 >> idx)); +#endif - kfree(rgmii); - return -ENOMEM; - } - ocp_set_drvdata(rgmii_dev, rgmii); - } + local_irq_restore(flags); +} +#else +#define EMAC_RX_CLK_TX(idx) ((void)0) +#define EMAC_RX_CLK_DEFAULT(idx) ((void)0) +#endif - if (phy_mode) { - switch (phy_mode) { - case PHY_MODE_GMII: - mode = GMII; - break; - case PHY_MODE_TBI: - mode = TBI; - break; - case PHY_MODE_RTBI: - mode = RTBI; - break; - case PHY_MODE_RGMII: - default: - mode = RGMII; - } - rgmii->base->fer &= ~RGMII_FER_MASK(input); - rgmii->base->fer |= rgmii_enable[mode] << (4 * input); - } else { - switch ((rgmii->base->fer & RGMII_FER_MASK(input)) >> (4 * - input)) { - case RGMII_RTBI: - mode = RTBI; - break; - case RGMII_RGMII: - mode = RGMII; - break; - case RGMII_TBI: - mode = TBI; - break; - case RGMII_GMII: - mode = GMII; - } - } +#if defined(CONFIG_IBM_EMAC_PHY_RX_CLK_FIX) && defined(CONFIG_440GX) +/* We can switch Ethernet clock to the internal source through SDR0_MFR[ECS], + * unfortunately this is less flexible than 440EP case, because it's a global + * setting for all EMACs, therefore we do this clock trick only during probe. + */ +#define EMAC_CLK_INTERNAL SDR_WRITE(DCRN_SDR_MFR, \ + SDR_READ(DCRN_SDR_MFR) | 0x08000000) +#define EMAC_CLK_EXTERNAL SDR_WRITE(DCRN_SDR_MFR, \ + SDR_READ(DCRN_SDR_MFR) & ~0x08000000) +#else +#define EMAC_CLK_INTERNAL ((void)0) +#define EMAC_CLK_EXTERNAL ((void)0) +#endif - /* Set mode to RGMII if nothing valid is detected */ - if (mode < 0) - mode = RGMII; +/* I don't want to litter system log with timeout errors + * when we have brain-damaged PHY. + */ +static inline void emac_report_timeout_error(struct ocp_enet_private *dev, + const char *error) +{ +#if defined(CONFIG_IBM_EMAC_PHY_RX_CLK_FIX) + DBG("%d: %s" NL, dev->def->index, error); +#else + if (net_ratelimit()) + printk(KERN_ERR "emac%d: %s\n", dev->def->index, error); +#endif +} - printk(KERN_NOTICE "rgmii%d: input %d in %s mode\n", - rgmii_dev->def->index, input, mode_name[mode]); +/* PHY polling intervals */ +#define PHY_POLL_LINK_ON HZ +#define PHY_POLL_LINK_OFF (HZ / 5) + +/* Please, keep in sync with struct ibm_emac_stats/ibm_emac_error_stats */ +static const char emac_stats_keys[EMAC_ETHTOOL_STATS_COUNT][ETH_GSTRING_LEN] = { + "rx_packets", "rx_bytes", "tx_packets", "tx_bytes", "rx_packets_csum", + "tx_packets_csum", "tx_undo", "rx_dropped_stack", "rx_dropped_oom", + "rx_dropped_error", "rx_dropped_resize", "rx_dropped_mtu", + "rx_stopped", "rx_bd_errors", "rx_bd_overrun", "rx_bd_bad_packet", + "rx_bd_runt_packet", "rx_bd_short_event", "rx_bd_alignment_error", + "rx_bd_bad_fcs", "rx_bd_packet_too_long", "rx_bd_out_of_range", + "rx_bd_in_range", "rx_parity", "rx_fifo_overrun", "rx_overrun", + "rx_bad_packet", "rx_runt_packet", "rx_short_event", + "rx_alignment_error", "rx_bad_fcs", "rx_packet_too_long", + "rx_out_of_range", "rx_in_range", "tx_dropped", "tx_bd_errors", + "tx_bd_bad_fcs", "tx_bd_carrier_loss", "tx_bd_excessive_deferral", + "tx_bd_excessive_collisions", "tx_bd_late_collision", + "tx_bd_multple_collisions", "tx_bd_single_collision", + "tx_bd_underrun", "tx_bd_sqe", "tx_parity", "tx_underrun", "tx_sqe", + "tx_errors" +}; - rgmii->mode[input] = mode; - rgmii->users++; +static irqreturn_t emac_irq(int irq, void *dev_instance, struct pt_regs *regs); +static void emac_clean_tx_ring(struct ocp_enet_private *dev); - return 0; +static inline int emac_phy_supports_gige(int phy_mode) +{ + return phy_mode == PHY_MODE_GMII || + phy_mode == PHY_MODE_RGMII || + phy_mode == PHY_MODE_TBI || + phy_mode == PHY_MODE_RTBI; } -static void -emac_rgmii_port_speed(struct ocp_device *ocpdev, int input, int speed) +static inline int emac_phy_gpcs(int phy_mode) { - struct ibm_ocp_rgmii *rgmii = RGMII_PRIV(ocpdev); - unsigned int rgmii_speed; + return phy_mode == PHY_MODE_TBI || + phy_mode == PHY_MODE_RTBI; +} - rgmii_speed = in_be32(&rgmii->base->ssr); +static inline void emac_tx_enable(struct ocp_enet_private *dev) +{ + struct emac_regs *p = dev->emacp; + unsigned long flags; + u32 r; - rgmii_speed &= ~rgmii_speed_mask[input]; + local_irq_save(flags); - if (speed == 1000) - rgmii_speed |= rgmii_speed1000[input]; - else if (speed == 100) - rgmii_speed |= rgmii_speed100[input]; + DBG("%d: tx_enable" NL, dev->def->index); - out_be32(&rgmii->base->ssr, rgmii_speed); + r = in_be32(&p->mr0); + if (!(r & EMAC_MR0_TXE)) + out_be32(&p->mr0, r | EMAC_MR0_TXE); + local_irq_restore(flags); } -static void emac_close_rgmii(struct ocp_device *ocpdev) +static void emac_tx_disable(struct ocp_enet_private *dev) { - struct ibm_ocp_rgmii *rgmii = RGMII_PRIV(ocpdev); - BUG_ON(!rgmii || rgmii->users == 0); + struct emac_regs *p = dev->emacp; + unsigned long flags; + u32 r; + + local_irq_save(flags); - if (!--rgmii->users) { - ocp_set_drvdata(ocpdev, NULL); - iounmap((void *)rgmii->base); - kfree(rgmii); + DBG("%d: tx_disable" NL, dev->def->index); + + r = in_be32(&p->mr0); + if (r & EMAC_MR0_TXE) { + int n = 300; + out_be32(&p->mr0, r & ~EMAC_MR0_TXE); + while (!(in_be32(&p->mr0) & EMAC_MR0_TXI) && n) + --n; + if (unlikely(!n)) + emac_report_timeout_error(dev, "TX disable timeout"); } + local_irq_restore(flags); } -static int emac_init_zmii(struct ocp_device *zmii_dev, int input, int phy_mode) +static void emac_rx_enable(struct ocp_enet_private *dev) { - struct ibm_ocp_zmii *zmii = ZMII_PRIV(zmii_dev); - const char *mode_name[] = { "SMII", "RMII", "MII" }; - int mode = -1; + struct emac_regs *p = dev->emacp; + unsigned long flags; + u32 r; - if (!zmii) { - zmii = kmalloc(sizeof(struct ibm_ocp_zmii), GFP_KERNEL); - if (zmii == NULL) { - printk(KERN_ERR - "zmii%d: Out of memory allocating ZMII structure!\n", - zmii_dev->def->index); - return -ENOMEM; - } - memset(zmii, 0, sizeof(*zmii)); - - zmii->base = - (struct zmii_regs *)ioremap(zmii_dev->def->paddr, - sizeof(*zmii->base)); - if (zmii->base == NULL) { - printk(KERN_ERR - "zmii%d: Cannot ioremap bridge registers!\n", - zmii_dev->def->index); + local_irq_save(flags); + if (unlikely(dev->commac.rx_stopped)) + goto out; - kfree(zmii); - return -ENOMEM; - } - ocp_set_drvdata(zmii_dev, zmii); - } + DBG("%d: rx_enable" NL, dev->def->index); - if (phy_mode) { - switch (phy_mode) { - case PHY_MODE_MII: - mode = MII; - break; - case PHY_MODE_RMII: - mode = RMII; - break; - case PHY_MODE_SMII: - default: - mode = SMII; - } - zmii->base->fer &= ~ZMII_FER_MASK(input); - zmii->base->fer |= zmii_enable[input][mode]; - } else { - switch ((zmii->base->fer & ZMII_FER_MASK(input)) << (4 * input)) { - case ZMII_MII0: - mode = MII; - break; - case ZMII_RMII0: - mode = RMII; - break; - case ZMII_SMII0: - mode = SMII; + r = in_be32(&p->mr0); + if (!(r & EMAC_MR0_RXE)) { + if (unlikely(!(r & EMAC_MR0_RXI))) { + /* Wait if previous async disable is still in progress */ + int n = 100; + while (!(r = in_be32(&p->mr0) & EMAC_MR0_RXI) && n) + --n; + if (unlikely(!n)) + emac_report_timeout_error(dev, + "RX disable timeout"); } + out_be32(&p->mr0, r | EMAC_MR0_RXE); } - - /* Set mode to SMII if nothing valid is detected */ - if (mode < 0) - mode = SMII; - - printk(KERN_NOTICE "zmii%d: input %d in %s mode\n", - zmii_dev->def->index, input, mode_name[mode]); - - zmii->mode[input] = mode; - zmii->users++; - - return 0; + out: + local_irq_restore(flags); } -static void emac_enable_zmii_port(struct ocp_device *ocpdev, int input) +static void emac_rx_disable(struct ocp_enet_private *dev) { - u32 mask; - struct ibm_ocp_zmii *zmii = ZMII_PRIV(ocpdev); - - mask = in_be32(&zmii->base->fer); - mask &= zmii_enable[input][MDI]; /* turn all non enabled MDI's off */ - mask |= zmii_enable[input][zmii->mode[input]] | mdi_enable[input]; - out_be32(&zmii->base->fer, mask); -} + struct emac_regs *p = dev->emacp; + unsigned long flags; + u32 r; -static void -emac_zmii_port_speed(struct ocp_device *ocpdev, int input, int speed) -{ - struct ibm_ocp_zmii *zmii = ZMII_PRIV(ocpdev); + local_irq_save(flags); - if (speed == 100) - zmii_speed |= zmii_speed100[input]; - else - zmii_speed &= ~zmii_speed100[input]; + DBG("%d: rx_disable" NL, dev->def->index); - out_be32(&zmii->base->ssr, zmii_speed); + r = in_be32(&p->mr0); + if (r & EMAC_MR0_RXE) { + int n = 300; + out_be32(&p->mr0, r & ~EMAC_MR0_RXE); + while (!(in_be32(&p->mr0) & EMAC_MR0_RXI) && n) + --n; + if (unlikely(!n)) + emac_report_timeout_error(dev, "RX disable timeout"); + } + local_irq_restore(flags); } -static void emac_close_zmii(struct ocp_device *ocpdev) +static inline void emac_rx_disable_async(struct ocp_enet_private *dev) { - struct ibm_ocp_zmii *zmii = ZMII_PRIV(ocpdev); - BUG_ON(!zmii || zmii->users == 0); + struct emac_regs *p = dev->emacp; + unsigned long flags; + u32 r; - if (!--zmii->users) { - ocp_set_drvdata(ocpdev, NULL); - iounmap((void *)zmii->base); - kfree(zmii); - } + local_irq_save(flags); + + DBG("%d: rx_disable_async" NL, dev->def->index); + + r = in_be32(&p->mr0); + if (r & EMAC_MR0_RXE) + out_be32(&p->mr0, r & ~EMAC_MR0_RXE); + local_irq_restore(flags); } -int emac_phy_read(struct net_device *dev, int mii_id, int reg) +static int emac_reset(struct ocp_enet_private *dev) { - int count; - uint32_t stacr; - struct ocp_enet_private *fep = dev->priv; - emac_t *emacp = fep->emacp; + struct emac_regs *p = dev->emacp; + unsigned long flags; + int n = 20; - MDIO_DEBUG(("%s: phy_read, id: 0x%x, reg: 0x%x\n", dev->name, mii_id, - reg)); + DBG("%d: reset" NL, dev->def->index); - /* Enable proper ZMII port */ - if (fep->zmii_dev) - emac_enable_zmii_port(fep->zmii_dev, fep->zmii_input); + local_irq_save(flags); - /* Use the EMAC that has the MDIO port */ - if (fep->mdio_dev) { - dev = fep->mdio_dev; - fep = dev->priv; - emacp = fep->emacp; + if (!dev->reset_failed) { + /* 40x erratum suggests stopping RX channel before reset, + * we stop TX as well + */ + emac_rx_disable(dev); + emac_tx_disable(dev); } - count = 0; - while ((((stacr = in_be32(&emacp->em0stacr)) & EMAC_STACR_OC) == 0) - && (count++ < MDIO_DELAY)) - udelay(1); - MDIO_DEBUG((" (count was %d)\n", count)); + out_be32(&p->mr0, EMAC_MR0_SRST); + while ((in_be32(&p->mr0) & EMAC_MR0_SRST) && n) + --n; + local_irq_restore(flags); - if ((stacr & EMAC_STACR_OC) == 0) { - printk(KERN_WARNING "%s: PHY read timeout #1!\n", dev->name); - return -1; + if (n) { + dev->reset_failed = 0; + return 0; + } else { + emac_report_timeout_error(dev, "reset timeout"); + dev->reset_failed = 1; + return -ETIMEDOUT; } +} - /* Clear the speed bits and make a read request to the PHY */ - stacr = ((EMAC_STACR_READ | (reg & 0x1f)) & ~EMAC_STACR_CLK_100MHZ); - stacr |= ((mii_id & 0x1F) << 5); +static void emac_hash_mc(struct ocp_enet_private *dev) +{ + struct emac_regs *p = dev->emacp; + u16 gaht[4] = { 0 }; + struct dev_mc_list *dmi; - out_be32(&emacp->em0stacr, stacr); + DBG("%d: hash_mc %d" NL, dev->def->index, dev->ndev->mc_count); - count = 0; - while ((((stacr = in_be32(&emacp->em0stacr)) & EMAC_STACR_OC) == 0) - && (count++ < MDIO_DELAY)) - udelay(1); - MDIO_DEBUG((" (count was %d)\n", count)); + for (dmi = dev->ndev->mc_list; dmi; dmi = dmi->next) { + int bit; + DBG2("%d: mc %02x:%02x:%02x:%02x:%02x:%02x" NL, + dev->def->index, + dmi->dmi_addr[0], dmi->dmi_addr[1], dmi->dmi_addr[2], + dmi->dmi_addr[3], dmi->dmi_addr[4], dmi->dmi_addr[5]); - if ((stacr & EMAC_STACR_OC) == 0) { - printk(KERN_WARNING "%s: PHY read timeout #2!\n", dev->name); - return -1; + bit = 63 - (ether_crc(ETH_ALEN, dmi->dmi_addr) >> 26); + gaht[bit >> 4] |= 0x8000 >> (bit & 0x0f); } + out_be32(&p->gaht1, gaht[0]); + out_be32(&p->gaht2, gaht[1]); + out_be32(&p->gaht3, gaht[2]); + out_be32(&p->gaht4, gaht[3]); +} - /* Check for a read error */ - if (stacr & EMAC_STACR_PHYE) { - MDIO_DEBUG(("EMAC MDIO PHY error !\n")); - return -1; - } +static inline u32 emac_iff2rmr(struct net_device *ndev) +{ + u32 r = EMAC_RMR_SP | EMAC_RMR_SFCS | EMAC_RMR_IAE | EMAC_RMR_BAE | + EMAC_RMR_BASE; - MDIO_DEBUG((" -> 0x%x\n", stacr >> 16)); + if (ndev->flags & IFF_PROMISC) + r |= EMAC_RMR_PME; + else if (ndev->flags & IFF_ALLMULTI || ndev->mc_count > 32) + r |= EMAC_RMR_PMME; + else if (ndev->mc_count > 0) + r |= EMAC_RMR_MAE; - return (stacr >> 16); + return r; } -void emac_phy_write(struct net_device *dev, int mii_id, int reg, int data) +static inline int emac_opb_mhz(void) { - int count; - uint32_t stacr; - struct ocp_enet_private *fep = dev->priv; - emac_t *emacp = fep->emacp; + return (ocp_sys_info.opb_bus_freq + 500000) / 1000000; +} - MDIO_DEBUG(("%s phy_write, id: 0x%x, reg: 0x%x, data: 0x%x\n", - dev->name, mii_id, reg, data)); +/* BHs disabled */ +static int emac_configure(struct ocp_enet_private *dev) +{ + struct emac_regs *p = dev->emacp; + struct net_device *ndev = dev->ndev; + int gige; + u32 r; - /* Enable proper ZMII port */ - if (fep->zmii_dev) - emac_enable_zmii_port(fep->zmii_dev, fep->zmii_input); + DBG("%d: configure" NL, dev->def->index); - /* Use the EMAC that has the MDIO port */ - if (fep->mdio_dev) { - dev = fep->mdio_dev; - fep = dev->priv; - emacp = fep->emacp; - } + if (emac_reset(dev) < 0) + return -ETIMEDOUT; - count = 0; - while ((((stacr = in_be32(&emacp->em0stacr)) & EMAC_STACR_OC) == 0) - && (count++ < MDIO_DELAY)) - udelay(1); - MDIO_DEBUG((" (count was %d)\n", count)); + tah_reset(dev->tah_dev); - if ((stacr & EMAC_STACR_OC) == 0) { - printk(KERN_WARNING "%s: PHY write timeout #2!\n", dev->name); - return; - } + /* Mode register */ + r = EMAC_MR1_BASE(emac_opb_mhz()) | EMAC_MR1_VLE | EMAC_MR1_IST; + if (dev->phy.duplex == DUPLEX_FULL) + r |= EMAC_MR1_FDE; + switch (dev->phy.speed) { + case SPEED_1000: + if (emac_phy_gpcs(dev->phy.mode)) { + r |= EMAC_MR1_MF_1000GPCS | + EMAC_MR1_MF_IPPA(dev->phy.address); - /* Clear the speed bits and make a read request to the PHY */ + /* Put some arbitrary OUI, Manuf & Rev IDs so we can + * identify this GPCS PHY later. + */ + out_be32(&p->ipcr, 0xdeadbeef); + } else + r |= EMAC_MR1_MF_1000; + r |= EMAC_MR1_RFS_16K; + gige = 1; - stacr = ((EMAC_STACR_WRITE | (reg & 0x1f)) & ~EMAC_STACR_CLK_100MHZ); - stacr |= ((mii_id & 0x1f) << 5) | ((data & 0xffff) << 16); + if (dev->ndev->mtu > ETH_DATA_LEN) + r |= EMAC_MR1_JPSM; + break; + case SPEED_100: + r |= EMAC_MR1_MF_100; + /* Fall through */ + default: + r |= EMAC_MR1_RFS_4K; + gige = 0; + break; + } - out_be32(&emacp->em0stacr, stacr); + if (dev->rgmii_dev) + rgmii_set_speed(dev->rgmii_dev, dev->rgmii_input, + dev->phy.speed); + else + zmii_set_speed(dev->zmii_dev, dev->zmii_input, dev->phy.speed); - count = 0; - while ((((stacr = in_be32(&emacp->em0stacr)) & EMAC_STACR_OC) == 0) - && (count++ < MDIO_DELAY)) - udelay(1); - MDIO_DEBUG((" (count was %d)\n", count)); +#if !defined(CONFIG_40x) + /* on 40x erratum forces us to NOT use integrated flow control, + * let's hope it works on 44x ;) + */ + if (dev->phy.duplex == DUPLEX_FULL) { + if (dev->phy.pause) + r |= EMAC_MR1_EIFC | EMAC_MR1_APP; + else if (dev->phy.asym_pause) + r |= EMAC_MR1_APP; + } +#endif + out_be32(&p->mr1, r); - if ((stacr & EMAC_STACR_OC) == 0) - printk(KERN_WARNING "%s: PHY write timeout #2!\n", dev->name); + /* Set individual MAC address */ + out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]); + out_be32(&p->ialr, (ndev->dev_addr[2] << 24) | + (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) | + ndev->dev_addr[5]); + + /* VLAN Tag Protocol ID */ + out_be32(&p->vtpid, 0x8100); + + /* Receive mode register */ + r = emac_iff2rmr(ndev); + if (r & EMAC_RMR_MAE) + emac_hash_mc(dev); + out_be32(&p->rmr, r); + + /* FIFOs thresholds */ + r = EMAC_TMR1((EMAC_MAL_BURST_SIZE / EMAC_FIFO_ENTRY_SIZE) + 1, + EMAC_TX_FIFO_SIZE / 2 / EMAC_FIFO_ENTRY_SIZE); + out_be32(&p->tmr1, r); + out_be32(&p->trtr, EMAC_TRTR(EMAC_TX_FIFO_SIZE / 2)); + + /* PAUSE frame is sent when RX FIFO reaches its high-water mark, + there should be still enough space in FIFO to allow the our link + partner time to process this frame and also time to send PAUSE + frame itself. + + Here is the worst case scenario for the RX FIFO "headroom" + (from "The Switch Book") (100Mbps, without preamble, inter-frame gap): + + 1) One maximum-length frame on TX 1522 bytes + 2) One PAUSE frame time 64 bytes + 3) PAUSE frame decode time allowance 64 bytes + 4) One maximum-length frame on RX 1522 bytes + 5) Round-trip propagation delay of the link (100Mb) 15 bytes + ---------- + 3187 bytes - /* Check for a write error */ - if ((stacr & EMAC_STACR_PHYE) != 0) { - MDIO_DEBUG(("EMAC MDIO PHY error !\n")); - } + I chose to set high-water mark to RX_FIFO_SIZE / 4 (1024 bytes) + low-water mark to RX_FIFO_SIZE / 8 (512 bytes) + */ + r = EMAC_RWMR(EMAC_RX_FIFO_SIZE(gige) / 8 / EMAC_FIFO_ENTRY_SIZE, + EMAC_RX_FIFO_SIZE(gige) / 4 / EMAC_FIFO_ENTRY_SIZE); + out_be32(&p->rwmr, r); + + /* Set PAUSE timer to the maximum */ + out_be32(&p->ptr, 0xffff); + + /* IRQ sources */ + out_be32(&p->iser, EMAC_ISR_TXPE | EMAC_ISR_RXPE | /* EMAC_ISR_TXUE | + EMAC_ISR_RXOE | */ EMAC_ISR_OVR | EMAC_ISR_BP | EMAC_ISR_SE | + EMAC_ISR_ALE | EMAC_ISR_BFCS | EMAC_ISR_PTLE | EMAC_ISR_ORE | + EMAC_ISR_IRE | EMAC_ISR_TE); + + /* We need to take GPCS PHY out of isolate mode after EMAC reset */ + if (emac_phy_gpcs(dev->phy.mode)) + mii_reset_phy(&dev->phy); + + return 0; } -static void emac_txeob_dev(void *param, u32 chanmask) +/* BHs disabled */ +static void emac_reinitialize(struct ocp_enet_private *dev) { - struct net_device *dev = param; - struct ocp_enet_private *fep = dev->priv; - unsigned long flags; - - spin_lock_irqsave(&fep->lock, flags); - - PKT_DEBUG(("emac_txeob_dev() entry, tx_cnt: %d\n", fep->tx_cnt)); + DBG("%d: reinitialize" NL, dev->def->index); - while (fep->tx_cnt && - !(fep->tx_desc[fep->ack_slot].ctrl & MAL_TX_CTRL_READY)) { + if (!emac_configure(dev)) { + emac_tx_enable(dev); + emac_rx_enable(dev); + } +} - if (fep->tx_desc[fep->ack_slot].ctrl & MAL_TX_CTRL_LAST) { - /* Tell the system the transmit completed. */ - dma_unmap_single(&fep->ocpdev->dev, - fep->tx_desc[fep->ack_slot].data_ptr, - fep->tx_desc[fep->ack_slot].data_len, - DMA_TO_DEVICE); - dev_kfree_skb_irq(fep->tx_skb[fep->ack_slot]); +/* BHs disabled */ +static void emac_full_tx_reset(struct net_device *ndev) +{ + struct ocp_enet_private *dev = ndev->priv; + struct ocp_func_emac_data *emacdata = dev->def->additions; - if (fep->tx_desc[fep->ack_slot].ctrl & - (EMAC_TX_ST_EC | EMAC_TX_ST_MC | EMAC_TX_ST_SC)) - fep->stats.collisions++; - } + DBG("%d: full_tx_reset" NL, dev->def->index); - fep->tx_skb[fep->ack_slot] = (struct sk_buff *)NULL; - if (++fep->ack_slot == NUM_TX_BUFF) - fep->ack_slot = 0; + emac_tx_disable(dev); + mal_disable_tx_channel(dev->mal, emacdata->mal_tx_chan); + emac_clean_tx_ring(dev); + dev->tx_cnt = dev->tx_slot = dev->ack_slot = 0; - fep->tx_cnt--; - } - if (fep->tx_cnt < NUM_TX_BUFF) - netif_wake_queue(dev); + emac_configure(dev); - PKT_DEBUG(("emac_txeob_dev() exit, tx_cnt: %d\n", fep->tx_cnt)); + mal_enable_tx_channel(dev->mal, emacdata->mal_tx_chan); + emac_tx_enable(dev); + emac_rx_enable(dev); - spin_unlock_irqrestore(&fep->lock, flags); + netif_wake_queue(ndev); } -/* - Fill/Re-fill the rx chain with valid ctrl/ptrs. - This function will fill from rx_slot up to the parm end. - So to completely fill the chain pre-set rx_slot to 0 and - pass in an end of 0. - */ -static void emac_rx_fill(struct net_device *dev, int end) +static int __emac_mdio_read(struct ocp_enet_private *dev, u8 id, u8 reg) { - int i; - struct ocp_enet_private *fep = dev->priv; + struct emac_regs *p = dev->emacp; + u32 r; + int n; - i = fep->rx_slot; - do { - /* We don't want the 16 bytes skb_reserve done by dev_alloc_skb, - * it breaks our cache line alignement. However, we still allocate - * +16 so that we end up allocating the exact same size as - * dev_alloc_skb() would do. - * Also, because of the skb_res, the max DMA size we give to EMAC - * is slighly wrong, causing it to potentially DMA 2 more bytes - * from a broken/oversized packet. These 16 bytes will take care - * that we don't walk on somebody else toes with that. - */ - fep->rx_skb[i] = - alloc_skb(fep->rx_buffer_size + 16, GFP_ATOMIC); + DBG2("%d: mdio_read(%02x,%02x)" NL, dev->def->index, id, reg); - if (fep->rx_skb[i] == NULL) { - /* Keep rx_slot here, the next time clean/fill is called - * we will try again before the MAL wraps back here - * If the MAL tries to use this descriptor with - * the EMPTY bit off it will cause the - * rxde interrupt. That is where we will - * try again to allocate an sk_buff. - */ - break; + /* Enable proper MDIO port */ + zmii_enable_mdio(dev->zmii_dev, dev->zmii_input); - } - - if (skb_res) - skb_reserve(fep->rx_skb[i], skb_res); + /* Wait for management interface to become idle */ + n = 10; + while (!(in_be32(&p->stacr) & EMAC_STACR_OC)) { + udelay(1); + if (!--n) + goto to; + } - /* We must NOT dma_map_single the cache line right after the - * buffer, so we must crop our sync size to account for the - * reserved space - */ - fep->rx_desc[i].data_ptr = - (unsigned char *)dma_map_single(&fep->ocpdev->dev, - (void *)fep->rx_skb[i]-> - data, - fep->rx_buffer_size - - skb_res, DMA_FROM_DEVICE); - - /* - * Some 4xx implementations use the previously - * reserved bits in data_len to encode the MS - * 4-bits of a 36-bit physical address (ERPN) - * This must be initialized. - */ - fep->rx_desc[i].data_len = 0; - fep->rx_desc[i].ctrl = MAL_RX_CTRL_EMPTY | MAL_RX_CTRL_INTR | - (i == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); + /* Issue read command */ + out_be32(&p->stacr, + EMAC_STACR_BASE(emac_opb_mhz()) | EMAC_STACR_STAC_READ | + (reg & EMAC_STACR_PRA_MASK) + | ((id & EMAC_STACR_PCDA_MASK) << EMAC_STACR_PCDA_SHIFT)); + + /* Wait for read to complete */ + n = 100; + while (!((r = in_be32(&p->stacr)) & EMAC_STACR_OC)) { + udelay(1); + if (!--n) + goto to; + } - } while ((i = (i + 1) % NUM_RX_BUFF) != end); + if (unlikely(r & EMAC_STACR_PHYE)) { + DBG("%d: mdio_read(%02x, %02x) failed" NL, dev->def->index, + id, reg); + return -EREMOTEIO; + } - fep->rx_slot = i; + r = ((r >> EMAC_STACR_PHYD_SHIFT) & EMAC_STACR_PHYD_MASK); + DBG2("%d: mdio_read -> %04x" NL, dev->def->index, r); + return r; + to: + DBG("%d: MII management interface timeout (read)" NL, dev->def->index); + return -ETIMEDOUT; } -static void -emac_rx_csum(struct net_device *dev, unsigned short ctrl, struct sk_buff *skb) +static void __emac_mdio_write(struct ocp_enet_private *dev, u8 id, u8 reg, + u16 val) { - struct ocp_enet_private *fep = dev->priv; + struct emac_regs *p = dev->emacp; + int n; + + DBG2("%d: mdio_write(%02x,%02x,%04x)" NL, dev->def->index, id, reg, + val); + + /* Enable proper MDIO port */ + zmii_enable_mdio(dev->zmii_dev, dev->zmii_input); - /* Exit if interface has no TAH engine */ - if (!fep->tah_dev) { - skb->ip_summed = CHECKSUM_NONE; - return; + /* Wait for management interface to be idle */ + n = 10; + while (!(in_be32(&p->stacr) & EMAC_STACR_OC)) { + udelay(1); + if (!--n) + goto to; } - /* Check for TCP/UDP/IP csum error */ - if (ctrl & EMAC_CSUM_VER_ERROR) { - /* Let the stack verify checksum errors */ - skb->ip_summed = CHECKSUM_NONE; -/* adapter->hw_csum_err++; */ - } else { - /* Csum is good */ - skb->ip_summed = CHECKSUM_UNNECESSARY; -/* adapter->hw_csum_good++; */ + /* Issue write command */ + out_be32(&p->stacr, + EMAC_STACR_BASE(emac_opb_mhz()) | EMAC_STACR_STAC_WRITE | + (reg & EMAC_STACR_PRA_MASK) | + ((id & EMAC_STACR_PCDA_MASK) << EMAC_STACR_PCDA_SHIFT) | + (val << EMAC_STACR_PHYD_SHIFT)); + + /* Wait for write to complete */ + n = 100; + while (!(in_be32(&p->stacr) & EMAC_STACR_OC)) { + udelay(1); + if (!--n) + goto to; } + return; + to: + DBG("%d: MII management interface timeout (write)" NL, dev->def->index); } -static int emac_rx_clean(struct net_device *dev) +static int emac_mdio_read(struct net_device *ndev, int id, int reg) { - int i, b, bnum = 0, buf[6]; - int error, frame_length; - struct ocp_enet_private *fep = dev->priv; - unsigned short ctrl; - - i = fep->rx_slot; - - PKT_DEBUG(("emac_rx_clean() entry, rx_slot: %d\n", fep->rx_slot)); - - do { - if (fep->rx_skb[i] == NULL) - continue; /*we have already handled the packet but haved failed to alloc */ - /* - since rx_desc is in uncached mem we don't keep reading it directly - we pull out a local copy of ctrl and do the checks on the copy. - */ - ctrl = fep->rx_desc[i].ctrl; - if (ctrl & MAL_RX_CTRL_EMPTY) - break; /*we don't have any more ready packets */ - - if (EMAC_IS_BAD_RX_PACKET(ctrl)) { - fep->stats.rx_errors++; - fep->stats.rx_dropped++; - - if (ctrl & EMAC_RX_ST_OE) - fep->stats.rx_fifo_errors++; - if (ctrl & EMAC_RX_ST_AE) - fep->stats.rx_frame_errors++; - if (ctrl & EMAC_RX_ST_BFCS) - fep->stats.rx_crc_errors++; - if (ctrl & (EMAC_RX_ST_RP | EMAC_RX_ST_PTL | - EMAC_RX_ST_ORE | EMAC_RX_ST_IRE)) - fep->stats.rx_length_errors++; - } else { - if ((ctrl & (MAL_RX_CTRL_FIRST | MAL_RX_CTRL_LAST)) == - (MAL_RX_CTRL_FIRST | MAL_RX_CTRL_LAST)) { - /* Single descriptor packet */ - emac_rx_csum(dev, ctrl, fep->rx_skb[i]); - /* Send the skb up the chain. */ - frame_length = fep->rx_desc[i].data_len - 4; - skb_put(fep->rx_skb[i], frame_length); - fep->rx_skb[i]->dev = dev; - fep->rx_skb[i]->protocol = - eth_type_trans(fep->rx_skb[i], dev); - error = netif_rx(fep->rx_skb[i]); - - if ((error == NET_RX_DROP) || - (error == NET_RX_BAD)) { - fep->stats.rx_dropped++; - } else { - fep->stats.rx_packets++; - fep->stats.rx_bytes += frame_length; - } - fep->rx_skb[i] = NULL; - } else { - /* Multiple descriptor packet */ - if (ctrl & MAL_RX_CTRL_FIRST) { - if (fep->rx_desc[(i + 1) % NUM_RX_BUFF]. - ctrl & MAL_RX_CTRL_EMPTY) - break; - bnum = 0; - buf[bnum] = i; - ++bnum; - continue; - } - if (((ctrl & MAL_RX_CTRL_FIRST) != - MAL_RX_CTRL_FIRST) && - ((ctrl & MAL_RX_CTRL_LAST) != - MAL_RX_CTRL_LAST)) { - if (fep->rx_desc[(i + 1) % - NUM_RX_BUFF].ctrl & - MAL_RX_CTRL_EMPTY) { - i = buf[0]; - break; - } - buf[bnum] = i; - ++bnum; - continue; - } - if (ctrl & MAL_RX_CTRL_LAST) { - buf[bnum] = i; - ++bnum; - skb_put(fep->rx_skb[buf[0]], - fep->rx_desc[buf[0]].data_len); - for (b = 1; b < bnum; b++) { - /* - * MAL is braindead, we need - * to copy the remainder - * of the packet from the - * latter descriptor buffers - * to the first skb. Then - * dispose of the source - * skbs. - * - * Once the stack is fixed - * to handle frags on most - * protocols we can generate - * a fragmented skb with - * no copies. - */ - memcpy(fep->rx_skb[buf[0]]-> - data + - fep->rx_skb[buf[0]]->len, - fep->rx_skb[buf[b]]-> - data, - fep->rx_desc[buf[b]]. - data_len); - skb_put(fep->rx_skb[buf[0]], - fep->rx_desc[buf[b]]. - data_len); - dma_unmap_single(&fep->ocpdev-> - dev, - fep-> - rx_desc[buf - [b]]. - data_ptr, - fep-> - rx_desc[buf - [b]]. - data_len, - DMA_FROM_DEVICE); - dev_kfree_skb(fep-> - rx_skb[buf[b]]); - } - emac_rx_csum(dev, ctrl, - fep->rx_skb[buf[0]]); - - fep->rx_skb[buf[0]]->dev = dev; - fep->rx_skb[buf[0]]->protocol = - eth_type_trans(fep->rx_skb[buf[0]], - dev); - error = netif_rx(fep->rx_skb[buf[0]]); - - if ((error == NET_RX_DROP) - || (error == NET_RX_BAD)) { - fep->stats.rx_dropped++; - } else { - fep->stats.rx_packets++; - fep->stats.rx_bytes += - fep->rx_skb[buf[0]]->len; - } - for (b = 0; b < bnum; b++) - fep->rx_skb[buf[b]] = NULL; - } - } - } - } while ((i = (i + 1) % NUM_RX_BUFF) != fep->rx_slot); - - PKT_DEBUG(("emac_rx_clean() exit, rx_slot: %d\n", fep->rx_slot)); + struct ocp_enet_private *dev = ndev->priv; + int res; - return i; + local_bh_disable(); + res = __emac_mdio_read(dev->mdio_dev ? dev->mdio_dev : dev, (u8) id, + (u8) reg); + local_bh_enable(); + return res; } -static void emac_rxeob_dev(void *param, u32 chanmask) +static void emac_mdio_write(struct net_device *ndev, int id, int reg, int val) { - struct net_device *dev = param; - struct ocp_enet_private *fep = dev->priv; - unsigned long flags; - int n; + struct ocp_enet_private *dev = ndev->priv; - spin_lock_irqsave(&fep->lock, flags); - if ((n = emac_rx_clean(dev)) != fep->rx_slot) - emac_rx_fill(dev, n); - spin_unlock_irqrestore(&fep->lock, flags); + local_bh_disable(); + __emac_mdio_write(dev->mdio_dev ? dev->mdio_dev : dev, (u8) id, + (u8) reg, (u16) val); + local_bh_enable(); } -/* - * This interrupt should never occurr, we don't program - * the MAL for contiunous mode. - */ -static void emac_txde_dev(void *param, u32 chanmask) +/* BHs disabled */ +static void emac_set_multicast_list(struct net_device *ndev) { - struct net_device *dev = param; - struct ocp_enet_private *fep = dev->priv; - - printk(KERN_WARNING "%s: transmit descriptor error\n", dev->name); + struct ocp_enet_private *dev = ndev->priv; + struct emac_regs *p = dev->emacp; + u32 rmr = emac_iff2rmr(ndev); - emac_mac_dump(dev); - emac_mal_dump(dev); + DBG("%d: multicast %08x" NL, dev->def->index, rmr); + BUG_ON(!netif_running(dev->ndev)); - /* Reenable the transmit channel */ - mal_enable_tx_channels(fep->mal, fep->commac.tx_chan_mask); + /* I decided to relax register access rules here to avoid + * full EMAC reset. + * + * There is a real problem with EMAC4 core if we use MWSW_001 bit + * in MR1 register and do a full EMAC reset. + * One TX BD status update is delayed and, after EMAC reset, it + * never happens, resulting in TX hung (it'll be recovered by TX + * timeout handler eventually, but this is just gross). + * So we either have to do full TX reset or try to cheat here :) + * + * The only required change is to RX mode register, so I *think* all + * we need is just to stop RX channel. This seems to work on all + * tested SoCs. --ebs + */ + emac_rx_disable(dev); + if (rmr & EMAC_RMR_MAE) + emac_hash_mc(dev); + out_be32(&p->rmr, rmr); + emac_rx_enable(dev); } -/* - * This interrupt should be very rare at best. This occurs when - * the hardware has a problem with the receive descriptors. The manual - * states that it occurs when the hardware cannot the receive descriptor - * empty bit is not set. The recovery mechanism will be to - * traverse through the descriptors, handle any that are marked to be - * handled and reinitialize each along the way. At that point the driver - * will be restarted. - */ -static void emac_rxde_dev(void *param, u32 chanmask) +/* BHs disabled */ +static int emac_resize_rx_ring(struct ocp_enet_private *dev, int new_mtu) { - struct net_device *dev = param; - struct ocp_enet_private *fep = dev->priv; - unsigned long flags; + struct ocp_func_emac_data *emacdata = dev->def->additions; + int rx_sync_size = emac_rx_sync_size(new_mtu); + int rx_skb_size = emac_rx_skb_size(new_mtu); + int i, ret = 0; - if (net_ratelimit()) { - printk(KERN_WARNING "%s: receive descriptor error\n", - fep->ndev->name); + emac_rx_disable(dev); + mal_disable_rx_channel(dev->mal, emacdata->mal_rx_chan); - emac_mac_dump(dev); - emac_mal_dump(dev); - emac_desc_dump(dev); + if (dev->rx_sg_skb) { + ++dev->estats.rx_dropped_resize; + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; } - /* Disable RX channel */ - spin_lock_irqsave(&fep->lock, flags); - mal_disable_rx_channels(fep->mal, fep->commac.rx_chan_mask); + /* Make a first pass over RX ring and mark BDs ready, dropping + * non-processed packets on the way. We need this as a separate pass + * to simplify error recovery in the case of allocation failure later. + */ + for (i = 0; i < NUM_RX_BUFF; ++i) { + if (dev->rx_desc[i].ctrl & MAL_RX_CTRL_FIRST) + ++dev->estats.rx_dropped_resize; - /* For now, charge the error against all emacs */ - fep->stats.rx_errors++; + dev->rx_desc[i].data_len = 0; + dev->rx_desc[i].ctrl = MAL_RX_CTRL_EMPTY | + (i == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); + } - /* so do we have any good packets still? */ - emac_rx_clean(dev); + /* Reallocate RX ring only if bigger skb buffers are required */ + if (rx_skb_size <= dev->rx_skb_size) + goto skip; - /* When the interface is restarted it resets processing to the - * first descriptor in the table. - */ + /* Second pass, allocate new skbs */ + for (i = 0; i < NUM_RX_BUFF; ++i) { + struct sk_buff *skb = alloc_skb(rx_skb_size, GFP_ATOMIC); + if (!skb) { + ret = -ENOMEM; + goto oom; + } + + BUG_ON(!dev->rx_skb[i]); + dev_kfree_skb(dev->rx_skb[i]); + + skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2); + dev->rx_desc[i].data_ptr = + dma_map_single(dev->ldev, skb->data - 2, rx_sync_size, + DMA_FROM_DEVICE) + 2; + dev->rx_skb[i] = skb; + } + skip: + /* Check if we need to change "Jumbo" bit in MR1 */ + if ((new_mtu > ETH_DATA_LEN) ^ (dev->ndev->mtu > ETH_DATA_LEN)) { + /* This is to prevent starting RX channel in emac_rx_enable() */ + dev->commac.rx_stopped = 1; - fep->rx_slot = 0; - emac_rx_fill(dev, 0); + dev->ndev->mtu = new_mtu; + emac_full_tx_reset(dev->ndev); + } - set_mal_dcrn(fep->mal, DCRN_MALRXEOBISR, fep->commac.rx_chan_mask); - set_mal_dcrn(fep->mal, DCRN_MALRXDEIR, fep->commac.rx_chan_mask); + mal_set_rcbs(dev->mal, emacdata->mal_rx_chan, emac_rx_size(new_mtu)); + oom: + /* Restart RX */ + dev->commac.rx_stopped = dev->rx_slot = 0; + mal_enable_rx_channel(dev->mal, emacdata->mal_rx_chan); + emac_rx_enable(dev); - /* Reenable the receive channels */ - mal_enable_rx_channels(fep->mal, fep->commac.rx_chan_mask); - spin_unlock_irqrestore(&fep->lock, flags); + return ret; } -static irqreturn_t -emac_mac_irq(int irq, void *dev_instance, struct pt_regs *regs) +/* Process ctx, rtnl_lock semaphore */ +static int emac_change_mtu(struct net_device *ndev, int new_mtu) { - struct net_device *dev = dev_instance; - struct ocp_enet_private *fep = dev->priv; - emac_t *emacp = fep->emacp; - unsigned long tmp_em0isr; + struct ocp_enet_private *dev = ndev->priv; + int ret = 0; - /* EMAC interrupt */ - tmp_em0isr = in_be32(&emacp->em0isr); - if (tmp_em0isr & (EMAC_ISR_TE0 | EMAC_ISR_TE1)) { - /* This error is a hard transmit error - could retransmit */ - fep->stats.tx_errors++; + if (new_mtu < EMAC_MIN_MTU || new_mtu > EMAC_MAX_MTU) + return -EINVAL; - /* Reenable the transmit channel */ - mal_enable_tx_channels(fep->mal, fep->commac.tx_chan_mask); + DBG("%d: change_mtu(%d)" NL, dev->def->index, new_mtu); - } else { - fep->stats.rx_errors++; + local_bh_disable(); + if (netif_running(ndev)) { + /* Check if we really need to reinitalize RX ring */ + if (emac_rx_skb_size(ndev->mtu) != emac_rx_skb_size(new_mtu)) + ret = emac_resize_rx_ring(dev, new_mtu); } - if (tmp_em0isr & EMAC_ISR_RP) - fep->stats.rx_length_errors++; - if (tmp_em0isr & EMAC_ISR_ALE) - fep->stats.rx_frame_errors++; - if (tmp_em0isr & EMAC_ISR_BFCS) - fep->stats.rx_crc_errors++; - if (tmp_em0isr & EMAC_ISR_PTLE) - fep->stats.rx_length_errors++; - if (tmp_em0isr & EMAC_ISR_ORE) - fep->stats.rx_length_errors++; - if (tmp_em0isr & EMAC_ISR_TE0) - fep->stats.tx_aborted_errors++; - - emac_err_dump(dev, tmp_em0isr); - - out_be32(&emacp->em0isr, tmp_em0isr); + if (!ret) { + ndev->mtu = new_mtu; + dev->rx_skb_size = emac_rx_skb_size(new_mtu); + dev->rx_sync_size = emac_rx_sync_size(new_mtu); + } + local_bh_enable(); - return IRQ_HANDLED; + return ret; } -static int emac_start_xmit(struct sk_buff *skb, struct net_device *dev) +static void emac_clean_tx_ring(struct ocp_enet_private *dev) { - unsigned short ctrl; - unsigned long flags; - struct ocp_enet_private *fep = dev->priv; - emac_t *emacp = fep->emacp; - int len = skb->len; - unsigned int offset = 0, size, f, tx_slot_first; - unsigned int nr_frags = skb_shinfo(skb)->nr_frags; - - spin_lock_irqsave(&fep->lock, flags); - - len -= skb->data_len; - - if ((fep->tx_cnt + nr_frags + len / DESC_BUF_SIZE + 1) > NUM_TX_BUFF) { - PKT_DEBUG(("emac_start_xmit() stopping queue\n")); - netif_stop_queue(dev); - spin_unlock_irqrestore(&fep->lock, flags); - return -EBUSY; - } - - tx_slot_first = fep->tx_slot; - - while (len) { - size = min(len, DESC_BUF_SIZE); - - fep->tx_desc[fep->tx_slot].data_len = (short)size; - fep->tx_desc[fep->tx_slot].data_ptr = - (unsigned char *)dma_map_single(&fep->ocpdev->dev, - (void *)((unsigned int)skb-> - data + offset), - size, DMA_TO_DEVICE); - - ctrl = EMAC_TX_CTRL_DFLT; - if (fep->tx_slot != tx_slot_first) - ctrl |= MAL_TX_CTRL_READY; - if ((NUM_TX_BUFF - 1) == fep->tx_slot) - ctrl |= MAL_TX_CTRL_WRAP; - if (!nr_frags && (len == size)) { - ctrl |= MAL_TX_CTRL_LAST; - fep->tx_skb[fep->tx_slot] = skb; + int i; + for (i = 0; i < NUM_TX_BUFF; ++i) { + if (dev->tx_skb[i]) { + dev_kfree_skb(dev->tx_skb[i]); + dev->tx_skb[i] = NULL; + if (dev->tx_desc[i].ctrl & MAL_TX_CTRL_READY) + ++dev->estats.tx_dropped; } - if (skb->ip_summed == CHECKSUM_HW) - ctrl |= EMAC_TX_CTRL_TAH_CSUM; - - fep->tx_desc[fep->tx_slot].ctrl = ctrl; - - len -= size; - offset += size; + dev->tx_desc[i].ctrl = 0; + dev->tx_desc[i].data_ptr = 0; + } +} - /* Bump tx count */ - if (++fep->tx_cnt == NUM_TX_BUFF) - netif_stop_queue(dev); +static void emac_clean_rx_ring(struct ocp_enet_private *dev) +{ + int i; + for (i = 0; i < NUM_RX_BUFF; ++i) + if (dev->rx_skb[i]) { + dev->rx_desc[i].ctrl = 0; + dev_kfree_skb(dev->rx_skb[i]); + dev->rx_skb[i] = NULL; + dev->rx_desc[i].data_ptr = 0; + } - /* Next descriptor */ - if (++fep->tx_slot == NUM_TX_BUFF) - fep->tx_slot = 0; + if (dev->rx_sg_skb) { + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; } +} - for (f = 0; f < nr_frags; f++) { - struct skb_frag_struct *frag; +static inline int emac_alloc_rx_skb(struct ocp_enet_private *dev, int slot, + int flags) +{ + struct sk_buff *skb = alloc_skb(dev->rx_skb_size, flags); + if (unlikely(!skb)) + return -ENOMEM; - frag = &skb_shinfo(skb)->frags[f]; - len = frag->size; - offset = 0; + dev->rx_skb[slot] = skb; + dev->rx_desc[slot].data_len = 0; - while (len) { - size = min(len, DESC_BUF_SIZE); + skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2); + dev->rx_desc[slot].data_ptr = + dma_map_single(dev->ldev, skb->data - 2, dev->rx_sync_size, + DMA_FROM_DEVICE) + 2; + barrier(); + dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY | + (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); - dma_map_page(&fep->ocpdev->dev, - frag->page, - frag->page_offset + offset, - size, DMA_TO_DEVICE); - - ctrl = EMAC_TX_CTRL_DFLT | MAL_TX_CTRL_READY; - if ((NUM_TX_BUFF - 1) == fep->tx_slot) - ctrl |= MAL_TX_CTRL_WRAP; - if ((f == (nr_frags - 1)) && (len == size)) { - ctrl |= MAL_TX_CTRL_LAST; - fep->tx_skb[fep->tx_slot] = skb; - } + return 0; +} - if (skb->ip_summed == CHECKSUM_HW) - ctrl |= EMAC_TX_CTRL_TAH_CSUM; +static void emac_print_link_status(struct ocp_enet_private *dev) +{ + if (netif_carrier_ok(dev->ndev)) + printk(KERN_INFO "%s: link is up, %d %s%s\n", + dev->ndev->name, dev->phy.speed, + dev->phy.duplex == DUPLEX_FULL ? "FDX" : "HDX", + dev->phy.pause ? ", pause enabled" : + dev->phy.asym_pause ? ", assymetric pause enabled" : ""); + else + printk(KERN_INFO "%s: link is down\n", dev->ndev->name); +} - fep->tx_desc[fep->tx_slot].data_len = (short)size; - fep->tx_desc[fep->tx_slot].data_ptr = - (char *)((page_to_pfn(frag->page) << PAGE_SHIFT) + - frag->page_offset + offset); - fep->tx_desc[fep->tx_slot].ctrl = ctrl; +/* Process ctx, rtnl_lock semaphore */ +static int emac_open(struct net_device *ndev) +{ + struct ocp_enet_private *dev = ndev->priv; + struct ocp_func_emac_data *emacdata = dev->def->additions; + int err, i; + + DBG("%d: open" NL, dev->def->index); + + /* Setup error IRQ handler */ + err = request_irq(dev->def->irq, emac_irq, 0, "EMAC", dev); + if (err) { + printk(KERN_ERR "%s: failed to request IRQ %d\n", + ndev->name, dev->def->irq); + return err; + } + + /* Allocate RX ring */ + for (i = 0; i < NUM_RX_BUFF; ++i) + if (emac_alloc_rx_skb(dev, i, GFP_KERNEL)) { + printk(KERN_ERR "%s: failed to allocate RX ring\n", + ndev->name); + goto oom; + } + + local_bh_disable(); + dev->tx_cnt = dev->tx_slot = dev->ack_slot = dev->rx_slot = + dev->commac.rx_stopped = 0; + dev->rx_sg_skb = NULL; + + if (dev->phy.address >= 0) { + int link_poll_interval; + if (dev->phy.def->ops->poll_link(&dev->phy)) { + dev->phy.def->ops->read_link(&dev->phy); + EMAC_RX_CLK_DEFAULT(dev->def->index); + netif_carrier_on(dev->ndev); + link_poll_interval = PHY_POLL_LINK_ON; + } else { + EMAC_RX_CLK_TX(dev->def->index); + netif_carrier_off(dev->ndev); + link_poll_interval = PHY_POLL_LINK_OFF; + } + mod_timer(&dev->link_timer, jiffies + link_poll_interval); + emac_print_link_status(dev); + } else + netif_carrier_on(dev->ndev); + + emac_configure(dev); + mal_poll_add(dev->mal, &dev->commac); + mal_enable_tx_channel(dev->mal, emacdata->mal_tx_chan); + mal_set_rcbs(dev->mal, emacdata->mal_rx_chan, emac_rx_size(ndev->mtu)); + mal_enable_rx_channel(dev->mal, emacdata->mal_rx_chan); + emac_tx_enable(dev); + emac_rx_enable(dev); + netif_start_queue(ndev); + local_bh_enable(); - len -= size; - offset += size; + return 0; + oom: + emac_clean_rx_ring(dev); + free_irq(dev->def->irq, dev); + return -ENOMEM; +} - /* Bump tx count */ - if (++fep->tx_cnt == NUM_TX_BUFF) - netif_stop_queue(dev); +/* BHs disabled */ +static int emac_link_differs(struct ocp_enet_private *dev) +{ + u32 r = in_be32(&dev->emacp->mr1); - /* Next descriptor */ - if (++fep->tx_slot == NUM_TX_BUFF) - fep->tx_slot = 0; - } - } + int duplex = r & EMAC_MR1_FDE ? DUPLEX_FULL : DUPLEX_HALF; + int speed, pause, asym_pause; - /* - * Deferred set READY on first descriptor of packet to - * avoid TX MAL race. - */ - fep->tx_desc[tx_slot_first].ctrl |= MAL_TX_CTRL_READY; + if (r & (EMAC_MR1_MF_1000 | EMAC_MR1_MF_1000GPCS)) + speed = SPEED_1000; + else if (r & EMAC_MR1_MF_100) + speed = SPEED_100; + else + speed = SPEED_10; - /* Send the packet out. */ - out_be32(&emacp->em0tmr0, EMAC_TMR0_XMIT); + switch (r & (EMAC_MR1_EIFC | EMAC_MR1_APP)) { + case (EMAC_MR1_EIFC | EMAC_MR1_APP): + pause = 1; + asym_pause = 0; + break; + case EMAC_MR1_APP: + pause = 0; + asym_pause = 1; + break; + default: + pause = asym_pause = 0; + } + return speed != dev->phy.speed || duplex != dev->phy.duplex || + pause != dev->phy.pause || asym_pause != dev->phy.asym_pause; +} - fep->stats.tx_packets++; - fep->stats.tx_bytes += skb->len; +/* BHs disabled */ +static void emac_link_timer(unsigned long data) +{ + struct ocp_enet_private *dev = (struct ocp_enet_private *)data; + int link_poll_interval; - PKT_DEBUG(("emac_start_xmit() exitn")); + DBG2("%d: link timer" NL, dev->def->index); - spin_unlock_irqrestore(&fep->lock, flags); + if (dev->phy.def->ops->poll_link(&dev->phy)) { + if (!netif_carrier_ok(dev->ndev)) { + EMAC_RX_CLK_DEFAULT(dev->def->index); - return 0; -} + /* Get new link parameters */ + dev->phy.def->ops->read_link(&dev->phy); -static int emac_adjust_to_link(struct ocp_enet_private *fep) -{ - emac_t *emacp = fep->emacp; - unsigned long mode_reg; - int full_duplex, speed; + if (dev->tah_dev || emac_link_differs(dev)) + emac_full_tx_reset(dev->ndev); - full_duplex = 0; - speed = SPEED_10; + netif_carrier_on(dev->ndev); + emac_print_link_status(dev); + } + link_poll_interval = PHY_POLL_LINK_ON; + } else { + if (netif_carrier_ok(dev->ndev)) { + EMAC_RX_CLK_TX(dev->def->index); +#if defined(CONFIG_IBM_EMAC_PHY_RX_CLK_FIX) + emac_reinitialize(dev); +#endif + netif_carrier_off(dev->ndev); + emac_print_link_status(dev); + } - /* set mode register 1 defaults */ - mode_reg = EMAC_M1_DEFAULT; + /* Retry reset if the previous attempt failed. + * This is needed mostly for CONFIG_IBM_EMAC_PHY_RX_CLK_FIX + * case, but I left it here because it shouldn't trigger for + * sane PHYs anyway. + */ + if (unlikely(dev->reset_failed)) + emac_reinitialize(dev); - /* Read link mode on PHY */ - if (fep->phy_mii.def->ops->read_link(&fep->phy_mii) == 0) { - /* If an error occurred, we don't deal with it yet */ - full_duplex = (fep->phy_mii.duplex == DUPLEX_FULL); - speed = fep->phy_mii.speed; + link_poll_interval = PHY_POLL_LINK_OFF; } + mod_timer(&dev->link_timer, jiffies + link_poll_interval); +} +/* BHs disabled */ +static void emac_force_link_update(struct ocp_enet_private *dev) +{ + netif_carrier_off(dev->ndev); + if (timer_pending(&dev->link_timer)) + mod_timer(&dev->link_timer, jiffies + PHY_POLL_LINK_OFF); +} - /* set speed (default is 10Mb) */ - switch (speed) { - case SPEED_1000: - mode_reg |= EMAC_M1_RFS_16K; - if (fep->rgmii_dev) { - struct ibm_ocp_rgmii *rgmii = RGMII_PRIV(fep->rgmii_dev); - - if ((rgmii->mode[fep->rgmii_input] == RTBI) - || (rgmii->mode[fep->rgmii_input] == TBI)) - mode_reg |= EMAC_M1_MF_1000GPCS; - else - mode_reg |= EMAC_M1_MF_1000MBPS; +/* Process ctx, rtnl_lock semaphore */ +static int emac_close(struct net_device *ndev) +{ + struct ocp_enet_private *dev = ndev->priv; + struct ocp_func_emac_data *emacdata = dev->def->additions; - emac_rgmii_port_speed(fep->rgmii_dev, fep->rgmii_input, - 1000); - } - break; - case SPEED_100: - mode_reg |= EMAC_M1_MF_100MBPS | EMAC_M1_RFS_4K; - if (fep->rgmii_dev) - emac_rgmii_port_speed(fep->rgmii_dev, fep->rgmii_input, - 100); - if (fep->zmii_dev) - emac_zmii_port_speed(fep->zmii_dev, fep->zmii_input, - 100); - break; - case SPEED_10: - default: - mode_reg = (mode_reg & ~EMAC_M1_MF_100MBPS) | EMAC_M1_RFS_4K; - if (fep->rgmii_dev) - emac_rgmii_port_speed(fep->rgmii_dev, fep->rgmii_input, - 10); - if (fep->zmii_dev) - emac_zmii_port_speed(fep->zmii_dev, fep->zmii_input, - 10); - } + DBG("%d: close" NL, dev->def->index); + + local_bh_disable(); - if (full_duplex) - mode_reg |= EMAC_M1_FDE | EMAC_M1_EIFC | EMAC_M1_IST; - else - mode_reg &= ~(EMAC_M1_FDE | EMAC_M1_EIFC | EMAC_M1_ILE); + if (dev->phy.address >= 0) + del_timer_sync(&dev->link_timer); - LINK_DEBUG(("%s: adjust to link, speed: %d, duplex: %d, opened: %d\n", - fep->ndev->name, speed, full_duplex, fep->opened)); + netif_stop_queue(ndev); + emac_rx_disable(dev); + emac_tx_disable(dev); + mal_disable_rx_channel(dev->mal, emacdata->mal_rx_chan); + mal_disable_tx_channel(dev->mal, emacdata->mal_tx_chan); + mal_poll_del(dev->mal, &dev->commac); + local_bh_enable(); - printk(KERN_INFO "%s: Speed: %d, %s duplex.\n", - fep->ndev->name, speed, full_duplex ? "Full" : "Half"); - if (fep->opened) - out_be32(&emacp->em0mr1, mode_reg); + emac_clean_tx_ring(dev); + emac_clean_rx_ring(dev); + free_irq(dev->def->irq, dev); return 0; } -static int emac_set_mac_address(struct net_device *ndev, void *p) +static inline u16 emac_tx_csum(struct ocp_enet_private *dev, + struct sk_buff *skb) { - struct ocp_enet_private *fep = ndev->priv; - emac_t *emacp = fep->emacp; - struct sockaddr *addr = p; +#if defined(CONFIG_IBM_EMAC_TAH) + if (skb->ip_summed == CHECKSUM_HW) { + ++dev->stats.tx_packets_csum; + return EMAC_TX_CTRL_TAH_CSUM; + } +#endif + return 0; +} - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; +static inline int emac_xmit_finish(struct ocp_enet_private *dev, int len) +{ + struct emac_regs *p = dev->emacp; + struct net_device *ndev = dev->ndev; - memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len); + /* Send the packet out */ + out_be32(&p->tmr0, EMAC_TMR0_XMIT); - /* set the high address */ - out_be32(&emacp->em0iahr, - (fep->ndev->dev_addr[0] << 8) | fep->ndev->dev_addr[1]); + if (unlikely(++dev->tx_cnt == NUM_TX_BUFF)) { + netif_stop_queue(ndev); + DBG2("%d: stopped TX queue" NL, dev->def->index); + } - /* set the low address */ - out_be32(&emacp->em0ialr, - (fep->ndev->dev_addr[2] << 24) | (fep->ndev->dev_addr[3] << 16) - | (fep->ndev->dev_addr[4] << 8) | fep->ndev->dev_addr[5]); + ndev->trans_start = jiffies; + ++dev->stats.tx_packets; + dev->stats.tx_bytes += len; return 0; } -static int emac_change_mtu(struct net_device *dev, int new_mtu) +/* BHs disabled */ +static int emac_start_xmit(struct sk_buff *skb, struct net_device *ndev) { - struct ocp_enet_private *fep = dev->priv; - int old_mtu = dev->mtu; - unsigned long mode_reg; - emac_t *emacp = fep->emacp; - u32 em0mr0; - int i, full; - unsigned long flags; + struct ocp_enet_private *dev = ndev->priv; + unsigned int len = skb->len; + int slot; - if ((new_mtu < EMAC_MIN_MTU) || (new_mtu > EMAC_MAX_MTU)) { - printk(KERN_ERR - "emac: Invalid MTU setting, MTU must be between %d and %d\n", - EMAC_MIN_MTU, EMAC_MAX_MTU); - return -EINVAL; - } - - if (old_mtu != new_mtu && netif_running(dev)) { - /* Stop rx engine */ - em0mr0 = in_be32(&emacp->em0mr0); - out_be32(&emacp->em0mr0, em0mr0 & ~EMAC_M0_RXE); - - /* Wait for descriptors to be empty */ - do { - full = 0; - for (i = 0; i < NUM_RX_BUFF; i++) - if (!(fep->rx_desc[i].ctrl & MAL_RX_CTRL_EMPTY)) { - printk(KERN_NOTICE - "emac: RX ring is still full\n"); - full = 1; - } - } while (full); - - spin_lock_irqsave(&fep->lock, flags); - - mal_disable_rx_channels(fep->mal, fep->commac.rx_chan_mask); - - /* Destroy all old rx skbs */ - for (i = 0; i < NUM_RX_BUFF; i++) { - dma_unmap_single(&fep->ocpdev->dev, - fep->rx_desc[i].data_ptr, - fep->rx_desc[i].data_len, - DMA_FROM_DEVICE); - dev_kfree_skb(fep->rx_skb[i]); - fep->rx_skb[i] = NULL; - } - - /* Set new rx_buffer_size, jumbo cap, and advertise new mtu */ - mode_reg = in_be32(&emacp->em0mr1); - if (new_mtu > ENET_DEF_MTU_SIZE) { - mode_reg |= EMAC_M1_JUMBO_ENABLE; - fep->rx_buffer_size = EMAC_MAX_FRAME; - } else { - mode_reg &= ~EMAC_M1_JUMBO_ENABLE; - fep->rx_buffer_size = ENET_DEF_BUF_SIZE; - } - dev->mtu = new_mtu; - out_be32(&emacp->em0mr1, mode_reg); + u16 ctrl = EMAC_TX_CTRL_GFCS | EMAC_TX_CTRL_GP | MAL_TX_CTRL_READY | + MAL_TX_CTRL_LAST | emac_tx_csum(dev, skb); - /* Re-init rx skbs */ - fep->rx_slot = 0; - emac_rx_fill(dev, 0); + slot = dev->tx_slot++; + if (dev->tx_slot == NUM_TX_BUFF) { + dev->tx_slot = 0; + ctrl |= MAL_TX_CTRL_WRAP; + } - /* Restart the rx engine */ - mal_enable_rx_channels(fep->mal, fep->commac.rx_chan_mask); - out_be32(&emacp->em0mr0, em0mr0 | EMAC_M0_RXE); + DBG2("%d: xmit(%u) %d" NL, dev->def->index, len, slot); - spin_unlock_irqrestore(&fep->lock, flags); - } + dev->tx_skb[slot] = skb; + dev->tx_desc[slot].data_ptr = dma_map_single(dev->ldev, skb->data, len, + DMA_TO_DEVICE); + dev->tx_desc[slot].data_len = (u16) len; + barrier(); + dev->tx_desc[slot].ctrl = ctrl; - return 0; + return emac_xmit_finish(dev, len); } -static void __emac_set_multicast_list(struct net_device *dev) +#if defined(CONFIG_IBM_EMAC_TAH) +static inline int emac_xmit_split(struct ocp_enet_private *dev, int slot, + u32 pd, int len, int last, u16 base_ctrl) { - struct ocp_enet_private *fep = dev->priv; - emac_t *emacp = fep->emacp; - u32 rmr = in_be32(&emacp->em0rmr); - - /* First clear all special bits, they can be set later */ - rmr &= ~(EMAC_RMR_PME | EMAC_RMR_PMME | EMAC_RMR_MAE); - - if (dev->flags & IFF_PROMISC) { - rmr |= EMAC_RMR_PME; - } else if (dev->flags & IFF_ALLMULTI || 32 < dev->mc_count) { - /* - * Must be setting up to use multicast - * Now check for promiscuous multicast - */ - rmr |= EMAC_RMR_PMME; - } else if (dev->flags & IFF_MULTICAST && 0 < dev->mc_count) { - unsigned short em0gaht[4] = { 0, 0, 0, 0 }; - struct dev_mc_list *dmi; + while (1) { + u16 ctrl = base_ctrl; + int chunk = min(len, MAL_MAX_TX_SIZE); + len -= chunk; - /* Need to hash on the multicast address. */ - for (dmi = dev->mc_list; dmi; dmi = dmi->next) { - unsigned long mc_crc; - unsigned int bit_number; + slot = (slot + 1) % NUM_TX_BUFF; - mc_crc = ether_crc(6, (char *)dmi->dmi_addr); - bit_number = 63 - (mc_crc >> 26); /* MSB: 0 LSB: 63 */ - em0gaht[bit_number >> 4] |= - 0x8000 >> (bit_number & 0x0f); - } - emacp->em0gaht1 = em0gaht[0]; - emacp->em0gaht2 = em0gaht[1]; - emacp->em0gaht3 = em0gaht[2]; - emacp->em0gaht4 = em0gaht[3]; + if (last && !len) + ctrl |= MAL_TX_CTRL_LAST; + if (slot == NUM_TX_BUFF - 1) + ctrl |= MAL_TX_CTRL_WRAP; - /* Turn on multicast addressing */ - rmr |= EMAC_RMR_MAE; + dev->tx_skb[slot] = NULL; + dev->tx_desc[slot].data_ptr = pd; + dev->tx_desc[slot].data_len = (u16) chunk; + dev->tx_desc[slot].ctrl = ctrl; + ++dev->tx_cnt; + + if (!len) + break; + + pd += chunk; } - out_be32(&emacp->em0rmr, rmr); + return slot; } -static int emac_init_tah(struct ocp_enet_private *fep) +/* BHs disabled (SG version for TAH equipped EMACs) */ +static int emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev) { - tah_t *tahp; + struct ocp_enet_private *dev = ndev->priv; + int nr_frags = skb_shinfo(skb)->nr_frags; + int len = skb->len, chunk; + int slot, i; + u16 ctrl; + u32 pd; + + /* This is common "fast" path */ + if (likely(!nr_frags && len <= MAL_MAX_TX_SIZE)) + return emac_start_xmit(skb, ndev); - /* Initialize TAH and enable checksum verification */ - tahp = (tah_t *) ioremap(fep->tah_dev->def->paddr, sizeof(*tahp)); + len -= skb->data_len; - if (tahp == NULL) { - printk(KERN_ERR "tah%d: Cannot ioremap TAH registers!\n", - fep->tah_dev->def->index); + /* Note, this is only an *estimation*, we can still run out of empty + * slots because of the additional fragmentation into + * MAL_MAX_TX_SIZE-sized chunks + */ + if (unlikely(dev->tx_cnt + nr_frags + mal_tx_chunks(len) > NUM_TX_BUFF)) + goto stop_queue; - return -ENOMEM; + ctrl = EMAC_TX_CTRL_GFCS | EMAC_TX_CTRL_GP | MAL_TX_CTRL_READY | + emac_tx_csum(dev, skb); + slot = dev->tx_slot; + + /* skb data */ + dev->tx_skb[slot] = NULL; + chunk = min(len, MAL_MAX_TX_SIZE); + dev->tx_desc[slot].data_ptr = pd = + dma_map_single(dev->ldev, skb->data, len, DMA_TO_DEVICE); + dev->tx_desc[slot].data_len = (u16) chunk; + len -= chunk; + if (unlikely(len)) + slot = emac_xmit_split(dev, slot, pd + chunk, len, !nr_frags, + ctrl); + /* skb fragments */ + for (i = 0; i < nr_frags; ++i) { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + len = frag->size; + + if (unlikely(dev->tx_cnt + mal_tx_chunks(len) >= NUM_TX_BUFF)) + goto undo_frame; + + pd = dma_map_page(dev->ldev, frag->page, frag->page_offset, len, + DMA_TO_DEVICE); + + slot = emac_xmit_split(dev, slot, pd, len, i == nr_frags - 1, + ctrl); } - out_be32(&tahp->tah_mr, TAH_MR_SR); + DBG2("%d: xmit_sg(%u) %d - %d" NL, dev->def->index, skb->len, + dev->tx_slot, slot); - /* wait for reset to complete */ - while (in_be32(&tahp->tah_mr) & TAH_MR_SR) ; + /* Attach skb to the last slot so we don't release it too early */ + dev->tx_skb[slot] = skb; - /* 10KB TAH TX FIFO accomodates the max MTU of 9000 */ - out_be32(&tahp->tah_mr, - TAH_MR_CVR | TAH_MR_ST_768 | TAH_MR_TFS_10KB | TAH_MR_DTFP | - TAH_MR_DIG); + /* Send the packet out */ + if (dev->tx_slot == NUM_TX_BUFF - 1) + ctrl |= MAL_TX_CTRL_WRAP; + barrier(); + dev->tx_desc[dev->tx_slot].ctrl = ctrl; + dev->tx_slot = (slot + 1) % NUM_TX_BUFF; - iounmap(tahp); + return emac_xmit_finish(dev, skb->len); - return 0; + undo_frame: + /* Well, too bad. Our previous estimation was overly optimistic. + * Undo everything. + */ + while (slot != dev->tx_slot) { + dev->tx_desc[slot].ctrl = 0; + --dev->tx_cnt; + if (--slot < 0) + slot = NUM_TX_BUFF - 1; + } + ++dev->estats.tx_undo; + + stop_queue: + netif_stop_queue(ndev); + DBG2("%d: stopped TX queue" NL, dev->def->index); + return 1; +} +#else +# define emac_start_xmit_sg emac_start_xmit +#endif /* !defined(CONFIG_IBM_EMAC_TAH) */ + +/* BHs disabled */ +static void emac_parse_tx_error(struct ocp_enet_private *dev, u16 ctrl) +{ + struct ibm_emac_error_stats *st = &dev->estats; + DBG("%d: BD TX error %04x" NL, dev->def->index, ctrl); + + ++st->tx_bd_errors; + if (ctrl & EMAC_TX_ST_BFCS) + ++st->tx_bd_bad_fcs; + if (ctrl & EMAC_TX_ST_LCS) + ++st->tx_bd_carrier_loss; + if (ctrl & EMAC_TX_ST_ED) + ++st->tx_bd_excessive_deferral; + if (ctrl & EMAC_TX_ST_EC) + ++st->tx_bd_excessive_collisions; + if (ctrl & EMAC_TX_ST_LC) + ++st->tx_bd_late_collision; + if (ctrl & EMAC_TX_ST_MC) + ++st->tx_bd_multple_collisions; + if (ctrl & EMAC_TX_ST_SC) + ++st->tx_bd_single_collision; + if (ctrl & EMAC_TX_ST_UR) + ++st->tx_bd_underrun; + if (ctrl & EMAC_TX_ST_SQE) + ++st->tx_bd_sqe; +} + +static void emac_poll_tx(void *param) +{ + struct ocp_enet_private *dev = param; + DBG2("%d: poll_tx, %d %d" NL, dev->def->index, dev->tx_cnt, + dev->ack_slot); + + if (dev->tx_cnt) { + u16 ctrl; + int slot = dev->ack_slot, n = 0; + again: + ctrl = dev->tx_desc[slot].ctrl; + if (!(ctrl & MAL_TX_CTRL_READY)) { + struct sk_buff *skb = dev->tx_skb[slot]; + ++n; + + if (skb) { + dev_kfree_skb(skb); + dev->tx_skb[slot] = NULL; + } + slot = (slot + 1) % NUM_TX_BUFF; + + if (unlikely(EMAC_IS_BAD_TX(ctrl))) + emac_parse_tx_error(dev, ctrl); + + if (--dev->tx_cnt) + goto again; + } + if (n) { + dev->ack_slot = slot; + if (netif_queue_stopped(dev->ndev) && + dev->tx_cnt < EMAC_TX_WAKEUP_THRESH) + netif_wake_queue(dev->ndev); + + DBG2("%d: tx %d pkts" NL, dev->def->index, n); + } + } } -static void emac_init_rings(struct net_device *dev) +static inline void emac_recycle_rx_skb(struct ocp_enet_private *dev, int slot, + int len) { - struct ocp_enet_private *ep = dev->priv; - int loop; + struct sk_buff *skb = dev->rx_skb[slot]; + DBG2("%d: recycle %d %d" NL, dev->def->index, slot, len); - ep->tx_desc = (struct mal_descriptor *)((char *)ep->mal->tx_virt_addr + - (ep->mal_tx_chan * - MAL_DT_ALIGN)); - ep->rx_desc = - (struct mal_descriptor *)((char *)ep->mal->rx_virt_addr + - (ep->mal_rx_chan * MAL_DT_ALIGN)); - - /* Fill in the transmit descriptor ring. */ - for (loop = 0; loop < NUM_TX_BUFF; loop++) { - if (ep->tx_skb[loop]) { - dma_unmap_single(&ep->ocpdev->dev, - ep->tx_desc[loop].data_ptr, - ep->tx_desc[loop].data_len, - DMA_TO_DEVICE); - dev_kfree_skb_irq(ep->tx_skb[loop]); - } - ep->tx_skb[loop] = NULL; - ep->tx_desc[loop].ctrl = 0; - ep->tx_desc[loop].data_len = 0; - ep->tx_desc[loop].data_ptr = NULL; - } - ep->tx_desc[loop - 1].ctrl |= MAL_TX_CTRL_WRAP; - - /* Format the receive descriptor ring. */ - ep->rx_slot = 0; - /* Default is MTU=1500 + Ethernet overhead */ - ep->rx_buffer_size = dev->mtu + ENET_HEADER_SIZE + ENET_FCS_SIZE; - emac_rx_fill(dev, 0); - if (ep->rx_slot != 0) { - printk(KERN_ERR - "%s: Not enough mem for RxChain durning Open?\n", - dev->name); - /*We couldn't fill the ring at startup? - *We could clean up and fail to open but right now we will try to - *carry on. It may be a sign of a bad NUM_RX_BUFF value - */ - } + if (len) + dma_map_single(dev->ldev, skb->data - 2, + EMAC_DMA_ALIGN(len + 2), DMA_FROM_DEVICE); - ep->tx_cnt = 0; - ep->tx_slot = 0; - ep->ack_slot = 0; + dev->rx_desc[slot].data_len = 0; + barrier(); + dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY | + (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); } -static void emac_reset_configure(struct ocp_enet_private *fep) +static void emac_parse_rx_error(struct ocp_enet_private *dev, u16 ctrl) { - emac_t *emacp = fep->emacp; - int i; + struct ibm_emac_error_stats *st = &dev->estats; + DBG("%d: BD RX error %04x" NL, dev->def->index, ctrl); - mal_disable_tx_channels(fep->mal, fep->commac.tx_chan_mask); - mal_disable_rx_channels(fep->mal, fep->commac.rx_chan_mask); + ++st->rx_bd_errors; + if (ctrl & EMAC_RX_ST_OE) + ++st->rx_bd_overrun; + if (ctrl & EMAC_RX_ST_BP) + ++st->rx_bd_bad_packet; + if (ctrl & EMAC_RX_ST_RP) + ++st->rx_bd_runt_packet; + if (ctrl & EMAC_RX_ST_SE) + ++st->rx_bd_short_event; + if (ctrl & EMAC_RX_ST_AE) + ++st->rx_bd_alignment_error; + if (ctrl & EMAC_RX_ST_BFCS) + ++st->rx_bd_bad_fcs; + if (ctrl & EMAC_RX_ST_PTL) + ++st->rx_bd_packet_too_long; + if (ctrl & EMAC_RX_ST_ORE) + ++st->rx_bd_out_of_range; + if (ctrl & EMAC_RX_ST_IRE) + ++st->rx_bd_in_range; +} - /* - * Check for a link, some PHYs don't provide a clock if - * no link is present. Some EMACs will not come out of - * soft reset without a PHY clock present. - */ - if (fep->phy_mii.def->ops->poll_link(&fep->phy_mii)) { - /* Reset the EMAC */ - out_be32(&emacp->em0mr0, EMAC_M0_SRST); - udelay(20); - for (i = 0; i < 100; i++) { - if ((in_be32(&emacp->em0mr0) & EMAC_M0_SRST) == 0) - break; - udelay(10); - } +static inline void emac_rx_csum(struct ocp_enet_private *dev, + struct sk_buff *skb, u16 ctrl) +{ +#if defined(CONFIG_IBM_EMAC_TAH) + if (!ctrl && dev->tah_dev) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + ++dev->stats.rx_packets_csum; + } +#endif +} - if (i >= 100) { - printk(KERN_ERR "%s: Cannot reset EMAC\n", - fep->ndev->name); - return; +static inline int emac_rx_sg_append(struct ocp_enet_private *dev, int slot) +{ + if (likely(dev->rx_sg_skb != NULL)) { + int len = dev->rx_desc[slot].data_len; + int tot_len = dev->rx_sg_skb->len + len; + + if (unlikely(tot_len + 2 > dev->rx_skb_size)) { + ++dev->estats.rx_dropped_mtu; + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; + } else { + cacheable_memcpy(dev->rx_sg_skb->tail, + dev->rx_skb[slot]->data, len); + skb_put(dev->rx_sg_skb, len); + emac_recycle_rx_skb(dev, slot, len); + return 0; } } + emac_recycle_rx_skb(dev, slot, 0); + return -1; +} + +/* BHs disabled */ +static int emac_poll_rx(void *param, int budget) +{ + struct ocp_enet_private *dev = param; + int slot = dev->rx_slot, received = 0; - /* Switch IRQs off for now */ - out_be32(&emacp->em0iser, 0); + DBG2("%d: poll_rx(%d)" NL, dev->def->index, budget); - /* Configure MAL rx channel */ - mal_set_rcbs(fep->mal, fep->mal_rx_chan, DESC_BUF_SIZE_REG); + again: + while (budget > 0) { + int len; + struct sk_buff *skb; + u16 ctrl = dev->rx_desc[slot].ctrl; - /* set the high address */ - out_be32(&emacp->em0iahr, - (fep->ndev->dev_addr[0] << 8) | fep->ndev->dev_addr[1]); + if (ctrl & MAL_RX_CTRL_EMPTY) + break; - /* set the low address */ - out_be32(&emacp->em0ialr, - (fep->ndev->dev_addr[2] << 24) | (fep->ndev->dev_addr[3] << 16) - | (fep->ndev->dev_addr[4] << 8) | fep->ndev->dev_addr[5]); + skb = dev->rx_skb[slot]; + barrier(); + len = dev->rx_desc[slot].data_len; + + if (unlikely(!MAL_IS_SINGLE_RX(ctrl))) + goto sg; + + ctrl &= EMAC_BAD_RX_MASK; + if (unlikely(ctrl && ctrl != EMAC_RX_TAH_BAD_CSUM)) { + emac_parse_rx_error(dev, ctrl); + ++dev->estats.rx_dropped_error; + emac_recycle_rx_skb(dev, slot, 0); + len = 0; + goto next; + } + + if (len && len < EMAC_RX_COPY_THRESH) { + struct sk_buff *copy_skb = + alloc_skb(len + EMAC_RX_SKB_HEADROOM + 2, GFP_ATOMIC); + if (unlikely(!copy_skb)) + goto oom; + + skb_reserve(copy_skb, EMAC_RX_SKB_HEADROOM + 2); + cacheable_memcpy(copy_skb->data - 2, skb->data - 2, + len + 2); + emac_recycle_rx_skb(dev, slot, len); + skb = copy_skb; + } else if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC))) + goto oom; + + skb_put(skb, len); + push_packet: + skb->dev = dev->ndev; + skb->protocol = eth_type_trans(skb, dev->ndev); + emac_rx_csum(dev, skb, ctrl); + + if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) + ++dev->estats.rx_dropped_stack; + next: + ++dev->stats.rx_packets; + skip: + dev->stats.rx_bytes += len; + slot = (slot + 1) % NUM_RX_BUFF; + --budget; + ++received; + continue; + sg: + if (ctrl & MAL_RX_CTRL_FIRST) { + BUG_ON(dev->rx_sg_skb); + if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC))) { + DBG("%d: rx OOM %d" NL, dev->def->index, slot); + ++dev->estats.rx_dropped_oom; + emac_recycle_rx_skb(dev, slot, 0); + } else { + dev->rx_sg_skb = skb; + skb_put(skb, len); + } + } else if (!emac_rx_sg_append(dev, slot) && + (ctrl & MAL_RX_CTRL_LAST)) { - /* Adjust to link */ - if (netif_carrier_ok(fep->ndev)) - emac_adjust_to_link(fep); + skb = dev->rx_sg_skb; + dev->rx_sg_skb = NULL; - /* enable broadcast/individual address and RX FIFO defaults */ - out_be32(&emacp->em0rmr, EMAC_RMR_DEFAULT); + ctrl &= EMAC_BAD_RX_MASK; + if (unlikely(ctrl && ctrl != EMAC_RX_TAH_BAD_CSUM)) { + emac_parse_rx_error(dev, ctrl); + ++dev->estats.rx_dropped_error; + dev_kfree_skb(skb); + len = 0; + } else + goto push_packet; + } + goto skip; + oom: + DBG("%d: rx OOM %d" NL, dev->def->index, slot); + /* Drop the packet and recycle skb */ + ++dev->estats.rx_dropped_oom; + emac_recycle_rx_skb(dev, slot, 0); + goto next; + } - /* set transmit request threshold register */ - out_be32(&emacp->em0trtr, EMAC_TRTR_DEFAULT); + if (received) { + DBG2("%d: rx %d BDs" NL, dev->def->index, received); + dev->rx_slot = slot; + } - /* Reconfigure multicast */ - __emac_set_multicast_list(fep->ndev); + if (unlikely(budget && dev->commac.rx_stopped)) { + struct ocp_func_emac_data *emacdata = dev->def->additions; - /* Set receiver/transmitter defaults */ - out_be32(&emacp->em0rwmr, EMAC_RWMR_DEFAULT); - out_be32(&emacp->em0tmr0, EMAC_TMR0_DEFAULT); - out_be32(&emacp->em0tmr1, EMAC_TMR1_DEFAULT); + barrier(); + if (!(dev->rx_desc[slot].ctrl & MAL_RX_CTRL_EMPTY)) { + DBG2("%d: rx restart" NL, dev->def->index); + received = 0; + goto again; + } - /* set frame gap */ - out_be32(&emacp->em0ipgvr, CONFIG_IBM_EMAC_FGAP); - - /* set VLAN Tag Protocol Identifier */ - out_be32(&emacp->em0vtpid, 0x8100); + if (dev->rx_sg_skb) { + DBG2("%d: dropping partial rx packet" NL, + dev->def->index); + ++dev->estats.rx_dropped_error; + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; + } - /* Init ring buffers */ - emac_init_rings(fep->ndev); + dev->commac.rx_stopped = 0; + mal_enable_rx_channel(dev->mal, emacdata->mal_rx_chan); + emac_rx_enable(dev); + dev->rx_slot = 0; + } + return received; } -static void emac_kick(struct ocp_enet_private *fep) +/* BHs disabled */ +static int emac_peek_rx(void *param) { - emac_t *emacp = fep->emacp; - unsigned long emac_ier; - - emac_ier = EMAC_ISR_PP | EMAC_ISR_BP | EMAC_ISR_RP | - EMAC_ISR_SE | EMAC_ISR_PTLE | EMAC_ISR_ALE | - EMAC_ISR_BFCS | EMAC_ISR_ORE | EMAC_ISR_IRE; - - out_be32(&emacp->em0iser, emac_ier); - - /* enable all MAL transmit and receive channels */ - mal_enable_tx_channels(fep->mal, fep->commac.tx_chan_mask); - mal_enable_rx_channels(fep->mal, fep->commac.rx_chan_mask); - - /* set transmit and receive enable */ - out_be32(&emacp->em0mr0, EMAC_M0_TXE | EMAC_M0_RXE); + struct ocp_enet_private *dev = param; + return !(dev->rx_desc[dev->rx_slot].ctrl & MAL_RX_CTRL_EMPTY); } -static void -emac_start_link(struct ocp_enet_private *fep, struct ethtool_cmd *ep) +/* BHs disabled */ +static int emac_peek_rx_sg(void *param) { - u32 advertise; - int autoneg; - int forced_speed; - int forced_duplex; + struct ocp_enet_private *dev = param; + int slot = dev->rx_slot; + while (1) { + u16 ctrl = dev->rx_desc[slot].ctrl; + if (ctrl & MAL_RX_CTRL_EMPTY) + return 0; + else if (ctrl & MAL_RX_CTRL_LAST) + return 1; + + slot = (slot + 1) % NUM_RX_BUFF; + + /* I'm just being paranoid here :) */ + if (unlikely(slot == dev->rx_slot)) + return 0; + } +} - /* Default advertise */ - advertise = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | - ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full | - ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full; - autoneg = fep->want_autoneg; - forced_speed = fep->phy_mii.speed; - forced_duplex = fep->phy_mii.duplex; +/* Hard IRQ */ +static void emac_rxde(void *param) +{ + struct ocp_enet_private *dev = param; + ++dev->estats.rx_stopped; + emac_rx_disable_async(dev); +} + +/* Hard IRQ */ +static irqreturn_t emac_irq(int irq, void *dev_instance, struct pt_regs *regs) +{ + struct ocp_enet_private *dev = dev_instance; + struct emac_regs *p = dev->emacp; + struct ibm_emac_error_stats *st = &dev->estats; + + u32 isr = in_be32(&p->isr); + out_be32(&p->isr, isr); + + DBG("%d: isr = %08x" NL, dev->def->index, isr); + + if (isr & EMAC_ISR_TXPE) + ++st->tx_parity; + if (isr & EMAC_ISR_RXPE) + ++st->rx_parity; + if (isr & EMAC_ISR_TXUE) + ++st->tx_underrun; + if (isr & EMAC_ISR_RXOE) + ++st->rx_fifo_overrun; + if (isr & EMAC_ISR_OVR) + ++st->rx_overrun; + if (isr & EMAC_ISR_BP) + ++st->rx_bad_packet; + if (isr & EMAC_ISR_RP) + ++st->rx_runt_packet; + if (isr & EMAC_ISR_SE) + ++st->rx_short_event; + if (isr & EMAC_ISR_ALE) + ++st->rx_alignment_error; + if (isr & EMAC_ISR_BFCS) + ++st->rx_bad_fcs; + if (isr & EMAC_ISR_PTLE) + ++st->rx_packet_too_long; + if (isr & EMAC_ISR_ORE) + ++st->rx_out_of_range; + if (isr & EMAC_ISR_IRE) + ++st->rx_in_range; + if (isr & EMAC_ISR_SQE) + ++st->tx_sqe; + if (isr & EMAC_ISR_TE) + ++st->tx_errors; - /* Setup link parameters */ - if (ep) { - if (ep->autoneg == AUTONEG_ENABLE) { - advertise = ep->advertising; - autoneg = 1; - } else { - autoneg = 0; - forced_speed = ep->speed; - forced_duplex = ep->duplex; - } - } + return IRQ_HANDLED; +} - /* Configure PHY & start aneg */ - fep->want_autoneg = autoneg; - if (autoneg) { - LINK_DEBUG(("%s: start link aneg, advertise: 0x%x\n", - fep->ndev->name, advertise)); - fep->phy_mii.def->ops->setup_aneg(&fep->phy_mii, advertise); - } else { - LINK_DEBUG(("%s: start link forced, speed: %d, duplex: %d\n", - fep->ndev->name, forced_speed, forced_duplex)); - fep->phy_mii.def->ops->setup_forced(&fep->phy_mii, forced_speed, - forced_duplex); - } - fep->timer_ticks = 0; - mod_timer(&fep->link_timer, jiffies + HZ); +static struct net_device_stats *emac_stats(struct net_device *ndev) +{ + struct ocp_enet_private *dev = ndev->priv; + struct ibm_emac_stats *st = &dev->stats; + struct ibm_emac_error_stats *est = &dev->estats; + struct net_device_stats *nst = &dev->nstats; + + DBG2("%d: stats" NL, dev->def->index); + + /* Compute "legacy" statistics */ + local_irq_disable(); + nst->rx_packets = (unsigned long)st->rx_packets; + nst->rx_bytes = (unsigned long)st->rx_bytes; + nst->tx_packets = (unsigned long)st->tx_packets; + nst->tx_bytes = (unsigned long)st->tx_bytes; + nst->rx_dropped = (unsigned long)(est->rx_dropped_oom + + est->rx_dropped_error + + est->rx_dropped_resize + + est->rx_dropped_mtu); + nst->tx_dropped = (unsigned long)est->tx_dropped; + + nst->rx_errors = (unsigned long)est->rx_bd_errors; + nst->rx_fifo_errors = (unsigned long)(est->rx_bd_overrun + + est->rx_fifo_overrun + + est->rx_overrun); + nst->rx_frame_errors = (unsigned long)(est->rx_bd_alignment_error + + est->rx_alignment_error); + nst->rx_crc_errors = (unsigned long)(est->rx_bd_bad_fcs + + est->rx_bad_fcs); + nst->rx_length_errors = (unsigned long)(est->rx_bd_runt_packet + + est->rx_bd_short_event + + est->rx_bd_packet_too_long + + est->rx_bd_out_of_range + + est->rx_bd_in_range + + est->rx_runt_packet + + est->rx_short_event + + est->rx_packet_too_long + + est->rx_out_of_range + + est->rx_in_range); + + nst->tx_errors = (unsigned long)(est->tx_bd_errors + est->tx_errors); + nst->tx_fifo_errors = (unsigned long)(est->tx_bd_underrun + + est->tx_underrun); + nst->tx_carrier_errors = (unsigned long)est->tx_bd_carrier_loss; + nst->collisions = (unsigned long)(est->tx_bd_excessive_deferral + + est->tx_bd_excessive_collisions + + est->tx_bd_late_collision + + est->tx_bd_multple_collisions); + local_irq_enable(); + return nst; } -static void emac_link_timer(unsigned long data) +static void emac_remove(struct ocp_device *ocpdev) { - struct ocp_enet_private *fep = (struct ocp_enet_private *)data; - int link; + struct ocp_enet_private *dev = ocp_get_drvdata(ocpdev); - if (fep->going_away) - return; + DBG("%d: remove" NL, dev->def->index); - spin_lock_irq(&fep->lock); + ocp_set_drvdata(ocpdev, 0); + unregister_netdev(dev->ndev); - link = fep->phy_mii.def->ops->poll_link(&fep->phy_mii); - LINK_DEBUG(("%s: poll_link: %d\n", fep->ndev->name, link)); + tah_fini(dev->tah_dev); + rgmii_fini(dev->rgmii_dev, dev->rgmii_input); + zmii_fini(dev->zmii_dev, dev->zmii_input); - if (link == netif_carrier_ok(fep->ndev)) { - if (!link && fep->want_autoneg && (++fep->timer_ticks) > 10) - emac_start_link(fep, NULL); - goto out; - } - printk(KERN_INFO "%s: Link is %s\n", fep->ndev->name, - link ? "Up" : "Down"); - if (link) { - netif_carrier_on(fep->ndev); - /* Chip needs a full reset on config change. That sucks, so I - * should ultimately move that to some tasklet to limit - * latency peaks caused by this code - */ - emac_reset_configure(fep); - if (fep->opened) - emac_kick(fep); - } else { - fep->timer_ticks = 0; - netif_carrier_off(fep->ndev); - } - out: - mod_timer(&fep->link_timer, jiffies + HZ); - spin_unlock_irq(&fep->lock); + emac_dbg_register(dev->def->index, 0); + + mal_unregister_commac(dev->mal, &dev->commac); + iounmap((void *)dev->emacp); + kfree(dev->ndev); } -static void emac_set_multicast_list(struct net_device *dev) -{ - struct ocp_enet_private *fep = dev->priv; +static struct mal_commac_ops emac_commac_ops = { + .poll_tx = &emac_poll_tx, + .poll_rx = &emac_poll_rx, + .peek_rx = &emac_peek_rx, + .rxde = &emac_rxde, +}; - spin_lock_irq(&fep->lock); - __emac_set_multicast_list(dev); - spin_unlock_irq(&fep->lock); -} +static struct mal_commac_ops emac_commac_sg_ops = { + .poll_tx = &emac_poll_tx, + .poll_rx = &emac_poll_rx, + .peek_rx = &emac_peek_rx_sg, + .rxde = &emac_rxde, +}; -static int emac_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd) +/* Ethtool support */ +static int emac_ethtool_get_settings(struct net_device *ndev, + struct ethtool_cmd *cmd) { - struct ocp_enet_private *fep = ndev->priv; + struct ocp_enet_private *dev = ndev->priv; - cmd->supported = fep->phy_mii.def->features; + cmd->supported = dev->phy.features; cmd->port = PORT_MII; - cmd->transceiver = XCVR_EXTERNAL; - cmd->phy_address = fep->mii_phy_addr; - spin_lock_irq(&fep->lock); - cmd->autoneg = fep->want_autoneg; - cmd->speed = fep->phy_mii.speed; - cmd->duplex = fep->phy_mii.duplex; - spin_unlock_irq(&fep->lock); + cmd->phy_address = dev->phy.address; + cmd->transceiver = + dev->phy.address >= 0 ? XCVR_EXTERNAL : XCVR_INTERNAL; + + local_bh_disable(); + cmd->advertising = dev->phy.advertising; + cmd->autoneg = dev->phy.autoneg; + cmd->speed = dev->phy.speed; + cmd->duplex = dev->phy.duplex; + local_bh_enable(); + return 0; } -static int emac_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) +static int emac_ethtool_set_settings(struct net_device *ndev, + struct ethtool_cmd *cmd) { - struct ocp_enet_private *fep = ndev->priv; - unsigned long features = fep->phy_mii.def->features; + struct ocp_enet_private *dev = ndev->priv; + u32 f = dev->phy.features; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; + DBG("%d: set_settings(%d, %d, %d, 0x%08x)" NL, dev->def->index, + cmd->autoneg, cmd->speed, cmd->duplex, cmd->advertising); + /* Basic sanity checks */ + if (dev->phy.address < 0) + return -EOPNOTSUPP; if (cmd->autoneg != AUTONEG_ENABLE && cmd->autoneg != AUTONEG_DISABLE) return -EINVAL; if (cmd->autoneg == AUTONEG_ENABLE && cmd->advertising == 0) return -EINVAL; if (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) return -EINVAL; - if (cmd->autoneg == AUTONEG_DISABLE) + + if (cmd->autoneg == AUTONEG_DISABLE) { switch (cmd->speed) { case SPEED_10: - if (cmd->duplex == DUPLEX_HALF && - (features & SUPPORTED_10baseT_Half) == 0) + if (cmd->duplex == DUPLEX_HALF + && !(f & SUPPORTED_10baseT_Half)) return -EINVAL; - if (cmd->duplex == DUPLEX_FULL && - (features & SUPPORTED_10baseT_Full) == 0) + if (cmd->duplex == DUPLEX_FULL + && !(f & SUPPORTED_10baseT_Full)) return -EINVAL; break; case SPEED_100: - if (cmd->duplex == DUPLEX_HALF && - (features & SUPPORTED_100baseT_Half) == 0) + if (cmd->duplex == DUPLEX_HALF + && !(f & SUPPORTED_100baseT_Half)) return -EINVAL; - if (cmd->duplex == DUPLEX_FULL && - (features & SUPPORTED_100baseT_Full) == 0) + if (cmd->duplex == DUPLEX_FULL + && !(f & SUPPORTED_100baseT_Full)) return -EINVAL; break; case SPEED_1000: - if (cmd->duplex == DUPLEX_HALF && - (features & SUPPORTED_1000baseT_Half) == 0) + if (cmd->duplex == DUPLEX_HALF + && !(f & SUPPORTED_1000baseT_Half)) return -EINVAL; - if (cmd->duplex == DUPLEX_FULL && - (features & SUPPORTED_1000baseT_Full) == 0) + if (cmd->duplex == DUPLEX_FULL + && !(f & SUPPORTED_1000baseT_Full)) return -EINVAL; break; default: return -EINVAL; - } else if ((features & SUPPORTED_Autoneg) == 0) - return -EINVAL; - spin_lock_irq(&fep->lock); - emac_start_link(fep, cmd); - spin_unlock_irq(&fep->lock); + } + + local_bh_disable(); + dev->phy.def->ops->setup_forced(&dev->phy, cmd->speed, + cmd->duplex); + + } else { + if (!(f & SUPPORTED_Autoneg)) + return -EINVAL; + + local_bh_disable(); + dev->phy.def->ops->setup_aneg(&dev->phy, + (cmd->advertising & f) | + (dev->phy.advertising & + (ADVERTISED_Pause | + ADVERTISED_Asym_Pause))); + } + emac_force_link_update(dev); + local_bh_enable(); + return 0; } -static void -emac_get_drvinfo(struct net_device *ndev, struct ethtool_drvinfo *info) +static void emac_ethtool_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *rp) { - struct ocp_enet_private *fep = ndev->priv; + rp->rx_max_pending = rp->rx_pending = NUM_RX_BUFF; + rp->tx_max_pending = rp->tx_pending = NUM_TX_BUFF; +} - strcpy(info->driver, DRV_NAME); - strcpy(info->version, DRV_VERSION); - info->fw_version[0] = '\0'; - sprintf(info->bus_info, "IBM EMAC %d", fep->ocpdev->def->index); - info->regdump_len = 0; +static void emac_ethtool_get_pauseparam(struct net_device *ndev, + struct ethtool_pauseparam *pp) +{ + struct ocp_enet_private *dev = ndev->priv; + + local_bh_disable(); + if ((dev->phy.features & SUPPORTED_Autoneg) && + (dev->phy.advertising & (ADVERTISED_Pause | ADVERTISED_Asym_Pause))) + pp->autoneg = 1; + + if (dev->phy.duplex == DUPLEX_FULL) { + if (dev->phy.pause) + pp->rx_pause = pp->tx_pause = 1; + else if (dev->phy.asym_pause) + pp->tx_pause = 1; + } + local_bh_enable(); } -static int emac_nway_reset(struct net_device *ndev) +static u32 emac_ethtool_get_rx_csum(struct net_device *ndev) { - struct ocp_enet_private *fep = ndev->priv; + struct ocp_enet_private *dev = ndev->priv; + return dev->tah_dev != 0; +} - if (!fep->want_autoneg) - return -EINVAL; - spin_lock_irq(&fep->lock); - emac_start_link(fep, NULL); - spin_unlock_irq(&fep->lock); - return 0; +static int emac_get_regs_len(struct ocp_enet_private *dev) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + EMAC_ETHTOOL_REGS_SIZE; } -static u32 emac_get_link(struct net_device *ndev) +static int emac_ethtool_get_regs_len(struct net_device *ndev) { - return netif_carrier_ok(ndev); + struct ocp_enet_private *dev = ndev->priv; + return sizeof(struct emac_ethtool_regs_hdr) + + emac_get_regs_len(dev) + mal_get_regs_len(dev->mal) + + zmii_get_regs_len(dev->zmii_dev) + + rgmii_get_regs_len(dev->rgmii_dev) + + tah_get_regs_len(dev->tah_dev); } -static struct ethtool_ops emac_ethtool_ops = { - .get_settings = emac_get_settings, - .set_settings = emac_set_settings, - .get_drvinfo = emac_get_drvinfo, - .nway_reset = emac_nway_reset, - .get_link = emac_get_link -}; +static void *emac_dump_regs(struct ocp_enet_private *dev, void *buf) +{ + struct emac_ethtool_regs_subhdr *hdr = buf; -static int emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) + hdr->version = EMAC_ETHTOOL_REGS_VER; + hdr->index = dev->def->index; + memcpy_fromio(hdr + 1, dev->emacp, EMAC_ETHTOOL_REGS_SIZE); + return ((void *)(hdr + 1) + EMAC_ETHTOOL_REGS_SIZE); +} + +static void emac_ethtool_get_regs(struct net_device *ndev, + struct ethtool_regs *regs, void *buf) { - struct ocp_enet_private *fep = dev->priv; - uint16_t *data = (uint16_t *) & rq->ifr_ifru; + struct ocp_enet_private *dev = ndev->priv; + struct emac_ethtool_regs_hdr *hdr = buf; - switch (cmd) { - case SIOCGMIIPHY: - data[0] = fep->mii_phy_addr; - /* Fall through */ - case SIOCGMIIREG: - data[3] = emac_phy_read(dev, fep->mii_phy_addr, data[1]); - return 0; - case SIOCSMIIREG: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; + hdr->components = 0; + buf = hdr + 1; - emac_phy_write(dev, fep->mii_phy_addr, data[1], data[2]); - return 0; - default: - return -EOPNOTSUPP; + local_irq_disable(); + buf = mal_dump_regs(dev->mal, buf); + buf = emac_dump_regs(dev, buf); + if (dev->zmii_dev) { + hdr->components |= EMAC_ETHTOOL_REGS_ZMII; + buf = zmii_dump_regs(dev->zmii_dev, buf); + } + if (dev->rgmii_dev) { + hdr->components |= EMAC_ETHTOOL_REGS_RGMII; + buf = rgmii_dump_regs(dev->rgmii_dev, buf); + } + if (dev->tah_dev) { + hdr->components |= EMAC_ETHTOOL_REGS_TAH; + buf = tah_dump_regs(dev->tah_dev, buf); } + local_irq_enable(); } -static int emac_open(struct net_device *dev) +static int emac_ethtool_nway_reset(struct net_device *ndev) { - struct ocp_enet_private *fep = dev->priv; - int rc; + struct ocp_enet_private *dev = ndev->priv; + int res = 0; - spin_lock_irq(&fep->lock); + DBG("%d: nway_reset" NL, dev->def->index); - fep->opened = 1; - netif_carrier_off(dev); + if (dev->phy.address < 0) + return -EOPNOTSUPP; - /* Reset & configure the chip */ - emac_reset_configure(fep); + local_bh_disable(); + if (!dev->phy.autoneg) { + res = -EINVAL; + goto out; + } - spin_unlock_irq(&fep->lock); + dev->phy.def->ops->setup_aneg(&dev->phy, dev->phy.advertising); + emac_force_link_update(dev); - /* Request our interrupt lines */ - rc = request_irq(dev->irq, emac_mac_irq, 0, "IBM EMAC MAC", dev); - if (rc != 0) { - printk("dev->irq %d failed\n", dev->irq); - goto bail; - } - /* Kick the chip rx & tx channels into life */ - spin_lock_irq(&fep->lock); - emac_kick(fep); - spin_unlock_irq(&fep->lock); + out: + local_bh_enable(); + return res; +} + +static int emac_ethtool_get_stats_count(struct net_device *ndev) +{ + return EMAC_ETHTOOL_STATS_COUNT; +} - netif_start_queue(dev); - bail: - return rc; +static void emac_ethtool_get_strings(struct net_device *ndev, u32 stringset, + u8 * buf) +{ + if (stringset == ETH_SS_STATS) + memcpy(buf, &emac_stats_keys, sizeof(emac_stats_keys)); } -static int emac_close(struct net_device *dev) +static void emac_ethtool_get_ethtool_stats(struct net_device *ndev, + struct ethtool_stats *estats, + u64 * tmp_stats) { - struct ocp_enet_private *fep = dev->priv; - emac_t *emacp = fep->emacp; + struct ocp_enet_private *dev = ndev->priv; + local_irq_disable(); + memcpy(tmp_stats, &dev->stats, sizeof(dev->stats)); + tmp_stats += sizeof(dev->stats) / sizeof(u64); + memcpy(tmp_stats, &dev->estats, sizeof(dev->estats)); + local_irq_enable(); +} - /* XXX Stop IRQ emitting here */ - spin_lock_irq(&fep->lock); - fep->opened = 0; - mal_disable_tx_channels(fep->mal, fep->commac.tx_chan_mask); - mal_disable_rx_channels(fep->mal, fep->commac.rx_chan_mask); - netif_carrier_off(dev); - netif_stop_queue(dev); +static void emac_ethtool_get_drvinfo(struct net_device *ndev, + struct ethtool_drvinfo *info) +{ + struct ocp_enet_private *dev = ndev->priv; - /* - * Check for a link, some PHYs don't provide a clock if - * no link is present. Some EMACs will not come out of - * soft reset without a PHY clock present. - */ - if (fep->phy_mii.def->ops->poll_link(&fep->phy_mii)) { - out_be32(&emacp->em0mr0, EMAC_M0_SRST); - udelay(10); + strcpy(info->driver, "ibm_emac"); + strcpy(info->version, DRV_VERSION); + info->fw_version[0] = '\0'; + sprintf(info->bus_info, "PPC 4xx EMAC %d", dev->def->index); + info->n_stats = emac_ethtool_get_stats_count(ndev); + info->regdump_len = emac_ethtool_get_regs_len(ndev); +} - if (emacp->em0mr0 & EMAC_M0_SRST) { - /*not sure what to do here hopefully it clears before another open */ - printk(KERN_ERR - "%s: Phy SoftReset didn't clear, no link?\n", - dev->name); - } - } +static struct ethtool_ops emac_ethtool_ops = { + .get_settings = emac_ethtool_get_settings, + .set_settings = emac_ethtool_set_settings, + .get_drvinfo = emac_ethtool_get_drvinfo, - /* Free the irq's */ - free_irq(dev->irq, dev); + .get_regs_len = emac_ethtool_get_regs_len, + .get_regs = emac_ethtool_get_regs, - spin_unlock_irq(&fep->lock); + .nway_reset = emac_ethtool_nway_reset, - return 0; -} + .get_ringparam = emac_ethtool_get_ringparam, + .get_pauseparam = emac_ethtool_get_pauseparam, -static void emac_remove(struct ocp_device *ocpdev) -{ - struct net_device *dev = ocp_get_drvdata(ocpdev); - struct ocp_enet_private *ep = dev->priv; + .get_rx_csum = emac_ethtool_get_rx_csum, - /* FIXME: locking, races, ... */ - ep->going_away = 1; - ocp_set_drvdata(ocpdev, NULL); - if (ep->rgmii_dev) - emac_close_rgmii(ep->rgmii_dev); - if (ep->zmii_dev) - emac_close_zmii(ep->zmii_dev); - - unregister_netdev(dev); - del_timer_sync(&ep->link_timer); - mal_unregister_commac(ep->mal, &ep->commac); - iounmap((void *)ep->emacp); - kfree(dev); -} - -struct mal_commac_ops emac_commac_ops = { - .txeob = &emac_txeob_dev, - .txde = &emac_txde_dev, - .rxeob = &emac_rxeob_dev, - .rxde = &emac_rxde_dev, + .get_strings = emac_ethtool_get_strings, + .get_stats_count = emac_ethtool_get_stats_count, + .get_ethtool_stats = emac_ethtool_get_ethtool_stats, + + .get_link = ethtool_op_get_link, + .get_tx_csum = ethtool_op_get_tx_csum, + .get_sg = ethtool_op_get_sg, }; -#ifdef CONFIG_NET_POLL_CONTROLLER -static void emac_netpoll(struct net_device *ndev) +static int emac_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) { - emac_rxeob_dev((void *)ndev, 0); - emac_txeob_dev((void *)ndev, 0); + struct ocp_enet_private *dev = ndev->priv; + uint16_t *data = (uint16_t *) & rq->ifr_ifru; + + DBG("%d: ioctl %08x" NL, dev->def->index, cmd); + + if (dev->phy.address < 0) + return -EOPNOTSUPP; + + switch (cmd) { + case SIOCGMIIPHY: + case SIOCDEVPRIVATE: + data[0] = dev->phy.address; + /* Fall through */ + case SIOCGMIIREG: + case SIOCDEVPRIVATE + 1: + data[3] = emac_mdio_read(ndev, dev->phy.address, data[1]); + return 0; + + case SIOCSMIIREG: + case SIOCDEVPRIVATE + 2: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + emac_mdio_write(ndev, dev->phy.address, data[1], data[2]); + return 0; + default: + return -EOPNOTSUPP; + } } -#endif -static int emac_init_device(struct ocp_device *ocpdev, struct ibm_ocp_mal *mal) +static int __init emac_probe(struct ocp_device *ocpdev) { - int deferred_init = 0; - int rc = 0, i; + struct ocp_func_emac_data *emacdata = ocpdev->def->additions; struct net_device *ndev; - struct ocp_enet_private *ep; - struct ocp_func_emac_data *emacdata; - int commac_reg = 0; - u32 phy_map; + struct ocp_device *maldev; + struct ocp_enet_private *dev; + int err, i; + + DBG("%d: probe" NL, ocpdev->def->index); - emacdata = (struct ocp_func_emac_data *)ocpdev->def->additions; if (!emacdata) { printk(KERN_ERR "emac%d: Missing additional data!\n", ocpdev->def->index); @@ -1738,304 +1936,312 @@ static int emac_init_device(struct ocp_d /* Allocate our net_device structure */ ndev = alloc_etherdev(sizeof(struct ocp_enet_private)); - if (ndev == NULL) { - printk(KERN_ERR - "emac%d: Could not allocate ethernet device.\n", + if (!ndev) { + printk(KERN_ERR "emac%d: could not allocate ethernet device!\n", ocpdev->def->index); return -ENOMEM; } - ep = ndev->priv; - ep->ndev = ndev; - ep->ocpdev = ocpdev; - ndev->irq = ocpdev->def->irq; - ep->wol_irq = emacdata->wol_irq; - if (emacdata->mdio_idx >= 0) { - if (emacdata->mdio_idx == ocpdev->def->index) { - /* Set the common MDIO net_device */ - mdio_ndev = ndev; - deferred_init = 1; + dev = ndev->priv; + dev->ndev = ndev; + dev->ldev = &ocpdev->dev; + dev->def = ocpdev->def; + SET_MODULE_OWNER(ndev); + + /* Find MAL device we are connected to */ + maldev = + ocp_find_device(OCP_VENDOR_IBM, OCP_FUNC_MAL, emacdata->mal_idx); + if (!maldev) { + printk(KERN_ERR "emac%d: unknown mal%d device!\n", + dev->def->index, emacdata->mal_idx); + err = -ENODEV; + goto out; + } + dev->mal = ocp_get_drvdata(maldev); + if (!dev->mal) { + printk(KERN_ERR "emac%d: mal%d hasn't been initialized yet!\n", + dev->def->index, emacdata->mal_idx); + err = -ENODEV; + goto out; + } + + /* Register with MAL */ + dev->commac.ops = &emac_commac_ops; + dev->commac.dev = dev; + dev->commac.tx_chan_mask = MAL_CHAN_MASK(emacdata->mal_tx_chan); + dev->commac.rx_chan_mask = MAL_CHAN_MASK(emacdata->mal_rx_chan); + err = mal_register_commac(dev->mal, &dev->commac); + if (err) { + printk(KERN_ERR "emac%d: failed to register with mal%d!\n", + dev->def->index, emacdata->mal_idx); + goto out; + } + dev->rx_skb_size = emac_rx_skb_size(ndev->mtu); + dev->rx_sync_size = emac_rx_sync_size(ndev->mtu); + + /* Get pointers to BD rings */ + dev->tx_desc = + dev->mal->bd_virt + mal_tx_bd_offset(dev->mal, + emacdata->mal_tx_chan); + dev->rx_desc = + dev->mal->bd_virt + mal_rx_bd_offset(dev->mal, + emacdata->mal_rx_chan); + + DBG("%d: tx_desc %p" NL, ocpdev->def->index, dev->tx_desc); + DBG("%d: rx_desc %p" NL, ocpdev->def->index, dev->rx_desc); + + /* Clean rings */ + memset(dev->tx_desc, 0, NUM_TX_BUFF * sizeof(struct mal_descriptor)); + memset(dev->rx_desc, 0, NUM_RX_BUFF * sizeof(struct mal_descriptor)); + + /* If we depend on another EMAC for MDIO, check whether it was probed already */ + if (emacdata->mdio_idx >= 0 && emacdata->mdio_idx != ocpdev->def->index) { + struct ocp_device *mdiodev = + ocp_find_device(OCP_VENDOR_IBM, OCP_FUNC_EMAC, + emacdata->mdio_idx); + if (!mdiodev) { + printk(KERN_ERR "emac%d: unknown emac%d device!\n", + dev->def->index, emacdata->mdio_idx); + err = -ENODEV; + goto out2; } - ep->mdio_dev = mdio_ndev; - } else { - ep->mdio_dev = ndev; + dev->mdio_dev = ocp_get_drvdata(mdiodev); + if (!dev->mdio_dev) { + printk(KERN_ERR + "emac%d: emac%d hasn't been initialized yet!\n", + dev->def->index, emacdata->mdio_idx); + err = -ENODEV; + goto out2; + } + } + + /* Attach to ZMII, if needed */ + if ((err = zmii_attach(dev)) != 0) + goto out2; + + /* Attach to RGMII, if needed */ + if ((err = rgmii_attach(dev)) != 0) + goto out3; + + /* Attach to TAH, if needed */ + if ((err = tah_attach(dev)) != 0) + goto out4; + + /* Map EMAC regs */ + dev->emacp = + (struct emac_regs *)ioremap(dev->def->paddr, + sizeof(struct emac_regs)); + if (!dev->emacp) { + printk(KERN_ERR "emac%d: could not ioremap device registers!\n", + dev->def->index); + err = -ENOMEM; + goto out5; } - ocp_set_drvdata(ocpdev, ndev); + /* Fill in MAC address */ + for (i = 0; i < 6; ++i) + ndev->dev_addr[i] = emacdata->mac_addr[i]; + + /* Set some link defaults before we can find out real parameters */ + dev->phy.speed = SPEED_100; + dev->phy.duplex = DUPLEX_FULL; + dev->phy.autoneg = AUTONEG_DISABLE; + dev->phy.pause = dev->phy.asym_pause = 0; + init_timer(&dev->link_timer); + dev->link_timer.function = emac_link_timer; + dev->link_timer.data = (unsigned long)dev; + + /* Find PHY if any */ + dev->phy.dev = ndev; + dev->phy.mode = emacdata->phy_mode; + if (emacdata->phy_map != 0xffffffff) { + u32 phy_map = emacdata->phy_map | busy_phy_map; + u32 adv; + + DBG("%d: PHY maps %08x %08x" NL, dev->def->index, + emacdata->phy_map, busy_phy_map); - spin_lock_init(&ep->lock); + EMAC_RX_CLK_TX(dev->def->index); - /* Fill out MAL informations and register commac */ - ep->mal = mal; - ep->mal_tx_chan = emacdata->mal_tx_chan; - ep->mal_rx_chan = emacdata->mal_rx_chan; - ep->commac.ops = &emac_commac_ops; - ep->commac.dev = ndev; - ep->commac.tx_chan_mask = MAL_CHAN_MASK(ep->mal_tx_chan); - ep->commac.rx_chan_mask = MAL_CHAN_MASK(ep->mal_rx_chan); - rc = mal_register_commac(ep->mal, &ep->commac); - if (rc != 0) - goto bail; - commac_reg = 1; - - /* Map our MMIOs */ - ep->emacp = (emac_t *) ioremap(ocpdev->def->paddr, sizeof(emac_t)); - - /* Check if we need to attach to a ZMII */ - if (emacdata->zmii_idx >= 0) { - ep->zmii_input = emacdata->zmii_mux; - ep->zmii_dev = - ocp_find_device(OCP_ANY_ID, OCP_FUNC_ZMII, - emacdata->zmii_idx); - if (ep->zmii_dev == NULL) - printk(KERN_WARNING - "emac%d: ZMII %d requested but not found !\n", - ocpdev->def->index, emacdata->zmii_idx); - else if ((rc = - emac_init_zmii(ep->zmii_dev, ep->zmii_input, - emacdata->phy_mode)) != 0) - goto bail; - } - - /* Check if we need to attach to a RGMII */ - if (emacdata->rgmii_idx >= 0) { - ep->rgmii_input = emacdata->rgmii_mux; - ep->rgmii_dev = - ocp_find_device(OCP_ANY_ID, OCP_FUNC_RGMII, - emacdata->rgmii_idx); - if (ep->rgmii_dev == NULL) - printk(KERN_WARNING - "emac%d: RGMII %d requested but not found !\n", - ocpdev->def->index, emacdata->rgmii_idx); - else if ((rc = - emac_init_rgmii(ep->rgmii_dev, ep->rgmii_input, - emacdata->phy_mode)) != 0) - goto bail; - } - - /* Check if we need to attach to a TAH */ - if (emacdata->tah_idx >= 0) { - ep->tah_dev = - ocp_find_device(OCP_ANY_ID, OCP_FUNC_TAH, - emacdata->tah_idx); - if (ep->tah_dev == NULL) - printk(KERN_WARNING - "emac%d: TAH %d requested but not found !\n", - ocpdev->def->index, emacdata->tah_idx); - else if ((rc = emac_init_tah(ep)) != 0) - goto bail; - } - - if (deferred_init) { - if (!list_empty(&emac_init_list)) { - struct list_head *entry; - struct emac_def_dev *ddev; - - list_for_each(entry, &emac_init_list) { - ddev = - list_entry(entry, struct emac_def_dev, - link); - emac_init_device(ddev->ocpdev, ddev->mal); - } + dev->phy.mdio_read = emac_mdio_read; + dev->phy.mdio_write = emac_mdio_write; + + /* Configure EMAC with defaults so we can at least use MDIO + * This is needed mostly for 440GX + */ + if (emac_phy_gpcs(dev->phy.mode)) { + /* XXX + * Make GPCS PHY address equal to EMAC index. + * We probably should take into account busy_phy_map + * and/or phy_map here. + */ + dev->phy.address = dev->def->index; } - } - /* Init link monitoring timer */ - init_timer(&ep->link_timer); - ep->link_timer.function = emac_link_timer; - ep->link_timer.data = (unsigned long)ep; - ep->timer_ticks = 0; - - /* Fill up the mii_phy structure */ - ep->phy_mii.dev = ndev; - ep->phy_mii.mdio_read = emac_phy_read; - ep->phy_mii.mdio_write = emac_phy_write; - ep->phy_mii.mode = emacdata->phy_mode; - - /* Find PHY */ - phy_map = emacdata->phy_map | busy_phy_map; - for (i = 0; i <= 0x1f; i++, phy_map >>= 1) { - if ((phy_map & 0x1) == 0) { - int val = emac_phy_read(ndev, i, MII_BMCR); - if (val != 0xffff && val != -1) - break; + emac_configure(dev); + + for (i = 0; i < 0x20; phy_map >>= 1, ++i) + if (!(phy_map & 1)) { + int r; + busy_phy_map |= 1 << i; + + /* Quick check if there is a PHY at the address */ + r = emac_mdio_read(dev->ndev, i, MII_BMCR); + if (r == 0xffff || r < 0) + continue; + if (!mii_phy_probe(&dev->phy, i)) + break; + } + if (i == 0x20) { + printk(KERN_WARNING "emac%d: can't find PHY!\n", + dev->def->index); + goto out6; } - } - if (i == 0x20) { - printk(KERN_WARNING "emac%d: Can't find PHY.\n", - ocpdev->def->index); - rc = -ENODEV; - goto bail; - } - busy_phy_map |= 1 << i; - ep->mii_phy_addr = i; - rc = mii_phy_probe(&ep->phy_mii, i); - if (rc) { - printk(KERN_WARNING "emac%d: Failed to probe PHY type.\n", - ocpdev->def->index); - rc = -ENODEV; - goto bail; - } - - /* Disable any PHY features not supported by the platform */ - ep->phy_mii.def->features &= ~emacdata->phy_feat_exc; - - /* Setup initial PHY config & startup aneg */ - if (ep->phy_mii.def->ops->init) - ep->phy_mii.def->ops->init(&ep->phy_mii); - netif_carrier_off(ndev); - if (ep->phy_mii.def->features & SUPPORTED_Autoneg) - ep->want_autoneg = 1; - else { - ep->want_autoneg = 0; + + /* Init PHY */ + if (dev->phy.def->ops->init) + dev->phy.def->ops->init(&dev->phy); - /* Select highest supported speed/duplex */ - if (ep->phy_mii.def->features & SUPPORTED_1000baseT_Full) { - ep->phy_mii.speed = SPEED_1000; - ep->phy_mii.duplex = DUPLEX_FULL; - } else if (ep->phy_mii.def->features & - SUPPORTED_1000baseT_Half) { - ep->phy_mii.speed = SPEED_1000; - ep->phy_mii.duplex = DUPLEX_HALF; - } else if (ep->phy_mii.def->features & - SUPPORTED_100baseT_Full) { - ep->phy_mii.speed = SPEED_100; - ep->phy_mii.duplex = DUPLEX_FULL; - } else if (ep->phy_mii.def->features & - SUPPORTED_100baseT_Half) { - ep->phy_mii.speed = SPEED_100; - ep->phy_mii.duplex = DUPLEX_HALF; - } else if (ep->phy_mii.def->features & - SUPPORTED_10baseT_Full) { - ep->phy_mii.speed = SPEED_10; - ep->phy_mii.duplex = DUPLEX_FULL; + /* Disable any PHY features not supported by the platform */ + dev->phy.def->features &= ~emacdata->phy_feat_exc; + + /* Setup initial link parameters */ + if (dev->phy.features & SUPPORTED_Autoneg) { + adv = dev->phy.features; +#if !defined(CONFIG_40x) + adv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; +#endif + /* Restart autonegotiation */ + dev->phy.def->ops->setup_aneg(&dev->phy, adv); } else { - ep->phy_mii.speed = SPEED_10; - ep->phy_mii.duplex = DUPLEX_HALF; + u32 f = dev->phy.def->features; + int speed = SPEED_10, fd = DUPLEX_HALF; + + /* Select highest supported speed/duplex */ + if (f & SUPPORTED_1000baseT_Full) { + speed = SPEED_1000; + fd = DUPLEX_FULL; + } else if (f & SUPPORTED_1000baseT_Half) + speed = SPEED_1000; + else if (f & SUPPORTED_100baseT_Full) { + speed = SPEED_100; + fd = DUPLEX_FULL; + } else if (f & SUPPORTED_100baseT_Half) + speed = SPEED_100; + else if (f & SUPPORTED_10baseT_Full) + fd = DUPLEX_FULL; + + /* Force link parameters */ + dev->phy.def->ops->setup_forced(&dev->phy, speed, fd); } - } - emac_start_link(ep, NULL); + } else { + emac_reset(dev); - /* read the MAC Address */ - for (i = 0; i < 6; i++) - ndev->dev_addr[i] = emacdata->mac_addr[i]; + /* PHY-less configuration. + * XXX I probably should move these settings to emacdata + */ + dev->phy.address = -1; + dev->phy.features = SUPPORTED_100baseT_Full | SUPPORTED_MII; + dev->phy.pause = 1; + } /* Fill in the driver function table */ ndev->open = &emac_open; - ndev->hard_start_xmit = &emac_start_xmit; + if (dev->tah_dev) { + ndev->hard_start_xmit = &emac_start_xmit_sg; + ndev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; + } else + ndev->hard_start_xmit = &emac_start_xmit; + ndev->tx_timeout = &emac_full_tx_reset; + ndev->watchdog_timeo = 5 * HZ; ndev->stop = &emac_close; ndev->get_stats = &emac_stats; - if (emacdata->jumbo) - ndev->change_mtu = &emac_change_mtu; - ndev->set_mac_address = &emac_set_mac_address; ndev->set_multicast_list = &emac_set_multicast_list; ndev->do_ioctl = &emac_ioctl; + if (emac_phy_supports_gige(emacdata->phy_mode)) { + ndev->change_mtu = &emac_change_mtu; + dev->commac.ops = &emac_commac_sg_ops; + } SET_ETHTOOL_OPS(ndev, &emac_ethtool_ops); - if (emacdata->tah_idx >= 0) - ndev->features = NETIF_F_IP_CSUM | NETIF_F_SG; -#ifdef CONFIG_NET_POLL_CONTROLLER - ndev->poll_controller = emac_netpoll; -#endif - SET_MODULE_OWNER(ndev); + netif_carrier_off(ndev); + netif_stop_queue(ndev); + + err = register_netdev(ndev); + if (err) { + printk(KERN_ERR "emac%d: failed to register net device (%d)!\n", + dev->def->index, err); + goto out6; + } - rc = register_netdev(ndev); - if (rc != 0) - goto bail; + ocp_set_drvdata(ocpdev, dev); - printk("%s: IBM emac, MAC %02x:%02x:%02x:%02x:%02x:%02x\n", - ndev->name, + printk("%s: emac%d, MAC %02x:%02x:%02x:%02x:%02x:%02x\n", + ndev->name, dev->def->index, ndev->dev_addr[0], ndev->dev_addr[1], ndev->dev_addr[2], ndev->dev_addr[3], ndev->dev_addr[4], ndev->dev_addr[5]); - printk(KERN_INFO "%s: Found %s PHY (0x%02x)\n", - ndev->name, ep->phy_mii.def->name, ep->mii_phy_addr); - - bail: - if (rc && commac_reg) - mal_unregister_commac(ep->mal, &ep->commac); - if (rc && ndev) - kfree(ndev); - - return rc; -} - -static int emac_probe(struct ocp_device *ocpdev) -{ - struct ocp_device *maldev; - struct ibm_ocp_mal *mal; - struct ocp_func_emac_data *emacdata; - - emacdata = (struct ocp_func_emac_data *)ocpdev->def->additions; - if (emacdata == NULL) { - printk(KERN_ERR "emac%d: Missing additional datas !\n", - ocpdev->def->index); - return -ENODEV; - } - /* Get the MAL device */ - maldev = ocp_find_device(OCP_ANY_ID, OCP_FUNC_MAL, emacdata->mal_idx); - if (maldev == NULL) { - printk("No maldev\n"); - return -ENODEV; - } - /* - * Get MAL driver data, it must be here due to link order. - * When the driver is modularized, symbol dependencies will - * ensure the MAL driver is already present if built as a - * module. - */ - mal = (struct ibm_ocp_mal *)ocp_get_drvdata(maldev); - if (mal == NULL) { - printk("No maldrv\n"); - return -ENODEV; - } + if (dev->phy.address >= 0) + printk("%s: found %s PHY (0x%02x)\n", ndev->name, + dev->phy.def->name, dev->phy.address); - /* If we depend on another EMAC for MDIO, wait for it to show up */ - if (emacdata->mdio_idx >= 0 && - (emacdata->mdio_idx != ocpdev->def->index) && !mdio_ndev) { - struct emac_def_dev *ddev; - /* Add this index to the deferred init table */ - ddev = kmalloc(sizeof(struct emac_def_dev), GFP_KERNEL); - ddev->ocpdev = ocpdev; - ddev->mal = mal; - list_add_tail(&ddev->link, &emac_init_list); - } else { - emac_init_device(ocpdev, mal); - } + emac_dbg_register(dev->def->index, dev); return 0; + out6: + iounmap((void *)dev->emacp); + out5: + tah_fini(dev->tah_dev); + out4: + rgmii_fini(dev->rgmii_dev, dev->rgmii_input); + out3: + zmii_fini(dev->zmii_dev, dev->zmii_input); + out2: + mal_unregister_commac(dev->mal, &dev->commac); + out: + kfree(ndev); + return err; } -/* Structure for a device driver */ static struct ocp_device_id emac_ids[] = { - {.vendor = OCP_ANY_ID,.function = OCP_FUNC_EMAC}, - {.vendor = OCP_VENDOR_INVALID} + { .vendor = OCP_VENDOR_IBM, .function = OCP_FUNC_EMAC }, + { .vendor = OCP_VENDOR_INVALID} }; static struct ocp_driver emac_driver = { .name = "emac", .id_table = emac_ids, - .probe = emac_probe, .remove = emac_remove, }; static int __init emac_init(void) { - printk(KERN_INFO DRV_NAME ": " DRV_DESC ", version " DRV_VERSION "\n"); - printk(KERN_INFO "Maintained by " DRV_AUTHOR "\n"); + printk(KERN_INFO DRV_DESC ", version " DRV_VERSION "\n"); + + DBG(": init" NL); - if (skb_res > 2) { - printk(KERN_WARNING "Invalid skb_res: %d, cropping to 2\n", - skb_res); - skb_res = 2; + if (mal_init()) + return -ENODEV; + + EMAC_CLK_INTERNAL; + if (ocp_register_driver(&emac_driver)) { + EMAC_CLK_EXTERNAL; + ocp_unregister_driver(&emac_driver); + mal_exit(); + return -ENODEV; } + EMAC_CLK_EXTERNAL; - return ocp_register_driver(&emac_driver); + emac_init_debug(); + return 0; } static void __exit emac_exit(void) { + DBG(": exit" NL); ocp_unregister_driver(&emac_driver); + mal_exit(); + emac_fini_debug(); } module_init(emac_init); diff -puN drivers/net/ibm_emac/ibm_emac_core.h~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/ibm_emac_core.h --- devel/drivers/net/ibm_emac/ibm_emac_core.h~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_core.h 2005-10-28 17:44:03.000000000 -0700 @@ -1,146 +1,221 @@ /* - * ibm_emac_core.h + * drivers/net/ibm_emac/ibm_emac_core.h * - * Ethernet driver for the built in ethernet on the IBM 405 PowerPC - * processor. + * Driver for PowerPC 4xx on-chip ethernet controller. * - * Armin Kuster akuster at mvista.com - * Sept, 2001 + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin or * - * Orignial driver - * Johnnie Peters - * jpeters at mvista.com - * - * Copyright 2000 MontaVista Softare Inc. + * Based on original work by + * Armin Kuster + * Johnnie Peters + * Copyright 2000, 2001 MontaVista Softare Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. + * */ +#ifndef __IBM_EMAC_CORE_H_ +#define __IBM_EMAC_CORE_H_ -#ifndef _IBM_EMAC_CORE_H_ -#define _IBM_EMAC_CORE_H_ - +#include #include +#include #include -#include /* For phys_addr_t */ #include "ibm_emac.h" #include "ibm_emac_phy.h" -#include "ibm_emac_rgmii.h" #include "ibm_emac_zmii.h" +#include "ibm_emac_rgmii.h" #include "ibm_emac_mal.h" #include "ibm_emac_tah.h" -#ifndef CONFIG_IBM_EMAC_TXB -#define NUM_TX_BUFF 64 -#define NUM_RX_BUFF 64 -#else -#define NUM_TX_BUFF CONFIG_IBM_EMAC_TXB -#define NUM_RX_BUFF CONFIG_IBM_EMAC_RXB -#endif +#define NUM_TX_BUFF CONFIG_IBM_EMAC_TXB +#define NUM_RX_BUFF CONFIG_IBM_EMAC_RXB -/* This does 16 byte alignment, exactly what we need. - * The packet length includes FCS, but we don't want to - * include that when passing upstream as it messes up - * bridging applications. - */ -#ifndef CONFIG_IBM_EMAC_SKBRES -#define SKB_RES 2 -#else -#define SKB_RES CONFIG_IBM_EMAC_SKBRES +/* Simple sanity check */ +#if NUM_TX_BUFF > 256 || NUM_RX_BUFF > 256 +#error Invalid number of buffer descriptors (greater than 256) #endif -/* Note about alignement. alloc_skb() returns a cache line - * aligned buffer. However, dev_alloc_skb() will add 16 more - * bytes and "reserve" them, so our buffer will actually end - * on a half cache line. What we do is to use directly - * alloc_skb, allocate 16 more bytes to match the total amount - * allocated by dev_alloc_skb(), but we don't reserve. +// XXX +#define EMAC_MIN_MTU 46 +#define EMAC_MAX_MTU 9000 + +/* Maximum L2 header length (VLAN tagged, no FCS) */ +#define EMAC_MTU_OVERHEAD (6 * 2 + 2 + 4) + +/* RX BD size for the given MTU */ +static inline int emac_rx_size(int mtu) +{ + if (mtu > ETH_DATA_LEN) + return MAL_MAX_RX_SIZE; + else + return mal_rx_size(ETH_DATA_LEN + EMAC_MTU_OVERHEAD); +} + +#define EMAC_DMA_ALIGN(x) ALIGN((x), dma_get_cache_alignment()) + +#define EMAC_RX_SKB_HEADROOM \ + EMAC_DMA_ALIGN(CONFIG_IBM_EMAC_RX_SKB_HEADROOM) + +/* Size of RX skb for the given MTU */ +static inline int emac_rx_skb_size(int mtu) +{ + int size = max(mtu + EMAC_MTU_OVERHEAD, emac_rx_size(mtu)); + return EMAC_DMA_ALIGN(size + 2) + EMAC_RX_SKB_HEADROOM; +} + +/* RX DMA sync size */ +static inline int emac_rx_sync_size(int mtu) +{ + return EMAC_DMA_ALIGN(emac_rx_size(mtu) + 2); +} + +/* Driver statistcs is split into two parts to make it more cache friendly: + * - normal statistics (packet count, etc) + * - error statistics + * + * When statistics is requested by ethtool, these parts are concatenated, + * normal one goes first. + * + * Please, keep these structures in sync with emac_stats_keys. */ -#define MAX_NUM_BUF_DESC 255 -#define DESC_BUF_SIZE 4080 /* max 4096-16 */ -#define DESC_BUF_SIZE_REG (DESC_BUF_SIZE / 16) - -/* Transmitter timeout. */ -#define TX_TIMEOUT (2*HZ) - -/* MDIO latency delay */ -#define MDIO_DELAY 250 - -/* Power managment shift registers */ -#define IBM_CPM_EMMII 0 /* Shift value for MII */ -#define IBM_CPM_EMRX 1 /* Shift value for recv */ -#define IBM_CPM_EMTX 2 /* Shift value for MAC */ -#define IBM_CPM_EMAC(x) (((x)>>IBM_CPM_EMMII) | ((x)>>IBM_CPM_EMRX) | ((x)>>IBM_CPM_EMTX)) - -#define ENET_HEADER_SIZE 14 -#define ENET_FCS_SIZE 4 -#define ENET_DEF_MTU_SIZE 1500 -#define ENET_DEF_BUF_SIZE (ENET_DEF_MTU_SIZE + ENET_HEADER_SIZE + ENET_FCS_SIZE) -#define EMAC_MIN_FRAME 64 -#define EMAC_MAX_FRAME 9018 -#define EMAC_MIN_MTU (EMAC_MIN_FRAME - ENET_HEADER_SIZE - ENET_FCS_SIZE) -#define EMAC_MAX_MTU (EMAC_MAX_FRAME - ENET_HEADER_SIZE - ENET_FCS_SIZE) - -#ifdef CONFIG_IBM_EMAC_ERRMSG -void emac_serr_dump_0(struct net_device *dev); -void emac_serr_dump_1(struct net_device *dev); -void emac_err_dump(struct net_device *dev, int em0isr); -void emac_phy_dump(struct net_device *); -void emac_desc_dump(struct net_device *); -void emac_mac_dump(struct net_device *); -void emac_mal_dump(struct net_device *); -#else -#define emac_serr_dump_0(dev) do { } while (0) -#define emac_serr_dump_1(dev) do { } while (0) -#define emac_err_dump(dev,x) do { } while (0) -#define emac_phy_dump(dev) do { } while (0) -#define emac_desc_dump(dev) do { } while (0) -#define emac_mac_dump(dev) do { } while (0) -#define emac_mal_dump(dev) do { } while (0) -#endif + +/* Normal TX/RX Statistics */ +struct ibm_emac_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 rx_packets_csum; + u64 tx_packets_csum; +}; + +/* Error statistics */ +struct ibm_emac_error_stats { + u64 tx_undo; + + /* Software RX Errors */ + u64 rx_dropped_stack; + u64 rx_dropped_oom; + u64 rx_dropped_error; + u64 rx_dropped_resize; + u64 rx_dropped_mtu; + u64 rx_stopped; + /* BD reported RX errors */ + u64 rx_bd_errors; + u64 rx_bd_overrun; + u64 rx_bd_bad_packet; + u64 rx_bd_runt_packet; + u64 rx_bd_short_event; + u64 rx_bd_alignment_error; + u64 rx_bd_bad_fcs; + u64 rx_bd_packet_too_long; + u64 rx_bd_out_of_range; + u64 rx_bd_in_range; + /* EMAC IRQ reported RX errors */ + u64 rx_parity; + u64 rx_fifo_overrun; + u64 rx_overrun; + u64 rx_bad_packet; + u64 rx_runt_packet; + u64 rx_short_event; + u64 rx_alignment_error; + u64 rx_bad_fcs; + u64 rx_packet_too_long; + u64 rx_out_of_range; + u64 rx_in_range; + + /* Software TX Errors */ + u64 tx_dropped; + /* BD reported TX errors */ + u64 tx_bd_errors; + u64 tx_bd_bad_fcs; + u64 tx_bd_carrier_loss; + u64 tx_bd_excessive_deferral; + u64 tx_bd_excessive_collisions; + u64 tx_bd_late_collision; + u64 tx_bd_multple_collisions; + u64 tx_bd_single_collision; + u64 tx_bd_underrun; + u64 tx_bd_sqe; + /* EMAC IRQ reported TX errors */ + u64 tx_parity; + u64 tx_underrun; + u64 tx_sqe; + u64 tx_errors; +}; + +#define EMAC_ETHTOOL_STATS_COUNT ((sizeof(struct ibm_emac_stats) + \ + sizeof(struct ibm_emac_error_stats)) \ + / sizeof(u64)) struct ocp_enet_private { - struct sk_buff *tx_skb[NUM_TX_BUFF]; - struct sk_buff *rx_skb[NUM_RX_BUFF]; - struct mal_descriptor *tx_desc; - struct mal_descriptor *rx_desc; - struct mal_descriptor *rx_dirty; - struct net_device_stats stats; - int tx_cnt; - int rx_slot; - int dirty_rx; - int tx_slot; - int ack_slot; - int rx_buffer_size; - - struct mii_phy phy_mii; - int mii_phy_addr; - int want_autoneg; - int timer_ticks; - struct timer_list link_timer; - struct net_device *mdio_dev; - - struct ocp_device *rgmii_dev; - int rgmii_input; - - struct ocp_device *zmii_dev; - int zmii_input; - - struct ibm_ocp_mal *mal; - int mal_tx_chan, mal_rx_chan; - struct mal_commac commac; - - struct ocp_device *tah_dev; - - int opened; - int going_away; - int wol_irq; - emac_t *emacp; - struct ocp_device *ocpdev; - struct net_device *ndev; - spinlock_t lock; + struct net_device *ndev; /* 0 */ + struct emac_regs *emacp; + + struct mal_descriptor *tx_desc; + int tx_cnt; + int tx_slot; + int ack_slot; + + struct mal_descriptor *rx_desc; + int rx_slot; + struct sk_buff *rx_sg_skb; /* 1 */ + int rx_skb_size; + int rx_sync_size; + + struct ibm_emac_stats stats; + struct ocp_device *tah_dev; + + struct ibm_ocp_mal *mal; + struct mal_commac commac; + + struct sk_buff *tx_skb[NUM_TX_BUFF]; + struct sk_buff *rx_skb[NUM_RX_BUFF]; + + struct ocp_device *zmii_dev; + int zmii_input; + struct ocp_enet_private *mdio_dev; + struct ocp_device *rgmii_dev; + int rgmii_input; + + struct ocp_def *def; + + struct mii_phy phy; + struct timer_list link_timer; + int reset_failed; + + struct ibm_emac_error_stats estats; + struct net_device_stats nstats; + + struct device* ldev; }; -#endif /* _IBM_EMAC_CORE_H_ */ + +/* Ethtool get_regs complex data. + * We want to get not just EMAC registers, but also MAL, ZMII, RGMII, TAH + * when available. + * + * Returned BLOB consists of the ibm_emac_ethtool_regs_hdr, + * MAL registers, EMAC registers and optional ZMII, RGMII, TAH registers. + * Each register component is preceded with emac_ethtool_regs_subhdr. + * Order of the optional headers follows their relative bit posititions + * in emac_ethtool_regs_hdr.components + */ +#define EMAC_ETHTOOL_REGS_ZMII 0x00000001 +#define EMAC_ETHTOOL_REGS_RGMII 0x00000002 +#define EMAC_ETHTOOL_REGS_TAH 0x00000004 + +struct emac_ethtool_regs_hdr { + u32 components; +}; + +struct emac_ethtool_regs_subhdr { + u32 version; + u32 index; +}; + +#endif /* __IBM_EMAC_CORE_H_ */ diff -puN drivers/net/ibm_emac/ibm_emac_debug.c~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/ibm_emac_debug.c --- devel/drivers/net/ibm_emac/ibm_emac_debug.c~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_debug.c 2005-10-28 17:44:03.000000000 -0700 @@ -1,224 +1,213 @@ /* - * ibm_ocp_debug.c + * drivers/net/ibm_emac/ibm_emac_debug.c * - * This has all the debug routines that where in *_enet.c + * Driver for PowerPC 4xx on-chip ethernet controller, debug print routines. * - * Armin Kuster akuster at mvista.com - * April , 2002 - * - * Copyright 2002 MontaVista Softare Inc. + * Copyright (c) 2004, 2005 Zultys Technologies + * Eugene Surovegin or * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. + * */ - #include +#include +#include #include #include +#include #include -#include "ibm_ocp_mal.h" -#include "ibm_ocp_zmii.h" -#include "ibm_ocp_enet.h" - -extern int emac_phy_read(struct net_device *dev, int mii_id, int reg); - -void emac_phy_dump(struct net_device *dev) -{ - struct ocp_enet_private *fep = dev->priv; - unsigned long i; - uint data; - - printk(KERN_DEBUG " Prepare for Phy dump....\n"); - for (i = 0; i < 0x1A; i++) { - data = emac_phy_read(dev, fep->mii_phy_addr, i); - printk(KERN_DEBUG "Phy reg 0x%lx ==> %4x\n", i, data); - if (i == 0x07) - i = 0x0f; + +#include "ibm_emac_core.h" + +static void emac_desc_dump(int idx, struct ocp_enet_private *p) +{ + int i; + printk("** EMAC%d TX BDs **\n" + " tx_cnt = %d tx_slot = %d ack_slot = %d\n", + idx, p->tx_cnt, p->tx_slot, p->ack_slot); + for (i = 0; i < NUM_TX_BUFF / 2; ++i) + printk + ("bd[%2d] 0x%08x %c 0x%04x %4u - bd[%2d] 0x%08x %c 0x%04x %4u\n", + i, p->tx_desc[i].data_ptr, p->tx_skb[i] ? 'V' : ' ', + p->tx_desc[i].ctrl, p->tx_desc[i].data_len, + NUM_TX_BUFF / 2 + i, + p->tx_desc[NUM_TX_BUFF / 2 + i].data_ptr, + p->tx_skb[NUM_TX_BUFF / 2 + i] ? 'V' : ' ', + p->tx_desc[NUM_TX_BUFF / 2 + i].ctrl, + p->tx_desc[NUM_TX_BUFF / 2 + i].data_len); + + printk("** EMAC%d RX BDs **\n" + " rx_slot = %d rx_stopped = %d rx_skb_size = %d rx_sync_size = %d\n" + " rx_sg_skb = 0x%p\n", + idx, p->rx_slot, p->commac.rx_stopped, p->rx_skb_size, + p->rx_sync_size, p->rx_sg_skb); + for (i = 0; i < NUM_RX_BUFF / 2; ++i) + printk + ("bd[%2d] 0x%08x %c 0x%04x %4u - bd[%2d] 0x%08x %c 0x%04x %4u\n", + i, p->rx_desc[i].data_ptr, p->rx_skb[i] ? 'V' : ' ', + p->rx_desc[i].ctrl, p->rx_desc[i].data_len, + NUM_RX_BUFF / 2 + i, + p->rx_desc[NUM_RX_BUFF / 2 + i].data_ptr, + p->rx_skb[NUM_RX_BUFF / 2 + i] ? 'V' : ' ', + p->rx_desc[NUM_RX_BUFF / 2 + i].ctrl, + p->rx_desc[NUM_RX_BUFF / 2 + i].data_len); +} + +static void emac_mac_dump(int idx, struct ocp_enet_private *dev) +{ + struct emac_regs *p = dev->emacp; + + printk("** EMAC%d registers **\n" + "MR0 = 0x%08x MR1 = 0x%08x TMR0 = 0x%08x TMR1 = 0x%08x\n" + "RMR = 0x%08x ISR = 0x%08x ISER = 0x%08x\n" + "IAR = %04x%08x VTPID = 0x%04x VTCI = 0x%04x\n" + "IAHT: 0x%04x 0x%04x 0x%04x 0x%04x " + "GAHT: 0x%04x 0x%04x 0x%04x 0x%04x\n" + "LSA = %04x%08x IPGVR = 0x%04x\n" + "STACR = 0x%08x TRTR = 0x%08x RWMR = 0x%08x\n" + "OCTX = 0x%08x OCRX = 0x%08x IPCR = 0x%08x\n", + idx, in_be32(&p->mr0), in_be32(&p->mr1), + in_be32(&p->tmr0), in_be32(&p->tmr1), + in_be32(&p->rmr), in_be32(&p->isr), in_be32(&p->iser), + in_be32(&p->iahr), in_be32(&p->ialr), in_be32(&p->vtpid), + in_be32(&p->vtci), + in_be32(&p->iaht1), in_be32(&p->iaht2), in_be32(&p->iaht3), + in_be32(&p->iaht4), + in_be32(&p->gaht1), in_be32(&p->gaht2), in_be32(&p->gaht3), + in_be32(&p->gaht4), + in_be32(&p->lsah), in_be32(&p->lsal), in_be32(&p->ipgvr), + in_be32(&p->stacr), in_be32(&p->trtr), in_be32(&p->rwmr), + in_be32(&p->octx), in_be32(&p->ocrx), in_be32(&p->ipcr) + ); + + emac_desc_dump(idx, dev); +} + +static void emac_mal_dump(struct ibm_ocp_mal *mal) +{ + struct ocp_func_mal_data *maldata = mal->def->additions; + int i; + + printk("** MAL%d Registers **\n" + "CFG = 0x%08x ESR = 0x%08x IER = 0x%08x\n" + "TX|CASR = 0x%08x CARR = 0x%08x EOBISR = 0x%08x DEIR = 0x%08x\n" + "RX|CASR = 0x%08x CARR = 0x%08x EOBISR = 0x%08x DEIR = 0x%08x\n", + mal->def->index, + get_mal_dcrn(mal, MAL_CFG), get_mal_dcrn(mal, MAL_ESR), + get_mal_dcrn(mal, MAL_IER), + get_mal_dcrn(mal, MAL_TXCASR), get_mal_dcrn(mal, MAL_TXCARR), + get_mal_dcrn(mal, MAL_TXEOBISR), get_mal_dcrn(mal, MAL_TXDEIR), + get_mal_dcrn(mal, MAL_RXCASR), get_mal_dcrn(mal, MAL_RXCARR), + get_mal_dcrn(mal, MAL_RXEOBISR), get_mal_dcrn(mal, MAL_RXDEIR) + ); + + printk("TX|"); + for (i = 0; i < maldata->num_tx_chans; ++i) { + if (i && !(i % 4)) + printk("\n "); + printk("CTP%d = 0x%08x ", i, get_mal_dcrn(mal, MAL_TXCTPR(i))); + } + printk("\nRX|"); + for (i = 0; i < maldata->num_rx_chans; ++i) { + if (i && !(i % 4)) + printk("\n "); + printk("CTP%d = 0x%08x ", i, get_mal_dcrn(mal, MAL_RXCTPR(i))); } + printk("\n "); + for (i = 0; i < maldata->num_rx_chans; ++i) { + u32 r = get_mal_dcrn(mal, MAL_RCBS(i)); + if (i && !(i % 3)) + printk("\n "); + printk("RCBS%d = 0x%08x (%d) ", i, r, r * 16); + } + printk("\n"); } -void emac_desc_dump(struct net_device *dev) +static struct ocp_enet_private *__emacs[4]; +static struct ibm_ocp_mal *__mals[1]; + +void emac_dbg_register(int idx, struct ocp_enet_private *dev) { - struct ocp_enet_private *fep = dev->priv; - int curr_slot; + unsigned long flags; - printk(KERN_DEBUG - "dumping the receive descriptors: current slot is %d\n", - fep->rx_slot); - for (curr_slot = 0; curr_slot < NUM_RX_BUFF; curr_slot++) { - printk(KERN_DEBUG - "Desc %02d: status 0x%04x, length %3d, addr 0x%x\n", - curr_slot, fep->rx_desc[curr_slot].ctrl, - fep->rx_desc[curr_slot].data_len, - (unsigned int)fep->rx_desc[curr_slot].data_ptr); + if (idx >= sizeof(__emacs) / sizeof(__emacs[0])) { + printk(KERN_WARNING + "invalid index %d when registering EMAC for debugging\n", + idx); + return; } + + local_irq_save(flags); + __emacs[idx] = dev; + local_irq_restore(flags); } -void emac_mac_dump(struct net_device *dev) +void mal_dbg_register(int idx, struct ibm_ocp_mal *mal) { - struct ocp_enet_private *fep = dev->priv; - volatile emac_t *emacp = fep->emacp; + unsigned long flags; - printk(KERN_DEBUG "EMAC DEBUG ********** \n"); - printk(KERN_DEBUG "EMAC_M0 ==> 0x%x\n", in_be32(&emacp->em0mr0)); - printk(KERN_DEBUG "EMAC_M1 ==> 0x%x\n", in_be32(&emacp->em0mr1)); - printk(KERN_DEBUG "EMAC_TXM0==> 0x%x\n", in_be32(&emacp->em0tmr0)); - printk(KERN_DEBUG "EMAC_TXM1==> 0x%x\n", in_be32(&emacp->em0tmr1)); - printk(KERN_DEBUG "EMAC_RXM ==> 0x%x\n", in_be32(&emacp->em0rmr)); - printk(KERN_DEBUG "EMAC_ISR ==> 0x%x\n", in_be32(&emacp->em0isr)); - printk(KERN_DEBUG "EMAC_IER ==> 0x%x\n", in_be32(&emacp->em0iser)); - printk(KERN_DEBUG "EMAC_IAH ==> 0x%x\n", in_be32(&emacp->em0iahr)); - printk(KERN_DEBUG "EMAC_IAL ==> 0x%x\n", in_be32(&emacp->em0ialr)); - printk(KERN_DEBUG "EMAC_VLAN_TPID_REG ==> 0x%x\n", - in_be32(&emacp->em0vtpid)); -} - -void emac_mal_dump(struct net_device *dev) -{ - struct ibm_ocp_mal *mal = ((struct ocp_enet_private *)dev->priv)->mal; - - printk(KERN_DEBUG " MAL DEBUG ********** \n"); - printk(KERN_DEBUG " MCR ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALCR)); - printk(KERN_DEBUG " ESR ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALESR)); - printk(KERN_DEBUG " IER ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALIER)); -#ifdef CONFIG_40x - printk(KERN_DEBUG " DBR ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALDBR)); -#endif /* CONFIG_40x */ - printk(KERN_DEBUG " TXCASR ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALTXCASR)); - printk(KERN_DEBUG " TXCARR ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALTXCARR)); - printk(KERN_DEBUG " TXEOBISR ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALTXEOBISR)); - printk(KERN_DEBUG " TXDEIR ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALTXDEIR)); - printk(KERN_DEBUG " RXCASR ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALRXCASR)); - printk(KERN_DEBUG " RXCARR ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALRXCARR)); - printk(KERN_DEBUG " RXEOBISR ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALRXEOBISR)); - printk(KERN_DEBUG " RXDEIR ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALRXDEIR)); - printk(KERN_DEBUG " TXCTP0R ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALTXCTP0R)); - printk(KERN_DEBUG " TXCTP1R ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALTXCTP1R)); - printk(KERN_DEBUG " TXCTP2R ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALTXCTP2R)); - printk(KERN_DEBUG " TXCTP3R ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALTXCTP3R)); - printk(KERN_DEBUG " RXCTP0R ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALRXCTP0R)); - printk(KERN_DEBUG " RXCTP1R ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALRXCTP1R)); - printk(KERN_DEBUG " RCBS0 ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALRCBS0)); - printk(KERN_DEBUG " RCBS1 ==> 0x%x\n", - (unsigned int)get_mal_dcrn(mal, DCRN_MALRCBS1)); -} - -void emac_serr_dump_0(struct net_device *dev) -{ - struct ibm_ocp_mal *mal = ((struct ocp_enet_private *)dev->priv)->mal; - unsigned long int mal_error, plb_error, plb_addr; - - mal_error = get_mal_dcrn(mal, DCRN_MALESR); - printk(KERN_DEBUG "ppc405_eth_serr: %s channel %ld \n", - (mal_error & 0x40000000) ? "Receive" : - "Transmit", (mal_error & 0x3e000000) >> 25); - printk(KERN_DEBUG " ----- latched error -----\n"); - if (mal_error & MALESR_DE) - printk(KERN_DEBUG " DE: descriptor error\n"); - if (mal_error & MALESR_OEN) - printk(KERN_DEBUG " ONE: OPB non-fullword error\n"); - if (mal_error & MALESR_OTE) - printk(KERN_DEBUG " OTE: OPB timeout error\n"); - if (mal_error & MALESR_OSE) - printk(KERN_DEBUG " OSE: OPB slave error\n"); - - if (mal_error & MALESR_PEIN) { - plb_error = mfdcr(DCRN_PLB0_BESR); - printk(KERN_DEBUG - " PEIN: PLB error, PLB0_BESR is 0x%x\n", - (unsigned int)plb_error); - plb_addr = mfdcr(DCRN_PLB0_BEAR); - printk(KERN_DEBUG - " PEIN: PLB error, PLB0_BEAR is 0x%x\n", - (unsigned int)plb_addr); + if (idx >= sizeof(__mals) / sizeof(__mals[0])) { + printk(KERN_WARNING + "invalid index %d when registering MAL for debugging\n", + idx); + return; } + + local_irq_save(flags); + __mals[idx] = mal; + local_irq_restore(flags); +} + +void emac_dbg_dump_all(void) +{ + unsigned int i; + unsigned long flags; + + local_irq_save(flags); + + for (i = 0; i < sizeof(__mals) / sizeof(__mals[0]); ++i) + if (__mals[i]) + emac_mal_dump(__mals[i]); + + for (i = 0; i < sizeof(__emacs) / sizeof(__emacs[0]); ++i) + if (__emacs[i]) + emac_mac_dump(i, __emacs[i]); + + local_irq_restore(flags); +} + +#if defined(CONFIG_MAGIC_SYSRQ) +static void emac_sysrq_handler(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + emac_dbg_dump_all(); } -void emac_serr_dump_1(struct net_device *dev) +static struct sysrq_key_op emac_sysrq_op = { + .handler = emac_sysrq_handler, + .help_msg = "emaC", + .action_msg = "Show EMAC(s) status", +}; + +int __init emac_init_debug(void) { - struct ibm_ocp_mal *mal = ((struct ocp_enet_private *)dev->priv)->mal; - int mal_error = get_mal_dcrn(mal, DCRN_MALESR); + return register_sysrq_key('c', &emac_sysrq_op); +} - printk(KERN_DEBUG " ----- cumulative errors -----\n"); - if (mal_error & MALESR_DEI) - printk(KERN_DEBUG " DEI: descriptor error interrupt\n"); - if (mal_error & MALESR_ONEI) - printk(KERN_DEBUG " OPB non-fullword error interrupt\n"); - if (mal_error & MALESR_OTEI) - printk(KERN_DEBUG " OTEI: timeout error interrupt\n"); - if (mal_error & MALESR_OSEI) - printk(KERN_DEBUG " OSEI: slave error interrupt\n"); - if (mal_error & MALESR_PBEI) - printk(KERN_DEBUG " PBEI: PLB bus error interrupt\n"); -} - -void emac_err_dump(struct net_device *dev, int em0isr) -{ - printk(KERN_DEBUG "%s: on-chip ethernet error:\n", dev->name); - - if (em0isr & EMAC_ISR_OVR) - printk(KERN_DEBUG " OVR: overrun\n"); - if (em0isr & EMAC_ISR_PP) - printk(KERN_DEBUG " PP: control pause packet\n"); - if (em0isr & EMAC_ISR_BP) - printk(KERN_DEBUG " BP: packet error\n"); - if (em0isr & EMAC_ISR_RP) - printk(KERN_DEBUG " RP: runt packet\n"); - if (em0isr & EMAC_ISR_SE) - printk(KERN_DEBUG " SE: short event\n"); - if (em0isr & EMAC_ISR_ALE) - printk(KERN_DEBUG " ALE: odd number of nibbles in packet\n"); - if (em0isr & EMAC_ISR_BFCS) - printk(KERN_DEBUG " BFCS: bad FCS\n"); - if (em0isr & EMAC_ISR_PTLE) - printk(KERN_DEBUG " PTLE: oversized packet\n"); - if (em0isr & EMAC_ISR_ORE) - printk(KERN_DEBUG - " ORE: packet length field > max allowed LLC\n"); - if (em0isr & EMAC_ISR_IRE) - printk(KERN_DEBUG " IRE: In Range error\n"); - if (em0isr & EMAC_ISR_DBDM) - printk(KERN_DEBUG " DBDM: xmit error or SQE\n"); - if (em0isr & EMAC_ISR_DB0) - printk(KERN_DEBUG " DB0: xmit error or SQE on TX channel 0\n"); - if (em0isr & EMAC_ISR_SE0) - printk(KERN_DEBUG - " SE0: Signal Quality Error test failure from TX channel 0\n"); - if (em0isr & EMAC_ISR_TE0) - printk(KERN_DEBUG " TE0: xmit channel 0 aborted\n"); - if (em0isr & EMAC_ISR_DB1) - printk(KERN_DEBUG " DB1: xmit error or SQE on TX channel \n"); - if (em0isr & EMAC_ISR_SE1) - printk(KERN_DEBUG - " SE1: Signal Quality Error test failure from TX channel 1\n"); - if (em0isr & EMAC_ISR_TE1) - printk(KERN_DEBUG " TE1: xmit channel 1 aborted\n"); - if (em0isr & EMAC_ISR_MOS) - printk(KERN_DEBUG " MOS\n"); - if (em0isr & EMAC_ISR_MOF) - printk(KERN_DEBUG " MOF\n"); +void __exit emac_fini_debug(void) +{ + unregister_sysrq_key('c', &emac_sysrq_op); +} - emac_mac_dump(dev); - emac_mal_dump(dev); +#else +int __init emac_init_debug(void) +{ + return 0; +} +void __exit emac_fini_debug(void) +{ } +#endif /* CONFIG_MAGIC_SYSRQ */ diff -puN /dev/null drivers/net/ibm_emac/ibm_emac_debug.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_debug.h 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,63 @@ +/* + * drivers/net/ibm_emac/ibm_ocp_debug.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, debug print routines. + * + * Copyright (c) 2004, 2005 Zultys Technologies + * Eugene Surovegin or + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#ifndef __IBM_EMAC_DEBUG_H_ +#define __IBM_EMAC_DEBUG_H_ + +#include +#include +#include "ibm_emac_core.h" +#include "ibm_emac_mal.h" + +#if defined(CONFIG_IBM_EMAC_DEBUG) +void emac_dbg_register(int idx, struct ocp_enet_private *dev); +void mal_dbg_register(int idx, struct ibm_ocp_mal *mal); +int emac_init_debug(void) __init; +void emac_fini_debug(void) __exit; +void emac_dbg_dump_all(void); +# define DBG_LEVEL 1 +#else +# define emac_dbg_register(x,y) ((void)0) +# define mal_dbg_register(x,y) ((void)0) +# define emac_init_debug() ((void)0) +# define emac_fini_debug() ((void)0) +# define emac_dbg_dump_all() ((void)0) +# define DBG_LEVEL 0 +#endif + +#if DBG_LEVEL > 0 +# define DBG(f,x...) printk("emac" f, ##x) +# define MAL_DBG(f,x...) printk("mal" f, ##x) +# define ZMII_DBG(f,x...) printk("zmii" f, ##x) +# define RGMII_DBG(f,x...) printk("rgmii" f, ##x) +# define NL "\n" +#else +# define DBG(f,x...) ((void)0) +# define MAL_DBG(f,x...) ((void)0) +# define ZMII_DBG(f,x...) ((void)0) +# define RGMII_DBG(f,x...) ((void)0) +#endif +#if DBG_LEVEL > 1 +# define DBG2(f,x...) DBG(f, ##x) +# define MAL_DBG2(f,x...) MAL_DBG(f, ##x) +# define ZMII_DBG2(f,x...) ZMII_DBG(f, ##x) +# define RGMII_DBG2(f,x...) RGMII_DBG(f, ##x) +#else +# define DBG2(f,x...) ((void)0) +# define MAL_DBG2(f,x...) ((void)0) +# define ZMII_DBG2(f,x...) ((void)0) +# define RGMII_DBG2(f,x...) ((void)0) +#endif + +#endif /* __IBM_EMAC_DEBUG_H_ */ diff -puN drivers/net/ibm_emac/ibm_emac.h~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/ibm_emac.h --- devel/drivers/net/ibm_emac/ibm_emac.h~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac.h 2005-10-28 17:44:03.000000000 -0700 @@ -1,110 +1,142 @@ /* - * ibm_emac.h + * drivers/net/ibm_emac/ibm_emac.h * + * Register definitions for PowerPC 4xx on-chip ethernet contoller * - * Armin Kuster akuster at mvista.com - * June, 2002 + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin or * - * Copyright 2002 MontaVista Softare Inc. + * Based on original work by + * Matt Porter + * Armin Kuster + * Copyright 2002-2004 MontaVista Software Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. + * */ +#ifndef __IBM_EMAC_H_ +#define __IBM_EMAC_H_ + +#include +#include + +/* This is a simple check to prevent use of this driver on non-tested SoCs */ +#if !defined(CONFIG_405GP) && !defined(CONFIG_405GPR) && !defined(CONFIG_405EP) && \ + !defined(CONFIG_440GP) && !defined(CONFIG_440GX) && !defined(CONFIG_440SP) && \ + !defined(CONFIG_440EP) && !defined(CONFIG_NP405H) +#error "Unknown SoC. Please, check chip user manual and make sure EMAC defines are OK" +#endif + +/* EMAC registers Write Access rules */ +struct emac_regs { + u32 mr0; /* special */ + u32 mr1; /* Reset */ + u32 tmr0; /* special */ + u32 tmr1; /* special */ + u32 rmr; /* Reset */ + u32 isr; /* Always */ + u32 iser; /* Reset */ + u32 iahr; /* Reset, R, T */ + u32 ialr; /* Reset, R, T */ + u32 vtpid; /* Reset, R, T */ + u32 vtci; /* Reset, R, T */ + u32 ptr; /* Reset, T */ + u32 iaht1; /* Reset, R */ + u32 iaht2; /* Reset, R */ + u32 iaht3; /* Reset, R */ + u32 iaht4; /* Reset, R */ + u32 gaht1; /* Reset, R */ + u32 gaht2; /* Reset, R */ + u32 gaht3; /* Reset, R */ + u32 gaht4; /* Reset, R */ + u32 lsah; + u32 lsal; + u32 ipgvr; /* Reset, T */ + u32 stacr; /* special */ + u32 trtr; /* special */ + u32 rwmr; /* Reset */ + u32 octx; + u32 ocrx; + u32 ipcr; +}; + +#if !defined(CONFIG_IBM_EMAC4) +#define EMAC_ETHTOOL_REGS_VER 0 +#define EMAC_ETHTOOL_REGS_SIZE (sizeof(struct emac_regs) - sizeof(u32)) +#else +#define EMAC_ETHTOOL_REGS_VER 1 +#define EMAC_ETHTOOL_REGS_SIZE sizeof(struct emac_regs) +#endif + +/* EMACx_MR0 */ +#define EMAC_MR0_RXI 0x80000000 +#define EMAC_MR0_TXI 0x40000000 +#define EMAC_MR0_SRST 0x20000000 +#define EMAC_MR0_TXE 0x10000000 +#define EMAC_MR0_RXE 0x08000000 +#define EMAC_MR0_WKE 0x04000000 + +/* EMACx_MR1 */ +#define EMAC_MR1_FDE 0x80000000 +#define EMAC_MR1_ILE 0x40000000 +#define EMAC_MR1_VLE 0x20000000 +#define EMAC_MR1_EIFC 0x10000000 +#define EMAC_MR1_APP 0x08000000 +#define EMAC_MR1_IST 0x01000000 + +#define EMAC_MR1_MF_MASK 0x00c00000 +#define EMAC_MR1_MF_10 0x00000000 +#define EMAC_MR1_MF_100 0x00400000 +#if !defined(CONFIG_IBM_EMAC4) +#define EMAC_MR1_MF_1000 0x00000000 +#define EMAC_MR1_MF_1000GPCS 0x00000000 +#define EMAC_MR1_MF_IPPA(id) 0x00000000 +#else +#define EMAC_MR1_MF_1000 0x00800000 +#define EMAC_MR1_MF_1000GPCS 0x00c00000 +#define EMAC_MR1_MF_IPPA(id) (((id) & 0x1f) << 6) +#endif + +#define EMAC_TX_FIFO_SIZE 2048 + +#if !defined(CONFIG_IBM_EMAC4) +#define EMAC_MR1_RFS_4K 0x00300000 +#define EMAC_MR1_RFS_16K 0x00000000 +#define EMAC_RX_FIFO_SIZE(gige) 4096 +#define EMAC_MR1_TFS_2K 0x00080000 +#define EMAC_MR1_TR0_MULT 0x00008000 +#define EMAC_MR1_JPSM 0x00000000 +#define EMAC_MR1_BASE(opb) (EMAC_MR1_TFS_2K | EMAC_MR1_TR0_MULT) +#else +#define EMAC_MR1_RFS_4K 0x00180000 +#define EMAC_MR1_RFS_16K 0x00280000 +#define EMAC_RX_FIFO_SIZE(gige) ((gige) ? 16384 : 4096) +#define EMAC_MR1_TFS_2K 0x00020000 +#define EMAC_MR1_TR 0x00008000 +#define EMAC_MR1_MWSW_001 0x00001000 +#define EMAC_MR1_JPSM 0x00000800 +#define EMAC_MR1_OBCI_MASK 0x00000038 +#define EMAC_MR1_OBCI_50 0x00000000 +#define EMAC_MR1_OBCI_66 0x00000008 +#define EMAC_MR1_OBCI_83 0x00000010 +#define EMAC_MR1_OBCI_100 0x00000018 +#define EMAC_MR1_OBCI_100P 0x00000020 +#define EMAC_MR1_OBCI(freq) ((freq) <= 50 ? EMAC_MR1_OBCI_50 : \ + (freq) <= 66 ? EMAC_MR1_OBCI_66 : \ + (freq) <= 83 ? EMAC_MR1_OBCI_83 : \ + (freq) <= 100 ? EMAC_MR1_OBCI_100 : EMAC_MR1_OBCI_100P) +#define EMAC_MR1_BASE(opb) (EMAC_MR1_TFS_2K | EMAC_MR1_TR | \ + EMAC_MR1_MWSW_001 | EMAC_MR1_OBCI(opb)) +#endif -#ifndef _IBM_EMAC_H_ -#define _IBM_EMAC_H_ -/* General defines needed for the driver */ - -/* Emac */ -typedef struct emac_regs { - u32 em0mr0; - u32 em0mr1; - u32 em0tmr0; - u32 em0tmr1; - u32 em0rmr; - u32 em0isr; - u32 em0iser; - u32 em0iahr; - u32 em0ialr; - u32 em0vtpid; - u32 em0vtci; - u32 em0ptr; - u32 em0iaht1; - u32 em0iaht2; - u32 em0iaht3; - u32 em0iaht4; - u32 em0gaht1; - u32 em0gaht2; - u32 em0gaht3; - u32 em0gaht4; - u32 em0lsah; - u32 em0lsal; - u32 em0ipgvr; - u32 em0stacr; - u32 em0trtr; - u32 em0rwmr; -} emac_t; - -/* MODE REG 0 */ -#define EMAC_M0_RXI 0x80000000 -#define EMAC_M0_TXI 0x40000000 -#define EMAC_M0_SRST 0x20000000 -#define EMAC_M0_TXE 0x10000000 -#define EMAC_M0_RXE 0x08000000 -#define EMAC_M0_WKE 0x04000000 - -/* MODE Reg 1 */ -#define EMAC_M1_FDE 0x80000000 -#define EMAC_M1_ILE 0x40000000 -#define EMAC_M1_VLE 0x20000000 -#define EMAC_M1_EIFC 0x10000000 -#define EMAC_M1_APP 0x08000000 -#define EMAC_M1_AEMI 0x02000000 -#define EMAC_M1_IST 0x01000000 -#define EMAC_M1_MF_1000GPCS 0x00c00000 /* Internal GPCS */ -#define EMAC_M1_MF_1000MBPS 0x00800000 /* External GPCS */ -#define EMAC_M1_MF_100MBPS 0x00400000 -#define EMAC_M1_RFS_16K 0x00280000 /* 000 for 512 byte */ -#define EMAC_M1_TR 0x00008000 -#ifdef CONFIG_IBM_EMAC4 -#define EMAC_M1_RFS_8K 0x00200000 -#define EMAC_M1_RFS_4K 0x00180000 -#define EMAC_M1_RFS_2K 0x00100000 -#define EMAC_M1_RFS_1K 0x00080000 -#define EMAC_M1_TX_FIFO_16K 0x00050000 /* 0's for 512 byte */ -#define EMAC_M1_TX_FIFO_8K 0x00040000 -#define EMAC_M1_TX_FIFO_4K 0x00030000 -#define EMAC_M1_TX_FIFO_2K 0x00020000 -#define EMAC_M1_TX_FIFO_1K 0x00010000 -#define EMAC_M1_TX_TR 0x00008000 -#define EMAC_M1_TX_MWSW 0x00001000 /* 0 wait for status */ -#define EMAC_M1_JUMBO_ENABLE 0x00000800 /* Upt to 9Kr status */ -#define EMAC_M1_OPB_CLK_66 0x00000008 /* 66Mhz */ -#define EMAC_M1_OPB_CLK_83 0x00000010 /* 83Mhz */ -#define EMAC_M1_OPB_CLK_100 0x00000018 /* 100Mhz */ -#define EMAC_M1_OPB_CLK_100P 0x00000020 /* 100Mhz+ */ -#else /* CONFIG_IBM_EMAC4 */ -#define EMAC_M1_RFS_4K 0x00300000 /* ~4k for 512 byte */ -#define EMAC_M1_RFS_2K 0x00200000 -#define EMAC_M1_RFS_1K 0x00100000 -#define EMAC_M1_TX_FIFO_2K 0x00080000 /* 0's for 512 byte */ -#define EMAC_M1_TX_FIFO_1K 0x00040000 -#define EMAC_M1_TR0_DEPEND 0x00010000 /* 0'x for single packet */ -#define EMAC_M1_TR1_DEPEND 0x00004000 -#define EMAC_M1_TR1_MULTI 0x00002000 -#define EMAC_M1_JUMBO_ENABLE 0x00001000 -#endif /* CONFIG_IBM_EMAC4 */ -#define EMAC_M1_BASE (EMAC_M1_TX_FIFO_2K | \ - EMAC_M1_APP | \ - EMAC_M1_TR | EMAC_M1_VLE) - -/* Transmit Mode Register 0 */ -#define EMAC_TMR0_GNP0 0x80000000 -#define EMAC_TMR0_GNP1 0x40000000 -#define EMAC_TMR0_GNPD 0x20000000 -#define EMAC_TMR0_FC 0x10000000 +/* EMACx_TMR0 */ +#define EMAC_TMR0_GNP 0x80000000 +#if !defined(CONFIG_IBM_EMAC4) +#define EMAC_TMR0_DEFAULT 0x00000000 +#else #define EMAC_TMR0_TFAE_2_32 0x00000001 #define EMAC_TMR0_TFAE_4_64 0x00000002 #define EMAC_TMR0_TFAE_8_128 0x00000003 @@ -112,14 +144,36 @@ typedef struct emac_regs { #define EMAC_TMR0_TFAE_32_512 0x00000005 #define EMAC_TMR0_TFAE_64_1024 0x00000006 #define EMAC_TMR0_TFAE_128_2048 0x00000007 +#define EMAC_TMR0_DEFAULT EMAC_TMR0_TFAE_2_32 +#endif +#define EMAC_TMR0_XMIT (EMAC_TMR0_GNP | EMAC_TMR0_DEFAULT) + +/* EMACx_TMR1 */ + +/* IBM manuals are not very clear here. + * This is my interpretation of how things are. --ebs + */ +#if defined(CONFIG_40x) +#define EMAC_FIFO_ENTRY_SIZE 8 +#define EMAC_MAL_BURST_SIZE (16 * 4) +#else +#define EMAC_FIFO_ENTRY_SIZE 16 +#define EMAC_MAL_BURST_SIZE (64 * 4) +#endif + +#if !defined(CONFIG_IBM_EMAC4) +#define EMAC_TMR1(l,h) (((l) << 27) | (((h) & 0xff) << 16)) +#else +#define EMAC_TMR1(l,h) (((l) << 27) | (((h) & 0x3ff) << 14)) +#endif -/* Receive Mode Register */ +/* EMACx_RMR */ #define EMAC_RMR_SP 0x80000000 #define EMAC_RMR_SFCS 0x40000000 -#define EMAC_RMR_ARRP 0x20000000 -#define EMAC_RMR_ARP 0x10000000 -#define EMAC_RMR_AROP 0x08000000 -#define EMAC_RMR_ARPI 0x04000000 +#define EMAC_RMR_RRP 0x20000000 +#define EMAC_RMR_RFP 0x10000000 +#define EMAC_RMR_ROP 0x08000000 +#define EMAC_RMR_RPIR 0x04000000 #define EMAC_RMR_PPP 0x02000000 #define EMAC_RMR_PME 0x01000000 #define EMAC_RMR_PMME 0x00800000 @@ -127,6 +181,9 @@ typedef struct emac_regs { #define EMAC_RMR_MIAE 0x00200000 #define EMAC_RMR_BAE 0x00100000 #define EMAC_RMR_MAE 0x00080000 +#if !defined(CONFIG_IBM_EMAC4) +#define EMAC_RMR_BASE 0x00000000 +#else #define EMAC_RMR_RFAF_2_32 0x00000001 #define EMAC_RMR_RFAF_4_64 0x00000002 #define EMAC_RMR_RFAF_8_128 0x00000003 @@ -134,9 +191,21 @@ typedef struct emac_regs { #define EMAC_RMR_RFAF_32_512 0x00000005 #define EMAC_RMR_RFAF_64_1024 0x00000006 #define EMAC_RMR_RFAF_128_2048 0x00000007 -#define EMAC_RMR_BASE (EMAC_RMR_IAE | EMAC_RMR_BAE) +#define EMAC_RMR_BASE EMAC_RMR_RFAF_128_2048 +#endif -/* Interrupt Status & enable Regs */ +/* EMACx_ISR & EMACx_ISER */ +#if !defined(CONFIG_IBM_EMAC4) +#define EMAC_ISR_TXPE 0x00000000 +#define EMAC_ISR_RXPE 0x00000000 +#define EMAC_ISR_TXUE 0x00000000 +#define EMAC_ISR_RXOE 0x00000000 +#else +#define EMAC_ISR_TXPE 0x20000000 +#define EMAC_ISR_RXPE 0x10000000 +#define EMAC_ISR_TXUE 0x08000000 +#define EMAC_ISR_RXOE 0x04000000 +#endif #define EMAC_ISR_OVR 0x02000000 #define EMAC_ISR_PP 0x01000000 #define EMAC_ISR_BP 0x00800000 @@ -147,53 +216,62 @@ typedef struct emac_regs { #define EMAC_ISR_PTLE 0x00040000 #define EMAC_ISR_ORE 0x00020000 #define EMAC_ISR_IRE 0x00010000 -#define EMAC_ISR_DBDM 0x00000200 -#define EMAC_ISR_DB0 0x00000100 -#define EMAC_ISR_SE0 0x00000080 -#define EMAC_ISR_TE0 0x00000040 -#define EMAC_ISR_DB1 0x00000020 -#define EMAC_ISR_SE1 0x00000010 -#define EMAC_ISR_TE1 0x00000008 +#define EMAC_ISR_SQE 0x00000080 +#define EMAC_ISR_TE 0x00000040 #define EMAC_ISR_MOS 0x00000002 #define EMAC_ISR_MOF 0x00000001 -/* STA CONTROL REG */ +/* EMACx_STACR */ +#define EMAC_STACR_PHYD_MASK 0xffff +#define EMAC_STACR_PHYD_SHIFT 16 #define EMAC_STACR_OC 0x00008000 #define EMAC_STACR_PHYE 0x00004000 -#define EMAC_STACR_WRITE 0x00002000 -#define EMAC_STACR_READ 0x00001000 -#define EMAC_STACR_CLK_83MHZ 0x00000800 /* 0's for 50Mhz */ -#define EMAC_STACR_CLK_66MHZ 0x00000400 -#define EMAC_STACR_CLK_100MHZ 0x00000C00 - -/* Transmit Request Threshold Register */ -#define EMAC_TRTR_1600 0x18000000 /* 0's for 64 Bytes */ -#define EMAC_TRTR_1024 0x0f000000 -#define EMAC_TRTR_512 0x07000000 -#define EMAC_TRTR_256 0x03000000 -#define EMAC_TRTR_192 0x10000000 -#define EMAC_TRTR_128 0x01000000 +#define EMAC_STACR_STAC_MASK 0x00003000 +#define EMAC_STACR_STAC_READ 0x00001000 +#define EMAC_STACR_STAC_WRITE 0x00002000 +#if !defined(CONFIG_IBM_EMAC4) +#define EMAC_STACR_OPBC_MASK 0x00000C00 +#define EMAC_STACR_OPBC_50 0x00000000 +#define EMAC_STACR_OPBC_66 0x00000400 +#define EMAC_STACR_OPBC_83 0x00000800 +#define EMAC_STACR_OPBC_100 0x00000C00 +#define EMAC_STACR_OPBC(freq) ((freq) <= 50 ? EMAC_STACR_OPBC_50 : \ + (freq) <= 66 ? EMAC_STACR_OPBC_66 : \ + (freq) <= 83 ? EMAC_STACR_OPBC_83 : EMAC_STACR_OPBC_100) +#define EMAC_STACR_BASE(opb) EMAC_STACR_OPBC(opb) +#else +#define EMAC_STACR_BASE(opb) 0x00000000 +#endif +#define EMAC_STACR_PCDA_MASK 0x1f +#define EMAC_STACR_PCDA_SHIFT 5 +#define EMAC_STACR_PRA_MASK 0x1f + +/* EMACx_TRTR */ +#if !defined(CONFIG_IBM_EMAC4) +#define EMAC_TRTR_SHIFT 27 +#else +#define EMAC_TRTR_SHIFT 24 +#endif +#define EMAC_TRTR(size) ((((size) >> 6) - 1) << EMAC_TRTR_SHIFT) + +/* EMACx_RWMR */ +#if !defined(CONFIG_IBM_EMAC4) +#define EMAC_RWMR(l,h) (((l) << 23) | ( ((h) & 0x1ff) << 7)) +#else +#define EMAC_RWMR(l,h) (((l) << 22) | ( ((h) & 0x3ff) << 6)) +#endif +/* EMAC specific TX descriptor control fields (write access) */ #define EMAC_TX_CTRL_GFCS 0x0200 #define EMAC_TX_CTRL_GP 0x0100 #define EMAC_TX_CTRL_ISA 0x0080 #define EMAC_TX_CTRL_RSA 0x0040 #define EMAC_TX_CTRL_IVT 0x0020 #define EMAC_TX_CTRL_RVT 0x0010 -#define EMAC_TX_CTRL_TAH_CSUM 0x000e /* TAH only */ -#define EMAC_TX_CTRL_TAH_SEG4 0x000a /* TAH only */ -#define EMAC_TX_CTRL_TAH_SEG3 0x0008 /* TAH only */ -#define EMAC_TX_CTRL_TAH_SEG2 0x0006 /* TAH only */ -#define EMAC_TX_CTRL_TAH_SEG1 0x0004 /* TAH only */ -#define EMAC_TX_CTRL_TAH_SEG0 0x0002 /* TAH only */ -#define EMAC_TX_CTRL_TAH_DIS 0x0000 /* TAH only */ - -#define EMAC_TX_CTRL_DFLT ( \ - MAL_TX_CTRL_INTR | EMAC_TX_CTRL_GFCS | EMAC_TX_CTRL_GP ) +#define EMAC_TX_CTRL_TAH_CSUM 0x000e -/* madmal transmit status / Control bits */ +/* EMAC specific TX descriptor status fields (read access) */ #define EMAC_TX_ST_BFCS 0x0200 -#define EMAC_TX_ST_BPP 0x0100 #define EMAC_TX_ST_LCS 0x0080 #define EMAC_TX_ST_ED 0x0040 #define EMAC_TX_ST_EC 0x0020 @@ -202,8 +280,16 @@ typedef struct emac_regs { #define EMAC_TX_ST_SC 0x0004 #define EMAC_TX_ST_UR 0x0002 #define EMAC_TX_ST_SQE 0x0001 +#if !defined(CONFIG_IBM_EMAC_TAH) +#define EMAC_IS_BAD_TX(v) ((v) & (EMAC_TX_ST_LCS | EMAC_TX_ST_ED | \ + EMAC_TX_ST_EC | EMAC_TX_ST_LC | \ + EMAC_TX_ST_MC | EMAC_TX_ST_UR)) +#else +#define EMAC_IS_BAD_TX(v) ((v) & (EMAC_TX_ST_LCS | EMAC_TX_ST_ED | \ + EMAC_TX_ST_EC | EMAC_TX_ST_LC)) +#endif -/* madmal receive status / Control bits */ +/* EMAC specific RX descriptor status fields (read access) */ #define EMAC_RX_ST_OE 0x0200 #define EMAC_RX_ST_PP 0x0100 #define EMAC_RX_ST_BP 0x0080 @@ -214,54 +300,10 @@ typedef struct emac_regs { #define EMAC_RX_ST_PTL 0x0004 #define EMAC_RX_ST_ORE 0x0002 #define EMAC_RX_ST_IRE 0x0001 -#define EMAC_BAD_RX_PACKET 0x02ff -#define EMAC_CSUM_VER_ERROR 0x0003 - -/* identify a bad rx packet dependent on emac features */ -#ifdef CONFIG_IBM_EMAC4 -#define EMAC_IS_BAD_RX_PACKET(desc) \ - (((desc & (EMAC_BAD_RX_PACKET & ~EMAC_CSUM_VER_ERROR)) || \ - ((desc & EMAC_CSUM_VER_ERROR) == EMAC_RX_ST_ORE) || \ - ((desc & EMAC_CSUM_VER_ERROR) == EMAC_RX_ST_IRE))) -#else -#define EMAC_IS_BAD_RX_PACKET(desc) \ - (desc & EMAC_BAD_RX_PACKET) -#endif - -/* SoC implementation specific EMAC register defaults */ -#if defined(CONFIG_440GP) -#define EMAC_RWMR_DEFAULT 0x80009000 -#define EMAC_TMR0_DEFAULT 0x00000000 -#define EMAC_TMR1_DEFAULT 0xf8640000 -#elif defined(CONFIG_440GX) -#define EMAC_RWMR_DEFAULT 0x1000a200 -#define EMAC_TMR0_DEFAULT EMAC_TMR0_TFAE_2_32 -#define EMAC_TMR1_DEFAULT 0xa00f0000 -#elif defined(CONFIG_440SP) -#define EMAC_RWMR_DEFAULT 0x08002000 -#define EMAC_TMR0_DEFAULT EMAC_TMR0_TFAE_128_2048 -#define EMAC_TMR1_DEFAULT 0xf8200000 -#else -#define EMAC_RWMR_DEFAULT 0x0f002000 -#define EMAC_TMR0_DEFAULT 0x00000000 -#define EMAC_TMR1_DEFAULT 0x380f0000 -#endif /* CONFIG_440GP */ - -/* Revision specific EMAC register defaults */ -#ifdef CONFIG_IBM_EMAC4 -#define EMAC_M1_DEFAULT (EMAC_M1_BASE | \ - EMAC_M1_OPB_CLK_83 | \ - EMAC_M1_TX_MWSW) -#define EMAC_RMR_DEFAULT (EMAC_RMR_BASE | \ - EMAC_RMR_RFAF_128_2048) -#define EMAC_TMR0_XMIT (EMAC_TMR0_GNP0 | \ - EMAC_TMR0_DEFAULT) -#define EMAC_TRTR_DEFAULT EMAC_TRTR_1024 -#else /* !CONFIG_IBM_EMAC4 */ -#define EMAC_M1_DEFAULT EMAC_M1_BASE -#define EMAC_RMR_DEFAULT EMAC_RMR_BASE -#define EMAC_TMR0_XMIT EMAC_TMR0_GNP0 -#define EMAC_TRTR_DEFAULT EMAC_TRTR_1600 -#endif /* CONFIG_IBM_EMAC4 */ - -#endif +#define EMAC_RX_TAH_BAD_CSUM 0x0003 +#define EMAC_BAD_RX_MASK (EMAC_RX_ST_OE | EMAC_RX_ST_BP | \ + EMAC_RX_ST_RP | EMAC_RX_ST_SE | \ + EMAC_RX_ST_AE | EMAC_RX_ST_BFCS | \ + EMAC_RX_ST_PTL | EMAC_RX_ST_ORE | \ + EMAC_RX_ST_IRE ) +#endif /* __IBM_EMAC_H_ */ diff -puN drivers/net/ibm_emac/ibm_emac_mal.c~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/ibm_emac_mal.c --- devel/drivers/net/ibm_emac/ibm_emac_mal.c~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_mal.c 2005-10-28 17:44:03.000000000 -0700 @@ -1,436 +1,565 @@ /* - * ibm_ocp_mal.c + * drivers/net/ibm_emac/ibm_emac_mal.c * - * Armin Kuster akuster at mvista.com - * Juen, 2002 + * Memory Access Layer (MAL) support * - * Copyright 2002 MontaVista Softare Inc. + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin or + * + * Based on original work by + * Benjamin Herrenschmidt , + * David Gibson , + * + * Armin Kuster + * Copyright 2002 MontaVista Softare Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. + * */ - #include #include #include #include #include #include +#include #include -#include -#include #include +#include "ibm_emac_core.h" #include "ibm_emac_mal.h" +#include "ibm_emac_debug.h" -// Locking: Should we share a lock with the client ? The client could provide -// a lock pointer (optionally) in the commac structure... I don't think this is -// really necessary though - -/* This lock protects the commac list. On today UP implementations, it's - * really only used as IRQ protection in mal_{register,unregister}_commac() - */ -static DEFINE_RWLOCK(mal_list_lock); - -int mal_register_commac(struct ibm_ocp_mal *mal, struct mal_commac *commac) +int __init mal_register_commac(struct ibm_ocp_mal *mal, + struct mal_commac *commac) { unsigned long flags; + local_irq_save(flags); - write_lock_irqsave(&mal_list_lock, flags); + MAL_DBG("%d: reg(%08x, %08x)" NL, mal->def->index, + commac->tx_chan_mask, commac->rx_chan_mask); - /* Don't let multiple commacs claim the same channel */ + /* Don't let multiple commacs claim the same channel(s) */ if ((mal->tx_chan_mask & commac->tx_chan_mask) || (mal->rx_chan_mask & commac->rx_chan_mask)) { - write_unlock_irqrestore(&mal_list_lock, flags); + local_irq_restore(flags); + printk(KERN_WARNING "mal%d: COMMAC channels conflict!\n", + mal->def->index); return -EBUSY; } mal->tx_chan_mask |= commac->tx_chan_mask; mal->rx_chan_mask |= commac->rx_chan_mask; + list_add(&commac->list, &mal->list); - list_add(&commac->list, &mal->commac); - - write_unlock_irqrestore(&mal_list_lock, flags); - + local_irq_restore(flags); return 0; } -int mal_unregister_commac(struct ibm_ocp_mal *mal, struct mal_commac *commac) +void __exit mal_unregister_commac(struct ibm_ocp_mal *mal, + struct mal_commac *commac) { unsigned long flags; + local_irq_save(flags); - write_lock_irqsave(&mal_list_lock, flags); + MAL_DBG("%d: unreg(%08x, %08x)" NL, mal->def->index, + commac->tx_chan_mask, commac->rx_chan_mask); mal->tx_chan_mask &= ~commac->tx_chan_mask; mal->rx_chan_mask &= ~commac->rx_chan_mask; - list_del_init(&commac->list); - write_unlock_irqrestore(&mal_list_lock, flags); - - return 0; + local_irq_restore(flags); } int mal_set_rcbs(struct ibm_ocp_mal *mal, int channel, unsigned long size) { - switch (channel) { - case 0: - set_mal_dcrn(mal, DCRN_MALRCBS0, size); - break; -#ifdef DCRN_MALRCBS1 - case 1: - set_mal_dcrn(mal, DCRN_MALRCBS1, size); - break; -#endif -#ifdef DCRN_MALRCBS2 - case 2: - set_mal_dcrn(mal, DCRN_MALRCBS2, size); - break; -#endif -#ifdef DCRN_MALRCBS3 - case 3: - set_mal_dcrn(mal, DCRN_MALRCBS3, size); - break; -#endif - default: + struct ocp_func_mal_data *maldata = mal->def->additions; + BUG_ON(channel < 0 || channel >= maldata->num_rx_chans || + size > MAL_MAX_RX_SIZE); + + MAL_DBG("%d: set_rbcs(%d, %lu)" NL, mal->def->index, channel, size); + + if (size & 0xf) { + printk(KERN_WARNING + "mal%d: incorrect RX size %lu for the channel %d\n", + mal->def->index, size, channel); return -EINVAL; } + set_mal_dcrn(mal, MAL_RCBS(channel), size >> 4); return 0; } -static irqreturn_t mal_serr(int irq, void *dev_instance, struct pt_regs *regs) +int mal_tx_bd_offset(struct ibm_ocp_mal *mal, int channel) { - struct ibm_ocp_mal *mal = dev_instance; - unsigned long mal_error; + struct ocp_func_mal_data *maldata = mal->def->additions; + BUG_ON(channel < 0 || channel >= maldata->num_tx_chans); + return channel * NUM_TX_BUFF; +} - /* - * This SERR applies to one of the devices on the MAL, here we charge - * it against the first EMAC registered for the MAL. - */ +int mal_rx_bd_offset(struct ibm_ocp_mal *mal, int channel) +{ + struct ocp_func_mal_data *maldata = mal->def->additions; + BUG_ON(channel < 0 || channel >= maldata->num_rx_chans); + return maldata->num_tx_chans * NUM_TX_BUFF + channel * NUM_RX_BUFF; +} - mal_error = get_mal_dcrn(mal, DCRN_MALESR); +void mal_enable_tx_channel(struct ibm_ocp_mal *mal, int channel) +{ + local_bh_disable(); + MAL_DBG("%d: enable_tx(%d)" NL, mal->def->index, channel); + set_mal_dcrn(mal, MAL_TXCASR, + get_mal_dcrn(mal, MAL_TXCASR) | MAL_CHAN_MASK(channel)); + local_bh_enable(); +} - printk(KERN_ERR "%s: System Error (MALESR=%lx)\n", - "MAL" /* FIXME: get the name right */ , mal_error); +void mal_disable_tx_channel(struct ibm_ocp_mal *mal, int channel) +{ + set_mal_dcrn(mal, MAL_TXCARR, MAL_CHAN_MASK(channel)); + MAL_DBG("%d: disable_tx(%d)" NL, mal->def->index, channel); +} - /* FIXME: decipher error */ - /* DIXME: distribute to commacs, if possible */ +void mal_enable_rx_channel(struct ibm_ocp_mal *mal, int channel) +{ + local_bh_disable(); + MAL_DBG("%d: enable_rx(%d)" NL, mal->def->index, channel); + set_mal_dcrn(mal, MAL_RXCASR, + get_mal_dcrn(mal, MAL_RXCASR) | MAL_CHAN_MASK(channel)); + local_bh_enable(); +} - /* Clear the error status register */ - set_mal_dcrn(mal, DCRN_MALESR, mal_error); +void mal_disable_rx_channel(struct ibm_ocp_mal *mal, int channel) +{ + set_mal_dcrn(mal, MAL_RXCARR, MAL_CHAN_MASK(channel)); + MAL_DBG("%d: disable_rx(%d)" NL, mal->def->index, channel); +} - return IRQ_HANDLED; +void mal_poll_add(struct ibm_ocp_mal *mal, struct mal_commac *commac) +{ + local_bh_disable(); + MAL_DBG("%d: poll_add(%p)" NL, mal->def->index, commac); + list_add_tail(&commac->poll_list, &mal->poll_list); + local_bh_enable(); } -static irqreturn_t mal_txeob(int irq, void *dev_instance, struct pt_regs *regs) +void mal_poll_del(struct ibm_ocp_mal *mal, struct mal_commac *commac) +{ + local_bh_disable(); + MAL_DBG("%d: poll_del(%p)" NL, mal->def->index, commac); + list_del(&commac->poll_list); + local_bh_enable(); +} + +/* synchronized by mal_poll() */ +static inline void mal_enable_eob_irq(struct ibm_ocp_mal *mal) +{ + MAL_DBG2("%d: enable_irq" NL, mal->def->index); + set_mal_dcrn(mal, MAL_CFG, get_mal_dcrn(mal, MAL_CFG) | MAL_CFG_EOPIE); +} + +/* synchronized by __LINK_STATE_RX_SCHED bit in ndev->state */ +static inline void mal_disable_eob_irq(struct ibm_ocp_mal *mal) +{ + set_mal_dcrn(mal, MAL_CFG, get_mal_dcrn(mal, MAL_CFG) & ~MAL_CFG_EOPIE); + MAL_DBG2("%d: disable_irq" NL, mal->def->index); +} + +static irqreturn_t mal_serr(int irq, void *dev_instance, struct pt_regs *regs) { struct ibm_ocp_mal *mal = dev_instance; - struct list_head *l; - unsigned long isr; + u32 esr = get_mal_dcrn(mal, MAL_ESR); - isr = get_mal_dcrn(mal, DCRN_MALTXEOBISR); - set_mal_dcrn(mal, DCRN_MALTXEOBISR, isr); + /* Clear the error status register */ + set_mal_dcrn(mal, MAL_ESR, esr); - read_lock(&mal_list_lock); - list_for_each(l, &mal->commac) { - struct mal_commac *mc = list_entry(l, struct mal_commac, list); + MAL_DBG("%d: SERR %08x" NL, mal->def->index, esr); + + if (esr & MAL_ESR_EVB) { + if (esr & MAL_ESR_DE) { + /* We ignore Descriptor error, + * TXDE or RXDE interrupt will be generated anyway. + */ + return IRQ_HANDLED; + } - if (isr & mc->tx_chan_mask) { - mc->ops->txeob(mc->dev, isr & mc->tx_chan_mask); + if (esr & MAL_ESR_PEIN) { + /* PLB error, it's probably buggy hardware or + * incorrect physical address in BD (i.e. bug) + */ + if (net_ratelimit()) + printk(KERN_ERR + "mal%d: system error, PLB (ESR = 0x%08x)\n", + mal->def->index, esr); + return IRQ_HANDLED; } + + /* OPB error, it's probably buggy hardware or incorrect EBC setup */ + if (net_ratelimit()) + printk(KERN_ERR + "mal%d: system error, OPB (ESR = 0x%08x)\n", + mal->def->index, esr); } - read_unlock(&mal_list_lock); + return IRQ_HANDLED; +} + +static inline void mal_schedule_poll(struct ibm_ocp_mal *mal) +{ + if (likely(netif_rx_schedule_prep(&mal->poll_dev))) { + MAL_DBG2("%d: schedule_poll" NL, mal->def->index); + mal_disable_eob_irq(mal); + __netif_rx_schedule(&mal->poll_dev); + } else + MAL_DBG2("%d: already in poll" NL, mal->def->index); +} +static irqreturn_t mal_txeob(int irq, void *dev_instance, struct pt_regs *regs) +{ + struct ibm_ocp_mal *mal = dev_instance; + u32 r = get_mal_dcrn(mal, MAL_TXEOBISR); + MAL_DBG2("%d: txeob %08x" NL, mal->def->index, r); + mal_schedule_poll(mal); + set_mal_dcrn(mal, MAL_TXEOBISR, r); return IRQ_HANDLED; } static irqreturn_t mal_rxeob(int irq, void *dev_instance, struct pt_regs *regs) { struct ibm_ocp_mal *mal = dev_instance; - struct list_head *l; - unsigned long isr; + u32 r = get_mal_dcrn(mal, MAL_RXEOBISR); + MAL_DBG2("%d: rxeob %08x" NL, mal->def->index, r); + mal_schedule_poll(mal); + set_mal_dcrn(mal, MAL_RXEOBISR, r); + return IRQ_HANDLED; +} - isr = get_mal_dcrn(mal, DCRN_MALRXEOBISR); - set_mal_dcrn(mal, DCRN_MALRXEOBISR, isr); +static irqreturn_t mal_txde(int irq, void *dev_instance, struct pt_regs *regs) +{ + struct ibm_ocp_mal *mal = dev_instance; + u32 deir = get_mal_dcrn(mal, MAL_TXDEIR); + set_mal_dcrn(mal, MAL_TXDEIR, deir); - read_lock(&mal_list_lock); - list_for_each(l, &mal->commac) { - struct mal_commac *mc = list_entry(l, struct mal_commac, list); + MAL_DBG("%d: txde %08x" NL, mal->def->index, deir); - if (isr & mc->rx_chan_mask) { - mc->ops->rxeob(mc->dev, isr & mc->rx_chan_mask); - } - } - read_unlock(&mal_list_lock); + if (net_ratelimit()) + printk(KERN_ERR + "mal%d: TX descriptor error (TXDEIR = 0x%08x)\n", + mal->def->index, deir); return IRQ_HANDLED; } -static irqreturn_t mal_txde(int irq, void *dev_instance, struct pt_regs *regs) +static irqreturn_t mal_rxde(int irq, void *dev_instance, struct pt_regs *regs) { struct ibm_ocp_mal *mal = dev_instance; struct list_head *l; - unsigned long deir; - - deir = get_mal_dcrn(mal, DCRN_MALTXDEIR); + u32 deir = get_mal_dcrn(mal, MAL_RXDEIR); - /* FIXME: print which MAL correctly */ - printk(KERN_WARNING "%s: Tx descriptor error (MALTXDEIR=%lx)\n", - "MAL", deir); + MAL_DBG("%d: rxde %08x" NL, mal->def->index, deir); - read_lock(&mal_list_lock); - list_for_each(l, &mal->commac) { + list_for_each(l, &mal->list) { struct mal_commac *mc = list_entry(l, struct mal_commac, list); - - if (deir & mc->tx_chan_mask) { - mc->ops->txde(mc->dev, deir & mc->tx_chan_mask); + if (deir & mc->rx_chan_mask) { + mc->rx_stopped = 1; + mc->ops->rxde(mc->dev); } } - read_unlock(&mal_list_lock); + + mal_schedule_poll(mal); + set_mal_dcrn(mal, MAL_RXDEIR, deir); return IRQ_HANDLED; } -/* - * This interrupt should be very rare at best. This occurs when - * the hardware has a problem with the receive descriptors. The manual - * states that it occurs when the hardware cannot the receive descriptor - * empty bit is not set. The recovery mechanism will be to - * traverse through the descriptors, handle any that are marked to be - * handled and reinitialize each along the way. At that point the driver - * will be restarted. - */ -static irqreturn_t mal_rxde(int irq, void *dev_instance, struct pt_regs *regs) +static int mal_poll(struct net_device *ndev, int *budget) { - struct ibm_ocp_mal *mal = dev_instance; + struct ibm_ocp_mal *mal = ndev->priv; struct list_head *l; - unsigned long deir; + int rx_work_limit = min(ndev->quota, *budget), received = 0, done; - deir = get_mal_dcrn(mal, DCRN_MALRXDEIR); + MAL_DBG2("%d: poll(%d) %d ->" NL, mal->def->index, *budget, + rx_work_limit); + again: + /* Process TX skbs */ + list_for_each(l, &mal->poll_list) { + struct mal_commac *mc = + list_entry(l, struct mal_commac, poll_list); + mc->ops->poll_tx(mc->dev); + } - /* - * This really is needed. This case encountered in stress testing. + /* Process RX skbs. + * We _might_ need something more smart here to enforce polling fairness. */ - if (deir == 0) - return IRQ_HANDLED; - - /* FIXME: print which MAL correctly */ - printk(KERN_WARNING "%s: Rx descriptor error (MALRXDEIR=%lx)\n", - "MAL", deir); - - read_lock(&mal_list_lock); - list_for_each(l, &mal->commac) { - struct mal_commac *mc = list_entry(l, struct mal_commac, list); + list_for_each(l, &mal->poll_list) { + struct mal_commac *mc = + list_entry(l, struct mal_commac, poll_list); + int n = mc->ops->poll_rx(mc->dev, rx_work_limit); + if (n) { + received += n; + rx_work_limit -= n; + if (rx_work_limit <= 0) { + done = 0; + goto more_work; // XXX What if this is the last one ? + } + } + } - if (deir & mc->rx_chan_mask) { - mc->ops->rxde(mc->dev, deir & mc->rx_chan_mask); + /* We need to disable IRQs to protect from RXDE IRQ here */ + local_irq_disable(); + __netif_rx_complete(ndev); + mal_enable_eob_irq(mal); + local_irq_enable(); + + done = 1; + + /* Check for "rotting" packet(s) */ + list_for_each(l, &mal->poll_list) { + struct mal_commac *mc = + list_entry(l, struct mal_commac, poll_list); + if (unlikely(mc->ops->peek_rx(mc->dev) || mc->rx_stopped)) { + MAL_DBG2("%d: rotting packet" NL, mal->def->index); + if (netif_rx_reschedule(ndev, received)) + mal_disable_eob_irq(mal); + else + MAL_DBG2("%d: already in poll list" NL, + mal->def->index); + + if (rx_work_limit > 0) + goto again; + else + goto more_work; } + mc->ops->poll_tx(mc->dev); } - read_unlock(&mal_list_lock); - return IRQ_HANDLED; + more_work: + ndev->quota -= received; + *budget -= received; + + MAL_DBG2("%d: poll() %d <- %d" NL, mal->def->index, *budget, + done ? 0 : 1); + return done ? 0 : 1; +} + +static void mal_reset(struct ibm_ocp_mal *mal) +{ + int n = 10; + MAL_DBG("%d: reset" NL, mal->def->index); + + set_mal_dcrn(mal, MAL_CFG, MAL_CFG_SR); + + /* Wait for reset to complete (1 system clock) */ + while ((get_mal_dcrn(mal, MAL_CFG) & MAL_CFG_SR) && n) + --n; + + if (unlikely(!n)) + printk(KERN_ERR "mal%d: reset timeout\n", mal->def->index); +} + +int mal_get_regs_len(struct ibm_ocp_mal *mal) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct ibm_mal_regs); +} + +void *mal_dump_regs(struct ibm_ocp_mal *mal, void *buf) +{ + struct emac_ethtool_regs_subhdr *hdr = buf; + struct ibm_mal_regs *regs = (struct ibm_mal_regs *)(hdr + 1); + struct ocp_func_mal_data *maldata = mal->def->additions; + int i; + + hdr->version = MAL_VERSION; + hdr->index = mal->def->index; + + regs->tx_count = maldata->num_tx_chans; + regs->rx_count = maldata->num_rx_chans; + + regs->cfg = get_mal_dcrn(mal, MAL_CFG); + regs->esr = get_mal_dcrn(mal, MAL_ESR); + regs->ier = get_mal_dcrn(mal, MAL_IER); + regs->tx_casr = get_mal_dcrn(mal, MAL_TXCASR); + regs->tx_carr = get_mal_dcrn(mal, MAL_TXCARR); + regs->tx_eobisr = get_mal_dcrn(mal, MAL_TXEOBISR); + regs->tx_deir = get_mal_dcrn(mal, MAL_TXDEIR); + regs->rx_casr = get_mal_dcrn(mal, MAL_RXCASR); + regs->rx_carr = get_mal_dcrn(mal, MAL_RXCARR); + regs->rx_eobisr = get_mal_dcrn(mal, MAL_RXEOBISR); + regs->rx_deir = get_mal_dcrn(mal, MAL_RXDEIR); + + for (i = 0; i < regs->tx_count; ++i) + regs->tx_ctpr[i] = get_mal_dcrn(mal, MAL_TXCTPR(i)); + + for (i = 0; i < regs->rx_count; ++i) { + regs->rx_ctpr[i] = get_mal_dcrn(mal, MAL_RXCTPR(i)); + regs->rcbs[i] = get_mal_dcrn(mal, MAL_RCBS(i)); + } + return regs + 1; } static int __init mal_probe(struct ocp_device *ocpdev) { - struct ibm_ocp_mal *mal = NULL; + struct ibm_ocp_mal *mal; struct ocp_func_mal_data *maldata; - int err = 0; + int err = 0, i, bd_size; - maldata = (struct ocp_func_mal_data *)ocpdev->def->additions; + MAL_DBG("%d: probe" NL, ocpdev->def->index); + + maldata = ocpdev->def->additions; if (maldata == NULL) { - printk(KERN_ERR "mal%d: Missing additional datas !\n", + printk(KERN_ERR "mal%d: missing additional data!\n", ocpdev->def->index); return -ENODEV; } - mal = kmalloc(sizeof(struct ibm_ocp_mal), GFP_KERNEL); - if (mal == NULL) { + mal = kzalloc(sizeof(struct ibm_ocp_mal), GFP_KERNEL); + if (!mal) { printk(KERN_ERR - "mal%d: Out of memory allocating MAL structure !\n", + "mal%d: out of memory allocating MAL structure!\n", ocpdev->def->index); return -ENOMEM; } - memset(mal, 0, sizeof(*mal)); - - switch (ocpdev->def->index) { - case 0: - mal->dcrbase = DCRN_MAL_BASE; - break; -#ifdef DCRN_MAL1_BASE - case 1: - mal->dcrbase = DCRN_MAL1_BASE; - break; -#endif - default: - BUG(); - } - - /**************************/ + mal->dcrbase = maldata->dcr_base; + mal->def = ocpdev->def; - INIT_LIST_HEAD(&mal->commac); + INIT_LIST_HEAD(&mal->poll_list); + set_bit(__LINK_STATE_START, &mal->poll_dev.state); + mal->poll_dev.weight = CONFIG_IBM_EMAC_POLL_WEIGHT; + mal->poll_dev.poll = mal_poll; + mal->poll_dev.priv = mal; + atomic_set(&mal->poll_dev.refcnt, 1); - set_mal_dcrn(mal, DCRN_MALRXCARR, 0xFFFFFFFF); - set_mal_dcrn(mal, DCRN_MALTXCARR, 0xFFFFFFFF); + INIT_LIST_HEAD(&mal->list); - set_mal_dcrn(mal, DCRN_MALCR, MALCR_MMSR); /* 384 */ - /* FIXME: Add delay */ + /* Load power-on reset defaults */ + mal_reset(mal); /* Set the MAL configuration register */ - set_mal_dcrn(mal, DCRN_MALCR, - MALCR_PLBB | MALCR_OPBBL | MALCR_LEA | - MALCR_PLBLT_DEFAULT); - - /* It would be nice to allocate buffers separately for each - * channel, but we can't because the channels share the upper - * 13 bits of address lines. Each channels buffer must also - * be 4k aligned, so we allocate 4k for each channel. This is - * inefficient FIXME: do better, if possible */ - mal->tx_virt_addr = dma_alloc_coherent(&ocpdev->dev, - MAL_DT_ALIGN * - maldata->num_tx_chans, - &mal->tx_phys_addr, GFP_KERNEL); - if (mal->tx_virt_addr == NULL) { + set_mal_dcrn(mal, MAL_CFG, MAL_CFG_DEFAULT | MAL_CFG_PLBB | + MAL_CFG_OPBBL | MAL_CFG_LEA); + + mal_enable_eob_irq(mal); + + /* Allocate space for BD rings */ + BUG_ON(maldata->num_tx_chans <= 0 || maldata->num_tx_chans > 32); + BUG_ON(maldata->num_rx_chans <= 0 || maldata->num_rx_chans > 32); + bd_size = sizeof(struct mal_descriptor) * + (NUM_TX_BUFF * maldata->num_tx_chans + + NUM_RX_BUFF * maldata->num_rx_chans); + mal->bd_virt = + dma_alloc_coherent(&ocpdev->dev, bd_size, &mal->bd_dma, GFP_KERNEL); + + if (!mal->bd_virt) { printk(KERN_ERR - "mal%d: Out of memory allocating MAL descriptors !\n", - ocpdev->def->index); + "mal%d: out of memory allocating RX/TX descriptors!\n", + mal->def->index); err = -ENOMEM; goto fail; } + memset(mal->bd_virt, 0, bd_size); - /* God, oh, god, I hate DCRs */ - set_mal_dcrn(mal, DCRN_MALTXCTP0R, mal->tx_phys_addr); -#ifdef DCRN_MALTXCTP1R - if (maldata->num_tx_chans > 1) - set_mal_dcrn(mal, DCRN_MALTXCTP1R, - mal->tx_phys_addr + MAL_DT_ALIGN); -#endif /* DCRN_MALTXCTP1R */ -#ifdef DCRN_MALTXCTP2R - if (maldata->num_tx_chans > 2) - set_mal_dcrn(mal, DCRN_MALTXCTP2R, - mal->tx_phys_addr + 2 * MAL_DT_ALIGN); -#endif /* DCRN_MALTXCTP2R */ -#ifdef DCRN_MALTXCTP3R - if (maldata->num_tx_chans > 3) - set_mal_dcrn(mal, DCRN_MALTXCTP3R, - mal->tx_phys_addr + 3 * MAL_DT_ALIGN); -#endif /* DCRN_MALTXCTP3R */ -#ifdef DCRN_MALTXCTP4R - if (maldata->num_tx_chans > 4) - set_mal_dcrn(mal, DCRN_MALTXCTP4R, - mal->tx_phys_addr + 4 * MAL_DT_ALIGN); -#endif /* DCRN_MALTXCTP4R */ -#ifdef DCRN_MALTXCTP5R - if (maldata->num_tx_chans > 5) - set_mal_dcrn(mal, DCRN_MALTXCTP5R, - mal->tx_phys_addr + 5 * MAL_DT_ALIGN); -#endif /* DCRN_MALTXCTP5R */ -#ifdef DCRN_MALTXCTP6R - if (maldata->num_tx_chans > 6) - set_mal_dcrn(mal, DCRN_MALTXCTP6R, - mal->tx_phys_addr + 6 * MAL_DT_ALIGN); -#endif /* DCRN_MALTXCTP6R */ -#ifdef DCRN_MALTXCTP7R - if (maldata->num_tx_chans > 7) - set_mal_dcrn(mal, DCRN_MALTXCTP7R, - mal->tx_phys_addr + 7 * MAL_DT_ALIGN); -#endif /* DCRN_MALTXCTP7R */ - - mal->rx_virt_addr = dma_alloc_coherent(&ocpdev->dev, - MAL_DT_ALIGN * - maldata->num_rx_chans, - &mal->rx_phys_addr, GFP_KERNEL); - - set_mal_dcrn(mal, DCRN_MALRXCTP0R, mal->rx_phys_addr); -#ifdef DCRN_MALRXCTP1R - if (maldata->num_rx_chans > 1) - set_mal_dcrn(mal, DCRN_MALRXCTP1R, - mal->rx_phys_addr + MAL_DT_ALIGN); -#endif /* DCRN_MALRXCTP1R */ -#ifdef DCRN_MALRXCTP2R - if (maldata->num_rx_chans > 2) - set_mal_dcrn(mal, DCRN_MALRXCTP2R, - mal->rx_phys_addr + 2 * MAL_DT_ALIGN); -#endif /* DCRN_MALRXCTP2R */ -#ifdef DCRN_MALRXCTP3R - if (maldata->num_rx_chans > 3) - set_mal_dcrn(mal, DCRN_MALRXCTP3R, - mal->rx_phys_addr + 3 * MAL_DT_ALIGN); -#endif /* DCRN_MALRXCTP3R */ + for (i = 0; i < maldata->num_tx_chans; ++i) + set_mal_dcrn(mal, MAL_TXCTPR(i), mal->bd_dma + + sizeof(struct mal_descriptor) * + mal_tx_bd_offset(mal, i)); + + for (i = 0; i < maldata->num_rx_chans; ++i) + set_mal_dcrn(mal, MAL_RXCTPR(i), mal->bd_dma + + sizeof(struct mal_descriptor) * + mal_rx_bd_offset(mal, i)); err = request_irq(maldata->serr_irq, mal_serr, 0, "MAL SERR", mal); if (err) - goto fail; - err = request_irq(maldata->txde_irq, mal_txde, 0, "MAL TX DE ", mal); + goto fail2; + err = request_irq(maldata->txde_irq, mal_txde, 0, "MAL TX DE", mal); if (err) - goto fail; + goto fail3; err = request_irq(maldata->txeob_irq, mal_txeob, 0, "MAL TX EOB", mal); if (err) - goto fail; + goto fail4; err = request_irq(maldata->rxde_irq, mal_rxde, 0, "MAL RX DE", mal); if (err) - goto fail; + goto fail5; err = request_irq(maldata->rxeob_irq, mal_rxeob, 0, "MAL RX EOB", mal); if (err) - goto fail; + goto fail6; - set_mal_dcrn(mal, DCRN_MALIER, - MALIER_DE | MALIER_NE | MALIER_TE | - MALIER_OPBE | MALIER_PLBE); + /* Enable all MAL SERR interrupt sources */ + set_mal_dcrn(mal, MAL_IER, MAL_IER_EVENTS); - /* Advertise me to the rest of the world */ + /* Advertise this instance to the rest of the world */ ocp_set_drvdata(ocpdev, mal); - printk(KERN_INFO "mal%d: Initialized, %d tx channels, %d rx channels\n", - ocpdev->def->index, maldata->num_tx_chans, - maldata->num_rx_chans); + mal_dbg_register(mal->def->index, mal); + printk(KERN_INFO "mal%d: initialized, %d TX channels, %d RX channels\n", + mal->def->index, maldata->num_tx_chans, maldata->num_rx_chans); return 0; + fail6: + free_irq(maldata->rxde_irq, mal); + fail5: + free_irq(maldata->txeob_irq, mal); + fail4: + free_irq(maldata->txde_irq, mal); + fail3: + free_irq(maldata->serr_irq, mal); + fail2: + dma_free_coherent(&ocpdev->dev, bd_size, mal->bd_virt, mal->bd_dma); fail: - /* FIXME: dispose requested IRQs ! */ - if (err && mal) - kfree(mal); + kfree(mal); return err; } static void __exit mal_remove(struct ocp_device *ocpdev) { struct ibm_ocp_mal *mal = ocp_get_drvdata(ocpdev); - struct ocp_func_mal_data *maldata = ocpdev->def->additions; + struct ocp_func_mal_data *maldata = mal->def->additions; - BUG_ON(!maldata); + MAL_DBG("%d: remove" NL, mal->def->index); + + /* Syncronize with scheduled polling, + stolen from net/core/dev.c:dev_close() + */ + clear_bit(__LINK_STATE_START, &mal->poll_dev.state); + netif_poll_disable(&mal->poll_dev); + + if (!list_empty(&mal->list)) { + /* This is *very* bad */ + printk(KERN_EMERG + "mal%d: commac list is not empty on remove!\n", + mal->def->index); + } ocp_set_drvdata(ocpdev, NULL); - /* FIXME: shut down the MAL, deal with dependency with emac */ free_irq(maldata->serr_irq, mal); free_irq(maldata->txde_irq, mal); free_irq(maldata->txeob_irq, mal); free_irq(maldata->rxde_irq, mal); free_irq(maldata->rxeob_irq, mal); - if (mal->tx_virt_addr) - dma_free_coherent(&ocpdev->dev, - MAL_DT_ALIGN * maldata->num_tx_chans, - mal->tx_virt_addr, mal->tx_phys_addr); - - if (mal->rx_virt_addr) - dma_free_coherent(&ocpdev->dev, - MAL_DT_ALIGN * maldata->num_rx_chans, - mal->rx_virt_addr, mal->rx_phys_addr); + mal_reset(mal); + + mal_dbg_register(mal->def->index, NULL); + + dma_free_coherent(&ocpdev->dev, + sizeof(struct mal_descriptor) * + (NUM_TX_BUFF * maldata->num_tx_chans + + NUM_RX_BUFF * maldata->num_rx_chans), mal->bd_virt, + mal->bd_dma); kfree(mal); } /* Structure for a device driver */ static struct ocp_device_id mal_ids[] = { - {.vendor = OCP_ANY_ID,.function = OCP_FUNC_MAL}, - {.vendor = OCP_VENDOR_INVALID} + { .vendor = OCP_VENDOR_IBM, .function = OCP_FUNC_MAL }, + { .vendor = OCP_VENDOR_INVALID} }; static struct ocp_driver mal_driver = { @@ -441,23 +570,14 @@ static struct ocp_driver mal_driver = { .remove = mal_remove, }; -static int __init init_mals(void) +int __init mal_init(void) { - int rc; - - rc = ocp_register_driver(&mal_driver); - if (rc < 0) { - ocp_unregister_driver(&mal_driver); - return -ENODEV; - } - - return 0; + MAL_DBG(": init" NL); + return ocp_register_driver(&mal_driver); } -static void __exit exit_mals(void) +void __exit mal_exit(void) { + MAL_DBG(": exit" NL); ocp_unregister_driver(&mal_driver); } - -module_init(init_mals); -module_exit(exit_mals); diff -puN drivers/net/ibm_emac/ibm_emac_mal.h~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/ibm_emac_mal.h --- devel/drivers/net/ibm_emac/ibm_emac_mal.h~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_mal.h 2005-10-28 17:44:03.000000000 -0700 @@ -1,131 +1,267 @@ -#ifndef _IBM_EMAC_MAL_H -#define _IBM_EMAC_MAL_H +/* + * drivers/net/ibm_emac/ibm_emac_mal.h + * + * Memory Access Layer (MAL) support + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin or + * + * Based on original work by + * Armin Kuster + * Copyright 2002 MontaVista Softare Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#ifndef __IBM_EMAC_MAL_H_ +#define __IBM_EMAC_MAL_H_ +#include +#include #include +#include -#define MAL_DT_ALIGN (4096) /* Alignment for each channel's descriptor table */ +#include -#define MAL_CHAN_MASK(chan) (0x80000000 >> (chan)) +/* + * These MAL "versions" probably aren't the real versions IBM uses for these + * MAL cores, I assigned them just to make #ifdefs in this file nicer and + * reflect the fact that 40x and 44x have slightly different MALs. --ebs + */ +#if defined(CONFIG_405GP) || defined(CONFIG_405GPR) || defined(CONFIG_405EP) || \ + defined(CONFIG_440EP) || defined(CONFIG_NP405H) +#define MAL_VERSION 1 +#elif defined(CONFIG_440GP) || defined(CONFIG_440GX) || defined(CONFIG_440SP) +#define MAL_VERSION 2 +#else +#error "Unknown SoC, please check chip manual and choose MAL 'version'" +#endif + +/* MALx DCR registers */ +#define MAL_CFG 0x00 +#define MAL_CFG_SR 0x80000000 +#define MAL_CFG_PLBB 0x00004000 +#define MAL_CFG_OPBBL 0x00000080 +#define MAL_CFG_EOPIE 0x00000004 +#define MAL_CFG_LEA 0x00000002 +#define MAL_CFG_SD 0x00000001 +#if MAL_VERSION == 1 +#define MAL_CFG_PLBP_MASK 0x00c00000 +#define MAL_CFG_PLBP_10 0x00800000 +#define MAL_CFG_GA 0x00200000 +#define MAL_CFG_OA 0x00100000 +#define MAL_CFG_PLBLE 0x00080000 +#define MAL_CFG_PLBT_MASK 0x00078000 +#define MAL_CFG_DEFAULT (MAL_CFG_PLBP_10 | MAL_CFG_PLBT_MASK) +#elif MAL_VERSION == 2 +#define MAL_CFG_RPP_MASK 0x00c00000 +#define MAL_CFG_RPP_10 0x00800000 +#define MAL_CFG_RMBS_MASK 0x00300000 +#define MAL_CFG_WPP_MASK 0x000c0000 +#define MAL_CFG_WPP_10 0x00080000 +#define MAL_CFG_WMBS_MASK 0x00030000 +#define MAL_CFG_PLBLE 0x00008000 +#define MAL_CFG_DEFAULT (MAL_CFG_RMBS_MASK | MAL_CFG_WMBS_MASK | \ + MAL_CFG_RPP_10 | MAL_CFG_WPP_10) +#else +#error "Unknown MAL version" +#endif + +#define MAL_ESR 0x01 +#define MAL_ESR_EVB 0x80000000 +#define MAL_ESR_CIDT 0x40000000 +#define MAL_ESR_CID_MASK 0x3e000000 +#define MAL_ESR_CID_SHIFT 25 +#define MAL_ESR_DE 0x00100000 +#define MAL_ESR_OTE 0x00040000 +#define MAL_ESR_OSE 0x00020000 +#define MAL_ESR_PEIN 0x00010000 +#define MAL_ESR_DEI 0x00000010 +#define MAL_ESR_OTEI 0x00000004 +#define MAL_ESR_OSEI 0x00000002 +#define MAL_ESR_PBEI 0x00000001 +#if MAL_VERSION == 1 +#define MAL_ESR_ONE 0x00080000 +#define MAL_ESR_ONEI 0x00000008 +#elif MAL_VERSION == 2 +#define MAL_ESR_PTE 0x00800000 +#define MAL_ESR_PRE 0x00400000 +#define MAL_ESR_PWE 0x00200000 +#define MAL_ESR_PTEI 0x00000080 +#define MAL_ESR_PREI 0x00000040 +#define MAL_ESR_PWEI 0x00000020 +#else +#error "Unknown MAL version" +#endif + +#define MAL_IER 0x02 +#define MAL_IER_DE 0x00000010 +#define MAL_IER_OTE 0x00000004 +#define MAL_IER_OE 0x00000002 +#define MAL_IER_PE 0x00000001 +#if MAL_VERSION == 1 +#define MAL_IER_NWE 0x00000008 +#define MAL_IER_SOC_EVENTS MAL_IER_NWE +#elif MAL_VERSION == 2 +#define MAL_IER_PT 0x00000080 +#define MAL_IER_PRE 0x00000040 +#define MAL_IER_PWE 0x00000020 +#define MAL_IER_SOC_EVENTS (MAL_IER_PT | MAL_IER_PRE | MAL_IER_PWE) +#else +#error "Unknown MAL version" +#endif +#define MAL_IER_EVENTS (MAL_IER_SOC_EVENTS | MAL_IER_OTE | \ + MAL_IER_OTE | MAL_IER_OE | MAL_IER_PE) + +#define MAL_TXCASR 0x04 +#define MAL_TXCARR 0x05 +#define MAL_TXEOBISR 0x06 +#define MAL_TXDEIR 0x07 +#define MAL_RXCASR 0x10 +#define MAL_RXCARR 0x11 +#define MAL_RXEOBISR 0x12 +#define MAL_RXDEIR 0x13 +#define MAL_TXCTPR(n) ((n) + 0x20) +#define MAL_RXCTPR(n) ((n) + 0x40) +#define MAL_RCBS(n) ((n) + 0x60) + +/* In reality MAL can handle TX buffers up to 4095 bytes long, + * but this isn't a good round number :) --ebs + */ +#define MAL_MAX_TX_SIZE 4080 +#define MAL_MAX_RX_SIZE 4080 + +static inline int mal_rx_size(int len) +{ + len = (len + 0xf) & ~0xf; + return len > MAL_MAX_RX_SIZE ? MAL_MAX_RX_SIZE : len; +} + +static inline int mal_tx_chunks(int len) +{ + return (len + MAL_MAX_TX_SIZE - 1) / MAL_MAX_TX_SIZE; +} + +#define MAL_CHAN_MASK(n) (0x80000000 >> (n)) /* MAL Buffer Descriptor structure */ struct mal_descriptor { - unsigned short ctrl; /* MAL / Commac status control bits */ - short data_len; /* Max length is 4K-1 (12 bits) */ - unsigned char *data_ptr; /* pointer to actual data buffer */ -} __attribute__ ((packed)); + u16 ctrl; /* MAL / Commac status control bits */ + u16 data_len; /* Max length is 4K-1 (12 bits) */ + u32 data_ptr; /* pointer to actual data buffer */ +}; /* the following defines are for the MadMAL status and control registers. */ /* MADMAL transmit and receive status/control bits */ -#define MAL_RX_CTRL_EMPTY 0x8000 -#define MAL_RX_CTRL_WRAP 0x4000 -#define MAL_RX_CTRL_CM 0x2000 -#define MAL_RX_CTRL_LAST 0x1000 -#define MAL_RX_CTRL_FIRST 0x0800 -#define MAL_RX_CTRL_INTR 0x0400 - -#define MAL_TX_CTRL_READY 0x8000 -#define MAL_TX_CTRL_WRAP 0x4000 -#define MAL_TX_CTRL_CM 0x2000 -#define MAL_TX_CTRL_LAST 0x1000 -#define MAL_TX_CTRL_INTR 0x0400 +#define MAL_RX_CTRL_EMPTY 0x8000 +#define MAL_RX_CTRL_WRAP 0x4000 +#define MAL_RX_CTRL_CM 0x2000 +#define MAL_RX_CTRL_LAST 0x1000 +#define MAL_RX_CTRL_FIRST 0x0800 +#define MAL_RX_CTRL_INTR 0x0400 +#define MAL_RX_CTRL_SINGLE (MAL_RX_CTRL_LAST | MAL_RX_CTRL_FIRST) +#define MAL_IS_SINGLE_RX(ctrl) (((ctrl) & MAL_RX_CTRL_SINGLE) == MAL_RX_CTRL_SINGLE) + +#define MAL_TX_CTRL_READY 0x8000 +#define MAL_TX_CTRL_WRAP 0x4000 +#define MAL_TX_CTRL_CM 0x2000 +#define MAL_TX_CTRL_LAST 0x1000 +#define MAL_TX_CTRL_INTR 0x0400 struct mal_commac_ops { - void (*txeob) (void *dev, u32 chanmask); - void (*txde) (void *dev, u32 chanmask); - void (*rxeob) (void *dev, u32 chanmask); - void (*rxde) (void *dev, u32 chanmask); + void (*poll_tx) (void *dev); + int (*poll_rx) (void *dev, int budget); + int (*peek_rx) (void *dev); + void (*rxde) (void *dev); }; struct mal_commac { - struct mal_commac_ops *ops; - void *dev; - u32 tx_chan_mask, rx_chan_mask; - struct list_head list; + struct mal_commac_ops *ops; + void *dev; + struct list_head poll_list; + int rx_stopped; + + u32 tx_chan_mask; + u32 rx_chan_mask; + struct list_head list; }; struct ibm_ocp_mal { - int dcrbase; + int dcrbase; - struct list_head commac; - u32 tx_chan_mask, rx_chan_mask; + struct list_head poll_list; + struct net_device poll_dev; - dma_addr_t tx_phys_addr; - struct mal_descriptor *tx_virt_addr; + struct list_head list; + u32 tx_chan_mask; + u32 rx_chan_mask; - dma_addr_t rx_phys_addr; - struct mal_descriptor *rx_virt_addr; -}; + dma_addr_t bd_dma; + struct mal_descriptor *bd_virt; -#define GET_MAL_STANZA(base,dcrn) \ - case base: \ - x = mfdcr(dcrn(base)); \ - break; - -#define SET_MAL_STANZA(base,dcrn, val) \ - case base: \ - mtdcr(dcrn(base), (val)); \ - break; - -#define GET_MAL0_STANZA(dcrn) GET_MAL_STANZA(DCRN_MAL_BASE,dcrn) -#define SET_MAL0_STANZA(dcrn,val) SET_MAL_STANZA(DCRN_MAL_BASE,dcrn,val) - -#ifdef DCRN_MAL1_BASE -#define GET_MAL1_STANZA(dcrn) GET_MAL_STANZA(DCRN_MAL1_BASE,dcrn) -#define SET_MAL1_STANZA(dcrn,val) SET_MAL_STANZA(DCRN_MAL1_BASE,dcrn,val) -#else /* ! DCRN_MAL1_BASE */ -#define GET_MAL1_STANZA(dcrn) -#define SET_MAL1_STANZA(dcrn,val) -#endif - -#define get_mal_dcrn(mal, dcrn) ({ \ - u32 x; \ - switch ((mal)->dcrbase) { \ - GET_MAL0_STANZA(dcrn) \ - GET_MAL1_STANZA(dcrn) \ - default: \ - x = 0; \ - BUG(); \ - } \ -x; }) - -#define set_mal_dcrn(mal, dcrn, val) do { \ - switch ((mal)->dcrbase) { \ - SET_MAL0_STANZA(dcrn,val) \ - SET_MAL1_STANZA(dcrn,val) \ - default: \ - BUG(); \ - } } while (0) - -static inline void mal_enable_tx_channels(struct ibm_ocp_mal *mal, u32 chanmask) -{ - set_mal_dcrn(mal, DCRN_MALTXCASR, - get_mal_dcrn(mal, DCRN_MALTXCASR) | chanmask); -} - -static inline void mal_disable_tx_channels(struct ibm_ocp_mal *mal, - u32 chanmask) -{ - set_mal_dcrn(mal, DCRN_MALTXCARR, chanmask); -} + struct ocp_def *def; +}; -static inline void mal_enable_rx_channels(struct ibm_ocp_mal *mal, u32 chanmask) +static inline u32 get_mal_dcrn(struct ibm_ocp_mal *mal, int reg) { - set_mal_dcrn(mal, DCRN_MALRXCASR, - get_mal_dcrn(mal, DCRN_MALRXCASR) | chanmask); + return mfdcr(mal->dcrbase + reg); } -static inline void mal_disable_rx_channels(struct ibm_ocp_mal *mal, - u32 chanmask) +static inline void set_mal_dcrn(struct ibm_ocp_mal *mal, int reg, u32 val) { - set_mal_dcrn(mal, DCRN_MALRXCARR, chanmask); + mtdcr(mal->dcrbase + reg, val); } -extern int mal_register_commac(struct ibm_ocp_mal *mal, - struct mal_commac *commac); -extern int mal_unregister_commac(struct ibm_ocp_mal *mal, - struct mal_commac *commac); +/* Register MAL devices */ +int mal_init(void) __init; +void mal_exit(void) __exit; + +int mal_register_commac(struct ibm_ocp_mal *mal, + struct mal_commac *commac) __init; +void mal_unregister_commac(struct ibm_ocp_mal *mal, + struct mal_commac *commac) __exit; +int mal_set_rcbs(struct ibm_ocp_mal *mal, int channel, unsigned long size); + +/* Returns BD ring offset for a particular channel + (in 'struct mal_descriptor' elements) +*/ +int mal_tx_bd_offset(struct ibm_ocp_mal *mal, int channel); +int mal_rx_bd_offset(struct ibm_ocp_mal *mal, int channel); + +void mal_enable_tx_channel(struct ibm_ocp_mal *mal, int channel); +void mal_disable_tx_channel(struct ibm_ocp_mal *mal, int channel); +void mal_enable_rx_channel(struct ibm_ocp_mal *mal, int channel); +void mal_disable_rx_channel(struct ibm_ocp_mal *mal, int channel); + +/* Add/remove EMAC to/from MAL polling list */ +void mal_poll_add(struct ibm_ocp_mal *mal, struct mal_commac *commac); +void mal_poll_del(struct ibm_ocp_mal *mal, struct mal_commac *commac); + +/* Ethtool MAL registers */ +struct ibm_mal_regs { + u32 tx_count; + u32 rx_count; + + u32 cfg; + u32 esr; + u32 ier; + u32 tx_casr; + u32 tx_carr; + u32 tx_eobisr; + u32 tx_deir; + u32 rx_casr; + u32 rx_carr; + u32 rx_eobisr; + u32 rx_deir; + u32 tx_ctpr[32]; + u32 rx_ctpr[32]; + u32 rcbs[32]; +}; -extern int mal_set_rcbs(struct ibm_ocp_mal *mal, int channel, - unsigned long size); +int mal_get_regs_len(struct ibm_ocp_mal *mal); +void *mal_dump_regs(struct ibm_ocp_mal *mal, void *buf); -#endif /* _IBM_EMAC_MAL_H */ +#endif /* __IBM_EMAC_MAL_H_ */ diff -puN drivers/net/ibm_emac/ibm_emac_phy.c~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/ibm_emac_phy.c --- devel/drivers/net/ibm_emac/ibm_emac_phy.c~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_phy.c 2005-10-28 17:44:03.000000000 -0700 @@ -1,96 +1,80 @@ /* - * ibm_ocp_phy.c + * drivers/net/ibm_emac/ibm_emac_phy.c * - * PHY drivers for the ibm ocp ethernet driver. Borrowed - * from sungem_phy.c, though I only kept the generic MII + * Driver for PowerPC 4xx on-chip ethernet controller, PHY support. + * Borrowed from sungem_phy.c, though I only kept the generic MII * driver for now. * * This file should be shared with other drivers or eventually * merged as the "low level" part of miilib * * (c) 2003, Benjamin Herrenscmidt (benh at kernel.crashing.org) + * (c) 2004-2005, Eugene Surovegin * */ - #include - #include - #include -#include #include #include -#include #include #include #include +#include + #include "ibm_emac_phy.h" -static int reset_one_mii_phy(struct mii_phy *phy, int phy_id) +static inline int phy_read(struct mii_phy *phy, int reg) +{ + return phy->mdio_read(phy->dev, phy->address, reg); +} + +static inline void phy_write(struct mii_phy *phy, int reg, int val) { - u16 val; + phy->mdio_write(phy->dev, phy->address, reg, val); +} + +int mii_reset_phy(struct mii_phy *phy) +{ + int val; int limit = 10000; - val = __phy_read(phy, phy_id, MII_BMCR); + val = phy_read(phy, MII_BMCR); val &= ~BMCR_ISOLATE; val |= BMCR_RESET; - __phy_write(phy, phy_id, MII_BMCR, val); + phy_write(phy, MII_BMCR, val); - udelay(100); + udelay(300); while (limit--) { - val = __phy_read(phy, phy_id, MII_BMCR); - if ((val & BMCR_RESET) == 0) + val = phy_read(phy, MII_BMCR); + if (val >= 0 && (val & BMCR_RESET) == 0) break; udelay(10); } if ((val & BMCR_ISOLATE) && limit > 0) - __phy_write(phy, phy_id, MII_BMCR, val & ~BMCR_ISOLATE); - - return (limit <= 0); -} - -static int cis8201_init(struct mii_phy *phy) -{ - u16 epcr; - - epcr = phy_read(phy, MII_CIS8201_EPCR); - epcr &= ~EPCR_MODE_MASK; - - switch (phy->mode) { - case PHY_MODE_TBI: - epcr |= EPCR_TBI_MODE; - break; - case PHY_MODE_RTBI: - epcr |= EPCR_RTBI_MODE; - break; - case PHY_MODE_GMII: - epcr |= EPCR_GMII_MODE; - break; - case PHY_MODE_RGMII: - default: - epcr |= EPCR_RGMII_MODE; - } + phy_write(phy, MII_BMCR, val & ~BMCR_ISOLATE); - phy_write(phy, MII_CIS8201_EPCR, epcr); - - return 0; + return limit <= 0; } static int genmii_setup_aneg(struct mii_phy *phy, u32 advertise) { - u16 ctl, adv; + int ctl, adv; - phy->autoneg = 1; + phy->autoneg = AUTONEG_ENABLE; phy->speed = SPEED_10; phy->duplex = DUPLEX_HALF; - phy->pause = 0; + phy->pause = phy->asym_pause = 0; phy->advertising = advertise; /* Setup standard advertise */ adv = phy_read(phy, MII_ADVERTISE); - adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4); + if (adv < 0) + return adv; + adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | + ADVERTISE_PAUSE_ASYM); if (advertise & ADVERTISED_10baseT_Half) adv |= ADVERTISE_10HALF; if (advertise & ADVERTISED_10baseT_Full) @@ -99,8 +83,25 @@ static int genmii_setup_aneg(struct mii_ adv |= ADVERTISE_100HALF; if (advertise & ADVERTISED_100baseT_Full) adv |= ADVERTISE_100FULL; + if (advertise & ADVERTISED_Pause) + adv |= ADVERTISE_PAUSE_CAP; + if (advertise & ADVERTISED_Asym_Pause) + adv |= ADVERTISE_PAUSE_ASYM; phy_write(phy, MII_ADVERTISE, adv); + if (phy->features & + (SUPPORTED_1000baseT_Full | SUPPORTED_1000baseT_Half)) { + adv = phy_read(phy, MII_CTRL1000); + if (adv < 0) + return adv; + adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF); + if (advertise & ADVERTISED_1000baseT_Full) + adv |= ADVERTISE_1000FULL; + if (advertise & ADVERTISED_1000baseT_Half) + adv |= ADVERTISE_1000HALF; + phy_write(phy, MII_CTRL1000, adv); + } + /* Start/Restart aneg */ ctl = phy_read(phy, MII_BMCR); ctl |= (BMCR_ANENABLE | BMCR_ANRESTART); @@ -111,14 +112,16 @@ static int genmii_setup_aneg(struct mii_ static int genmii_setup_forced(struct mii_phy *phy, int speed, int fd) { - u16 ctl; + int ctl; - phy->autoneg = 0; + phy->autoneg = AUTONEG_DISABLE; phy->speed = speed; phy->duplex = fd; - phy->pause = 0; + phy->pause = phy->asym_pause = 0; ctl = phy_read(phy, MII_BMCR); + if (ctl < 0) + return ctl; ctl &= ~(BMCR_FULLDPLX | BMCR_SPEED100 | BMCR_ANENABLE); /* First reset the PHY */ @@ -132,6 +135,8 @@ static int genmii_setup_forced(struct mi ctl |= BMCR_SPEED100; break; case SPEED_1000: + ctl |= BMCR_SPEED1000; + break; default: return -EINVAL; } @@ -144,112 +149,143 @@ static int genmii_setup_forced(struct mi static int genmii_poll_link(struct mii_phy *phy) { - u16 status; + int status; - (void)phy_read(phy, MII_BMSR); + /* Clear latched value with dummy read */ + phy_read(phy, MII_BMSR); status = phy_read(phy, MII_BMSR); - if ((status & BMSR_LSTATUS) == 0) + if (status < 0 || (status & BMSR_LSTATUS) == 0) return 0; - if (phy->autoneg && !(status & BMSR_ANEGCOMPLETE)) + if (phy->autoneg == AUTONEG_ENABLE && !(status & BMSR_ANEGCOMPLETE)) return 0; return 1; } -#define MII_CIS8201_ACSR 0x1c -#define ACSR_DUPLEX_STATUS 0x0020 -#define ACSR_SPEED_1000BASET 0x0010 -#define ACSR_SPEED_100BASET 0x0008 - -static int cis8201_read_link(struct mii_phy *phy) +static int genmii_read_link(struct mii_phy *phy) { - u16 acsr; + if (phy->autoneg == AUTONEG_ENABLE) { + int glpa = 0; + int lpa = phy_read(phy, MII_LPA) & phy_read(phy, MII_ADVERTISE); + if (lpa < 0) + return lpa; + + if (phy->features & + (SUPPORTED_1000baseT_Full | SUPPORTED_1000baseT_Half)) { + int adv = phy_read(phy, MII_CTRL1000); + glpa = phy_read(phy, MII_STAT1000); - if (phy->autoneg) { - acsr = phy_read(phy, MII_CIS8201_ACSR); + if (glpa < 0 || adv < 0) + return adv; - if (acsr & ACSR_DUPLEX_STATUS) + glpa &= adv << 2; + } + + phy->speed = SPEED_10; + phy->duplex = DUPLEX_HALF; + phy->pause = phy->asym_pause = 0; + + if (glpa & (LPA_1000FULL | LPA_1000HALF)) { + phy->speed = SPEED_1000; + if (glpa & LPA_1000FULL) + phy->duplex = DUPLEX_FULL; + } else if (lpa & (LPA_100FULL | LPA_100HALF)) { + phy->speed = SPEED_100; + if (lpa & LPA_100FULL) + phy->duplex = DUPLEX_FULL; + } else if (lpa & LPA_10FULL) + phy->duplex = DUPLEX_FULL; + + if (phy->duplex == DUPLEX_FULL) { + phy->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; + phy->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; + } + } else { + int bmcr = phy_read(phy, MII_BMCR); + if (bmcr < 0) + return bmcr; + + if (bmcr & BMCR_FULLDPLX) phy->duplex = DUPLEX_FULL; else phy->duplex = DUPLEX_HALF; - if (acsr & ACSR_SPEED_1000BASET) { + if (bmcr & BMCR_SPEED1000) phy->speed = SPEED_1000; - } else if (acsr & ACSR_SPEED_100BASET) + else if (bmcr & BMCR_SPEED100) phy->speed = SPEED_100; else phy->speed = SPEED_10; - phy->pause = 0; - } - /* On non-aneg, we assume what we put in BMCR is the speed, - * though magic-aneg shouldn't prevent this case from occurring - */ + phy->pause = phy->asym_pause = 0; + } return 0; } -static int genmii_read_link(struct mii_phy *phy) +/* Generic implementation for most 10/100/1000 PHYs */ +static struct mii_phy_ops generic_phy_ops = { + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link +}; + +static struct mii_phy_def genmii_phy_def = { + .phy_id = 0x00000000, + .phy_id_mask = 0x00000000, + .name = "Generic MII", + .ops = &generic_phy_ops +}; + +/* CIS8201 */ +#define MII_CIS8201_EPCR 0x17 +#define EPCR_MODE_MASK 0x3000 +#define EPCR_GMII_MODE 0x0000 +#define EPCR_RGMII_MODE 0x1000 +#define EPCR_TBI_MODE 0x2000 +#define EPCR_RTBI_MODE 0x3000 + +static int cis8201_init(struct mii_phy *phy) { - u16 lpa; + int epcr; - if (phy->autoneg) { - lpa = phy_read(phy, MII_LPA) & phy_read(phy, MII_ADVERTISE); + epcr = phy_read(phy, MII_CIS8201_EPCR); + if (epcr < 0) + return epcr; - phy->speed = SPEED_10; - phy->duplex = DUPLEX_HALF; - phy->pause = 0; + epcr &= ~EPCR_MODE_MASK; - if (lpa & (LPA_100FULL | LPA_100HALF)) { - phy->speed = SPEED_100; - if (lpa & LPA_100FULL) - phy->duplex = DUPLEX_FULL; - } else if (lpa & LPA_10FULL) - phy->duplex = DUPLEX_FULL; + switch (phy->mode) { + case PHY_MODE_TBI: + epcr |= EPCR_TBI_MODE; + break; + case PHY_MODE_RTBI: + epcr |= EPCR_RTBI_MODE; + break; + case PHY_MODE_GMII: + epcr |= EPCR_GMII_MODE; + break; + case PHY_MODE_RGMII: + default: + epcr |= EPCR_RGMII_MODE; } - /* On non-aneg, we assume what we put in BMCR is the speed, - * though magic-aneg shouldn't prevent this case from occurring - */ + + phy_write(phy, MII_CIS8201_EPCR, epcr); return 0; } -#define MII_BASIC_FEATURES (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | \ - SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \ - SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII) -#define MII_GBIT_FEATURES (MII_BASIC_FEATURES | \ - SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full) - -/* CIS8201 phy ops */ static struct mii_phy_ops cis8201_phy_ops = { - init:cis8201_init, - setup_aneg:genmii_setup_aneg, - setup_forced:genmii_setup_forced, - poll_link:genmii_poll_link, - read_link:cis8201_read_link -}; - -/* Generic implementation for most 10/100 PHYs */ -static struct mii_phy_ops generic_phy_ops = { - setup_aneg:genmii_setup_aneg, - setup_forced:genmii_setup_forced, - poll_link:genmii_poll_link, - read_link:genmii_read_link + .init = cis8201_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link }; static struct mii_phy_def cis8201_phy_def = { - phy_id:0x000fc410, - phy_id_mask:0x000ffff0, - name:"CIS8201 Gigabit Ethernet", - features:MII_GBIT_FEATURES, - magic_aneg:0, - ops:&cis8201_phy_ops -}; - -static struct mii_phy_def genmii_phy_def = { - phy_id:0x00000000, - phy_id_mask:0x00000000, - name:"Generic MII", - features:MII_BASIC_FEATURES, - magic_aneg:0, - ops:&generic_phy_ops + .phy_id = 0x000fc410, + .phy_id_mask = 0x000ffff0, + .name = "CIS8201 Gigabit Ethernet", + .ops = &cis8201_phy_ops }; static struct mii_phy_def *mii_phy_table[] = { @@ -258,39 +294,60 @@ static struct mii_phy_def *mii_phy_table NULL }; -int mii_phy_probe(struct mii_phy *phy, int mii_id) +int mii_phy_probe(struct mii_phy *phy, int address) { - int rc; - u32 id; struct mii_phy_def *def; int i; + u32 id; - phy->autoneg = 0; + phy->autoneg = AUTONEG_DISABLE; phy->advertising = 0; - phy->mii_id = mii_id; - phy->speed = 0; - phy->duplex = 0; - phy->pause = 0; - - /* Take PHY out of isloate mode and reset it. */ - rc = reset_one_mii_phy(phy, mii_id); - if (rc) + phy->address = address; + phy->speed = SPEED_10; + phy->duplex = DUPLEX_HALF; + phy->pause = phy->asym_pause = 0; + + /* Take PHY out of isolate mode and reset it. */ + if (mii_reset_phy(phy)) return -ENODEV; /* Read ID and find matching entry */ - id = (phy_read(phy, MII_PHYSID1) << 16 | phy_read(phy, MII_PHYSID2)) - & 0xfffffff0; + id = (phy_read(phy, MII_PHYSID1) << 16) | phy_read(phy, MII_PHYSID2); for (i = 0; (def = mii_phy_table[i]) != NULL; i++) if ((id & def->phy_id_mask) == def->phy_id) break; /* Should never be NULL (we have a generic entry), but... */ - if (def == NULL) + if (!def) return -ENODEV; phy->def = def; + /* Determine PHY features if needed */ + phy->features = def->features; + if (!phy->features) { + u16 bmsr = phy_read(phy, MII_BMSR); + if (bmsr & BMSR_ANEGCAPABLE) + phy->features |= SUPPORTED_Autoneg; + if (bmsr & BMSR_10HALF) + phy->features |= SUPPORTED_10baseT_Half; + if (bmsr & BMSR_10FULL) + phy->features |= SUPPORTED_10baseT_Full; + if (bmsr & BMSR_100HALF) + phy->features |= SUPPORTED_100baseT_Half; + if (bmsr & BMSR_100FULL) + phy->features |= SUPPORTED_100baseT_Full; + if (bmsr & BMSR_ESTATEN) { + u16 esr = phy_read(phy, MII_ESTATUS); + if (esr & ESTATUS_1000_TFULL) + phy->features |= SUPPORTED_1000baseT_Full; + if (esr & ESTATUS_1000_THALF) + phy->features |= SUPPORTED_1000baseT_Half; + } + phy->features |= SUPPORTED_MII; + } + /* Setup default advertising */ - phy->advertising = def->features; + phy->advertising = phy->features; return 0; } diff -puN drivers/net/ibm_emac/ibm_emac_phy.h~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/ibm_emac_phy.h --- devel/drivers/net/ibm_emac/ibm_emac_phy.h~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_phy.h 2005-10-28 17:44:03.000000000 -0700 @@ -1,65 +1,25 @@ - /* - * ibm_emac_phy.h - * + * drivers/net/ibm_emac/ibm_emac_phy.h * - * Benjamin Herrenschmidt - * February 2003 - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. + * Driver for PowerPC 4xx on-chip ethernet controller, PHY support * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * Benjamin Herrenschmidt + * February 2003 * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. + * Minor additions by Eugene Surovegin , 2004 * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. * * This file basically duplicates sungem_phy.{c,h} with different PHYs * supported. I'm looking into merging that in a single mii layer more * flexible than mii.c */ -#ifndef _IBM_EMAC_PHY_H_ -#define _IBM_EMAC_PHY_H_ - -/* - * PHY mode settings - * Used for multi-mode capable PHYs - */ -#define PHY_MODE_NA 0 -#define PHY_MODE_MII 1 -#define PHY_MODE_RMII 2 -#define PHY_MODE_SMII 3 -#define PHY_MODE_RGMII 4 -#define PHY_MODE_TBI 5 -#define PHY_MODE_GMII 6 -#define PHY_MODE_RTBI 7 -#define PHY_MODE_SGMII 8 - -/* - * PHY specific registers/values - */ - -/* CIS8201 */ -#define MII_CIS8201_EPCR 0x17 -#define EPCR_MODE_MASK 0x3000 -#define EPCR_GMII_MODE 0x0000 -#define EPCR_RGMII_MODE 0x1000 -#define EPCR_TBI_MODE 0x2000 -#define EPCR_RTBI_MODE 0x3000 +#ifndef _IBM_OCP_PHY_H_ +#define _IBM_OCP_PHY_H_ struct mii_phy; @@ -77,7 +37,8 @@ struct mii_phy_ops { struct mii_phy_def { u32 phy_id; /* Concatenated ID1 << 16 | ID2 */ u32 phy_id_mask; /* Significant bits */ - u32 features; /* Ethtool SUPPORTED_* defines */ + u32 features; /* Ethtool SUPPORTED_* defines or + 0 for autodetect */ int magic_aneg; /* Autoneg does all speed test for us */ const char *name; const struct mii_phy_ops *ops; @@ -86,8 +47,11 @@ struct mii_phy_def { /* An instance of a PHY, partially borrowed from mii_if_info */ struct mii_phy { struct mii_phy_def *def; - int advertising; - int mii_id; + u32 advertising; /* Ethtool ADVERTISED_* defines */ + u32 features; /* Copied from mii_phy_def.features + or determined automaticaly */ + int address; /* PHY address */ + int mode; /* PHY mode */ /* 1: autoneg enabled, 0: disabled */ int autoneg; @@ -98,40 +62,19 @@ struct mii_phy { int speed; int duplex; int pause; - - /* PHY mode - if needed */ - int mode; + int asym_pause; /* Provided by host chip */ struct net_device *dev; - int (*mdio_read) (struct net_device * dev, int mii_id, int reg); - void (*mdio_write) (struct net_device * dev, int mii_id, int reg, + int (*mdio_read) (struct net_device * dev, int addr, int reg); + void (*mdio_write) (struct net_device * dev, int addr, int reg, int val); }; /* Pass in a struct mii_phy with dev, mdio_read and mdio_write * filled, the remaining fields will be filled on return */ -extern int mii_phy_probe(struct mii_phy *phy, int mii_id); - -static inline int __phy_read(struct mii_phy *phy, int id, int reg) -{ - return phy->mdio_read(phy->dev, id, reg); -} - -static inline void __phy_write(struct mii_phy *phy, int id, int reg, int val) -{ - phy->mdio_write(phy->dev, id, reg, val); -} - -static inline int phy_read(struct mii_phy *phy, int reg) -{ - return phy->mdio_read(phy->dev, phy->mii_id, reg); -} - -static inline void phy_write(struct mii_phy *phy, int reg, int val) -{ - phy->mdio_write(phy->dev, phy->mii_id, reg, val); -} +int mii_phy_probe(struct mii_phy *phy, int address); +int mii_reset_phy(struct mii_phy *phy); -#endif /* _IBM_EMAC_PHY_H_ */ +#endif /* _IBM_OCP_PHY_H_ */ diff -puN /dev/null drivers/net/ibm_emac/ibm_emac_rgmii.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_rgmii.c 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,201 @@ +/* + * drivers/net/ibm_emac/ibm_emac_rgmii.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, RGMII bridge support. + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin or + * + * Based on original work by + * Matt Porter + * Copyright 2004 MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include + +#include "ibm_emac_core.h" +#include "ibm_emac_debug.h" + +/* RGMIIx_FER */ +#define RGMII_FER_MASK(idx) (0x7 << ((idx) * 4)) +#define RGMII_FER_RTBI(idx) (0x4 << ((idx) * 4)) +#define RGMII_FER_RGMII(idx) (0x5 << ((idx) * 4)) +#define RGMII_FER_TBI(idx) (0x6 << ((idx) * 4)) +#define RGMII_FER_GMII(idx) (0x7 << ((idx) * 4)) + +/* RGMIIx_SSR */ +#define RGMII_SSR_MASK(idx) (0x7 << ((idx) * 8)) +#define RGMII_SSR_100(idx) (0x2 << ((idx) * 8)) +#define RGMII_SSR_1000(idx) (0x4 << ((idx) * 8)) + +/* RGMII bridge supports only GMII/TBI and RGMII/RTBI PHYs */ +static inline int rgmii_valid_mode(int phy_mode) +{ + return phy_mode == PHY_MODE_GMII || + phy_mode == PHY_MODE_RGMII || + phy_mode == PHY_MODE_TBI || + phy_mode == PHY_MODE_RTBI; +} + +static inline const char *rgmii_mode_name(int mode) +{ + switch (mode) { + case PHY_MODE_RGMII: + return "RGMII"; + case PHY_MODE_TBI: + return "TBI"; + case PHY_MODE_GMII: + return "GMII"; + case PHY_MODE_RTBI: + return "RTBI"; + default: + BUG(); + } +} + +static inline u32 rgmii_mode_mask(int mode, int input) +{ + switch (mode) { + case PHY_MODE_RGMII: + return RGMII_FER_RGMII(input); + case PHY_MODE_TBI: + return RGMII_FER_TBI(input); + case PHY_MODE_GMII: + return RGMII_FER_GMII(input); + case PHY_MODE_RTBI: + return RGMII_FER_RTBI(input); + default: + BUG(); + } +} + +static int __init rgmii_init(struct ocp_device *ocpdev, int input, int mode) +{ + struct ibm_ocp_rgmii *dev = ocp_get_drvdata(ocpdev); + struct rgmii_regs *p; + + RGMII_DBG("%d: init(%d, %d)" NL, ocpdev->def->index, input, mode); + + if (!dev) { + dev = kzalloc(sizeof(struct ibm_ocp_rgmii), GFP_KERNEL); + if (!dev) { + printk(KERN_ERR + "rgmii%d: couldn't allocate device structure!\n", + ocpdev->def->index); + return -ENOMEM; + } + + p = (struct rgmii_regs *)ioremap(ocpdev->def->paddr, + sizeof(struct rgmii_regs)); + if (!p) { + printk(KERN_ERR + "rgmii%d: could not ioremap device registers!\n", + ocpdev->def->index); + kfree(dev); + return -ENOMEM; + } + + dev->base = p; + ocp_set_drvdata(ocpdev, dev); + + /* Disable all inputs by default */ + out_be32(&p->fer, 0); + } else + p = dev->base; + + /* Enable this input */ + out_be32(&p->fer, in_be32(&p->fer) | rgmii_mode_mask(mode, input)); + + printk(KERN_NOTICE "rgmii%d: input %d in %s mode\n", + ocpdev->def->index, input, rgmii_mode_name(mode)); + + ++dev->users; + return 0; +} + +int __init rgmii_attach(void *emac) +{ + struct ocp_enet_private *dev = emac; + struct ocp_func_emac_data *emacdata = dev->def->additions; + + /* Check if we need to attach to a RGMII */ + if (emacdata->rgmii_idx >= 0 && rgmii_valid_mode(emacdata->phy_mode)) { + dev->rgmii_input = emacdata->rgmii_mux; + dev->rgmii_dev = + ocp_find_device(OCP_VENDOR_IBM, OCP_FUNC_RGMII, + emacdata->rgmii_idx); + if (!dev->rgmii_dev) { + printk(KERN_ERR "emac%d: unknown rgmii%d!\n", + dev->def->index, emacdata->rgmii_idx); + return -ENODEV; + } + if (rgmii_init + (dev->rgmii_dev, dev->rgmii_input, emacdata->phy_mode)) { + printk(KERN_ERR + "emac%d: rgmii%d initialization failed!\n", + dev->def->index, emacdata->rgmii_idx); + return -ENODEV; + } + } + return 0; +} + +void rgmii_set_speed(struct ocp_device *ocpdev, int input, int speed) +{ + struct ibm_ocp_rgmii *dev = ocp_get_drvdata(ocpdev); + u32 ssr = in_be32(&dev->base->ssr) & ~RGMII_SSR_MASK(input); + + RGMII_DBG("%d: speed(%d, %d)" NL, ocpdev->def->index, input, speed); + + if (speed == SPEED_1000) + ssr |= RGMII_SSR_1000(input); + else if (speed == SPEED_100) + ssr |= RGMII_SSR_100(input); + + out_be32(&dev->base->ssr, ssr); +} + +void __exit __rgmii_fini(struct ocp_device *ocpdev, int input) +{ + struct ibm_ocp_rgmii *dev = ocp_get_drvdata(ocpdev); + BUG_ON(!dev || dev->users == 0); + + RGMII_DBG("%d: fini(%d)" NL, ocpdev->def->index, input); + + /* Disable this input */ + out_be32(&dev->base->fer, + in_be32(&dev->base->fer) & ~RGMII_FER_MASK(input)); + + if (!--dev->users) { + /* Free everything if this is the last user */ + ocp_set_drvdata(ocpdev, NULL); + iounmap((void *)dev->base); + kfree(dev); + } +} + +int __rgmii_get_regs_len(struct ocp_device *ocpdev) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct rgmii_regs); +} + +void *rgmii_dump_regs(struct ocp_device *ocpdev, void *buf) +{ + struct ibm_ocp_rgmii *dev = ocp_get_drvdata(ocpdev); + struct emac_ethtool_regs_subhdr *hdr = buf; + struct rgmii_regs *regs = (struct rgmii_regs *)(hdr + 1); + + hdr->version = 0; + hdr->index = ocpdev->def->index; + memcpy_fromio(regs, dev->base, sizeof(struct rgmii_regs)); + return regs + 1; +} diff -puN drivers/net/ibm_emac/ibm_emac_rgmii.h~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/ibm_emac_rgmii.h --- devel/drivers/net/ibm_emac/ibm_emac_rgmii.h~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_rgmii.h 2005-10-28 17:44:03.000000000 -0700 @@ -1,5 +1,7 @@ /* - * Defines for the IBM RGMII bridge + * drivers/net/ibm_emac/ibm_emac_rgmii.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, RGMII bridge support. * * Based on ocp_zmii.h/ibm_emac_zmii.h * Armin Kuster akuster at mvista.com @@ -7,6 +9,9 @@ * Copyright 2004 MontaVista Software, Inc. * Matt Porter * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin or + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your @@ -19,47 +24,42 @@ #include /* RGMII bridge */ -typedef struct rgmii_regs { +struct rgmii_regs { u32 fer; /* Function enable register */ u32 ssr; /* Speed select register */ -} rgmii_t; - -#define RGMII_INPUTS 4 +}; /* RGMII device */ struct ibm_ocp_rgmii { struct rgmii_regs *base; - int mode[RGMII_INPUTS]; int users; /* number of EMACs using this RGMII bridge */ }; -/* Fuctional Enable Reg */ -#define RGMII_FER_MASK(x) (0x00000007 << (4*x)) -#define RGMII_RTBI 0x00000004 -#define RGMII_RGMII 0x00000005 -#define RGMII_TBI 0x00000006 -#define RGMII_GMII 0x00000007 - -/* Speed Selection reg */ - -#define RGMII_SP2_100 0x00000002 -#define RGMII_SP2_1000 0x00000004 -#define RGMII_SP3_100 0x00000200 -#define RGMII_SP3_1000 0x00000400 - -#define RGMII_MII2_SPDMASK 0x00000007 -#define RGMII_MII3_SPDMASK 0x00000700 - -#define RGMII_MII2_100MB RGMII_SP2_100 & ~RGMII_SP2_1000 -#define RGMII_MII2_1000MB RGMII_SP2_1000 & ~RGMII_SP2_100 -#define RGMII_MII2_10MB ~(RGMII_SP2_100 | RGMII_SP2_1000) -#define RGMII_MII3_100MB RGMII_SP3_100 & ~RGMII_SP3_1000 -#define RGMII_MII3_1000MB RGMII_SP3_1000 & ~RGMII_SP3_100 -#define RGMII_MII3_10MB ~(RGMII_SP3_100 | RGMII_SP3_1000) - -#define RTBI 0 -#define RGMII 1 -#define TBI 2 -#define GMII 3 +#ifdef CONFIG_IBM_EMAC_RGMII +int rgmii_attach(void *emac) __init; + +void __rgmii_fini(struct ocp_device *ocpdev, int input) __exit; +static inline void rgmii_fini(struct ocp_device *ocpdev, int input) +{ + if (ocpdev) + __rgmii_fini(ocpdev, input); +} + +void rgmii_set_speed(struct ocp_device *ocpdev, int input, int speed); + +int __rgmii_get_regs_len(struct ocp_device *ocpdev); +static inline int rgmii_get_regs_len(struct ocp_device *ocpdev) +{ + return ocpdev ? __rgmii_get_regs_len(ocpdev) : 0; +} + +void *rgmii_dump_regs(struct ocp_device *ocpdev, void *buf); +#else +# define rgmii_attach(x) 0 +# define rgmii_fini(x,y) ((void)0) +# define rgmii_set_speed(x,y,z) ((void)0) +# define rgmii_get_regs_len(x) 0 +# define rgmii_dump_regs(x,buf) (buf) +#endif /* !CONFIG_IBM_EMAC_RGMII */ #endif /* _IBM_EMAC_RGMII_H_ */ diff -puN /dev/null drivers/net/ibm_emac/ibm_emac_tah.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_tah.c 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,111 @@ +/* + * drivers/net/ibm_emac/ibm_emac_tah.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, TAH support. + * + * Copyright 2004 MontaVista Software, Inc. + * Matt Porter + * + * Copyright (c) 2005 Eugene Surovegin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include + +#include "ibm_emac_core.h" + +static int __init tah_init(struct ocp_device *ocpdev) +{ + struct tah_regs *p; + + if (ocp_get_drvdata(ocpdev)) { + printk(KERN_ERR "tah%d: already in use!\n", ocpdev->def->index); + return -EBUSY; + } + + /* Initialize TAH and enable IPv4 checksum verification, no TSO yet */ + p = (struct tah_regs *)ioremap(ocpdev->def->paddr, sizeof(*p)); + if (!p) { + printk(KERN_ERR "tah%d: could not ioremap device registers!\n", + ocpdev->def->index); + return -ENOMEM; + } + ocp_set_drvdata(ocpdev, p); + __tah_reset(ocpdev); + + return 0; +} + +int __init tah_attach(void *emac) +{ + struct ocp_enet_private *dev = emac; + struct ocp_func_emac_data *emacdata = dev->def->additions; + + /* Check if we need to attach to a TAH */ + if (emacdata->tah_idx >= 0) { + dev->tah_dev = ocp_find_device(OCP_ANY_ID, OCP_FUNC_TAH, + emacdata->tah_idx); + if (!dev->tah_dev) { + printk(KERN_ERR "emac%d: unknown tah%d!\n", + dev->def->index, emacdata->tah_idx); + return -ENODEV; + } + if (tah_init(dev->tah_dev)) { + printk(KERN_ERR + "emac%d: tah%d initialization failed!\n", + dev->def->index, emacdata->tah_idx); + return -ENODEV; + } + } + return 0; +} + +void __exit __tah_fini(struct ocp_device *ocpdev) +{ + struct tah_regs *p = ocp_get_drvdata(ocpdev); + BUG_ON(!p); + ocp_set_drvdata(ocpdev, NULL); + iounmap((void *)p); +} + +void __tah_reset(struct ocp_device *ocpdev) +{ + struct tah_regs *p = ocp_get_drvdata(ocpdev); + int n; + + /* Reset TAH */ + out_be32(&p->mr, TAH_MR_SR); + n = 100; + while ((in_be32(&p->mr) & TAH_MR_SR) && n) + --n; + + if (unlikely(!n)) + printk(KERN_ERR "tah%d: reset timeout\n", ocpdev->def->index); + + /* 10KB TAH TX FIFO accomodates the max MTU of 9000 */ + out_be32(&p->mr, + TAH_MR_CVR | TAH_MR_ST_768 | TAH_MR_TFS_10KB | TAH_MR_DTFP | + TAH_MR_DIG); +} + +int __tah_get_regs_len(struct ocp_device *ocpdev) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct tah_regs); +} + +void *tah_dump_regs(struct ocp_device *ocpdev, void *buf) +{ + struct tah_regs *dev = ocp_get_drvdata(ocpdev); + struct emac_ethtool_regs_subhdr *hdr = buf; + struct tah_regs *regs = (struct tah_regs *)(hdr + 1); + + hdr->version = 0; + hdr->index = ocpdev->def->index; + memcpy_fromio(regs, dev, sizeof(struct tah_regs)); + return regs + 1; +} diff -puN drivers/net/ibm_emac/ibm_emac_tah.h~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/ibm_emac_tah.h --- devel/drivers/net/ibm_emac/ibm_emac_tah.h~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_tah.h 2005-10-28 17:44:03.000000000 -0700 @@ -1,9 +1,13 @@ /* - * Defines for the IBM TAH + * drivers/net/ibm_emac/ibm_emac_tah.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, TAH support. * * Copyright 2004 MontaVista Software, Inc. * Matt Porter * + * Copyright (c) 2005 Eugene Surovegin + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your @@ -13,36 +17,72 @@ #ifndef _IBM_EMAC_TAH_H #define _IBM_EMAC_TAH_H +#include +#include +#include + /* TAH */ -typedef struct tah_regs { - u32 tah_revid; +struct tah_regs { + u32 revid; u32 pad[3]; - u32 tah_mr; - u32 tah_ssr0; - u32 tah_ssr1; - u32 tah_ssr2; - u32 tah_ssr3; - u32 tah_ssr4; - u32 tah_ssr5; - u32 tah_tsr; -} tah_t; + u32 mr; + u32 ssr0; + u32 ssr1; + u32 ssr2; + u32 ssr3; + u32 ssr4; + u32 ssr5; + u32 tsr; +}; /* TAH engine */ -#define TAH_MR_CVR 0x80000000 -#define TAH_MR_SR 0x40000000 -#define TAH_MR_ST_256 0x01000000 -#define TAH_MR_ST_512 0x02000000 -#define TAH_MR_ST_768 0x03000000 -#define TAH_MR_ST_1024 0x04000000 -#define TAH_MR_ST_1280 0x05000000 -#define TAH_MR_ST_1536 0x06000000 -#define TAH_MR_TFS_16KB 0x00000000 -#define TAH_MR_TFS_2KB 0x00200000 -#define TAH_MR_TFS_4KB 0x00400000 -#define TAH_MR_TFS_6KB 0x00600000 -#define TAH_MR_TFS_8KB 0x00800000 -#define TAH_MR_TFS_10KB 0x00a00000 -#define TAH_MR_DTFP 0x00100000 -#define TAH_MR_DIG 0x00080000 +#define TAH_MR_CVR 0x80000000 +#define TAH_MR_SR 0x40000000 +#define TAH_MR_ST_256 0x01000000 +#define TAH_MR_ST_512 0x02000000 +#define TAH_MR_ST_768 0x03000000 +#define TAH_MR_ST_1024 0x04000000 +#define TAH_MR_ST_1280 0x05000000 +#define TAH_MR_ST_1536 0x06000000 +#define TAH_MR_TFS_16KB 0x00000000 +#define TAH_MR_TFS_2KB 0x00200000 +#define TAH_MR_TFS_4KB 0x00400000 +#define TAH_MR_TFS_6KB 0x00600000 +#define TAH_MR_TFS_8KB 0x00800000 +#define TAH_MR_TFS_10KB 0x00a00000 +#define TAH_MR_DTFP 0x00100000 +#define TAH_MR_DIG 0x00080000 + +#ifdef CONFIG_IBM_EMAC_TAH +int tah_attach(void *emac) __init; + +void __tah_fini(struct ocp_device *ocpdev) __exit; +static inline void tah_fini(struct ocp_device *ocpdev) +{ + if (ocpdev) + __tah_fini(ocpdev); +} + +void __tah_reset(struct ocp_device *ocpdev); +static inline void tah_reset(struct ocp_device *ocpdev) +{ + if (ocpdev) + __tah_reset(ocpdev); +} + +int __tah_get_regs_len(struct ocp_device *ocpdev); +static inline int tah_get_regs_len(struct ocp_device *ocpdev) +{ + return ocpdev ? __tah_get_regs_len(ocpdev) : 0; +} + +void *tah_dump_regs(struct ocp_device *ocpdev, void *buf); +#else +# define tah_attach(x) 0 +# define tah_fini(x) ((void)0) +# define tah_reset(x) ((void)0) +# define tah_get_regs_len(x) 0 +# define tah_dump_regs(x,buf) (buf) +#endif /* !CONFIG_IBM_EMAC_TAH */ #endif /* _IBM_EMAC_TAH_H */ diff -puN /dev/null drivers/net/ibm_emac/ibm_emac_zmii.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_zmii.c 2005-10-28 17:44:03.000000000 -0700 @@ -0,0 +1,255 @@ +/* + * drivers/net/ibm_emac/ibm_emac_zmii.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, ZMII bridge support. + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin or + * + * Based on original work by + * Armin Kuster + * Copyright 2001 MontaVista Softare Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include + +#include "ibm_emac_core.h" +#include "ibm_emac_debug.h" + +/* ZMIIx_FER */ +#define ZMII_FER_MDI(idx) (0x80000000 >> ((idx) * 4)) +#define ZMII_FER_MDI_ALL (ZMII_FER_MDI(0) | ZMII_FER_MDI(1) | \ + ZMII_FER_MDI(2) | ZMII_FER_MDI(3)) + +#define ZMII_FER_SMII(idx) (0x40000000 >> ((idx) * 4)) +#define ZMII_FER_RMII(idx) (0x20000000 >> ((idx) * 4)) +#define ZMII_FER_MII(idx) (0x10000000 >> ((idx) * 4)) + +/* ZMIIx_SSR */ +#define ZMII_SSR_SCI(idx) (0x40000000 >> ((idx) * 4)) +#define ZMII_SSR_FSS(idx) (0x20000000 >> ((idx) * 4)) +#define ZMII_SSR_SP(idx) (0x10000000 >> ((idx) * 4)) + +/* ZMII only supports MII, RMII and SMII + * we also support autodetection for backward compatibility + */ +static inline int zmii_valid_mode(int mode) +{ + return mode == PHY_MODE_MII || + mode == PHY_MODE_RMII || + mode == PHY_MODE_SMII || + mode == PHY_MODE_NA; +} + +static inline const char *zmii_mode_name(int mode) +{ + switch (mode) { + case PHY_MODE_MII: + return "MII"; + case PHY_MODE_RMII: + return "RMII"; + case PHY_MODE_SMII: + return "SMII"; + default: + BUG(); + } +} + +static inline u32 zmii_mode_mask(int mode, int input) +{ + switch (mode) { + case PHY_MODE_MII: + return ZMII_FER_MII(input); + case PHY_MODE_RMII: + return ZMII_FER_RMII(input); + case PHY_MODE_SMII: + return ZMII_FER_SMII(input); + default: + return 0; + } +} + +static int __init zmii_init(struct ocp_device *ocpdev, int input, int *mode) +{ + struct ibm_ocp_zmii *dev = ocp_get_drvdata(ocpdev); + struct zmii_regs *p; + + ZMII_DBG("%d: init(%d, %d)" NL, ocpdev->def->index, input, *mode); + + if (!dev) { + dev = kzalloc(sizeof(struct ibm_ocp_zmii), GFP_KERNEL); + if (!dev) { + printk(KERN_ERR + "zmii%d: couldn't allocate device structure!\n", + ocpdev->def->index); + return -ENOMEM; + } + dev->mode = PHY_MODE_NA; + + p = (struct zmii_regs *)ioremap(ocpdev->def->paddr, + sizeof(struct zmii_regs)); + if (!p) { + printk(KERN_ERR + "zmii%d: could not ioremap device registers!\n", + ocpdev->def->index); + kfree(dev); + return -ENOMEM; + } + dev->base = p; + ocp_set_drvdata(ocpdev, dev); + + /* We may need FER value for autodetection later */ + dev->fer_save = in_be32(&p->fer); + + /* Disable all inputs by default */ + out_be32(&p->fer, 0); + } else + p = dev->base; + + if (!zmii_valid_mode(*mode)) { + /* Probably an EMAC connected to RGMII, + * but it still may need ZMII for MDIO + */ + goto out; + } + + /* Autodetect ZMII mode if not specified. + * This is only for backward compatibility with the old driver. + * Please, always specify PHY mode in your board port to avoid + * any surprises. + */ + if (dev->mode == PHY_MODE_NA) { + if (*mode == PHY_MODE_NA) { + u32 r = dev->fer_save; + + ZMII_DBG("%d: autodetecting mode, FER = 0x%08x" NL, + ocpdev->def->index, r); + + if (r & (ZMII_FER_MII(0) | ZMII_FER_MII(1))) + dev->mode = PHY_MODE_MII; + else if (r & (ZMII_FER_RMII(0) | ZMII_FER_RMII(1))) + dev->mode = PHY_MODE_RMII; + else + dev->mode = PHY_MODE_SMII; + } else + dev->mode = *mode; + + printk(KERN_NOTICE "zmii%d: bridge in %s mode\n", + ocpdev->def->index, zmii_mode_name(dev->mode)); + } else { + /* All inputs must use the same mode */ + if (*mode != PHY_MODE_NA && *mode != dev->mode) { + printk(KERN_ERR + "zmii%d: invalid mode %d specified for input %d\n", + ocpdev->def->index, *mode, input); + return -EINVAL; + } + } + + /* Report back correct PHY mode, + * it may be used during PHY initialization. + */ + *mode = dev->mode; + + /* Enable this input */ + out_be32(&p->fer, in_be32(&p->fer) | zmii_mode_mask(dev->mode, input)); + out: + ++dev->users; + return 0; +} + +int __init zmii_attach(void *emac) +{ + struct ocp_enet_private *dev = emac; + struct ocp_func_emac_data *emacdata = dev->def->additions; + + if (emacdata->zmii_idx >= 0) { + dev->zmii_input = emacdata->zmii_mux; + dev->zmii_dev = + ocp_find_device(OCP_VENDOR_IBM, OCP_FUNC_ZMII, + emacdata->zmii_idx); + if (!dev->zmii_dev) { + printk(KERN_ERR "emac%d: unknown zmii%d!\n", + dev->def->index, emacdata->zmii_idx); + return -ENODEV; + } + if (zmii_init + (dev->zmii_dev, dev->zmii_input, &emacdata->phy_mode)) { + printk(KERN_ERR + "emac%d: zmii%d initialization failed!\n", + dev->def->index, emacdata->zmii_idx); + return -ENODEV; + } + } + return 0; +} + +void __zmii_enable_mdio(struct ocp_device *ocpdev, int input) +{ + struct ibm_ocp_zmii *dev = ocp_get_drvdata(ocpdev); + u32 fer = in_be32(&dev->base->fer) & ~ZMII_FER_MDI_ALL; + + ZMII_DBG2("%d: mdio(%d)" NL, ocpdev->def->index, input); + + out_be32(&dev->base->fer, fer | ZMII_FER_MDI(input)); +} + +void __zmii_set_speed(struct ocp_device *ocpdev, int input, int speed) +{ + struct ibm_ocp_zmii *dev = ocp_get_drvdata(ocpdev); + u32 ssr = in_be32(&dev->base->ssr); + + ZMII_DBG("%d: speed(%d, %d)" NL, ocpdev->def->index, input, speed); + + if (speed == SPEED_100) + ssr |= ZMII_SSR_SP(input); + else + ssr &= ~ZMII_SSR_SP(input); + + out_be32(&dev->base->ssr, ssr); +} + +void __exit __zmii_fini(struct ocp_device *ocpdev, int input) +{ + struct ibm_ocp_zmii *dev = ocp_get_drvdata(ocpdev); + BUG_ON(!dev || dev->users == 0); + + ZMII_DBG("%d: fini(%d)" NL, ocpdev->def->index, input); + + /* Disable this input */ + out_be32(&dev->base->fer, + in_be32(&dev->base->fer) & ~zmii_mode_mask(dev->mode, input)); + + if (!--dev->users) { + /* Free everything if this is the last user */ + ocp_set_drvdata(ocpdev, NULL); + iounmap((void *)dev->base); + kfree(dev); + } +} + +int __zmii_get_regs_len(struct ocp_device *ocpdev) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct zmii_regs); +} + +void *zmii_dump_regs(struct ocp_device *ocpdev, void *buf) +{ + struct ibm_ocp_zmii *dev = ocp_get_drvdata(ocpdev); + struct emac_ethtool_regs_subhdr *hdr = buf; + struct zmii_regs *regs = (struct zmii_regs *)(hdr + 1); + + hdr->version = 0; + hdr->index = ocpdev->def->index; + memcpy_fromio(regs, dev->base, sizeof(struct zmii_regs)); + return regs + 1; +} diff -puN drivers/net/ibm_emac/ibm_emac_zmii.h~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/ibm_emac_zmii.h --- devel/drivers/net/ibm_emac/ibm_emac_zmii.h~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/ibm_emac_zmii.h 2005-10-28 17:44:03.000000000 -0700 @@ -1,23 +1,27 @@ /* - * ocp_zmii.h + * drivers/net/ibm_emac/ibm_emac_zmii.h * - * Defines for the IBM ZMII bridge + * Driver for PowerPC 4xx on-chip ethernet controller, ZMII bridge support. * - * Armin Kuster akuster at mvista.com - * Dec, 2001 + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin or * - * Copyright 2001 MontaVista Softare Inc. + * Based on original work by + * Armin Kuster + * Copyright 2001 MontaVista Softare Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. + * */ - #ifndef _IBM_EMAC_ZMII_H_ #define _IBM_EMAC_ZMII_H_ #include +#include +#include /* ZMII bridge registers */ struct zmii_regs { @@ -26,68 +30,54 @@ struct zmii_regs { u32 smiirs; /* SMII status reg */ }; -#define ZMII_INPUTS 4 - /* ZMII device */ struct ibm_ocp_zmii { struct zmii_regs *base; - int mode[ZMII_INPUTS]; + int mode; /* subset of PHY_MODE_XXXX */ int users; /* number of EMACs using this ZMII bridge */ + u32 fer_save; /* FER value left by firmware */ }; -/* Fuctional Enable Reg */ - -#define ZMII_FER_MASK(x) (0xf0000000 >> (4*x)) +#ifdef CONFIG_IBM_EMAC_ZMII +int zmii_attach(void *emac) __init; -#define ZMII_MDI0 0x80000000 -#define ZMII_SMII0 0x40000000 -#define ZMII_RMII0 0x20000000 -#define ZMII_MII0 0x10000000 -#define ZMII_MDI1 0x08000000 -#define ZMII_SMII1 0x04000000 -#define ZMII_RMII1 0x02000000 -#define ZMII_MII1 0x01000000 -#define ZMII_MDI2 0x00800000 -#define ZMII_SMII2 0x00400000 -#define ZMII_RMII2 0x00200000 -#define ZMII_MII2 0x00100000 -#define ZMII_MDI3 0x00080000 -#define ZMII_SMII3 0x00040000 -#define ZMII_RMII3 0x00020000 -#define ZMII_MII3 0x00010000 - -/* Speed Selection reg */ - -#define ZMII_SCI0 0x40000000 -#define ZMII_FSS0 0x20000000 -#define ZMII_SP0 0x10000000 -#define ZMII_SCI1 0x04000000 -#define ZMII_FSS1 0x02000000 -#define ZMII_SP1 0x01000000 -#define ZMII_SCI2 0x00400000 -#define ZMII_FSS2 0x00200000 -#define ZMII_SP2 0x00100000 -#define ZMII_SCI3 0x00040000 -#define ZMII_FSS3 0x00020000 -#define ZMII_SP3 0x00010000 - -#define ZMII_MII0_100MB ZMII_SP0 -#define ZMII_MII0_10MB ~ZMII_SP0 -#define ZMII_MII1_100MB ZMII_SP1 -#define ZMII_MII1_10MB ~ZMII_SP1 -#define ZMII_MII2_100MB ZMII_SP2 -#define ZMII_MII2_10MB ~ZMII_SP2 -#define ZMII_MII3_100MB ZMII_SP3 -#define ZMII_MII3_10MB ~ZMII_SP3 - -/* SMII Status reg */ - -#define ZMII_STS0 0xFF000000 /* EMAC0 smii status mask */ -#define ZMII_STS1 0x00FF0000 /* EMAC1 smii status mask */ - -#define SMII 0 -#define RMII 1 -#define MII 2 -#define MDI 3 +void __zmii_fini(struct ocp_device *ocpdev, int input) __exit; +static inline void zmii_fini(struct ocp_device *ocpdev, int input) +{ + if (ocpdev) + __zmii_fini(ocpdev, input); +} + +void __zmii_enable_mdio(struct ocp_device *ocpdev, int input); +static inline void zmii_enable_mdio(struct ocp_device *ocpdev, int input) +{ + if (ocpdev) + __zmii_enable_mdio(ocpdev, input); +} + +void __zmii_set_speed(struct ocp_device *ocpdev, int input, int speed); +static inline void zmii_set_speed(struct ocp_device *ocpdev, int input, + int speed) +{ + if (ocpdev) + __zmii_set_speed(ocpdev, input, speed); +} + +int __zmii_get_regs_len(struct ocp_device *ocpdev); +static inline int zmii_get_regs_len(struct ocp_device *ocpdev) +{ + return ocpdev ? __zmii_get_regs_len(ocpdev) : 0; +} + +void *zmii_dump_regs(struct ocp_device *ocpdev, void *buf); + +#else +# define zmii_attach(x) 0 +# define zmii_fini(x,y) ((void)0) +# define zmii_enable_mdio(x,y) ((void)0) +# define zmii_set_speed(x,y,z) ((void)0) +# define zmii_get_regs_len(x) 0 +# define zmii_dump_regs(x,buf) (buf) +#endif /* !CONFIG_IBM_EMAC_ZMII */ #endif /* _IBM_EMAC_ZMII_H_ */ diff -puN drivers/net/ibm_emac/Makefile~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/ibm_emac/Makefile --- devel/drivers/net/ibm_emac/Makefile~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/ibm_emac/Makefile 2005-10-28 17:44:03.000000000 -0700 @@ -1,12 +1,11 @@ # -# Makefile for the IBM PPC4xx EMAC controllers +# Makefile for the PowerPC 4xx on-chip ethernet driver # obj-$(CONFIG_IBM_EMAC) += ibm_emac.o ibm_emac-objs := ibm_emac_mal.o ibm_emac_core.o ibm_emac_phy.o - -# Only need this if you want to see additional debug messages -ifeq ($(CONFIG_IBM_EMAC_ERRMSG), y) -ibm_emac-objs += ibm_emac_debug.o -endif +ibm_emac-$(CONFIG_IBM_EMAC_ZMII) += ibm_emac_zmii.o +ibm_emac-$(CONFIG_IBM_EMAC_RGMII) += ibm_emac_rgmii.o +ibm_emac-$(CONFIG_IBM_EMAC_TAH) += ibm_emac_tah.o +ibm_emac-$(CONFIG_IBM_EMAC_DEBUG) += ibm_emac_debug.o diff -puN drivers/net/Kconfig~new-powerpc-4xx-on-chip-ethernet-controller-driver drivers/net/Kconfig --- devel/drivers/net/Kconfig~new-powerpc-4xx-on-chip-ethernet-controller-driver 2005-10-28 17:44:03.000000000 -0700 +++ devel-akpm/drivers/net/Kconfig 2005-10-28 17:44:03.000000000 -0700 @@ -1163,38 +1163,74 @@ config IBMVETH be called ibmveth. config IBM_EMAC - bool "IBM PPC4xx EMAC driver support" + tristate "PowerPC 4xx on-chip Ethernet support" depends on 4xx - select CRC32 - ---help--- - This driver supports the IBM PPC4xx EMAC family of on-chip - Ethernet controllers. - -config IBM_EMAC_ERRMSG - bool "Verbose error messages" - depends on IBM_EMAC && BROKEN + help + This driver supports the PowerPC 4xx EMAC family of on-chip + Ethernet controllers. config IBM_EMAC_RXB int "Number of receive buffers" depends on IBM_EMAC - default "128" if IBM_EMAC4 - default "64" + default "128" config IBM_EMAC_TXB int "Number of transmit buffers" depends on IBM_EMAC - default "128" if IBM_EMAC4 - default "8" + default "64" + +config IBM_EMAC_POLL_WEIGHT + int "MAL NAPI polling weight" + depends on IBM_EMAC + default "32" -config IBM_EMAC_FGAP - int "Frame gap" +config IBM_EMAC_RX_COPY_THRESHOLD + int "RX skb copy threshold (bytes)" depends on IBM_EMAC - default "8" + default "256" -config IBM_EMAC_SKBRES - int "Skb reserve amount" +config IBM_EMAC_RX_SKB_HEADROOM + int "Additional RX skb headroom (bytes)" depends on IBM_EMAC default "0" + help + Additional receive skb headroom. Note, that driver + will always reserve at least 2 bytes to make IP header + aligned, so usualy there is no need to add any additional + headroom. + + If unsure, set to 0. + +config IBM_EMAC_PHY_RX_CLK_FIX + bool "PHY Rx clock workaround" + depends on IBM_EMAC && (405EP || 440GX || 440EP) + help + Enable this if EMAC attached to a PHY which doesn't generate + RX clock if there is no link, if this is the case, you will + see "TX disable timeout" or "RX disable timeout" in the system + log. + + If unsure, say N. + +config IBM_EMAC_DEBUG + bool "Debugging" + depends on IBM_EMAC + default n + +config IBM_EMAC_ZMII + bool + depends on IBM_EMAC && (NP405H || NP405L || 44x) + default y + +config IBM_EMAC_RGMII + bool + depends on IBM_EMAC && 440GX + default y + +config IBM_EMAC_TAH + bool + depends on IBM_EMAC && 440GX + default y config NET_PCI bool "EISA, VLB, PCI and on board controllers" _ From ebs at ebshome.net Sat Oct 29 11:24:13 2005 From: ebs at ebshome.net (Eugene Surovegin) Date: Fri, 28 Oct 2005 18:24:13 -0700 Subject: [patch 13/43] Add MAINTAINER entry for the new PowerPC 4xx on-chip ethernet controller driver In-Reply-To: <200510290046.j9T0ks7c030026@shell0.pdx.osdl.net> References: <200510290046.j9T0ks7c030026@shell0.pdx.osdl.net> Message-ID: <20051029012413.GA1371@gate.ebshome.net> On Fri, Oct 28, 2005 at 05:46:24PM -0700, Andrew Morton wrote: > > From: Eugene Surovegin > > Add MAINTAINER entry for the new PPC4xx EMAC driver Paul, drop this and the previous patch. They will go in through netdev tree and aren't affected by the ppc/ppc64 merge. -- Eugene From paulus at samba.org Sat Oct 29 15:45:58 2005 From: paulus at samba.org (Paul Mackerras) Date: Sat, 29 Oct 2005 15:45:58 +1000 Subject: Patches for 2.6.15 In-Reply-To: <20051028174029.24d4dbb9.akpm@osdl.org> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> <20051028103041.B15268@cox.net> <20051028130246.459f1e9a.akpm@osdl.org> <17250.49231.248045.41302@cargo.ozlabs.ibm.com> <20051028174029.24d4dbb9.akpm@osdl.org> Message-ID: <17251.3222.175214.996295@cargo.ozlabs.ibm.com> Andrew Morton writes: > I'll mail them all over. Got them, thanks. I'm going to drop 06, 12, 13, 18, 19, 20, 21, and 37 for now. The rest I have committed to the powerpc-merge tree. The reasons for dropping them are: 06 - need to discuss the specifics of the patch with Marcelo 12, 13 - going through netdev tree 18 - it seems there is still ongoing discussion about whether this is the best approach 19, 20, 21 - BenH's SMU stuff, which he told me he would rebase on top of the merge tree 37 - I want the patch done a little differently Regards, Paul. From paulus at samba.org Sat Oct 29 15:41:59 2005 From: paulus at samba.org (Paul Mackerras) Date: Sat, 29 Oct 2005 15:41:59 +1000 Subject: [patch 38/43] Various powerpc 32bit ppc64 build fixes In-Reply-To: <200510290047.j9T0lNNJ030117@shell0.pdx.osdl.net> References: <200510290047.j9T0lNNJ030117@shell0.pdx.osdl.net> Message-ID: <17251.2983.358202.164993@cargo.ozlabs.ibm.com> Sven Luther writes: > +ifeq ($(CONFIG_6xx),y) > obj-$(CONFIG_PPC_PMAC) += pmac_pic.o pmac_setup.o pmac_time.o \ > pmac_feature.o pmac_pci.o pmac_sleep.o \ > pmac_low_i2c.o pmac_cache.o > +else > +obj-$(CONFIG_PPC_PMAC) += pmac_pic.o pmac_setup.o pmac_time.o \ > + pmac_feature.o pmac_pci.o pmac_sleep.o \ > + pmac_low_i2c.o > +endif This is a bit gross. How about: pmac-$(CONFIG_6xx) := pmac_cache.o obj-$(CONFIG_PPC_PMAC) += pmac_pic.o pmac_setup.o pmac_time.o \ pmac_feature.o pmac_pci.o pmac_sleep.o \ pmac_low_i2c.o $(pmac-y) Care to send a revised patch? Paul. From akpm at osdl.org Sat Oct 29 15:59:17 2005 From: akpm at osdl.org (Andrew Morton) Date: Fri, 28 Oct 2005 22:59:17 -0700 Subject: Patches for 2.6.15 In-Reply-To: <17251.3222.175214.996295@cargo.ozlabs.ibm.com> References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> <20051028103041.B15268@cox.net> <20051028130246.459f1e9a.akpm@osdl.org> <17250.49231.248045.41302@cargo.ozlabs.ibm.com> <20051028174029.24d4dbb9.akpm@osdl.org> <17251.3222.175214.996295@cargo.ozlabs.ibm.com> Message-ID: <20051028225917.2873a74e.akpm@osdl.org> Paul Mackerras wrote: > > Andrew Morton writes: > > > I'll mail them all over. > > Got them, thanks. I'm going to drop 06, 12, 13, 18, 19, 20, 21, and > 37 for now. The rest I have committed to the powerpc-merge tree. > > The reasons for dropping them are: > > 06 - need to discuss the specifics of the patch with Marcelo > 12, 13 - going through netdev tree > 18 - it seems there is still ongoing discussion about whether this is > the best approach > 19, 20, 21 - BenH's SMU stuff, which he told me he would rebase on > top of the merge tree > 37 - I want the patch done a little differently OK, well I'll probably drop all those patches as well - hopefully all the originators know what's going on and nothing will fall through a crack. From sven.luther at wanadoo.fr Sat Oct 29 16:27:24 2005 From: sven.luther at wanadoo.fr (Sven Luther) Date: Sat, 29 Oct 2005 08:27:24 +0200 Subject: [patch 38/43] Various powerpc 32bit ppc64 build fixes In-Reply-To: <17251.2983.358202.164993@cargo.ozlabs.ibm.com> References: <200510290047.j9T0lNNJ030117@shell0.pdx.osdl.net> <17251.2983.358202.164993@cargo.ozlabs.ibm.com> Message-ID: <20051029062724.GA1893@localhost.localdomain> On Sat, Oct 29, 2005 at 03:41:59PM +1000, Paul Mackerras wrote: > Sven Luther writes: > > > +ifeq ($(CONFIG_6xx),y) > > obj-$(CONFIG_PPC_PMAC) += pmac_pic.o pmac_setup.o pmac_time.o \ > > pmac_feature.o pmac_pci.o pmac_sleep.o \ > > pmac_low_i2c.o pmac_cache.o > > +else > > +obj-$(CONFIG_PPC_PMAC) += pmac_pic.o pmac_setup.o pmac_time.o \ > > + pmac_feature.o pmac_pci.o pmac_sleep.o \ > > + pmac_low_i2c.o > > +endif > > This is a bit gross. How about: Well, i sent to linuxppc-dev for comments too :) > pmac-$(CONFIG_6xx) := pmac_cache.o > obj-$(CONFIG_PPC_PMAC) += pmac_pic.o pmac_setup.o pmac_time.o \ > pmac_feature.o pmac_pci.o pmac_sleep.o \ > pmac_low_i2c.o $(pmac-y) > > Care to send a revised patch? Sure, i will do so early next week. Friendly, Sven Luther From sfr at canb.auug.org.au Sat Oct 29 18:07:55 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Sat, 29 Oct 2005 18:07:55 +1000 Subject: Patches for 2.6.15 In-Reply-To: References: <17250.8725.358204.62510@cargo.ozlabs.ibm.com> Message-ID: <20051029180755.22de749b.sfr@canb.auug.org.au> On Fri, 28 Oct 2005 13:49:26 -0500 Kumar Gala wrote: > > Can you merge this in: > > http://patchwork.ozlabs.org/linuxppc/patch?id=2931 I can't find any use of mpc85xx_show_cpuinfo. Is there something subtle here? -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051029/3dc4663e/attachment.pgp From galak at gate.crashing.org Sat Oct 29 22:55:32 2005 From: galak at gate.crashing.org (galak at gate.crashing.org) Date: Sat, 29 Oct 2005 07:55:32 -0500 (CDT) Subject: Patches for 2.6.15 In-Reply-To: <20051029180755.22de749b.sfr@canb.auug.org.au> Message-ID: On Sat, 29 Oct 2005, Stephen Rothwell wrote: > On Fri, 28 Oct 2005 13:49:26 -0500 Kumar Gala wrote: > > > > Can you merge this in: > > > > http://patchwork.ozlabs.org/linuxppc/patch?id=2931 > > I can't find any use of mpc85xx_show_cpuinfo. Is there something subtle here? No, you are correct. This was a bogus decleration before. I will kill it and resend the patch. - kumar From olh at suse.de Mon Oct 31 08:39:00 2005 From: olh at suse.de (Olaf Hering) Date: Sun, 30 Oct 2005 22:39:00 +0100 Subject: [PATCH] ppc64: add MODALIAS= for vio bus Message-ID: <20051030213900.GA22510@suse.de> A non-broken udev would autoload also the drivers for devices on the pseries vio bus, like ibmveth, ibmvscsic and hvsc. This is similar to pci, usb and ieee1394: /lib/modules/`uname -r`/modules.alias alias vio:TvscsiSIBM,v-scsi* ibmvscsic alias vio:TnetworkSIBM,l-lan* ibmveth alias vio:Tserial-serverShvterm2* hvcs /events/debug.00004.pci.add.1394:MODALIAS='pci:v00001014d00000188sv00000000sd00000000bc06sc04i0f' /events/debug.00005.pci.add.1509:MODALIAS='pci:v00008086d00001229sv00001014sd000001FFbc02sc00i00' /events/debug.00026.vio.add.1519:MODALIAS='vio:TserialShvterm1' /events/debug.00027.vio.add.1446:MODALIAS='vio:TvscsiSIBM,v-scsi' /events/debug.00028.vio.add.1451:MODALIAS='vio:TnetworkSIBM,l-lan' modprobe -v vio:TnetworkSIBM,l-lan insmod /lib/modules/2.6.14-20051030_vio-ppc64/kernel/drivers/net/ibmveth.ko Signed-off-by: Olaf Hering arch/ppc64/kernel/vio.c | 27 +++++++++++++++++++++++++++ 1 files changed, 27 insertions(+) Index: linux-2.6.14-olh/arch/ppc64/kernel/vio.c =================================================================== --- linux-2.6.14-olh.orig/arch/ppc64/kernel/vio.c +++ linux-2.6.14-olh/arch/ppc64/kernel/vio.c @@ -21,6 +21,7 @@ #include #include #include +#include static const struct vio_device_id *vio_match_device( const struct vio_device_id *, const struct vio_dev *); @@ -255,7 +256,33 @@ static int vio_bus_match(struct device * return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); } +static int pseries_vio_hotplug (struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + char *cp; + int length; + + if (!num_envp) + return -ENOMEM; + + if (!vio_dev->dev.platform_data) + return -ENODEV; + cp = (char *)get_property(vio_dev->dev.platform_data, "compatible", &length); + if (!cp) + return -ENODEV; + + envp[0] = buffer; + length = scnprintf (buffer, buffer_size, "MODALIAS=vio:T%sS%s", + vio_dev->type, cp); + if (buffer_size - length <= 0) + return -ENOMEM; + envp[1] = NULL; + return 0; +} + struct bus_type vio_bus_type = { .name = "vio", + .hotplug = pseries_vio_hotplug, .match = vio_bus_match, }; -- short story of a lazy sysadmin: alias appserv=wotan From sfr at canb.auug.org.au Mon Oct 31 13:48:14 2005 From: sfr at canb.auug.org.au (Stephen Rothwell) Date: Mon, 31 Oct 2005 13:48:14 +1100 Subject: [PATCH] ppc64: add MODALIAS= for vio bus In-Reply-To: <20051030213900.GA22510@suse.de> References: <20051030213900.GA22510@suse.de> Message-ID: <20051031134814.42940751.sfr@canb.auug.org.au> Hi Olaf, This patch breaks lagacy iSeries i.e. it won't link (iSeries has no get_property()). It may be easier to redo this patch against Paulus' merge tree. A couple of trivial comments: On Sun, 30 Oct 2005 22:39:00 +0100 Olaf Hering wrote: > > +static int pseries_vio_hotplug (struct device *dev, char **envp, int num_envp, ^ No space here, please. > + length = scnprintf (buffer, buffer_size, "MODALIAS=vio:T%sS%s", ^ No space here either, please. -- Cheers, Stephen Rothwell sfr at canb.auug.org.au http://www.canb.auug.org.au/~sfr/ -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 189 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20051031/1fbfc94d/attachment.pgp From paulus at samba.org Mon Oct 31 15:23:21 2005 From: paulus at samba.org (Paul Mackerras) Date: Mon, 31 Oct 2005 15:23:21 +1100 Subject: please pull the powerpc-merge.git tree Message-ID: <17253.39993.502458.390760@cargo.ozlabs.ibm.com> Linus, Please do a pull from: git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc-merge.git to get a ppc/ppc64/powerpc update. We are now a considerable way through the ppc/ppc64 -> powerpc merge. It is now possible to build kernels for powermac, pSeries, iSeries and maple with ARCH=powerpc, and for powermac, both 32-bit and 64-bit build and run. I have not yet put in the patch that makes it default to ARCH=powerpc if uname -m gives ppc or ppc64, though. Andrew sent me all of the patches he had queued that affected ppc and ppc64, and I have merged in most of them (the rest I am discussing with their authors). Diffstat and shortlog follow. Thanks, Paul. arch/powerpc/Kconfig | 900 +++++++ arch/powerpc/Kconfig.debug | 128 + arch/powerpc/Makefile | 222 ++ arch/powerpc/kernel/Makefile | 56 arch/powerpc/kernel/asm-offsets.c | 273 ++ arch/powerpc/kernel/binfmt_elf32.c | 3 arch/powerpc/kernel/btext.c | 853 +++++++ arch/powerpc/kernel/cputable.c | 797 +++--- arch/powerpc/kernel/entry_32.S | 1000 ++++++++ arch/powerpc/kernel/entry_64.S | 47 arch/powerpc/kernel/fpu.S | 144 + arch/powerpc/kernel/head_32.S | 1381 +++++++++++ arch/powerpc/kernel/head_44x.S | 782 ++++++ arch/powerpc/kernel/head_4xx.S | 1022 ++++++++ arch/powerpc/kernel/head_64.S | 1957 +++++++++++++++ arch/powerpc/kernel/head_8xx.S | 860 +++++++ arch/powerpc/kernel/head_fsl_booke.S | 1063 ++++++++ arch/powerpc/kernel/idle_6xx.S | 233 ++ arch/powerpc/kernel/idle_power4.S | 9 arch/powerpc/kernel/init_task.c | 0 arch/powerpc/kernel/lparmap.c | 0 arch/powerpc/kernel/misc_32.S | 1037 ++++++++ arch/powerpc/kernel/misc_64.S | 880 +++++++ arch/powerpc/kernel/of_device.c | 4 arch/powerpc/kernel/pmc.c | 30 arch/powerpc/kernel/ppc_ksyms.c | 273 ++ arch/powerpc/kernel/process.c | 486 +++- arch/powerpc/kernel/prom.c | 2170 +++++++++++++++++ arch/powerpc/kernel/prom_init.c | 2109 +++++++++++++++++ arch/powerpc/kernel/ptrace.c | 170 + arch/powerpc/kernel/ptrace32.c | 9 arch/powerpc/kernel/rtas.c | 254 +- arch/powerpc/kernel/semaphore.c | 135 + arch/powerpc/kernel/setup-common.c | 410 +++ arch/powerpc/kernel/setup_32.c | 372 +++ arch/powerpc/kernel/setup_64.c | 352 --- arch/powerpc/kernel/signal_32.c | 993 +++++--- arch/powerpc/kernel/sys_ppc32.c | 320 --- arch/powerpc/kernel/syscalls.c | 358 +++ arch/powerpc/kernel/systbl.S | 321 +++ arch/powerpc/kernel/time.c | 570 +++- arch/powerpc/kernel/traps.c | 1101 +++++++++ arch/powerpc/kernel/vecemu.c | 0 arch/powerpc/kernel/vector.S | 71 - arch/powerpc/kernel/vio.c | 14 arch/powerpc/kernel/vmlinux.lds.S | 279 ++ arch/powerpc/lib/Makefile | 19 arch/powerpc/lib/checksum_32.S | 225 ++ arch/powerpc/lib/checksum_64.S | 0 arch/powerpc/lib/copy_32.S | 543 ++++ arch/powerpc/lib/copypage_64.S | 0 arch/powerpc/lib/copyuser_64.S | 0 arch/powerpc/lib/div64.S | 59 arch/powerpc/lib/e2a.c | 0 arch/powerpc/lib/locks.c | 5 arch/powerpc/lib/mem_64.S | 119 + arch/powerpc/lib/memcpy_64.S | 0 arch/powerpc/lib/rheap.c | 693 +++++ arch/powerpc/lib/sstep.c | 17 arch/powerpc/lib/strcase.c | 23 arch/powerpc/lib/string.S | 198 ++ arch/powerpc/lib/usercopy_64.c | 0 arch/powerpc/mm/44x_mmu.c | 120 + arch/powerpc/mm/4xx_mmu.c | 141 + arch/powerpc/mm/Makefile | 21 arch/powerpc/mm/fault.c | 104 + arch/powerpc/mm/fsl_booke_mmu.c | 237 ++ arch/powerpc/mm/hash_low_32.S | 618 +++++ arch/powerpc/mm/hash_low_64.S | 2 arch/powerpc/mm/hash_native_64.c | 13 arch/powerpc/mm/hash_utils_64.c | 61 arch/powerpc/mm/hugetlbpage.c | 0 arch/powerpc/mm/imalloc.c | 0 arch/powerpc/mm/init_32.c | 254 ++ arch/powerpc/mm/init_64.c | 223 ++ arch/powerpc/mm/lmb.c | 105 - arch/powerpc/mm/mem.c | 564 ++++ arch/powerpc/mm/mmap.c | 0 arch/powerpc/mm/mmu_context_32.c | 86 + arch/powerpc/mm/mmu_context_64.c | 63 arch/powerpc/mm/mmu_decl.h | 87 + arch/powerpc/mm/numa.c | 2 arch/powerpc/mm/pgtable_32.c | 467 ++++ arch/powerpc/mm/pgtable_64.c | 347 +++ arch/powerpc/mm/ppc_mmu_32.c | 285 ++ arch/powerpc/mm/slb.c | 0 arch/powerpc/mm/slb_low.S | 0 arch/powerpc/mm/stab.c | 0 arch/powerpc/mm/tlb_32.c | 183 + arch/powerpc/mm/tlb_64.c | 23 arch/powerpc/oprofile/Kconfig | 0 arch/powerpc/oprofile/Makefile | 4 arch/powerpc/oprofile/common.c | 84 + arch/powerpc/oprofile/op_model_fsl_booke.c | 7 arch/powerpc/oprofile/op_model_power4.c | 2 arch/powerpc/oprofile/op_model_rs64.c | 2 arch/powerpc/platforms/4xx/Kconfig | 280 ++ arch/powerpc/platforms/4xx/Makefile | 1 arch/powerpc/platforms/85xx/Kconfig | 86 + arch/powerpc/platforms/85xx/Makefile | 1 arch/powerpc/platforms/8xx/Kconfig | 352 +++ arch/powerpc/platforms/Makefile | 13 arch/powerpc/platforms/apus/Kconfig | 130 + arch/powerpc/platforms/chrp/Makefile | 4 arch/powerpc/platforms/chrp/chrp.h | 12 arch/powerpc/platforms/chrp/nvram.c | 84 + arch/powerpc/platforms/chrp/pci.c | 310 ++ arch/powerpc/platforms/chrp/pegasos_eth.c | 213 ++ arch/powerpc/platforms/chrp/setup.c | 522 ++++ arch/powerpc/platforms/chrp/smp.c | 122 + arch/powerpc/platforms/chrp/time.c | 188 + arch/powerpc/platforms/embedded6xx/Kconfig | 318 ++ arch/powerpc/platforms/iseries/Kconfig | 31 arch/powerpc/platforms/iseries/Makefile | 9 arch/powerpc/platforms/iseries/call_hpt.h | 7 arch/powerpc/platforms/iseries/call_pci.h | 290 ++ arch/powerpc/platforms/iseries/call_sm.h | 7 arch/powerpc/platforms/iseries/htab.c | 47 arch/powerpc/platforms/iseries/hvcall.S | 22 arch/powerpc/platforms/iseries/hvlog.c | 1 arch/powerpc/platforms/iseries/hvlpconfig.c | 1 arch/powerpc/platforms/iseries/iommu.c | 42 arch/powerpc/platforms/iseries/ipl_parms.h | 7 arch/powerpc/platforms/iseries/irq.c | 17 arch/powerpc/platforms/iseries/irq.h | 8 arch/powerpc/platforms/iseries/ksyms.c | 27 arch/powerpc/platforms/iseries/lpardata.c | 28 arch/powerpc/platforms/iseries/lpevents.c | 77 + arch/powerpc/platforms/iseries/main_store.h | 7 arch/powerpc/platforms/iseries/mf.c | 98 + arch/powerpc/platforms/iseries/misc.S | 55 arch/powerpc/platforms/iseries/pci.c | 173 + arch/powerpc/platforms/iseries/pci.h | 63 arch/powerpc/platforms/iseries/proc.c | 15 arch/powerpc/platforms/iseries/processor_vpd.h | 7 arch/powerpc/platforms/iseries/release_data.h | 7 arch/powerpc/platforms/iseries/setup.c | 501 ++-- arch/powerpc/platforms/iseries/setup.h | 4 arch/powerpc/platforms/iseries/smp.c | 46 arch/powerpc/platforms/iseries/spcomm_area.h | 7 arch/powerpc/platforms/iseries/vio.c | 1 arch/powerpc/platforms/iseries/viopath.c | 3 arch/powerpc/platforms/iseries/vpd_areas.h | 7 arch/powerpc/platforms/iseries/vpdinfo.c | 21 arch/powerpc/platforms/maple/Makefile | 1 arch/powerpc/platforms/maple/maple.h | 12 arch/powerpc/platforms/maple/pci.c | 7 arch/powerpc/platforms/maple/setup.c | 13 arch/powerpc/platforms/maple/time.c | 9 arch/powerpc/platforms/powermac/Makefile | 8 arch/powerpc/platforms/powermac/backlight.c | 202 ++ arch/powerpc/platforms/powermac/cache.S | 359 +++ arch/powerpc/platforms/powermac/cpufreq.c | 726 ++++++ arch/powerpc/platforms/powermac/feature.c | 3063 ++++++++++++++++++++++++ arch/powerpc/platforms/powermac/low_i2c.c | 0 arch/powerpc/platforms/powermac/nvram.c | 282 ++ arch/powerpc/platforms/powermac/pci.c | 1170 +++++++++ arch/powerpc/platforms/powermac/pic.c | 678 +++++ arch/powerpc/platforms/powermac/pic.h | 11 arch/powerpc/platforms/powermac/pmac.h | 51 arch/powerpc/platforms/powermac/setup.c | 794 ++++++ arch/powerpc/platforms/powermac/sleep.S | 396 +++ arch/powerpc/platforms/powermac/smp.c | 865 +++++++ arch/powerpc/platforms/powermac/time.c | 360 +++ arch/powerpc/platforms/prep/Kconfig | 22 arch/powerpc/platforms/pseries/Kconfig | 42 arch/powerpc/platforms/pseries/Makefile | 5 arch/powerpc/platforms/pseries/hvCall.S | 0 arch/powerpc/platforms/pseries/iommu.c | 28 arch/powerpc/platforms/pseries/lpar.c | 5 arch/powerpc/platforms/pseries/nvram.c | 0 arch/powerpc/platforms/pseries/pci.c | 3 arch/powerpc/platforms/pseries/ras.c | 11 arch/powerpc/platforms/pseries/reconfig.c | 0 arch/powerpc/platforms/pseries/rtas-fw.c | 138 + arch/powerpc/platforms/pseries/rtas-fw.h | 3 arch/powerpc/platforms/pseries/setup.c | 57 arch/powerpc/platforms/pseries/smp.c | 52 arch/powerpc/platforms/pseries/vio.c | 1 arch/powerpc/platforms/pseries/xics.c | 30 arch/powerpc/platforms/pseries/xics.h | 10 arch/powerpc/sysdev/Makefile | 7 arch/powerpc/sysdev/dcr.S | 0 arch/powerpc/sysdev/grackle.c | 64 + arch/powerpc/sysdev/i8259.c | 65 - arch/powerpc/sysdev/indirect_pci.c | 0 arch/powerpc/sysdev/mpic.c | 53 arch/powerpc/sysdev/u3_iommu.c | 50 arch/powerpc/xmon/Makefile | 11 arch/powerpc/xmon/ansidecl.h | 0 arch/powerpc/xmon/nonstdio.h | 0 arch/powerpc/xmon/ppc-dis.c | 0 arch/powerpc/xmon/ppc-opc.c | 0 arch/powerpc/xmon/ppc.h | 0 arch/powerpc/xmon/setjmp.S | 135 + arch/powerpc/xmon/start_32.c | 624 +++++ arch/powerpc/xmon/start_64.c | 0 arch/powerpc/xmon/start_8xx.c | 287 ++ arch/powerpc/xmon/subr_prf.c | 11 arch/powerpc/xmon/xmon.c | 395 ++- arch/ppc/8xx_io/commproc.c | 20 arch/ppc/Kconfig | 40 arch/ppc/Makefile | 14 arch/ppc/boot/of1275/claim.c | 1 arch/ppc/boot/openfirmware/chrpmain.c | 2 arch/ppc/boot/openfirmware/coffmain.c | 2 arch/ppc/kernel/Makefile | 27 arch/ppc/kernel/align.c | 4 arch/ppc/kernel/asm-offsets.c | 3 arch/ppc/kernel/cpu_setup_6xx.S | 6 arch/ppc/kernel/cpu_setup_power4.S | 6 arch/ppc/kernel/entry.S | 12 arch/ppc/kernel/fpu.S | 133 - arch/ppc/kernel/head.S | 100 - arch/ppc/kernel/head_44x.S | 32 arch/ppc/kernel/head_4xx.S | 68 - arch/ppc/kernel/head_8xx.S | 42 arch/ppc/kernel/head_booke.h | 4 arch/ppc/kernel/head_fsl_booke.S | 47 arch/ppc/kernel/idle.c | 3 arch/ppc/kernel/irq.c | 1 arch/ppc/kernel/l2cr.S | 2 arch/ppc/kernel/misc.S | 235 -- arch/ppc/kernel/pci.c | 33 arch/ppc/kernel/perfmon.c | 96 - arch/ppc/kernel/perfmon_fsl_booke.c | 2 arch/ppc/kernel/ppc_ksyms.c | 34 arch/ppc/kernel/process.c | 142 + arch/ppc/kernel/setup.c | 39 arch/ppc/kernel/signal.c | 771 ------ arch/ppc/kernel/smp.c | 22 arch/ppc/kernel/syscalls.c | 268 -- arch/ppc/kernel/time.c | 9 arch/ppc/kernel/traps.c | 42 arch/ppc/kernel/vector.S | 217 -- arch/ppc/kernel/vmlinux.lds.S | 26 arch/ppc/lib/string.S | 24 arch/ppc/math-emu/sfp-machine.h | 2 arch/ppc/mm/init.c | 23 arch/ppc/oprofile/Makefile | 14 arch/ppc/oprofile/common.c | 161 - arch/ppc/oprofile/op_impl.h | 45 arch/ppc/platforms/4xx/bamboo.c | 14 arch/ppc/platforms/4xx/ebony.c | 15 arch/ppc/platforms/4xx/luan.c | 13 arch/ppc/platforms/4xx/ocotea.c | 31 arch/ppc/platforms/83xx/mpc834x_sys.h | 1 arch/ppc/platforms/85xx/mpc8540_ads.c | 30 arch/ppc/platforms/85xx/mpc8560_ads.c | 25 arch/ppc/platforms/85xx/mpc85xx_ads_common.h | 1 arch/ppc/platforms/85xx/mpc85xx_cds_common.c | 39 arch/ppc/platforms/85xx/sbc8560.c | 22 arch/ppc/platforms/85xx/stx_gp3.c | 21 arch/ppc/platforms/85xx/stx_gp3.h | 1 arch/ppc/platforms/Makefile | 3 arch/ppc/platforms/chestnut.c | 1 arch/ppc/platforms/chrp_nvram.c | 83 + arch/ppc/platforms/chrp_pci.c | 10 arch/ppc/platforms/chrp_pegasos_eth.c | 124 + arch/ppc/platforms/chrp_setup.c | 33 arch/ppc/platforms/chrp_smp.c | 3 arch/ppc/platforms/chrp_time.c | 8 arch/ppc/platforms/ev64360.c | 1 arch/ppc/platforms/fads.h | 2 arch/ppc/platforms/gemini_setup.c | 4 arch/ppc/platforms/hdpu.c | 4 arch/ppc/platforms/katana.c | 3 arch/ppc/platforms/lite5200.c | 1 arch/ppc/platforms/lopec.c | 17 arch/ppc/platforms/mpc885ads.h | 2 arch/ppc/platforms/mvme5100.c | 6 arch/ppc/platforms/pal4_setup.c | 1 arch/ppc/platforms/pmac_backlight.c | 16 arch/ppc/platforms/pmac_cpufreq.c | 36 arch/ppc/platforms/pmac_feature.c | 176 + arch/ppc/platforms/pmac_nvram.c | 42 arch/ppc/platforms/pmac_pci.c | 28 arch/ppc/platforms/pmac_pic.c | 27 arch/ppc/platforms/pmac_setup.c | 19 arch/ppc/platforms/pmac_sleep.S | 4 arch/ppc/platforms/pmac_smp.c | 11 arch/ppc/platforms/pmac_time.c | 8 arch/ppc/platforms/pplus.c | 17 arch/ppc/platforms/prep_pci.c | 64 - arch/ppc/platforms/prep_setup.c | 70 - arch/ppc/platforms/radstone_ppc7d.c | 15 arch/ppc/platforms/residual.c | 2 arch/ppc/platforms/sandpoint.c | 21 arch/ppc/syslib/Makefile | 57 arch/ppc/syslib/btext.c | 6 arch/ppc/syslib/gt64260_pic.c | 1 arch/ppc/syslib/ibm440gx_common.c | 6 arch/ppc/syslib/ibm44x_common.c | 37 arch/ppc/syslib/ibm44x_common.h | 3 arch/ppc/syslib/m8260_setup.c | 4 arch/ppc/syslib/m82xx_pci.c | 4 arch/ppc/syslib/m8xx_setup.c | 48 arch/ppc/syslib/m8xx_wdt.c | 14 arch/ppc/syslib/mpc52xx_pci.c | 3 arch/ppc/syslib/mpc83xx_devices.c | 1 arch/ppc/syslib/mpc85xx_devices.c | 17 arch/ppc/syslib/mpc85xx_sys.c | 44 arch/ppc/syslib/mpc8xx_sys.c | 4 arch/ppc/syslib/mv64360_pic.c | 1 arch/ppc/syslib/mv64x60.c | 2 arch/ppc/syslib/mv64x60_dbg.c | 1 arch/ppc/syslib/of_device.c | 278 -- arch/ppc/syslib/open_pic.c | 3 arch/ppc/syslib/open_pic2.c | 1 arch/ppc/syslib/ppc403_pic.c | 1 arch/ppc/syslib/ppc4xx_pic.c | 1 arch/ppc/syslib/ppc4xx_setup.c | 2 arch/ppc/syslib/ppc83xx_setup.c | 1 arch/ppc/syslib/ppc85xx_setup.c | 1 arch/ppc/syslib/ppc8xx_pic.c | 17 arch/ppc/syslib/ppc_sys.c | 3 arch/ppc/syslib/pq2_devices.c | 1 arch/ppc/syslib/prep_nvram.c | 13 arch/ppc/syslib/prom.c | 18 arch/ppc/syslib/xilinx_pic.c | 1 arch/ppc/xmon/start.c | 3 arch/ppc/xmon/xmon.c | 9 arch/ppc64/Kconfig | 33 arch/ppc64/Makefile | 18 arch/ppc64/boot/Makefile | 67 - arch/ppc64/boot/crt0.S | 53 arch/ppc64/boot/install.sh | 2 arch/ppc64/boot/main.c | 268 +- arch/ppc64/boot/string.S | 4 arch/ppc64/boot/string.h | 1 arch/ppc64/boot/zImage.lds | 64 - arch/ppc64/boot/zlib.c | 2195 ----------------- arch/ppc64/boot/zlib.h | 432 --- arch/ppc64/defconfig | 4 arch/ppc64/kernel/HvLpEvent.c | 88 - arch/ppc64/kernel/Makefile | 77 - arch/ppc64/kernel/align.c | 4 arch/ppc64/kernel/asm-offsets.c | 3 arch/ppc64/kernel/bpa_iommu.c | 2 arch/ppc64/kernel/bpa_setup.c | 7 arch/ppc64/kernel/btext.c | 42 arch/ppc64/kernel/cputable.c | 308 -- arch/ppc64/kernel/eeh.c | 2 arch/ppc64/kernel/head.S | 290 +- arch/ppc64/kernel/i8259.c | 177 - arch/ppc64/kernel/i8259.h | 17 arch/ppc64/kernel/idle.c | 8 arch/ppc64/kernel/kprobes.c | 1 arch/ppc64/kernel/misc.S | 662 ----- arch/ppc64/kernel/pci.c | 46 arch/ppc64/kernel/pci_direct_iommu.c | 3 arch/ppc64/kernel/pci_dn.c | 3 arch/ppc64/kernel/pci_iommu.c | 21 arch/ppc64/kernel/pmac.h | 31 arch/ppc64/kernel/pmac_feature.c | 767 ------ arch/ppc64/kernel/pmac_pci.c | 793 ------ arch/ppc64/kernel/pmac_setup.c | 525 ---- arch/ppc64/kernel/pmac_smp.c | 330 --- arch/ppc64/kernel/pmac_time.c | 195 -- arch/ppc64/kernel/ppc_ksyms.c | 20 arch/ppc64/kernel/prom.c | 7 arch/ppc64/kernel/prom_init.c | 1 arch/ppc64/kernel/ptrace.c | 363 --- arch/ppc64/kernel/rtas-proc.c | 1 arch/ppc64/kernel/rtas_pci.c | 9 arch/ppc64/kernel/rtc.c | 48 arch/ppc64/kernel/signal.c | 2 arch/ppc64/kernel/smp.c | 40 arch/ppc64/kernel/syscalls.c | 263 -- arch/ppc64/kernel/traps.c | 568 ---- arch/ppc64/kernel/vdso64/sigtramp.S | 1 arch/ppc64/kernel/vecemu.c | 346 --- arch/ppc64/kernel/vmlinux.lds.S | 17 arch/ppc64/lib/Makefile | 15 arch/ppc64/lib/strcase.c | 31 arch/ppc64/lib/string.S | 106 - arch/ppc64/mm/Makefile | 11 arch/ppc64/mm/init.c | 950 ------- arch/ppc64/oprofile/Kconfig | 23 arch/ppc64/xmon/Makefile | 5 arch/ppc64/xmon/setjmp.S | 73 - drivers/block/viodasd.c | 9 drivers/cdrom/viocd.c | 9 drivers/char/hvc_vio.c | 2 drivers/char/hvcs.c | 5 drivers/char/mem.c | 4 drivers/char/viotape.c | 9 drivers/ide/ppc/pmac.c | 80 - drivers/macintosh/ans-lcd.c | 10 drivers/macintosh/apm_emu.c | 8 drivers/macintosh/macio_sysfs.c | 26 drivers/macintosh/mediabay.c | 56 drivers/macintosh/smu.c | 4 drivers/macintosh/via-cuda.c | 1 drivers/macintosh/via-pmu.c | 142 + drivers/macintosh/via-pmu68k.c | 15 drivers/net/bmac.c | 1 drivers/net/ibmveth.c | 14 drivers/net/iseries_veth.c | 18 drivers/net/mace.c | 1 drivers/net/mv643xx_eth.c | 3 drivers/pcmcia/Kconfig | 10 drivers/pcmcia/Makefile | 1 drivers/pcmcia/m8xx_pcmcia.c | 1290 ++++++++++ drivers/scsi/ibmvscsi/ibmvscsi.c | 9 drivers/video/fbmem.c | 2 fs/proc/proc_misc.c | 8 include/asm-powerpc/a.out.h | 36 include/asm-powerpc/atomic.h | 45 include/asm-powerpc/auxvec.h | 8 include/asm-powerpc/backlight.h | 9 include/asm-powerpc/bug.h | 81 + include/asm-powerpc/byteorder.h | 11 include/asm-powerpc/checksum.h | 47 include/asm-powerpc/cputable.h | 427 +++ include/asm-powerpc/dbdma.h | 0 include/asm-powerpc/dma.h | 91 - include/asm-powerpc/elf.h | 96 - include/asm-powerpc/firmware.h | 10 include/asm-powerpc/grackle.h | 7 include/asm-powerpc/hardirq.h | 16 include/asm-powerpc/heathrow.h | 0 include/asm-powerpc/hw_irq.h | 114 + include/asm-powerpc/i8259.h | 12 include/asm-powerpc/iommu.h | 41 include/asm-powerpc/irq.h | 168 + include/asm-powerpc/kdebug.h | 11 include/asm-powerpc/keylargo.h | 0 include/asm-powerpc/kmap_types.h | 33 include/asm-powerpc/kprobes.h | 7 include/asm-powerpc/lmb.h | 2 include/asm-powerpc/machdep.h | 284 ++ include/asm-powerpc/macio.h | 0 include/asm-powerpc/mediabay.h | 0 include/asm-powerpc/mpic.h | 14 include/asm-powerpc/of_device.h | 7 include/asm-powerpc/ohare.h | 0 include/asm-powerpc/oprofile_impl.h | 24 include/asm-powerpc/pSeries_reconfig.h | 0 include/asm-powerpc/parport.h | 6 include/asm-powerpc/pmac_feature.h | 0 include/asm-powerpc/pmac_low_i2c.h | 0 include/asm-powerpc/pmc.h | 46 include/asm-powerpc/posix_types.h | 40 include/asm-powerpc/ppc-pci.h | 6 include/asm-powerpc/ppc_asm.h | 511 ++++ include/asm-powerpc/processor.h | 281 ++ include/asm-powerpc/prom.h | 219 ++ include/asm-powerpc/reg.h | 297 ++ include/asm-powerpc/rtas.h | 8 include/asm-powerpc/rtc.h | 78 + include/asm-powerpc/rwsem.h | 18 include/asm-powerpc/scatterlist.h | 45 include/asm-powerpc/seccomp.h | 11 include/asm-powerpc/sections.h | 20 include/asm-powerpc/semaphore.h | 6 include/asm-powerpc/smu.h | 0 include/asm-powerpc/spinlock_types.h | 4 include/asm-powerpc/sstep.h | 4 include/asm-powerpc/statfs.h | 60 include/asm-powerpc/synch.h | 51 include/asm-powerpc/system.h | 363 +++ include/asm-powerpc/thread_info.h | 59 include/asm-powerpc/time.h | 226 ++ include/asm-powerpc/types.h | 37 include/asm-powerpc/uninorth.h | 0 include/asm-powerpc/unistd.h | 91 - include/asm-powerpc/vga.h | 22 include/asm-powerpc/vio.h | 22 include/asm-powerpc/xmon.h | 12 include/asm-ppc/a.out.h | 26 include/asm-ppc/auxvec.h | 14 include/asm-ppc/bug.h | 58 include/asm-ppc/byteorder.h | 76 - include/asm-ppc/cache.h | 13 include/asm-ppc/checksum.h | 107 - include/asm-ppc/cpm2.h | 3 include/asm-ppc/cputable.h | 129 - include/asm-ppc/elf.h | 151 - include/asm-ppc/hw_irq.h | 74 - include/asm-ppc/i8259.h | 11 include/asm-ppc/io.h | 11 include/asm-ppc/kmap_types.h | 25 include/asm-ppc/machdep.h | 4 include/asm-ppc/mmu_context.h | 6 include/asm-ppc/mpc8260.h | 4 include/asm-ppc/mpc85xx.h | 3 include/asm-ppc/mpc8xx.h | 4 include/asm-ppc/open_pic.h | 3 include/asm-ppc/page.h | 18 include/asm-ppc/parport.h | 18 include/asm-ppc/pci-bridge.h | 5 include/asm-ppc/pci.h | 6 include/asm-ppc/perfmon.h | 22 include/asm-ppc/pgtable.h | 2 include/asm-ppc/posix_types.h | 111 - include/asm-ppc/ppc_asm.h | 350 --- include/asm-ppc/processor.h | 201 -- include/asm-ppc/ptrace.h | 2 include/asm-ppc/rwsem.h | 177 - include/asm-ppc/scatterlist.h | 25 include/asm-ppc/seccomp.h | 10 include/asm-ppc/sections.h | 33 include/asm-ppc/semaphore.h | 108 - include/asm-ppc/smp.h | 28 include/asm-ppc/spinlock.h | 8 include/asm-ppc/spinlock_types.h | 20 include/asm-ppc/statfs.h | 8 include/asm-ppc/system.h | 28 include/asm-ppc/thread_info.h | 107 - include/asm-ppc/types.h | 69 - include/asm-ppc/xmon.h | 17 include/asm-ppc64/a.out.h | 39 include/asm-ppc64/abs_addr.h | 7 include/asm-ppc64/atomic.h | 197 -- include/asm-ppc64/bitops.h | 2 include/asm-ppc64/bootinfo.h | 70 - include/asm-ppc64/btext.h | 1 include/asm-ppc64/bug.h | 69 - include/asm-ppc64/cputable.h | 167 - include/asm-ppc64/dart.h | 59 include/asm-ppc64/dbdma.h | 2 include/asm-ppc64/dma.h | 329 --- include/asm-ppc64/futex.h | 2 include/asm-ppc64/hardirq.h | 27 include/asm-ppc64/hw_irq.h | 104 - include/asm-ppc64/iSeries/HvCallPci.h | 533 ---- include/asm-ppc64/iSeries/iSeries_irq.h | 8 include/asm-ppc64/iSeries/iSeries_pci.h | 88 - include/asm-ppc64/io.h | 2 include/asm-ppc64/irq.h | 120 - include/asm-ppc64/keylargo.h | 2 include/asm-ppc64/kmap_types.h | 23 include/asm-ppc64/machdep.h | 185 - include/asm-ppc64/macio.h | 2 include/asm-ppc64/memory.h | 61 include/asm-ppc64/mmu.h | 7 include/asm-ppc64/of_device.h | 2 include/asm-ppc64/page.h | 8 include/asm-ppc64/pci-bridge.h | 22 include/asm-ppc64/pci.h | 2 include/asm-ppc64/pgtable.h | 2 include/asm-ppc64/pmac_feature.h | 2 include/asm-ppc64/pmac_low_i2c.h | 2 include/asm-ppc64/pmc.h | 31 include/asm-ppc64/ppc32.h | 14 include/asm-ppc64/ppc_asm.h | 242 -- include/asm-ppc64/processor.h | 558 ---- include/asm-ppc64/prom.h | 4 include/asm-ppc64/scatterlist.h | 31 include/asm-ppc64/sections.h | 29 include/asm-ppc64/smp.h | 1 include/asm-ppc64/statfs.h | 61 include/asm-ppc64/system.h | 20 include/asm-ppc64/tce.h | 64 + include/asm-ppc64/time.h | 124 - include/asm-ppc64/tlbflush.h | 7 include/asm-ppc64/udbg.h | 3 include/asm-ppc64/uninorth.h | 2 include/asm-ppc64/unistd.h | 486 ---- include/asm-ppc64/vga.h | 50 include/linux/fsl_devices.h | 13 include/linux/zutil.h | 1 kernel/irq/handle.c | 6 lib/zlib_inflate/inflate.c | 1 565 files changed, 50073 insertions(+), 22537 deletions(-) create mode 100644 arch/powerpc/Kconfig create mode 100644 arch/powerpc/Kconfig.debug create mode 100644 arch/powerpc/Makefile create mode 100644 arch/powerpc/kernel/Makefile create mode 100644 arch/powerpc/kernel/asm-offsets.c rename arch/{ppc64/kernel/binfmt_elf32.c => powerpc/kernel/binfmt_elf32.c} (93%) create mode 100644 arch/powerpc/kernel/btext.c rename arch/{ppc/kernel/cputable.c => powerpc/kernel/cputable.c} (55%) create mode 100644 arch/powerpc/kernel/entry_32.S rename arch/{ppc64/kernel/entry.S => powerpc/kernel/entry_64.S} (96%) create mode 100644 arch/powerpc/kernel/fpu.S create mode 100644 arch/powerpc/kernel/head_32.S create mode 100644 arch/powerpc/kernel/head_44x.S create mode 100644 arch/powerpc/kernel/head_4xx.S create mode 100644 arch/powerpc/kernel/head_64.S create mode 100644 arch/powerpc/kernel/head_8xx.S create mode 100644 arch/powerpc/kernel/head_fsl_booke.S create mode 100644 arch/powerpc/kernel/idle_6xx.S rename arch/{ppc64/kernel/idle_power4.S => powerpc/kernel/idle_power4.S} (92%) rename arch/{ppc64/kernel/init_task.c => powerpc/kernel/init_task.c} (100%) rename arch/{ppc64/kernel/lparmap.c => powerpc/kernel/lparmap.c} (100%) create mode 100644 arch/powerpc/kernel/misc_32.S create mode 100644 arch/powerpc/kernel/misc_64.S rename arch/{ppc64/kernel/of_device.c => powerpc/kernel/of_device.c} (98%) rename arch/{ppc64/kernel/pmc.c => powerpc/kernel/pmc.c} (76%) create mode 100644 arch/powerpc/kernel/ppc_ksyms.c rename arch/{ppc64/kernel/process.c => powerpc/kernel/process.c} (58%) create mode 100644 arch/powerpc/kernel/prom.c create mode 100644 arch/powerpc/kernel/prom_init.c rename arch/{ppc/kernel/ptrace.c => powerpc/kernel/ptrace.c} (82%) rename arch/{ppc64/kernel/ptrace32.c => powerpc/kernel/ptrace32.c} (97%) rename arch/{ppc64/kernel/rtas.c => powerpc/kernel/rtas.c} (64%) create mode 100644 arch/powerpc/kernel/semaphore.c create mode 100644 arch/powerpc/kernel/setup-common.c create mode 100644 arch/powerpc/kernel/setup_32.c rename arch/{ppc64/kernel/setup.c => powerpc/kernel/setup_64.c} (70%) rename arch/{ppc64/kernel/signal32.c => powerpc/kernel/signal_32.c} (59%) rename arch/{ppc64/kernel/sys_ppc32.c => powerpc/kernel/sys_ppc32.c} (77%) create mode 100644 arch/powerpc/kernel/syscalls.c create mode 100644 arch/powerpc/kernel/systbl.S rename arch/{ppc64/kernel/time.c => powerpc/kernel/time.c} (61%) create mode 100644 arch/powerpc/kernel/traps.c rename arch/{ppc/kernel/vecemu.c => powerpc/kernel/vecemu.c} (100%) rename arch/{ppc64/kernel/vector.S => powerpc/kernel/vector.S} (63%) rename arch/{ppc64/kernel/vio.c => powerpc/kernel/vio.c} (100%) create mode 100644 arch/powerpc/kernel/vmlinux.lds.S create mode 100644 arch/powerpc/lib/Makefile create mode 100644 arch/powerpc/lib/checksum_32.S rename arch/{ppc64/lib/checksum.S => powerpc/lib/checksum_64.S} (100%) create mode 100644 arch/powerpc/lib/copy_32.S rename arch/{ppc64/lib/copypage.S => powerpc/lib/copypage_64.S} (100%) rename arch/{ppc64/lib/copyuser.S => powerpc/lib/copyuser_64.S} (100%) create mode 100644 arch/powerpc/lib/div64.S rename arch/{ppc64/lib/e2a.c => powerpc/lib/e2a.c} (100%) rename arch/{ppc64/lib/locks.c => powerpc/lib/locks.c} (98%) create mode 100644 arch/powerpc/lib/mem_64.S rename arch/{ppc64/lib/memcpy.S => powerpc/lib/memcpy_64.S} (100%) create mode 100644 arch/powerpc/lib/rheap.c rename arch/{ppc64/lib/sstep.c => powerpc/lib/sstep.c} (96%) create mode 100644 arch/powerpc/lib/strcase.c create mode 100644 arch/powerpc/lib/string.S rename arch/{ppc64/lib/usercopy.c => powerpc/lib/usercopy_64.c} (100%) create mode 100644 arch/powerpc/mm/44x_mmu.c create mode 100644 arch/powerpc/mm/4xx_mmu.c create mode 100644 arch/powerpc/mm/Makefile rename arch/{ppc64/mm/fault.c => powerpc/mm/fault.c} (68%) create mode 100644 arch/powerpc/mm/fsl_booke_mmu.c create mode 100644 arch/powerpc/mm/hash_low_32.S rename arch/{ppc64/mm/hash_low.S => powerpc/mm/hash_low_64.S} (100%) rename arch/{ppc64/mm/hash_native.c => powerpc/mm/hash_native_64.c} (97%) rename arch/{ppc64/mm/hash_utils.c => powerpc/mm/hash_utils_64.c} (90%) rename arch/{ppc64/mm/hugetlbpage.c => powerpc/mm/hugetlbpage.c} (100%) rename arch/{ppc64/mm/imalloc.c => powerpc/mm/imalloc.c} (100%) create mode 100644 arch/powerpc/mm/init_32.c create mode 100644 arch/powerpc/mm/init_64.c rename arch/{ppc64/kernel/lmb.c => powerpc/mm/lmb.c} (77%) create mode 100644 arch/powerpc/mm/mem.c rename arch/{ppc64/mm/mmap.c => powerpc/mm/mmap.c} (100%) create mode 100644 arch/powerpc/mm/mmu_context_32.c create mode 100644 arch/powerpc/mm/mmu_context_64.c create mode 100644 arch/powerpc/mm/mmu_decl.h rename arch/{ppc64/mm/numa.c => powerpc/mm/numa.c} (100%) create mode 100644 arch/powerpc/mm/pgtable_32.c create mode 100644 arch/powerpc/mm/pgtable_64.c create mode 100644 arch/powerpc/mm/ppc_mmu_32.c rename arch/{ppc64/mm/slb.c => powerpc/mm/slb.c} (100%) rename arch/{ppc64/mm/slb_low.S => powerpc/mm/slb_low.S} (100%) rename arch/{ppc64/mm/stab.c => powerpc/mm/stab.c} (100%) create mode 100644 arch/powerpc/mm/tlb_32.c rename arch/{ppc64/mm/tlb.c => powerpc/mm/tlb_64.c} (88%) rename arch/{ppc/oprofile/Kconfig => powerpc/oprofile/Kconfig} (100%) rename arch/{ppc64/oprofile/Makefile => powerpc/oprofile/Makefile} (63%) rename arch/{ppc64/oprofile/common.c => powerpc/oprofile/common.c} (54%) rename arch/{ppc/oprofile/op_model_fsl_booke.c => powerpc/oprofile/op_model_fsl_booke.c} (99%) rename arch/{ppc64/oprofile/op_model_power4.c => powerpc/oprofile/op_model_power4.c} (100%) rename arch/{ppc64/oprofile/op_model_rs64.c => powerpc/oprofile/op_model_rs64.c} (99%) create mode 100644 arch/powerpc/platforms/4xx/Kconfig create mode 100644 arch/powerpc/platforms/4xx/Makefile create mode 100644 arch/powerpc/platforms/85xx/Kconfig create mode 100644 arch/powerpc/platforms/85xx/Makefile create mode 100644 arch/powerpc/platforms/8xx/Kconfig create mode 100644 arch/powerpc/platforms/Makefile create mode 100644 arch/powerpc/platforms/apus/Kconfig create mode 100644 arch/powerpc/platforms/chrp/Makefile create mode 100644 arch/powerpc/platforms/chrp/chrp.h create mode 100644 arch/powerpc/platforms/chrp/nvram.c create mode 100644 arch/powerpc/platforms/chrp/pci.c create mode 100644 arch/powerpc/platforms/chrp/pegasos_eth.c create mode 100644 arch/powerpc/platforms/chrp/setup.c create mode 100644 arch/powerpc/platforms/chrp/smp.c create mode 100644 arch/powerpc/platforms/chrp/time.c create mode 100644 arch/powerpc/platforms/embedded6xx/Kconfig create mode 100644 arch/powerpc/platforms/iseries/Kconfig create mode 100644 arch/powerpc/platforms/iseries/Makefile rename include/asm-ppc64/iSeries/HvCallHpt.h => arch/powerpc/platforms/iseries/call_hpt.h (94%) create mode 100644 arch/powerpc/platforms/iseries/call_pci.h rename include/asm-ppc64/iSeries/HvCallSm.h => arch/powerpc/platforms/iseries/call_sm.h (89%) rename arch/{ppc64/kernel/iSeries_htab.c => powerpc/platforms/iseries/htab.c} (96%) rename arch/{ppc64/kernel/hvCall.S => powerpc/platforms/iseries/hvcall.S} (82%) rename arch/{ppc64/kernel/HvCall.c => powerpc/platforms/iseries/hvlog.c} (98%) rename arch/{ppc64/kernel/HvLpConfig.c => powerpc/platforms/iseries/hvlpconfig.c} (95%) rename arch/{ppc64/kernel/iSeries_iommu.c => powerpc/platforms/iseries/iommu.c} (86%) rename include/asm-ppc64/iSeries/ItIplParmsReal.h => arch/powerpc/platforms/iseries/ipl_parms.h (93%) rename arch/{ppc64/kernel/iSeries_irq.c => powerpc/platforms/iseries/irq.c} (97%) create mode 100644 arch/powerpc/platforms/iseries/irq.h create mode 100644 arch/powerpc/platforms/iseries/ksyms.c rename arch/{ppc64/kernel/LparData.c => powerpc/platforms/iseries/lpardata.c} (94%) rename arch/{ppc64/kernel/ItLpQueue.c => powerpc/platforms/iseries/lpevents.c} (74%) rename include/asm-ppc64/iSeries/IoHriMainStore.h => arch/powerpc/platforms/iseries/main_store.h (97%) rename arch/{ppc64/kernel/mf.c => powerpc/platforms/iseries/mf.c} (97%) create mode 100644 arch/powerpc/platforms/iseries/misc.S rename arch/{ppc64/kernel/iSeries_pci.c => powerpc/platforms/iseries/pci.c} (86%) create mode 100644 arch/powerpc/platforms/iseries/pci.h rename arch/{ppc64/kernel/iSeries_proc.c => powerpc/platforms/iseries/proc.c} (94%) rename include/asm-ppc64/iSeries/IoHriProcessorVpd.h => arch/powerpc/platforms/iseries/processor_vpd.h (94%) rename include/asm-ppc64/iSeries/HvReleaseData.h => arch/powerpc/platforms/iseries/release_data.h (92%) rename arch/{ppc64/kernel/iSeries_setup.c => powerpc/platforms/iseries/setup.c} (53%) rename arch/{ppc64/kernel/iSeries_setup.h => powerpc/platforms/iseries/setup.h} (86%) rename arch/{ppc64/kernel/iSeries_smp.c => powerpc/platforms/iseries/smp.c} (63%) rename include/asm-ppc64/iSeries/ItSpCommArea.h => arch/powerpc/platforms/iseries/spcomm_area.h (87%) rename arch/{ppc64/kernel/iSeries_vio.c => powerpc/platforms/iseries/vio.c} (100%) rename arch/{ppc64/kernel/viopath.c => powerpc/platforms/iseries/viopath.c} (100%) rename include/asm-ppc64/iSeries/ItVpdAreas.h => arch/powerpc/platforms/iseries/vpd_areas.h (95%) rename arch/{ppc64/kernel/iSeries_VpdInfo.c => powerpc/platforms/iseries/vpdinfo.c} (92%) create mode 100644 arch/powerpc/platforms/maple/Makefile create mode 100644 arch/powerpc/platforms/maple/maple.h rename arch/{ppc64/kernel/maple_pci.c => powerpc/platforms/maple/pci.c} (99%) rename arch/{ppc64/kernel/maple_setup.c => powerpc/platforms/maple/setup.c} (93%) rename arch/{ppc64/kernel/maple_time.c => powerpc/platforms/maple/time.c} (95%) create mode 100644 arch/powerpc/platforms/powermac/Makefile create mode 100644 arch/powerpc/platforms/powermac/backlight.c create mode 100644 arch/powerpc/platforms/powermac/cache.S create mode 100644 arch/powerpc/platforms/powermac/cpufreq.c create mode 100644 arch/powerpc/platforms/powermac/feature.c rename arch/{ppc64/kernel/pmac_low_i2c.c => powerpc/platforms/powermac/low_i2c.c} (100%) rename arch/{ppc64/kernel/pmac_nvram.c => powerpc/platforms/powermac/nvram.c} (51%) create mode 100644 arch/powerpc/platforms/powermac/pci.c create mode 100644 arch/powerpc/platforms/powermac/pic.c create mode 100644 arch/powerpc/platforms/powermac/pic.h create mode 100644 arch/powerpc/platforms/powermac/pmac.h create mode 100644 arch/powerpc/platforms/powermac/setup.c create mode 100644 arch/powerpc/platforms/powermac/sleep.S create mode 100644 arch/powerpc/platforms/powermac/smp.c create mode 100644 arch/powerpc/platforms/powermac/time.c create mode 100644 arch/powerpc/platforms/prep/Kconfig create mode 100644 arch/powerpc/platforms/pseries/Kconfig create mode 100644 arch/powerpc/platforms/pseries/Makefile rename arch/{ppc64/kernel/pSeries_hvCall.S => powerpc/platforms/pseries/hvCall.S} (100%) rename arch/{ppc64/kernel/pSeries_iommu.c => powerpc/platforms/pseries/iommu.c} (97%) rename arch/{ppc64/kernel/pSeries_lpar.c => powerpc/platforms/pseries/lpar.c} (99%) rename arch/{ppc64/kernel/pSeries_nvram.c => powerpc/platforms/pseries/nvram.c} (100%) rename arch/{ppc64/kernel/pSeries_pci.c => powerpc/platforms/pseries/pci.c} (99%) rename arch/{ppc64/kernel/ras.c => powerpc/platforms/pseries/ras.c} (99%) rename arch/{ppc64/kernel/pSeries_reconfig.c => powerpc/platforms/pseries/reconfig.c} (100%) create mode 100644 arch/powerpc/platforms/pseries/rtas-fw.c create mode 100644 arch/powerpc/platforms/pseries/rtas-fw.h rename arch/{ppc64/kernel/pSeries_setup.c => powerpc/platforms/pseries/setup.c} (93%) rename arch/{ppc64/kernel/pSeries_smp.c => powerpc/platforms/pseries/smp.c} (91%) rename arch/{ppc64/kernel/pSeries_vio.c => powerpc/platforms/pseries/vio.c} (100%) rename arch/{ppc64/kernel/xics.c => powerpc/platforms/pseries/xics.c} (98%) rename include/asm-ppc64/xics.h => arch/powerpc/platforms/pseries/xics.h (85%) create mode 100644 arch/powerpc/sysdev/Makefile rename arch/{ppc/syslib/dcr.S => powerpc/sysdev/dcr.S} (100%) create mode 100644 arch/powerpc/sysdev/grackle.c rename arch/{ppc/syslib/i8259.c => powerpc/sysdev/i8259.c} (78%) rename arch/{ppc/syslib/indirect_pci.c => powerpc/sysdev/indirect_pci.c} (100%) rename arch/{ppc64/kernel/mpic.c => powerpc/sysdev/mpic.c} (97%) rename arch/{ppc64/kernel/u3_iommu.c => powerpc/sysdev/u3_iommu.c} (89%) create mode 100644 arch/powerpc/xmon/Makefile rename arch/{ppc64/xmon/ansidecl.h => powerpc/xmon/ansidecl.h} (100%) rename arch/{ppc64/xmon/nonstdio.h => powerpc/xmon/nonstdio.h} (100%) rename arch/{ppc64/xmon/ppc-dis.c => powerpc/xmon/ppc-dis.c} (100%) rename arch/{ppc64/xmon/ppc-opc.c => powerpc/xmon/ppc-opc.c} (100%) rename arch/{ppc64/xmon/ppc.h => powerpc/xmon/ppc.h} (100%) create mode 100644 arch/powerpc/xmon/setjmp.S create mode 100644 arch/powerpc/xmon/start_32.c rename arch/{ppc64/xmon/start.c => powerpc/xmon/start_64.c} (100%) create mode 100644 arch/powerpc/xmon/start_8xx.c rename arch/{ppc64/xmon/subr_prf.c => powerpc/xmon/subr_prf.c} (92%) rename arch/{ppc64/xmon/xmon.c => powerpc/xmon/xmon.c} (87%) delete mode 100644 arch/ppc/kernel/fpu.S delete mode 100644 arch/ppc/kernel/perfmon.c delete mode 100644 arch/ppc/kernel/signal.c delete mode 100644 arch/ppc/kernel/syscalls.c delete mode 100644 arch/ppc/kernel/vector.S delete mode 100644 arch/ppc/oprofile/Makefile delete mode 100644 arch/ppc/oprofile/common.c delete mode 100644 arch/ppc/oprofile/op_impl.h create mode 100644 arch/ppc/platforms/chrp_nvram.c delete mode 100644 arch/ppc/syslib/of_device.c delete mode 100644 arch/ppc64/boot/zlib.c delete mode 100644 arch/ppc64/boot/zlib.h delete mode 100644 arch/ppc64/kernel/HvLpEvent.c delete mode 100644 arch/ppc64/kernel/cputable.c delete mode 100644 arch/ppc64/kernel/i8259.c delete mode 100644 arch/ppc64/kernel/i8259.h delete mode 100644 arch/ppc64/kernel/pmac.h delete mode 100644 arch/ppc64/kernel/pmac_feature.c delete mode 100644 arch/ppc64/kernel/pmac_pci.c delete mode 100644 arch/ppc64/kernel/pmac_setup.c delete mode 100644 arch/ppc64/kernel/pmac_smp.c delete mode 100644 arch/ppc64/kernel/pmac_time.c delete mode 100644 arch/ppc64/kernel/ptrace.c delete mode 100644 arch/ppc64/kernel/syscalls.c delete mode 100644 arch/ppc64/kernel/traps.c delete mode 100644 arch/ppc64/kernel/vecemu.c delete mode 100644 arch/ppc64/lib/strcase.c delete mode 100644 arch/ppc64/mm/Makefile delete mode 100644 arch/ppc64/mm/init.c delete mode 100644 arch/ppc64/oprofile/Kconfig delete mode 100644 arch/ppc64/xmon/Makefile delete mode 100644 arch/ppc64/xmon/setjmp.S create mode 100644 drivers/pcmcia/m8xx_pcmcia.c create mode 100644 include/asm-powerpc/a.out.h rename include/{asm-ppc/atomic.h => asm-powerpc/atomic.h} (76%) rename include/{asm-ppc64/auxvec.h => asm-powerpc/auxvec.h} (73%) rename include/{asm-ppc/backlight.h => asm-powerpc/backlight.h} (68%) create mode 100644 include/asm-powerpc/bug.h rename include/{asm-ppc64/byteorder.h => asm-powerpc/byteorder.h} (90%) rename include/{asm-ppc64/checksum.h => asm-powerpc/checksum.h} (78%) create mode 100644 include/asm-powerpc/cputable.h rename include/{asm-ppc/dbdma.h => asm-powerpc/dbdma.h} (100%) rename include/{asm-ppc/dma.h => asm-powerpc/dma.h} (90%) rename include/{asm-ppc64/elf.h => asm-powerpc/elf.h} (88%) rename include/{asm-ppc64/firmware.h => asm-powerpc/firmware.h} (94%) create mode 100644 include/asm-powerpc/grackle.h rename include/{asm-ppc/hardirq.h => asm-powerpc/hardirq.h} (61%) rename include/{asm-ppc/heathrow.h => asm-powerpc/heathrow.h} (100%) create mode 100644 include/asm-powerpc/hw_irq.h create mode 100644 include/asm-powerpc/i8259.h rename include/{asm-ppc64/iommu.h => asm-powerpc/iommu.h} (69%) rename include/{asm-ppc/irq.h => asm-powerpc/irq.h} (76%) rename include/{asm-ppc64/kdebug.h => asm-powerpc/kdebug.h} (80%) rename include/{asm-ppc/keylargo.h => asm-powerpc/keylargo.h} (100%) create mode 100644 include/asm-powerpc/kmap_types.h rename include/{asm-ppc64/kprobes.h => asm-powerpc/kprobes.h} (93%) rename include/{asm-ppc64/lmb.h => asm-powerpc/lmb.h} (100%) create mode 100644 include/asm-powerpc/machdep.h rename include/{asm-ppc/macio.h => asm-powerpc/macio.h} (100%) rename include/{asm-ppc/mediabay.h => asm-powerpc/mediabay.h} (100%) rename arch/ppc64/kernel/mpic.h => include/asm-powerpc/mpic.h (96%) rename include/{asm-ppc/of_device.h => asm-powerpc/of_device.h} (92%) rename include/{asm-ppc/ohare.h => asm-powerpc/ohare.h} (100%) rename include/{asm-ppc64/oprofile_impl.h => asm-powerpc/oprofile_impl.h} (73%) rename include/{asm-ppc64/pSeries_reconfig.h => asm-powerpc/pSeries_reconfig.h} (100%) rename include/{asm-ppc64/parport.h => asm-powerpc/parport.h} (84%) rename include/{asm-ppc/pmac_feature.h => asm-powerpc/pmac_feature.h} (100%) rename include/{asm-ppc/pmac_low_i2c.h => asm-powerpc/pmac_low_i2c.h} (100%) create mode 100644 include/asm-powerpc/pmc.h rename include/{asm-ppc64/posix_types.h => asm-powerpc/posix_types.h} (84%) rename arch/ppc64/kernel/pci.h => include/asm-powerpc/ppc-pci.h (90%) create mode 100644 include/asm-powerpc/ppc_asm.h create mode 100644 include/asm-powerpc/processor.h create mode 100644 include/asm-powerpc/prom.h rename include/{asm-ppc/reg.h => asm-powerpc/reg.h} (70%) rename include/{asm-ppc64/rtas.h => asm-powerpc/rtas.h} (98%) create mode 100644 include/asm-powerpc/rtc.h rename include/{asm-ppc64/rwsem.h => asm-powerpc/rwsem.h} (89%) create mode 100644 include/asm-powerpc/scatterlist.h rename include/{asm-ppc64/seccomp.h => asm-powerpc/seccomp.h} (59%) create mode 100644 include/asm-powerpc/sections.h rename include/{asm-ppc64/semaphore.h => asm-powerpc/semaphore.h} (94%) rename include/{asm-ppc64/smu.h => asm-powerpc/smu.h} (100%) rename include/{asm-ppc64/spinlock_types.h => asm-powerpc/spinlock_types.h} (79%) rename include/{asm-ppc64/sstep.h => asm-powerpc/sstep.h} (85%) create mode 100644 include/asm-powerpc/statfs.h create mode 100644 include/asm-powerpc/synch.h create mode 100644 include/asm-powerpc/system.h rename include/{asm-ppc64/thread_info.h => asm-powerpc/thread_info.h} (69%) create mode 100644 include/asm-powerpc/time.h rename include/{asm-ppc64/types.h => asm-powerpc/types.h} (63%) rename include/{asm-ppc/uninorth.h => asm-powerpc/uninorth.h} (100%) rename include/{asm-ppc/unistd.h => asm-powerpc/unistd.h} (89%) rename include/{asm-ppc/vga.h => asm-powerpc/vga.h} (60%) rename include/{asm-ppc64/vio.h => asm-powerpc/vio.h} (92%) create mode 100644 include/asm-powerpc/xmon.h delete mode 100644 include/asm-ppc/a.out.h delete mode 100644 include/asm-ppc/auxvec.h delete mode 100644 include/asm-ppc/bug.h delete mode 100644 include/asm-ppc/byteorder.h delete mode 100644 include/asm-ppc/checksum.h delete mode 100644 include/asm-ppc/cputable.h delete mode 100644 include/asm-ppc/elf.h delete mode 100644 include/asm-ppc/hw_irq.h delete mode 100644 include/asm-ppc/i8259.h delete mode 100644 include/asm-ppc/kmap_types.h delete mode 100644 include/asm-ppc/parport.h delete mode 100644 include/asm-ppc/perfmon.h delete mode 100644 include/asm-ppc/posix_types.h delete mode 100644 include/asm-ppc/ppc_asm.h delete mode 100644 include/asm-ppc/processor.h delete mode 100644 include/asm-ppc/rwsem.h delete mode 100644 include/asm-ppc/scatterlist.h delete mode 100644 include/asm-ppc/seccomp.h delete mode 100644 include/asm-ppc/sections.h delete mode 100644 include/asm-ppc/semaphore.h delete mode 100644 include/asm-ppc/spinlock_types.h delete mode 100644 include/asm-ppc/statfs.h delete mode 100644 include/asm-ppc/thread_info.h delete mode 100644 include/asm-ppc/types.h delete mode 100644 include/asm-ppc/xmon.h delete mode 100644 include/asm-ppc64/a.out.h delete mode 100644 include/asm-ppc64/atomic.h delete mode 100644 include/asm-ppc64/bootinfo.h delete mode 100644 include/asm-ppc64/bug.h delete mode 100644 include/asm-ppc64/cputable.h create mode 100644 include/asm-ppc64/dart.h delete mode 100644 include/asm-ppc64/dbdma.h delete mode 100644 include/asm-ppc64/dma.h delete mode 100644 include/asm-ppc64/hardirq.h delete mode 100644 include/asm-ppc64/hw_irq.h delete mode 100644 include/asm-ppc64/iSeries/HvCallPci.h delete mode 100644 include/asm-ppc64/iSeries/iSeries_irq.h delete mode 100644 include/asm-ppc64/iSeries/iSeries_pci.h delete mode 100644 include/asm-ppc64/irq.h delete mode 100644 include/asm-ppc64/keylargo.h delete mode 100644 include/asm-ppc64/kmap_types.h delete mode 100644 include/asm-ppc64/machdep.h delete mode 100644 include/asm-ppc64/macio.h delete mode 100644 include/asm-ppc64/memory.h delete mode 100644 include/asm-ppc64/of_device.h delete mode 100644 include/asm-ppc64/pmac_feature.h delete mode 100644 include/asm-ppc64/pmac_low_i2c.h delete mode 100644 include/asm-ppc64/pmc.h delete mode 100644 include/asm-ppc64/ppc_asm.h delete mode 100644 include/asm-ppc64/processor.h delete mode 100644 include/asm-ppc64/scatterlist.h delete mode 100644 include/asm-ppc64/sections.h delete mode 100644 include/asm-ppc64/statfs.h create mode 100644 include/asm-ppc64/tce.h delete mode 100644 include/asm-ppc64/time.h delete mode 100644 include/asm-ppc64/uninorth.h delete mode 100644 include/asm-ppc64/unistd.h delete mode 100644 include/asm-ppc64/vga.h Andy Fleming: ppc32: 85xx PHY Platform Update Andy Whitcroft: ppc64 memory model depends on NUMA Becky Bruce: powerpc: Merge bug.h powerpc: Merge elf.h powerpc: merge atomic.h, memory.h powerpc: merge semaphore.h powerpc: merge byteorder.h powerpc: Merge types.h powerpc: Fix types.h ppc: Fix m82xx_pci build Benjamin Herrenschmidt: ppc64: Store virtual address in TLB flush batches David Gibson: powerpc: Fix use of LOADBASE in merge tree powerpc: Another maple merge tree fix powerpc: Merge ppc64 pmc.[ch] with ppc32 perfmon.[ch] Fix broken initialization of conswitchp for ARCH=ppc64 powerpc: Merge thread_info.h powerpc: Fix handling of fpscr on 64-bit powerpc: Purge bootinfo.h powerpc: Merge parport.h powerpc: Don't use kmalloc() for kernel stacks powerpc: Remove dregs of bootinfo.h powerpc: Move xics.[ch] into platforms/pseries jdl at freescale.com: powerpc: Merge asm-ppc*/posix_types.h Jon Loeliger: powerpc: Merge kmap_types.h powerpc: Remove sections use from ppc powerpc: Remove sections use from ppc64 and drivers powerpc: Remove section free() and linker script bits powerpc: Merge simplified sections.h into asm-powerpc powerpc: Merge asm-ppc*/dma.h powerpc: Merge asm-ppc*/seccomp.h, drop TIF_32BIT check powerpc: Merge asm-ppc*/rwsem.h Kumar Gala: powerpc: Merged ppc_asm.h ppc32: Removed non-inlined versions of local_irq* functions ppc32: Allow user to individual select CHRP/PMAC/PREP config ppc32: Allow user to individual select CHRP/PMAC/PREP config powerpc: unified signature of timer_interrupt() between ppc32/ppc64 powerpc: merge include/asm-ppc*/auxvec.h into include/asm-powerpc/auxvec.h powerpc: merge include/asm-ppc*/spinlock_types.h into include/asm-powerpc/spinlock_types.h powerpc: merge include/asm-ppc*/statfs.h into include/asm-powerpc/statfs.h powerpc: Fix compiling of ppc32 powerpc: merged hw_irq.h powerpc: Fix building of power3 config on ppc32 powerpc: merged asm/cputable.h ppc32: make cur_cpu_spec a single pointer instead of an array powerpc: replace use of _GLOBAL with .globl powerpc: Some more fixes to allow building for a Book-E processor ppc32: replace use of _GLOBAL with .globl for ppc32 powerpc: Make sure we have an RTC before trying to adjust it powerpc: some prom.c cleanups powerpc: merge include/asm-ppc*/checksum.h into include/asm-powerpc/checksum.h powerpc: Add support for Book-E timer config to generic_calibrate_decr powerpc: Some minor cleanups to setup_32.c powerpc: Fix warning related to do_dabr powerpc: Moved dcr support to arch/powerpc powerpc: only build idle_6xx for 6xx Lee Nicks: ppc: prevent GCC 4 from generating AltiVec instructions in kernel linuxppc at jdl.com: powerpc: Revised merge asm-ppc*/hardirq.h powerpc: Merge asm-ppc*/vga.h Marcelo Tosatti: ppc32 8xx: use io accessor macros instead of direct memory reference MPC8xx PCMCIA driver ppc32: #ifdef out ALTIVEC specific code in __switch_to Matt Porter: ppc32: Cleanup AMCC PPC44x eval board U-Boot support Michael Ellerman: ppc64 iSeries: Move iSeries ppc_md functions into a machdep_calls struct ppc46 iSeries: Make some generic irq code compile for iSeries ppc64 iSeries: Update create_pte_mapping to replace iSeries_bolt_kernel() ppc64 iSeries: Make stab_initialize() work on iSeries ppc64 iSeries: Make smp_release_cpus() callable on iSeries ppc64 iSeries: Create a fake flat device tree on iSeries ppc64 iSeries: Call early_setup() on iSeries ppc64 iSeries: Move memory setup into iSeries device tree ppc64 iSeries: Move setup of systemcfg->platform into iSeries device tree ppc64 iSeries: Define /cpus in iSeries device tree powerpc: Don't blow away load_addr in start_thread powerpc: Fix mmap returning 64 bit addresses powerpc: Remove duplicate definition of set_tb() powerpc: Remove trailing \n" in HMT macros powerpc: Move firmware.h into include/asm-powerpc powerpc: Move ras.c into arch/powerpc/platforms/pseries Nicolas DET: chrp_pegasos_eth: Added Marvell Discovery II SRAM support mv643xx_eth_showsram: Added information message when using the SRAM Olaf Hering: ppc32: update xmon help text ppc32: nvram driver for chrp Add modalias for pmac network drivers ppc64 boot: remove include from lib/zlib_inflate/inflate.c ppc64 boot: remove include from include/linux/zutil.h ppc64 boot: missing include for size_t ppc64 boot: remove zlib ppc64 boot: remove need for imagesize.c ppc64 boot: move gunzip function before use ppc64 boot: bootfiles depend on linker script ppc64 boot: cleanup linker script ppc64 boot: use memset to clear bss ppc64 boot: fix typo in asm comments ppc64 boot: remove global initializers ppc64 boot: make the zImage relocateable ppc64 boot: proof that reloc works ppc64 boot: print firmware provided stackpointer ppc64: AC Power handling broken for desktops ppc64: compile nls_cp437 and nls_iso8859_1 into the kernel in defconfig ppc64: reenable make install with defconfig ppc64: change name of target file during make install ppc64: remove duplicate local variable in set_preferred_console Olof Johansson: ppc64: Updated Olof iommu updates 1/3 ppc64: Updated Olof iommu updates 2/3 ppc64: Updated Olof misc updates 3/3 Paul Mackerras: Revert "ppc32: Allow user to individual select CHRP/PMAC/PREP config" Merge from Linus' tree. powerpc: Merge enough to start building in arch/powerpc. Merge refs/heads/devtree from rsync://oak/kernels/iseries/work/.git ppc64: Fix typo in iommu cleanups that broke pmac build. powerpc: Fix building in the old arch's boot directory for now powerpc: Fixes to get the merged kernel to boot on powermac. Don't call a NULL ack function in the generic IRQ code. Merge Stephen Rothwell's patches Merge by hand from Linus' tree. powerpc: Get merged kernel to compile and run on 32-bit SMP powermac. Merge rsync://ozlabs.org/sfr-git/for-paulus/ ppc32: export a few more things where they are defined ppc32: remove obsolete klock_info definition powerpc: Define 32/64 bit asm macros and use them in fpu.S powerpc: Merge in the ppc64 version of the prom code. powerpc: Merge lmb.c and make MM initialization use it. powerpc: Remove 64-bit cpu support from ppc32. powerpc: Merge of_device.c and of_device.h powerpc: Use the merged of_device.c with ARCH=powerpc powerpc: Merge traps.c a bit more powerpc: Define a _sdata symbol powerpc: Fix idle.c compile warning powerpc: Use SPRN_xxx rather than xxx for SPR numbers powerpc: Make some #includes explicit. powerpc: Merged processor.h. ppc64: Use the merged lmb routines powerpc: Rename files to have consistent _32/_64 suffixes powerpc: Merge arch/ppc64/mm to arch/powerpc/mm powerpc: move pSeries files to arch/powerpc/platforms/pseries ppc64: Use SPRN_ prefix for special purpose register names powerpc: rename powermac files to remove pmac_ prefix powerpc: Merge Kconfig.debug powerpc: Use reg.h instead of processor.h when we just want reg names powerpc: Use arch/powerpc/mm and arch/powerpc/lib for 64-bit powerpc: make process.c suitable for both 32-bit and 64-bit powerpc: Introduce entry_{32,64}.S, misc_{32,64}.S, systbl.S powerpc: Make prom_init.c suitable for both 32-bit and 64-bit powerpc: Reduce the 32/64-bit differences in traps.c powerpc: Reduce the 32/64-bit diffs in vmlinux.lds.S powerpc: Fix bug caused by negation of 64-bit reloc_offset value powerpc: move lparmap.c to arch/powerpc/kernel powerpc: Get 64-bit configs to compile with ARCH=powerpc powerpc: Get iseries to compile with ARCH=powerpc powerpc: Merge asm/irq.h powerpc: Merge asm/unistd.h powerpc: Fix off-by-one error in prom_init.c powerpc: Fix compilation for 32-bit configs powerpc: Start merging 64-bit support into powermac files powerpc: Make building the boot image work for both 32-bit and 64-bit powerpc: Merged asm/backlight.h powerpc: Merged asm/i8259.h powerpc: Remove 83xx from arch/powerpc/platforms/Makefile for now powerpc: Merged asm/xmon.h powerpc: Remove xmon.h include from arch/powerpc/platforms/powermac/pic.c powerpc: Remove debug messages from setup_64.c ppc: Adapt to asm-powerpc/irq.h irq_canonicalize changes ppc: Various minor compile fixes ppc64: Use merged versions of init_task.c and process.c. ppc64: compile fix - define execve in misc.S Merge from Linus' tree powerpc: Move default hash table size calculation to hash_utils_64.c powerpc: Bring in some changes made to arch/ppc and include/asm-ppc64 ppc64: Remove duplicate versions of some headers powerpc: Initialize btext subsystem later, after prom_init powerpc: Merge syscalls.c and sys_ppc32.c. ppc: Use the merged of_device.c from arch/powerpc/kernel ppc: Fix various compile errors resulting from ptrace.c merge powerpc: Clear the BSS at the start of early_init with ARCH=ppc powerpc: Make CONFIG_PROC_DEVICETREE independent of CONFIG_PPC_OF powerpc: Fix various compile errors with ARCH=ppc, ppc64 and powerpc Merge rsync://oak/kernels/iseries/work/ ppc: Minor smp changes for consistency with ppc64 powerpc: Merge machdep.h ppc: declare smp_ops in asm/smp.h, since platform setup code needs it powerpc: Eliminate a compile warning in signal_32.c powerpc: Move ptrace32.c from arch/ppc64 to arch/powerpc ppc64: Minor compilation fixes powerpc: Merge time.c and asm/time.h. powerpc: Fix a branch-too-far link error for 32-bit targets powerpc: Fix a corner case in __div64_32 ppc64: Change ppc_md.get_cpuinfo to ppc_md.show_cpuinfo powerpc: Move smp_mpic_message_pass into mpic.c powerpc: Fix places where ppc_md.show_[per]cpuinfo was treated as int powerpc: Merge various powermac-related header files. ppc: rename pci_assign_all_busses to pci_assign_all_buses ppc64: Move init_boot_text call and conswitchp init into setup_arch powerpc: Move some calculations from xxx_calibrate_decr to time_init ppc64: Use the merged mpic.c powerpc/ppc/ppc64: Various compile fixes. powerpc: Fix some bugs in the new merged time code powerpc: Move agp_special_page export to where it is defined ppc64: Fix delivery of RT signals to 32-bit processes. ppc64/powerpc: Fix time initialization on SMP systems ppc64: Add a `primary' argument to pci_process_bridge_OF_ranges ppc64: Rearrange btext initialization for consistency with ppc32 ppc64: Simplify secondary CPU startup on powermacs powerpc: Make set_rtc_time() return error code from lower-level function powerpc: Merge in 64-bit powermac support. ppc64: Use arch/powerpc/platforms/powermac for powermac build. Merge changes from linux-2.6 by hand powerpc: Fix time code for 601 processors powerpc: Run on old powermacs. powerpc: Make coff boot wrapper load the kernel at 8M ppc: Use the indirect_pci.c from arch/powerpc/sysdev powerpc: Merge i8259.c into arch/powerpc/sysdev powerpc: Merge rtas.c into arch/powerpc/kernel powerpc: Pull common bits of setup_{32,64}.c into setup-common.c powerpc: Pull out MPC106 (grackle) initialization code into its own file powerpc: Fix interrupt-tree parsing powerpc: Don't limit pmac_get_rtc_time to return only positive values powerpc: Merge 32-bit CHRP support. powerpc: Fixes to get the Longtrail CHRP a bit further power: Update the multiple inclusion protection symbol on machdep.h powerpc32: Limit memory to lowmem if !CONFIG_HIGHMEM. powerpc: Fix incorrect timer register addresses in mpic.c powerpc: 32-bit powermac needs the mpc106 code powerpc: Remove common stuff from setup_64.c powerpc: undeprecate the old OF device tree accessors for now ppc64: remove arch/ppc64/kernel/setup.c powerpc: remove duplicate screen_info from setup_32.c powerpc: 32-bit CHRP SMP fixes powerpc: Introduce toreal/fromreal assembly macros powerpc: Fix new-world powermac detection powerpc: Move U3 IOMMU driver to arch/powerpc/sysdev ppc64: Include arch/powerpc/kernel/setup-common.o ppc64: Use the correct prototypes for i8259 functions Merge in v2.6.14 by hand Merge git://oak/home/sfr/kernels/iseries/work/ powerpc: Rename asm offset TRAP to _TRAP for 32-bit powerpc: Make single-stepping emulation (mostly) usable on 32-bit powerpc: Merge xmon powerpc: Add -mno-altivec for ARCH=powerpc builds powerpc: Merge maple support code to arch/powerpc/platforms/maple powerpc: 32-bit needs cur_cpu_spec exported too powerpc: import a gfp_t fix to arch/powerpc/mm/pgtable_32.c powerpc: Remove T command from xmon help text since it no longer exists powerpc: Fix time setting bug on 32-bit powerpc: Fix bug arising from having multiple memory_limit variables ppc: remove duplicate export of cur_cpu_spec Merge ../linux-2.6 by hand powerpc: apply recent changes to merged code powerpc: import a fix from arch/ppc/mm/pgtable.c Roland Dreier: ppc: make phys_mem_access_prot() work with pfns instead of addresses scwhab at suse.de: Add modalias to macio sysfs attributes Stephen Rothwell: powerpc: Move arch/ppc*/kernel/vecemu.c to arch/powerpc powerpc: Merge include/asm-ppc*/a.out.h into include/asm-powerpc powerpc: Move arch/ppc*/oprofile/Kconfig to arch/powerpc Merge arch/ppc*/oprofile/Makefile into arch/powerpc/oprofile powerpc: rename op_ppc{32,64}_model to op_powerpc_model powerpc: merge oprofile headers powerpc: merge the rest of arch/ppc*/oprofile powerpc: reduce oprofile/common.c differences ppc64 iSeries: Don't create linux,boot-cpu powerpc: clean up after powermac build merge ppc64 g5: merge tree build fix powerpc: Create arch/powerpc/platforms/iseries powerpc: Move HvLpConfig.c to powerpc arch powerpc: Move LparData.c to powerpc platforms powerpc: move iSeries_setup.[ch] and mf.c into platforms/iseries powerpc: move ItLpQueue.c to powerpc/platforms/iseries powerpc: move hvCall.s to powerpc/platforms/iseries powerpc: Merge HvLpEvent.c into lpevents.c powerpc: move iSeries_proc.c to powerpc/platforms/iseries powerpc: Move iSeries_htab.c to powerpc/platforms/iseries powerpc: Move iSeries_iommu.c to powerpc/platforms/iseries powerpc: Move iSeries_pci.c to powerpc/platform/iseries powerpc: Move iSeries_irq.c to powerpc/platorms/iseries powerpc: Move iSeries_VpdInfo.c to powerpc/platforms/iseries powerpc: Move iSeries_vio.c to powerpc/platforms/iseries powerpc: Move iSeries_smp.c to powerpc/platforms/iseries powerpc: Move viopath.c to powerpc/platforms/iseries powerpc: Make powerpc pmac 32 bit build again powerpc: move more iSeries code ppc64 iSeries: use device_node instead of iSeries_Device_node ppc64 iseries: move some iSeries include files powerpc: merge asm-offsets.c powerpc: remove old vector.S files powerpc: more cleanup of powerpc/kernel powerpc: Move lparmap.c to powerpc/platforms powerpc: merge idle_power4.S and trapc.s ppc64: simplify the build a little powerpc: make iSeries build powerpc: make iSeries boot powerpc: pci_dn's should point to their device_node's powerpc: make iSeries boot again powerpc: consolidate cputable.c powerpc: create 32 bit LOADADDR macro powerpc: make 64 bit binaries work ppc64: merge binfmt_elf32.c ppc64: fix arch/ppc64/kernel/Makefile ppc64: use powerpc of_device.c powerpc: merge ptrace.c powerpc: fix uname -m powerpc: move iSeries/iSeries_pci.h to platforms/iseries powerpc: remove ISERIES_[SUB]BUS macros powerpc: eliminate DsaAddr from pci_dn powerpc: move iSeries/HvCallHpt.h to platforms/iseries/call_hpt.h powerpc: move iSeries/HvCallPci.h to platforms/iseries/call_pci.h powerpc: fix 32bit LOADADDR macro ppc32: use L1_CACHE_SHIFT/L1_CACHE_BYTES powerpc: merge ppc signal.c and ppc64 signal32.c powerpc: change sys32_ to compat_sys_ powerpc: Merge arch/ppc64/kernel/vio.c into arch/powerpc/kernel/vio.c powerpc: Move include/asm-ppc64/vio.h to include/asm-powerpc/vio.h powerpc: don't duplicate name between vio_driver and device_driver powerpc: iseries: Fix a bogus comment powerpc: set the driver.owner field for all vio drivers powerpc: Add a shutdown member to vio_driver ppc64: make dma_addr_t 64 bits powerpc: merge scatterlist.h ppc64: use the merged syscall table ppc64: use checksum_64.S from powerpc ppc64: use e2a.c from powerpc/lib ppc64: use copypage_64.S from powerpc/lib ppc64: use copyuser_64.S from powerpc/lib ppc64: use lockc.c from powerpc/lib ppc64: use memcpy_64.S from powerpc/lib ppc64: use sstep.c from powerpc/lib ppc64: user strcase.c from powerpc/lib ppc64: use usercopy_64.c from powerpc/lib ppc64: use mem_64.S from powerpc/lib Vitaly Bordug: ppc32: ppc_sys fixes for 8xx and 82xx From david at gibson.dropbear.id.au Mon Oct 31 17:48:23 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Mon, 31 Oct 2005 17:48:23 +1100 Subject: powerpc: Merge bitops.h Message-ID: <20051031064823.GD6622@localhost.localdomain> Paulus, here is the patch to merge bitops.h and bitops.c. I booted this on Power5, but only against an older version of the merge tree. I've built it for ARCH=ppc, ARCH=ppc64 and ARCH=powerpc, but not booted. As such, it probably needs more testing before being merged. However, I thought I'd send it out so there's a chance for some more eyes to find problems. Notable changes: - We use LARXL/STCXL macros to generate the right (32 or 64 bit) instructions, similar to LDL/STL from ppc_asm.h, used in fpu.S - ppc32 previously used a full "sync" barrier at the end of test_and_*_bit(), whereas ppc64 used an "isync". The merged version uses "isync", since I believe that's sufficient. - The ppc64 versions of then minix_*() bitmap functions have changed semantics. Previously on ppc64, these functions were big-endian (that is bit 0 was the LSB in the first 64-bit, big-endian word). On ppc32 (and x86, for that matter, they were little-endian. As far as I can tell, the big-endian usage was simply wrong - I guess no-one ever tried to use minixfs on ppc64. - On ppc32 find_next_bit() and find_next_zero_bit() are no longer inline (they were already out-of-line on ppc64). - For ppc64, sched_find_first_bit() has moved from mmu_context.h to the merged bitops. What it was doing in mmu_context.h in the first place, I have no idea. - The fls() function is now implemented using the cntlzw instruction on ppc64, instead of generic_fls(), as it already was on ppc32. Signed-off-by: David Gibson Index: working-2.6/include/asm-ppc/bitops.h =================================================================== --- working-2.6.orig/include/asm-ppc/bitops.h 2005-10-25 11:59:59.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,460 +0,0 @@ -/* - * bitops.h: Bit string operations on the ppc - */ - -#ifdef __KERNEL__ -#ifndef _PPC_BITOPS_H -#define _PPC_BITOPS_H - -#include -#include -#include -#include - -/* - * The test_and_*_bit operations are taken to imply a memory barrier - * on SMP systems. - */ -#ifdef CONFIG_SMP -#define SMP_WMB "eieio\n" -#define SMP_MB "\nsync" -#else -#define SMP_WMB -#define SMP_MB -#endif /* CONFIG_SMP */ - -static __inline__ void set_bit(int nr, volatile unsigned long * addr) -{ - unsigned long old; - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - - __asm__ __volatile__("\n\ -1: lwarx %0,0,%3 \n\ - or %0,%0,%2 \n" - PPC405_ERR77(0,%3) -" stwcx. %0,0,%3 \n\ - bne- 1b" - : "=&r" (old), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc" ); -} - -/* - * non-atomic version - */ -static __inline__ void __set_bit(int nr, volatile unsigned long *addr) -{ - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - - *p |= mask; -} - -/* - * clear_bit doesn't imply a memory barrier - */ -#define smp_mb__before_clear_bit() smp_mb() -#define smp_mb__after_clear_bit() smp_mb() - -static __inline__ void clear_bit(int nr, volatile unsigned long *addr) -{ - unsigned long old; - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - - __asm__ __volatile__("\n\ -1: lwarx %0,0,%3 \n\ - andc %0,%0,%2 \n" - PPC405_ERR77(0,%3) -" stwcx. %0,0,%3 \n\ - bne- 1b" - : "=&r" (old), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc"); -} - -/* - * non-atomic version - */ -static __inline__ void __clear_bit(int nr, volatile unsigned long *addr) -{ - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - - *p &= ~mask; -} - -static __inline__ void change_bit(int nr, volatile unsigned long *addr) -{ - unsigned long old; - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - - __asm__ __volatile__("\n\ -1: lwarx %0,0,%3 \n\ - xor %0,%0,%2 \n" - PPC405_ERR77(0,%3) -" stwcx. %0,0,%3 \n\ - bne- 1b" - : "=&r" (old), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc"); -} - -/* - * non-atomic version - */ -static __inline__ void __change_bit(int nr, volatile unsigned long *addr) -{ - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - - *p ^= mask; -} - -/* - * test_and_*_bit do imply a memory barrier (?) - */ -static __inline__ int test_and_set_bit(int nr, volatile unsigned long *addr) -{ - unsigned int old, t; - unsigned int mask = 1 << (nr & 0x1f); - volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5); - - __asm__ __volatile__(SMP_WMB "\n\ -1: lwarx %0,0,%4 \n\ - or %1,%0,%3 \n" - PPC405_ERR77(0,%4) -" stwcx. %1,0,%4 \n\ - bne 1b" - SMP_MB - : "=&r" (old), "=&r" (t), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc", "memory"); - - return (old & mask) != 0; -} - -/* - * non-atomic version - */ -static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr) -{ - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - unsigned long old = *p; - - *p = old | mask; - return (old & mask) != 0; -} - -static __inline__ int test_and_clear_bit(int nr, volatile unsigned long *addr) -{ - unsigned int old, t; - unsigned int mask = 1 << (nr & 0x1f); - volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5); - - __asm__ __volatile__(SMP_WMB "\n\ -1: lwarx %0,0,%4 \n\ - andc %1,%0,%3 \n" - PPC405_ERR77(0,%4) -" stwcx. %1,0,%4 \n\ - bne 1b" - SMP_MB - : "=&r" (old), "=&r" (t), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc", "memory"); - - return (old & mask) != 0; -} - -/* - * non-atomic version - */ -static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr) -{ - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - unsigned long old = *p; - - *p = old & ~mask; - return (old & mask) != 0; -} - -static __inline__ int test_and_change_bit(int nr, volatile unsigned long *addr) -{ - unsigned int old, t; - unsigned int mask = 1 << (nr & 0x1f); - volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5); - - __asm__ __volatile__(SMP_WMB "\n\ -1: lwarx %0,0,%4 \n\ - xor %1,%0,%3 \n" - PPC405_ERR77(0,%4) -" stwcx. %1,0,%4 \n\ - bne 1b" - SMP_MB - : "=&r" (old), "=&r" (t), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc", "memory"); - - return (old & mask) != 0; -} - -/* - * non-atomic version - */ -static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr) -{ - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - unsigned long old = *p; - - *p = old ^ mask; - return (old & mask) != 0; -} - -static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr) -{ - return ((addr[nr >> 5] >> (nr & 0x1f)) & 1) != 0; -} - -/* Return the bit position of the most significant 1 bit in a word */ -static __inline__ int __ilog2(unsigned long x) -{ - int lz; - - asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x)); - return 31 - lz; -} - -static __inline__ int ffz(unsigned long x) -{ - if ((x = ~x) == 0) - return 32; - return __ilog2(x & -x); -} - -static inline int __ffs(unsigned long x) -{ - return __ilog2(x & -x); -} - -/* - * ffs: find first bit set. This is defined the same way as - * the libc and compiler builtin ffs routines, therefore - * differs in spirit from the above ffz (man ffs). - */ -static __inline__ int ffs(int x) -{ - return __ilog2(x & -x) + 1; -} - -/* - * fls: find last (most-significant) bit set. - * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. - */ -static __inline__ int fls(unsigned int x) -{ - int lz; - - asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x)); - return 32 - lz; -} - -/* - * hweightN: returns the hamming weight (i.e. the number - * of bits set) of a N-bit word - */ - -#define hweight32(x) generic_hweight32(x) -#define hweight16(x) generic_hweight16(x) -#define hweight8(x) generic_hweight8(x) - -/* - * Find the first bit set in a 140-bit bitmap. - * The first 100 bits are unlikely to be set. - */ -static inline int sched_find_first_bit(const unsigned long *b) -{ - if (unlikely(b[0])) - return __ffs(b[0]); - if (unlikely(b[1])) - return __ffs(b[1]) + 32; - if (unlikely(b[2])) - return __ffs(b[2]) + 64; - if (b[3]) - return __ffs(b[3]) + 96; - return __ffs(b[4]) + 128; -} - -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -static __inline__ unsigned long find_next_bit(const unsigned long *addr, - unsigned long size, unsigned long offset) -{ - unsigned int *p = ((unsigned int *) addr) + (offset >> 5); - unsigned int result = offset & ~31UL; - unsigned int tmp; - - if (offset >= size) - return size; - size -= result; - offset &= 31UL; - if (offset) { - tmp = *p++; - tmp &= ~0UL << offset; - if (size < 32) - goto found_first; - if (tmp) - goto found_middle; - size -= 32; - result += 32; - } - while (size >= 32) { - if ((tmp = *p++) != 0) - goto found_middle; - result += 32; - size -= 32; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= ~0UL >> (32 - size); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); -} - -/** - * find_first_bit - find the first set bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search - * - * Returns the bit-number of the first set bit, not the number of the byte - * containing a bit. - */ -#define find_first_bit(addr, size) \ - find_next_bit((addr), (size), 0) - -/* - * This implementation of find_{first,next}_zero_bit was stolen from - * Linus' asm-alpha/bitops.h. - */ -#define find_first_zero_bit(addr, size) \ - find_next_zero_bit((addr), (size), 0) - -static __inline__ unsigned long find_next_zero_bit(const unsigned long *addr, - unsigned long size, unsigned long offset) -{ - unsigned int * p = ((unsigned int *) addr) + (offset >> 5); - unsigned int result = offset & ~31UL; - unsigned int tmp; - - if (offset >= size) - return size; - size -= result; - offset &= 31UL; - if (offset) { - tmp = *p++; - tmp |= ~0UL >> (32-offset); - if (size < 32) - goto found_first; - if (tmp != ~0U) - goto found_middle; - size -= 32; - result += 32; - } - while (size >= 32) { - if ((tmp = *p++) != ~0U) - goto found_middle; - result += 32; - size -= 32; - } - if (!size) - return result; - tmp = *p; -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found_middle: - return result + ffz(tmp); -} - - -#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr)) -#define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr)) -#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr)) -#define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr)) - -static __inline__ int ext2_test_bit(int nr, __const__ void * addr) -{ - __const__ unsigned char *ADDR = (__const__ unsigned char *) addr; - - return (ADDR[nr >> 3] >> (nr & 7)) & 1; -} - -/* - * This implementation of ext2_find_{first,next}_zero_bit was stolen from - * Linus' asm-alpha/bitops.h and modified for a big-endian machine. - */ - -#define ext2_find_first_zero_bit(addr, size) \ - ext2_find_next_zero_bit((addr), (size), 0) - -static __inline__ unsigned long ext2_find_next_zero_bit(const void *addr, - unsigned long size, unsigned long offset) -{ - unsigned int *p = ((unsigned int *) addr) + (offset >> 5); - unsigned int result = offset & ~31UL; - unsigned int tmp; - - if (offset >= size) - return size; - size -= result; - offset &= 31UL; - if (offset) { - tmp = cpu_to_le32p(p++); - tmp |= ~0UL >> (32-offset); - if (size < 32) - goto found_first; - if (tmp != ~0U) - goto found_middle; - size -= 32; - result += 32; - } - while (size >= 32) { - if ((tmp = cpu_to_le32p(p++)) != ~0U) - goto found_middle; - result += 32; - size -= 32; - } - if (!size) - return result; - tmp = cpu_to_le32p(p); -found_first: - tmp |= ~0U << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found_middle: - return result + ffz(tmp); -} - -/* Bitmap functions for the minix filesystem. */ -#define minix_test_and_set_bit(nr,addr) ext2_set_bit(nr,addr) -#define minix_set_bit(nr,addr) ((void)ext2_set_bit(nr,addr)) -#define minix_test_and_clear_bit(nr,addr) ext2_clear_bit(nr,addr) -#define minix_test_bit(nr,addr) ext2_test_bit(nr,addr) -#define minix_find_first_zero_bit(addr,size) ext2_find_first_zero_bit(addr,size) - -#endif /* _PPC_BITOPS_H */ -#endif /* __KERNEL__ */ Index: working-2.6/include/asm-ppc64/bitops.h =================================================================== --- working-2.6.orig/include/asm-ppc64/bitops.h 2005-10-31 15:20:22.000000000 +1100 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,360 +0,0 @@ -/* - * PowerPC64 atomic bit operations. - * Dave Engebretsen, Todd Inglett, Don Reed, Pat McCarthy, Peter Bergner, - * Anton Blanchard - * - * Originally taken from the 32b PPC code. Modified to use 64b values for - * the various counters & memory references. - * - * Bitops are odd when viewed on big-endian systems. They were designed - * on little endian so the size of the bitset doesn't matter (low order bytes - * come first) as long as the bit in question is valid. - * - * Bits are "tested" often using the C expression (val & (1< - -/* - * clear_bit doesn't imply a memory barrier - */ -#define smp_mb__before_clear_bit() smp_mb() -#define smp_mb__after_clear_bit() smp_mb() - -static __inline__ int test_bit(unsigned long nr, __const__ volatile unsigned long *addr) -{ - return (1UL & (addr[nr >> 6] >> (nr & 63))); -} - -static __inline__ void set_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long old; - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - - __asm__ __volatile__( -"1: ldarx %0,0,%3 # set_bit\n\ - or %0,%0,%2\n\ - stdcx. %0,0,%3\n\ - bne- 1b" - : "=&r" (old), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc"); -} - -static __inline__ void clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long old; - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - - __asm__ __volatile__( -"1: ldarx %0,0,%3 # clear_bit\n\ - andc %0,%0,%2\n\ - stdcx. %0,0,%3\n\ - bne- 1b" - : "=&r" (old), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc"); -} - -static __inline__ void change_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long old; - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - - __asm__ __volatile__( -"1: ldarx %0,0,%3 # change_bit\n\ - xor %0,%0,%2\n\ - stdcx. %0,0,%3\n\ - bne- 1b" - : "=&r" (old), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc"); -} - -static __inline__ int test_and_set_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long old, t; - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - - __asm__ __volatile__( - EIEIO_ON_SMP -"1: ldarx %0,0,%3 # test_and_set_bit\n\ - or %1,%0,%2 \n\ - stdcx. %1,0,%3 \n\ - bne- 1b" - ISYNC_ON_SMP - : "=&r" (old), "=&r" (t) - : "r" (mask), "r" (p) - : "cc", "memory"); - - return (old & mask) != 0; -} - -static __inline__ int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long old, t; - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - - __asm__ __volatile__( - EIEIO_ON_SMP -"1: ldarx %0,0,%3 # test_and_clear_bit\n\ - andc %1,%0,%2\n\ - stdcx. %1,0,%3\n\ - bne- 1b" - ISYNC_ON_SMP - : "=&r" (old), "=&r" (t) - : "r" (mask), "r" (p) - : "cc", "memory"); - - return (old & mask) != 0; -} - -static __inline__ int test_and_change_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long old, t; - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - - __asm__ __volatile__( - EIEIO_ON_SMP -"1: ldarx %0,0,%3 # test_and_change_bit\n\ - xor %1,%0,%2\n\ - stdcx. %1,0,%3\n\ - bne- 1b" - ISYNC_ON_SMP - : "=&r" (old), "=&r" (t) - : "r" (mask), "r" (p) - : "cc", "memory"); - - return (old & mask) != 0; -} - -static __inline__ void set_bits(unsigned long mask, unsigned long *addr) -{ - unsigned long old; - - __asm__ __volatile__( -"1: ldarx %0,0,%3 # set_bit\n\ - or %0,%0,%2\n\ - stdcx. %0,0,%3\n\ - bne- 1b" - : "=&r" (old), "=m" (*addr) - : "r" (mask), "r" (addr), "m" (*addr) - : "cc"); -} - -/* - * non-atomic versions - */ -static __inline__ void __set_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - - *p |= mask; -} - -static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - - *p &= ~mask; -} - -static __inline__ void __change_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - - *p ^= mask; -} - -static __inline__ int __test_and_set_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - unsigned long old = *p; - - *p = old | mask; - return (old & mask) != 0; -} - -static __inline__ int __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - unsigned long old = *p; - - *p = old & ~mask; - return (old & mask) != 0; -} - -static __inline__ int __test_and_change_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long mask = 1UL << (nr & 0x3f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 6); - unsigned long old = *p; - - *p = old ^ mask; - return (old & mask) != 0; -} - -/* - * Return the zero-based bit position (from RIGHT TO LEFT, 63 -> 0) of the - * most significant (left-most) 1-bit in a double word. - */ -static __inline__ int __ilog2(unsigned long x) -{ - int lz; - - asm ("cntlzd %0,%1" : "=r" (lz) : "r" (x)); - return 63 - lz; -} - -/* - * Determines the bit position of the least significant (rightmost) 0 bit - * in the specified double word. The returned bit position will be zero-based, - * starting from the right side (63 - 0). - */ -static __inline__ unsigned long ffz(unsigned long x) -{ - /* no zero exists anywhere in the 8 byte area. */ - if ((x = ~x) == 0) - return 64; - - /* - * Calculate the bit position of the least signficant '1' bit in x - * (since x has been changed this will actually be the least signficant - * '0' bit in * the original x). Note: (x & -x) gives us a mask that - * is the least significant * (RIGHT-most) 1-bit of the value in x. - */ - return __ilog2(x & -x); -} - -static __inline__ int __ffs(unsigned long x) -{ - return __ilog2(x & -x); -} - -/* - * ffs: find first bit set. This is defined the same way as - * the libc and compiler builtin ffs routines, therefore - * differs in spirit from the above ffz (man ffs). - */ -static __inline__ int ffs(int x) -{ - unsigned long i = (unsigned long)x; - return __ilog2(i & -i) + 1; -} - -/* - * fls: find last (most-significant) bit set. - * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. - */ -#define fls(x) generic_fls(x) - -/* - * hweightN: returns the hamming weight (i.e. the number - * of bits set) of a N-bit word - */ -#define hweight64(x) generic_hweight64(x) -#define hweight32(x) generic_hweight32(x) -#define hweight16(x) generic_hweight16(x) -#define hweight8(x) generic_hweight8(x) - -extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset); -#define find_first_zero_bit(addr, size) \ - find_next_zero_bit((addr), (size), 0) - -extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); -#define find_first_bit(addr, size) \ - find_next_bit((addr), (size), 0) - -extern unsigned long find_next_zero_le_bit(const unsigned long *addr, unsigned long size, unsigned long offset); -#define find_first_zero_le_bit(addr, size) \ - find_next_zero_le_bit((addr), (size), 0) - -static __inline__ int test_le_bit(unsigned long nr, __const__ unsigned long * addr) -{ - __const__ unsigned char *ADDR = (__const__ unsigned char *) addr; - return (ADDR[nr >> 3] >> (nr & 7)) & 1; -} - -#define test_and_clear_le_bit(nr, addr) \ - test_and_clear_bit((nr) ^ 0x38, (addr)) -#define test_and_set_le_bit(nr, addr) \ - test_and_set_bit((nr) ^ 0x38, (addr)) - -/* - * non-atomic versions - */ - -#define __set_le_bit(nr, addr) \ - __set_bit((nr) ^ 0x38, (addr)) -#define __clear_le_bit(nr, addr) \ - __clear_bit((nr) ^ 0x38, (addr)) -#define __test_and_clear_le_bit(nr, addr) \ - __test_and_clear_bit((nr) ^ 0x38, (addr)) -#define __test_and_set_le_bit(nr, addr) \ - __test_and_set_bit((nr) ^ 0x38, (addr)) - -#define ext2_set_bit(nr,addr) \ - __test_and_set_le_bit((nr), (unsigned long*)addr) -#define ext2_clear_bit(nr, addr) \ - __test_and_clear_le_bit((nr), (unsigned long*)addr) - -#define ext2_set_bit_atomic(lock, nr, addr) \ - test_and_set_le_bit((nr), (unsigned long*)addr) -#define ext2_clear_bit_atomic(lock, nr, addr) \ - test_and_clear_le_bit((nr), (unsigned long*)addr) - - -#define ext2_test_bit(nr, addr) test_le_bit((nr),(unsigned long*)addr) -#define ext2_find_first_zero_bit(addr, size) \ - find_first_zero_le_bit((unsigned long*)addr, size) -#define ext2_find_next_zero_bit(addr, size, off) \ - find_next_zero_le_bit((unsigned long*)addr, size, off) - -#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr) -#define minix_set_bit(nr,addr) set_bit(nr,addr) -#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr) -#define minix_test_bit(nr,addr) test_bit(nr,addr) -#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) - -#endif /* __KERNEL__ */ -#endif /* _PPC64_BITOPS_H */ Index: working-2.6/arch/powerpc/lib/bitops.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ working-2.6/arch/powerpc/lib/bitops.c 2005-10-31 16:20:01.000000000 +1100 @@ -0,0 +1,150 @@ +#include +#include +#include +#include + +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (64 - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} +EXPORT_SYMBOL(find_next_bit); + +/* + * This implementation of find_{first,next}_zero_bit was stolen from + * Linus' asm-alpha/bitops.h. + */ +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) + goto found_first; + if (~tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if (~(tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + ffz(tmp); +} +EXPORT_SYMBOL(find_next_zero_bit); + +static inline unsigned int ext2_ilog2(unsigned int x) +{ + int lz; + + asm("cntlzw %0,%1": "=r"(lz):"r"(x)); + return 31 - lz; +} + +static inline unsigned int ext2_ffz(unsigned int x) +{ + u32 rc; + if ((x = ~x) == 0) + return 32; + rc = ext2_ilog2(x & -x); + return rc; +} + +unsigned long find_next_zero_le_bit(const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + const unsigned int *p = ((const unsigned int *)addr) + (offset >> 5); + unsigned int result = offset & ~31; + unsigned int tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 31; + if (offset) { + tmp = cpu_to_le32p(p++); + tmp |= ~0U >> (32 - offset); /* bug or feature ? */ + if (size < 32) + goto found_first; + if (tmp != ~0) + goto found_middle; + size -= 32; + result += 32; + } + while (size >= 32) { + if ((tmp = cpu_to_le32p(p++)) != ~0) + goto found_middle; + result += 32; + size -= 32; + } + if (!size) + return result; + tmp = cpu_to_le32p(p); +found_first: + tmp |= ~0 << size; + if (tmp == ~0) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + ext2_ffz(tmp); +} +EXPORT_SYMBOL(find_next_zero_le_bit); Index: working-2.6/arch/ppc64/kernel/bitops.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/bitops.c 2005-10-25 11:59:53.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,147 +0,0 @@ -/* - * These are too big to be inlined. - */ - -#include -#include -#include -#include - -unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + (offset >> 6); - unsigned long result = offset & ~63UL; - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset &= 63UL; - if (offset) { - tmp = *(p++); - tmp |= ~0UL >> (64 - offset); - if (size < 64) - goto found_first; - if (~tmp) - goto found_middle; - size -= 64; - result += 64; - } - while (size & ~63UL) { - if (~(tmp = *(p++))) - goto found_middle; - result += 64; - size -= 64; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found_middle: - return result + ffz(tmp); -} - -EXPORT_SYMBOL(find_next_zero_bit); - -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + (offset >> 6); - unsigned long result = offset & ~63UL; - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset &= 63UL; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < 64) - goto found_first; - if (tmp) - goto found_middle; - size -= 64; - result += 64; - } - while (size & ~63UL) { - if ((tmp = *(p++))) - goto found_middle; - result += 64; - size -= 64; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (64 - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); -} - -EXPORT_SYMBOL(find_next_bit); - -static inline unsigned int ext2_ilog2(unsigned int x) -{ - int lz; - - asm("cntlzw %0,%1": "=r"(lz):"r"(x)); - return 31 - lz; -} - -static inline unsigned int ext2_ffz(unsigned int x) -{ - u32 rc; - if ((x = ~x) == 0) - return 32; - rc = ext2_ilog2(x & -x); - return rc; -} - -unsigned long find_next_zero_le_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned int *p = ((const unsigned int *)addr) + (offset >> 5); - unsigned int result = offset & ~31; - unsigned int tmp; - - if (offset >= size) - return size; - size -= result; - offset &= 31; - if (offset) { - tmp = cpu_to_le32p(p++); - tmp |= ~0U >> (32 - offset); /* bug or feature ? */ - if (size < 32) - goto found_first; - if (tmp != ~0) - goto found_middle; - size -= 32; - result += 32; - } - while (size >= 32) { - if ((tmp = cpu_to_le32p(p++)) != ~0) - goto found_middle; - result += 32; - size -= 32; - } - if (!size) - return result; - tmp = cpu_to_le32p(p); -found_first: - tmp |= ~0 << size; - if (tmp == ~0) /* Are any bits zero? */ - return result + size; /* Nope. */ -found_middle: - return result + ext2_ffz(tmp); -} - -EXPORT_SYMBOL(find_next_zero_le_bit); Index: working-2.6/arch/powerpc/kernel/ppc_ksyms.c =================================================================== --- working-2.6.orig/arch/powerpc/kernel/ppc_ksyms.c 2005-10-31 15:20:57.000000000 +1100 +++ working-2.6/arch/powerpc/kernel/ppc_ksyms.c 2005-10-31 16:20:01.000000000 +1100 @@ -81,15 +81,6 @@ EXPORT_SYMBOL(ucSystemType); #endif -#if !defined(__INLINE_BITOPS) -EXPORT_SYMBOL(set_bit); -EXPORT_SYMBOL(clear_bit); -EXPORT_SYMBOL(change_bit); -EXPORT_SYMBOL(test_and_set_bit); -EXPORT_SYMBOL(test_and_clear_bit); -EXPORT_SYMBOL(test_and_change_bit); -#endif /* __INLINE_BITOPS */ - EXPORT_SYMBOL(strcpy); EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strcat); Index: working-2.6/arch/ppc/kernel/bitops.c =================================================================== --- working-2.6.orig/arch/ppc/kernel/bitops.c 2005-10-25 11:59:53.000000000 +1000 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,126 +0,0 @@ -/* - * Copyright (C) 1996 Paul Mackerras. - */ - -#include -#include - -/* - * If the bitops are not inlined in bitops.h, they are defined here. - * -- paulus - */ -#if !__INLINE_BITOPS -void set_bit(int nr, volatile void * addr) -{ - unsigned long old; - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - - __asm__ __volatile__(SMP_WMB "\n\ -1: lwarx %0,0,%3 \n\ - or %0,%0,%2 \n" - PPC405_ERR77(0,%3) -" stwcx. %0,0,%3 \n\ - bne 1b" - SMP_MB - : "=&r" (old), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc" ); -} - -void clear_bit(int nr, volatile void *addr) -{ - unsigned long old; - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - - __asm__ __volatile__(SMP_WMB "\n\ -1: lwarx %0,0,%3 \n\ - andc %0,%0,%2 \n" - PPC405_ERR77(0,%3) -" stwcx. %0,0,%3 \n\ - bne 1b" - SMP_MB - : "=&r" (old), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc"); -} - -void change_bit(int nr, volatile void *addr) -{ - unsigned long old; - unsigned long mask = 1 << (nr & 0x1f); - unsigned long *p = ((unsigned long *)addr) + (nr >> 5); - - __asm__ __volatile__(SMP_WMB "\n\ -1: lwarx %0,0,%3 \n\ - xor %0,%0,%2 \n" - PPC405_ERR77(0,%3) -" stwcx. %0,0,%3 \n\ - bne 1b" - SMP_MB - : "=&r" (old), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc"); -} - -int test_and_set_bit(int nr, volatile void *addr) -{ - unsigned int old, t; - unsigned int mask = 1 << (nr & 0x1f); - volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5); - - __asm__ __volatile__(SMP_WMB "\n\ -1: lwarx %0,0,%4 \n\ - or %1,%0,%3 \n" - PPC405_ERR77(0,%4) -" stwcx. %1,0,%4 \n\ - bne 1b" - SMP_MB - : "=&r" (old), "=&r" (t), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc"); - - return (old & mask) != 0; -} - -int test_and_clear_bit(int nr, volatile void *addr) -{ - unsigned int old, t; - unsigned int mask = 1 << (nr & 0x1f); - volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5); - - __asm__ __volatile__(SMP_WMB "\n\ -1: lwarx %0,0,%4 \n\ - andc %1,%0,%3 \n" - PPC405_ERR77(0,%4) -" stwcx. %1,0,%4 \n\ - bne 1b" - SMP_MB - : "=&r" (old), "=&r" (t), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc"); - - return (old & mask) != 0; -} - -int test_and_change_bit(int nr, volatile void *addr) -{ - unsigned int old, t; - unsigned int mask = 1 << (nr & 0x1f); - volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5); - - __asm__ __volatile__(SMP_WMB "\n\ -1: lwarx %0,0,%4 \n\ - xor %1,%0,%3 \n" - PPC405_ERR77(0,%4) -" stwcx. %1,0,%4 \n\ - bne 1b" - SMP_MB - : "=&r" (old), "=&r" (t), "=m" (*p) - : "r" (mask), "r" (p), "m" (*p) - : "cc"); - - return (old & mask) != 0; -} -#endif /* !__INLINE_BITOPS */ Index: working-2.6/arch/ppc64/kernel/Makefile =================================================================== --- working-2.6.orig/arch/ppc64/kernel/Makefile 2005-10-31 15:20:57.000000000 +1100 +++ working-2.6/arch/ppc64/kernel/Makefile 2005-10-31 16:20:01.000000000 +1100 @@ -13,7 +13,7 @@ obj-y += irq.o idle.o dma.o \ signal.o \ - align.o bitops.o pacaData.o \ + align.o pacaData.o \ udbg.o ioctl32.o \ rtc.o \ cpu_setup_power4.o \ Index: working-2.6/include/asm-powerpc/bitops.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ working-2.6/include/asm-powerpc/bitops.h 2005-10-31 16:20:01.000000000 +1100 @@ -0,0 +1,423 @@ +/* + * PowerPC atomic bit operations. + * + * Merged version by David Gibson . + * Based on ppc64 versions by: Dave Engebretsen, Todd Inglett, Don + * Reed, Pat McCarthy, Peter Bergner, Anton Blanchard. They + * originally took it from the ppc32 code. + * + * Within a word, bits are numbered LSB first. Lot's of places make + * this assumption by directly testing bits with (val & (1< 1 word) bitmaps on a + * big-endian system because, unlike little endian, the number of each + * bit depends on the word size. + * + * The bitop functions are defined to work on unsigned longs, so for a + * ppc64 system the bits end up numbered: + * |63..............0|127............64|191...........128|255...........196| + * and on ppc32: + * |31.....0|63....31|95....64|127...96|159..128|191..160|223..192|255..224| + * + * There are a few little-endian macros used mostly for filesystem + * bitmaps, these work on similar bit arrays layouts, but + * byte-oriented: + * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56| + * + * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit + * number field needs to be reversed compared to the big-endian bit + * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_BITOPS_H +#define _ASM_POWERPC_BITOPS_H + +#ifdef __KERNEL__ + +#include +#include +#include + +/* + * clear_bit doesn't imply a memory barrier + */ +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() + +#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) + +#ifdef CONFIG_PPC64 +#define LARXL "ldarx" +#define STCXL "stdcx." +#define CNTLZL "cntlzd" +#else +#define LARXL "lwarx" +#define STCXL "stwcx." +#define CNTLZL "cntlzw" +#endif + +static __inline__ void set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long old; + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + __asm__ __volatile__( +"1:" LARXL " %0,0,%3 # set_bit\n" + "or %0,%0,%2\n" + PPC405_ERR77(0,%3) + STCXL " %0,0,%3\n" + "bne- 1b" + : "=&r"(old), "=m"(*p) + : "r"(mask), "r"(p), "m"(*p) + : "cc" ); +} + +static __inline__ void clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long old; + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + __asm__ __volatile__( +"1:" LARXL " %0,0,%3 # set_bit\n" + "andc %0,%0,%2\n" + PPC405_ERR77(0,%3) + STCXL " %0,0,%3\n" + "bne- 1b" + : "=&r"(old), "=m"(*p) + : "r"(mask), "r"(p), "m"(*p) + : "cc" ); +} + +static __inline__ void change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long old; + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + __asm__ __volatile__( +"1:" LARXL " %0,0,%3 # set_bit\n" + "xor %0,%0,%2\n" + PPC405_ERR77(0,%3) + STCXL " %0,0,%3\n" + "bne- 1b" + : "=&r"(old), "=m"(*p) + : "r"(mask), "r"(p), "m"(*p) + : "cc" ); +} + +static __inline__ int test_and_set_bit(unsigned long nr, + volatile unsigned long *addr) +{ + unsigned long old, t; + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + __asm__ __volatile__( + EIEIO_ON_SMP +"1:" LARXL " %0,0,%3 # test_and_set_bit\n" + "or %1,%0,%2 \n" + PPC405_ERR77(0,%3) + STCXL " %1,0,%3 \n" + "bne- 1b" + ISYNC_ON_SMP + : "=&r" (old), "=&r" (t) + : "r" (mask), "r" (p) + : "cc", "memory"); + + return (old & mask) != 0; +} + +static __inline__ int test_and_clear_bit(unsigned long nr, + volatile unsigned long *addr) +{ + unsigned long old, t; + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + __asm__ __volatile__( + EIEIO_ON_SMP +"1:" LARXL " %0,0,%3 # test_and_clear_bit\n" + "andc %1,%0,%2 \n" + PPC405_ERR77(0,%3) + STCXL " %1,0,%3 \n" + "bne- 1b" + ISYNC_ON_SMP + : "=&r" (old), "=&r" (t) + : "r" (mask), "r" (p) + : "cc", "memory"); + + return (old & mask) != 0; +} + +static __inline__ int test_and_change_bit(unsigned long nr, + volatile unsigned long *addr) +{ + unsigned long old, t; + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + __asm__ __volatile__( + EIEIO_ON_SMP +"1:" LARXL " %0,0,%3 # test_and_change_bit\n" + "xor %1,%0,%2 \n" + PPC405_ERR77(0,%3) + STCXL " %1,0,%3 \n" + "bne- 1b" + ISYNC_ON_SMP + : "=&r" (old), "=&r" (t) + : "r" (mask), "r" (p) + : "cc", "memory"); + + return (old & mask) != 0; +} + +/* Non-atomic versions */ +static __inline__ int test_bit(unsigned long nr, + __const__ volatile unsigned long *addr) +{ + return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +static __inline__ void __set_bit(unsigned long nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p |= mask; +} + +static __inline__ void __clear_bit(unsigned long nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p &= ~mask; +} + +static __inline__ void __change_bit(unsigned long nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p ^= mask; +} + +static __inline__ int __test_and_set_bit(unsigned long nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old | mask; + return (old & mask) != 0; +} + +static __inline__ int __test_and_clear_bit(unsigned long nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old & ~mask; + return (old & mask) != 0; +} + +static __inline__ int __test_and_change_bit(unsigned long nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old ^ mask; + return (old & mask) != 0; +} + +/* + * Return the zero-based bit position (LE, not IBM bit numbering) of + * the most significant 1-bit in a double word. + */ +static __inline__ int __ilog2(unsigned long x) +{ + int lz; + + asm (CNTLZL " %0,%1" : "=r" (lz) : "r" (x)); + return BITS_PER_LONG - 1 - lz; +} + +/* + * Determines the bit position of the least significant 0 bit in the + * specified double word. The returned bit position will be + * zero-based, starting from the right side (63/31 - 0). + */ +static __inline__ unsigned long ffz(unsigned long x) +{ + /* no zero exists anywhere in the 8 byte area. */ + if ((x = ~x) == 0) + return BITS_PER_LONG; + + /* + * Calculate the bit position of the least signficant '1' bit in x + * (since x has been changed this will actually be the least signficant + * '0' bit in * the original x). Note: (x & -x) gives us a mask that + * is the least significant * (RIGHT-most) 1-bit of the value in x. + */ + return __ilog2(x & -x); +} + +static __inline__ int __ffs(unsigned long x) +{ + return __ilog2(x & -x); +} + +/* + * ffs: find first bit set. This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above ffz (man ffs). + */ +static __inline__ int ffs(int x) +{ + unsigned long i = (unsigned long)x; + return __ilog2(i & -i) + 1; +} + +/* + * fls: find last (most-significant) bit set. + * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. + */ +static __inline__ int fls(unsigned int x) +{ + int lz; + + asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x)); + return 32 - lz; +} + +/* + * hweightN: returns the hamming weight (i.e. the number + * of bits set) of a N-bit word + */ +#define hweight64(x) generic_hweight64(x) +#define hweight32(x) generic_hweight32(x) +#define hweight16(x) generic_hweight16(x) +#define hweight8(x) generic_hweight8(x) + +#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) +unsigned long find_next_zero_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); +/** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit-number of the first set bit, not the number of the byte + * containing a bit. + */ +#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) +unsigned long find_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); + +/* Little-endian versions */ + +static __inline__ int test_le_bit(unsigned long nr, + __const__ unsigned long *addr) +{ + __const__ unsigned char *tmp = (__const__ unsigned char *) addr; + return (tmp[nr >> 3] >> (nr & 7)) & 1; +} + +#define __set_le_bit(nr, addr) \ + __set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) +#define __clear_le_bit(nr, addr) \ + __clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) + +#define test_and_set_le_bit(nr, addr) \ + test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) +#define test_and_clear_le_bit(nr, addr) \ + test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) + +#define __test_and_set_le_bit(nr, addr) \ + __test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) +#define __test_and_clear_le_bit(nr, addr) \ + __test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) + +#define find_first_zero_le_bit(addr, size) find_next_zero_le_bit((addr), (size), 0) +unsigned long find_next_zero_le_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); + +/* Bitmap functions for the ext2 filesystem */ + +#define ext2_set_bit(nr,addr) \ + __test_and_set_le_bit((nr), (unsigned long*)addr) +#define ext2_clear_bit(nr, addr) \ + __test_and_clear_le_bit((nr), (unsigned long*)addr) + +#define ext2_set_bit_atomic(lock, nr, addr) \ + test_and_set_le_bit((nr), (unsigned long*)addr) +#define ext2_clear_bit_atomic(lock, nr, addr) \ + test_and_clear_le_bit((nr), (unsigned long*)addr) + +#define ext2_test_bit(nr, addr) test_le_bit((nr),(unsigned long*)addr) + +#define ext2_find_first_zero_bit(addr, size) \ + find_first_zero_le_bit((unsigned long*)addr, size) +#define ext2_find_next_zero_bit(addr, size, off) \ + find_next_zero_le_bit((unsigned long*)addr, size, off) + +/* Bitmap functions for the minix filesystem. */ + +#define minix_test_and_set_bit(nr,addr) \ + __test_and_set_le_bit(nr, (unsigned long *)addr) +#define minix_set_bit(nr,addr) \ + __set_le_bit(nr, (unsigned long *)addr) +#define minix_test_and_clear_bit(nr,addr) \ + __test_and_clear_le_bit(nr, (unsigned long *)addr) +#define minix_test_bit(nr,addr) \ + test_le_bit(nr, (unsigned long *)addr) + +#define minix_find_first_zero_bit(addr,size) \ + find_first_zero_le_bit((unsigned long *)addr, size) + +/* + * Every architecture must define this function. It's the fastest + * way of searching a 140-bit bitmap where the first 100 bits are + * unlikely to be set. It's guaranteed that at least one of the 140 + * bits is cleared. + */ +static inline int sched_find_first_bit(const unsigned long *b) +{ +#ifdef CONFIG_PPC64 + if (unlikely(b[0])) + return __ffs(b[0]); + if (unlikely(b[1])) + return __ffs(b[1]) + 64; + return __ffs(b[2]) + 128; +#else + if (unlikely(b[0])) + return __ffs(b[0]); + if (unlikely(b[1])) + return __ffs(b[1]) + 32; + if (unlikely(b[2])) + return __ffs(b[2]) + 64; + if (b[3]) + return __ffs(b[3]) + 96; + return __ffs(b[4]) + 128; +#endif +} + +#endif /* __KERNEL__ */ + +#endif /* _ASM_POWERPC_BITOPS_H */ Index: working-2.6/include/asm-ppc64/mmu_context.h =================================================================== --- working-2.6.orig/include/asm-ppc64/mmu_context.h 2005-10-25 11:59:59.000000000 +1000 +++ working-2.6/include/asm-ppc64/mmu_context.h 2005-10-31 16:20:01.000000000 +1100 @@ -16,21 +16,6 @@ * 2 of the License, or (at your option) any later version. */ -/* - * Every architecture must define this function. It's the fastest - * way of searching a 140-bit bitmap where the first 100 bits are - * unlikely to be set. It's guaranteed that at least one of the 140 - * bits is cleared. - */ -static inline int sched_find_first_bit(unsigned long *b) -{ - if (unlikely(b[0])) - return __ffs(b[0]); - if (unlikely(b[1])) - return __ffs(b[1]) + 64; - return __ffs(b[2]) + 128; -} - static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } Index: working-2.6/arch/ppc/Makefile =================================================================== --- working-2.6.orig/arch/ppc/Makefile 2005-10-31 15:20:57.000000000 +1100 +++ working-2.6/arch/ppc/Makefile 2005-10-31 16:20:01.000000000 +1100 @@ -66,7 +66,8 @@ core-y += arch/ppc/kernel/ arch/powerpc/kernel/ \ arch/ppc/platforms/ \ arch/ppc/mm/ arch/ppc/lib/ \ - arch/ppc/syslib/ arch/powerpc/sysdev/ + arch/ppc/syslib/ arch/powerpc/sysdev/ \ + arch/powerpc/lib/ core-$(CONFIG_4xx) += arch/ppc/platforms/4xx/ core-$(CONFIG_83xx) += arch/ppc/platforms/83xx/ core-$(CONFIG_85xx) += arch/ppc/platforms/85xx/ Index: working-2.6/arch/powerpc/lib/Makefile =================================================================== --- working-2.6.orig/arch/powerpc/lib/Makefile 2005-10-31 15:20:57.000000000 +1100 +++ working-2.6/arch/powerpc/lib/Makefile 2005-10-31 16:23:34.000000000 +1100 @@ -3,13 +3,14 @@ # ifeq ($(CONFIG_PPC_MERGE),y) -obj-y := string.o +obj-y := string.o strcase.o +obj-$(CONFIG_PPC32) += div64.o copy_32.o checksum_32.o endif -obj-y += strcase.o -obj-$(CONFIG_PPC32) += div64.o copy_32.o checksum_32.o +obj-y += bitops.o obj-$(CONFIG_PPC64) += checksum_64.o copypage_64.o copyuser_64.o \ - memcpy_64.o usercopy_64.o mem_64.o + memcpy_64.o usercopy_64.o mem_64.o \ + strcase.o obj-$(CONFIG_PPC_ISERIES) += e2a.o obj-$(CONFIG_XMON) += sstep.o -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From david at gibson.dropbear.id.au Mon Oct 31 17:55:21 2005 From: david at gibson.dropbear.id.au (David Gibson) Date: Mon, 31 Oct 2005 17:55:21 +1100 Subject: [PATCH] Fix Kconfig performance bug In-Reply-To: <20051021014955.GA12976@localhost.localdomain> References: <20051020032342.GA11273@localhost.localdomain> <20051021014955.GA12976@localhost.localdomain> Message-ID: <20051031065521.GE6622@localhost.localdomain> On Fri, Oct 21, 2005 at 11:49:55AM +1000, David Gibson wrote: > On Fri, Oct 21, 2005 at 02:46:30AM +0200, Roman Zippel wrote: > > Hi, > > > > On Thu, 20 Oct 2005, David Gibson wrote: > > > > > When doing its recursive dependency check, scripts/kconfig/conf uses > > > the flag SYMBOL_CHECK_DONE to avoid rechecking a symbol it has already > > > checked. However, that flag is only set at the top level, so if a > > > symbol is first encountered as a dependency of another symbol it will > > > be rechecked every time it is encountered until it's encountered at > > > the top level. > > > > You're correct, the check does too much. > > > > > Index: working-2.6/scripts/kconfig/symbol.c > > > =================================================================== > > > --- working-2.6.orig/scripts/kconfig/symbol.c 2005-10-20 12:40:45.000000000 +1000 > > > +++ working-2.6/scripts/kconfig/symbol.c 2005-10-20 12:41:43.000000000 +1000 > > > @@ -758,6 +758,8 @@ > > > out: > > > if (sym2) > > > printf(" %s", sym->name); > > > + else > > > + sym->flags |= SYMBOL_CHECK_DONE; > > > sym->flags &= ~SYMBOL_CHECK; > > > return sym2; > > > } > > > > Actually this way it becomes redundant with SYMBOL_CHECKED, could you > > merge these two flags? The above check would be also probably better: > > Ok, done. There is now only SYMBOL_CHECKED (seemed a clearer name to > me), but it's semantics are like those of SYMBOL_CHECK_DONE were. > > > if (sym2) { > > printf(" %s", sym->name); > > if (sym2 == sym) { > > printf("\n"); > > sym2 = NULL; > > } > > } > > > > So that this check will stop when it hits the start symbol and continue > > looking for more dependency problems, which is I think I intended with the > > original code. > > Erm.. ok. I don't entirely understand the intent of this is, but > applied anyway. > > > > Index: working-2.6/scripts/kconfig/zconf.y > > > =================================================================== > > > --- working-2.6.orig/scripts/kconfig/zconf.y 2005-10-20 12:40:45.000000000 +1000 > > > +++ working-2.6/scripts/kconfig/zconf.y 2005-10-20 12:41:43.000000000 +1000 > > > @@ -495,10 +495,9 @@ > > > exit(1); > > > menu_finalize(&rootmenu); > > > for_all_symbols(i, sym) { > > > +/* fprintf(stderr, "Checking %s...\n", sym->name); */ > > > > One "quilt refresh" missing? :-) > > Oops. Something like that. > > Oh.. one caveat, the diffs I have here to zconf.tab.c_shipped are > direct edits to match zconf.y - I didn't regenerate the file with > bison. I've done that to getting a whole lot of irrelevant changes in > the patch because I'm using a different version of bison to that used > for the existing zconf.tab.[ch]_shipped > > Anyway, revised version below: Now that 2.6.14 is out, do you intend to pass this patch on to Linus? -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/people/dgibson From olh at suse.de Mon Oct 31 18:38:34 2005 From: olh at suse.de (Olaf Hering) Date: Mon, 31 Oct 2005 08:38:34 +0100 Subject: [PATCH] ppc64: add MODALIAS= for vio bus In-Reply-To: <20051031134814.42940751.sfr@canb.auug.org.au> References: <20051030213900.GA22510@suse.de> <20051031134814.42940751.sfr@canb.auug.org.au> Message-ID: <20051031073834.GA4868@suse.de> On Mon, Oct 31, Stephen Rothwell wrote: > Hi Olaf, > > This patch breaks lagacy iSeries i.e. it won't link (iSeries has no get_property()). > It may be easier to redo this patch against Paulus' merge tree. Can you fix it up, why is there no get_property for iseries, yet? iseries_veth should be autoloaded in a similar way. Maybe it should just go into a CONFIG_PSERIES or whatever. > A couple of trivial comments: > > On Sun, 30 Oct 2005 22:39:00 +0100 Olaf Hering wrote: > > > > +static int pseries_vio_hotplug (struct device *dev, char **envp, int num_envp, > ^ > No space here, please. > > > + length = scnprintf (buffer, buffer_size, "MODALIAS=vio:T%sS%s", > ^ > No space here either, please. I copied it from macio_asic.c. -- short story of a lazy sysadmin: alias appserv=wotan