[RFC PATCH 2/2] WIP: PowerPC cache cleanup

Benjamin Herrenschmidt benh at kernel.crashing.org
Wed Nov 16 09:42:40 EST 2011


On Tue, 2011-11-15 at 10:22 -0500, Kyle Moffett wrote:
> [My apologies for the resend, it does not seem to have hit the MLs.
> I think my git send-email "cc-cmd" may have broken somehow, oops.]

Or the ML took a while because it's big :-) I got both.

I'll try to review this week. Probably wont get to it today tho.

Thanks for looking at this !

Cheers,
Ben.

> This badly needs breaking up, and a better changelog... oh well...
> 
> The big changes:
> 
> * The "ppc64_caches" structure is now "powerpc_caches" and is used on
>   both PPC32 and PPC64.  I hated staring at the pages and pages of
>   assembly code, so nearly all of the functions are now C with tiny
>   snippets of inline ASM in the loops.
> 
> * Lots of ugly assembly functions in arch/powerpc/kernel/misc_*.S were
>   rewritten as cleaner inline ASM in arch/powerpc/mm/cache.c
> 
> * I'm not sure that the physical address functions from those files
>   actually came out cleaner, but they are now more correct.
> 
> * I'm not 100% sure I like the new FOR_EACH_CACHE_LINE() macro, but it
>   sure does make a lot of the other code much cleaner.
> 
> * I have a bit of a temptation to try to merge the 32/64-bit variants
>   of copy_page() into a single C function.  A quick test seems to show
>   that I can get nearly identical output to the 64-bit ASM with very
>   little work.
> 
> 
> ---
>  arch/powerpc/include/asm/cache.h             |  155 ++++++++++++---
>  arch/powerpc/include/asm/cacheflush.h        |    3 -
>  arch/powerpc/include/asm/page.h              |    6 +
>  arch/powerpc/include/asm/page_32.h           |    4 +-
>  arch/powerpc/include/asm/page_64.h           |   17 --
>  arch/powerpc/kernel/align.c                  |    7 +-
>  arch/powerpc/kernel/asm-offsets.c            |   13 +-
>  arch/powerpc/kernel/head_32.S                |    9 +-
>  arch/powerpc/kernel/head_64.S                |    2 +-
>  arch/powerpc/kernel/misc_32.S                |  193 ------------------
>  arch/powerpc/kernel/misc_64.S                |  182 -----------------
>  arch/powerpc/kernel/ppc_ksyms.c              |    3 -
>  arch/powerpc/kernel/setup-common.c           |  103 ++++++++++
>  arch/powerpc/kernel/setup.h                  |    1 +
>  arch/powerpc/kernel/setup_32.c               |   11 +-
>  arch/powerpc/kernel/setup_64.c               |  118 +----------
>  arch/powerpc/kernel/vdso.c                   |   27 +--
>  arch/powerpc/lib/copypage_64.S               |   10 +-
>  arch/powerpc/mm/Makefile                     |    2 +-
>  arch/powerpc/mm/cache.c                      |  279 ++++++++++++++++++++++++++
>  arch/powerpc/mm/dma-noncoherent.c            |    2 +-
>  arch/powerpc/platforms/52xx/lite5200_sleep.S |    9 +-
>  arch/powerpc/platforms/powermac/pci.c        |    2 +-
>  arch/powerpc/xmon/xmon.c                     |   53 +++---
>  drivers/macintosh/smu.c                      |    8 +-
>  25 files changed, 599 insertions(+), 620 deletions(-)
>  create mode 100644 arch/powerpc/mm/cache.c
> 
> diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
> index 4b50941..b1dc08f 100644
> --- a/arch/powerpc/include/asm/cache.h
> +++ b/arch/powerpc/include/asm/cache.h
> @@ -3,47 +3,142 @@
>  
>  #ifdef __KERNEL__
>  
> -
> -/* bytes per L1 cache line */
> -#if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
> -#define L1_CACHE_SHIFT		4
> -#define MAX_COPY_PREFETCH	1
> +/*
> + * Various PowerPC CPUs which are otherwise compatible have different L1
> + * cache line sizes.
> + *
> + * Unfortunately, lots of kernel code assumes that L1_CACHE_BYTES and
> + * L1_CACHE_SHIFT are compile-time constants that can be used to align
> + * data-structures to avoid false cacheline sharing, so we can't just
> + * compute them at runtime from the cputable values.
> + *
> + * So for alignment purposes, we will compute these values as safe maximums
> + * of all the CPU support compiled into the kernel.
> + */
> +#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_47x)
> +# define L1_CACHE_SHIFT_MAX 7 /* 128-byte cache blocks */
>  #elif defined(CONFIG_PPC_E500MC)
> -#define L1_CACHE_SHIFT		6
> -#define MAX_COPY_PREFETCH	4
> -#elif defined(CONFIG_PPC32)
> -#define MAX_COPY_PREFETCH	4
> -#if defined(CONFIG_PPC_47x)
> -#define L1_CACHE_SHIFT		7
> +# define L1_CACHE_SHIFT_MAX 6 /* 64-byte cache blocks */
>  #else
> -#define L1_CACHE_SHIFT		5
> +# define L1_CACHE_SHIFT_MAX 5 /* 32-byte cache blocks */
>  #endif
> +#define L1_CACHE_BYTES_MAX (1 << L1_CACHE_SHIFT_MAX)
> +
> +#define L1_CACHE_SHIFT  L1_CACHE_SHIFT_MAX
> +#define L1_CACHE_BYTES  L1_CACHE_BYTES_MAX
> +#define SMP_CACHE_BYTES L1_CACHE_BYTES_MAX
> +
> +/*
> + * Unfortunately, for other purposes, we can't just use a safe maximum value
> + * because it gets used in loops when invalidating or clearing cachelines and
> + * it would be very bad to only flush/invalidate/zero/etc every 4th one.
> + *
> + * During early initialization we load these values from the device-tree and
> + * the cputable into the powerpc_caches structure, but we need to be able to
> + * clear pages before that occurs, so these need sane default values.
> + *
> + * As explained in the powerpc_caches structure definition, the defaults
> + * should be safe minimums, so that's what we compute here.
> + */
> +#if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
> +# define L1_CACHE_SHIFT_MIN 4 /* 16-byte cache blocks */
> +#elif defined(CONFIG_PPC32)
> +# define L1_CACHE_SHIFT_MIN 5 /* 32-byte cache blocks */
>  #else /* CONFIG_PPC64 */
> -#define L1_CACHE_SHIFT		7
> +# define L1_CACHE_SHIFT_MIN 6 /* 64-byte cache blocks */
>  #endif
> +#define L1_CACHE_BYTES_MIN (1 << L1_CACHE_SHIFT_MIN)
>  
> -#define	L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
> +/*
> + * Apparently the 8xx and the 403GCX have tiny caches, so they never prefetch
> + * more than a single cacheline in the ASM memory copy functions.
> + *
> + * All other 32-bit CPUs prefetch 4 cachelines, and the 64-bit CPUs have
> + * their own copy routines which prefetch the entire page.
> + */
> +#ifdef PPC32
> +# if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
> +#  define MAX_COPY_PREFETCH 1
> +# else
> +#  define MAX_COPY_PREFETCH 4
> +# endif
> +#endif
>  
> -#define	SMP_CACHE_BYTES		L1_CACHE_BYTES
> +#ifndef __ASSEMBLY__
>  
> -#if defined(__powerpc64__) && !defined(__ASSEMBLY__)
> -struct ppc64_caches {
> -	u32	dsize;			/* L1 d-cache size */
> -	u32	dline_size;		/* L1 d-cache line size	*/
> -	u32	log_dline_size;
> -	u32	dlines_per_page;
> -	u32	isize;			/* L1 i-cache size */
> -	u32	iline_size;		/* L1 i-cache line size	*/
> -	u32	log_iline_size;
> -	u32	ilines_per_page;
> -};
> +/*
> + * A handy macro to iterate over all the cachelines referring to memory from
> + * "START" through "STOP - 1", inclusive.
> + */
> +#define FOR_EACH_CACHELINE(LINE, START, STOP, CACHE)			\
> +	for (u32 linesize__ = powerpc_caches.CACHE##_block_bytes,	\
> +			(LINE) = (START) & ~(linesize__ - 1);		\
> +			(LINE) < (STOP); (LINE) += linesize__)
> +
> +/* Write out a data cache block if it is dirty */
> +static inline void dcbst(unsigned long addr)
> +{
> +	asm volatile("dcbst %y0" :: "Z"(addr) : "memory");
> +}
>  
> -extern struct ppc64_caches ppc64_caches;
> -#endif /* __powerpc64__ && ! __ASSEMBLY__ */
> +/* Invalidate a data cache block (will lose data if dirty!) */
> +static inline void dcbi(unsigned long addr)
> +{
> +	asm volatile("dcbi %y0" :: "Z"(addr) : "memory");
> +}
> +
> +/* Write out (if dirty) and invalidate a data cache block */
> +static inline void dcbf(unsigned long addr)
> +{
> +	asm volatile("dcbf %y0" :: "Z"(addr) : "memory");
> +}
> +
> +/* Populate a data cache block with zeros */
> +static inline void dcbz(unsigned long addr)
> +{
> +	asm volatile("dcbz %y0" :: "Z"(addr) : "memory");
> +}
> +
> +/* Invalidate an instruction cache block */
> +static inline void icbi(unsigned long addr)
> +{
> +	asm volatile("icbi %y0" :: "Z"(addr) : "memory");
> +}
> +
> +/*
> + * This structure contains the various PowerPC cache parameters computed
> + * shortly after the device-tree has been unflattened during boot.
> + *
> + * Prior to that they have statically initialized values from L1_CACHE_*_MIN
> + * computed above.
> + *
> + * NOTE: If the dcache/icache are separate then ucache_* should be zeroed,
> + *       otherwise dcache == icache == ucache.
> + */
> +struct powerpc_caches {
> +	/* Data cache parameters */
> +	u32 dcache_total_bytes;
> +	u32 dcache_block_bytes;
> +	u32 dcache_block_shift;
> +	u32 dcache_blocks_per_page;
> +
> +	/* Instruction cache parameters */
> +	u32 icache_total_bytes;
> +	u32 icache_block_bytes;
> +	u32 icache_block_shift;
> +	u32 icache_blocks_per_page;
> +
> +	/* Unified cache parameters (If != 0, all 3 caches must be equal) */
> +	u32 ucache_total_bytes;
> +	u32 ucache_block_bytes;
> +	u32 ucache_block_shift;
> +	u32 ucache_blocks_per_page;
> +};
> +extern struct powerpc_caches powerpc_caches;
>  
> -#if !defined(__ASSEMBLY__)
>  #define __read_mostly __attribute__((__section__(".data..read_mostly")))
> -#endif
> +
> +#endif /* not __ASSEMBLY__ */
>  
>  #endif /* __KERNEL__ */
>  #endif /* _ASM_POWERPC_CACHE_H */
> diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
> index ab9e402..8646443 100644
> --- a/arch/powerpc/include/asm/cacheflush.h
> +++ b/arch/powerpc/include/asm/cacheflush.h
> @@ -47,12 +47,9 @@ extern void __flush_dcache_icache_phys(unsigned long physaddr);
>  #endif /* CONFIG_PPC32 && !CONFIG_BOOKE */
>  
>  extern void flush_dcache_range(unsigned long start, unsigned long stop);
> -#ifdef CONFIG_PPC32
>  extern void clean_dcache_range(unsigned long start, unsigned long stop);
>  extern void invalidate_dcache_range(unsigned long start, unsigned long stop);
> -#endif /* CONFIG_PPC32 */
>  #ifdef CONFIG_PPC64
> -extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
>  extern void flush_dcache_phys_range(unsigned long start, unsigned long stop);
>  #endif
>  
> diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
> index dd9c4fd..b2e24ce 100644
> --- a/arch/powerpc/include/asm/page.h
> +++ b/arch/powerpc/include/asm/page.h
> @@ -286,11 +286,17 @@ static inline int hugepd_ok(hugepd_t hpd)
>  #endif /* CONFIG_HUGETLB_PAGE */
>  
>  struct page;
> +extern void clear_pages(void *page, int order);
>  extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
>  extern void copy_user_page(void *to, void *from, unsigned long vaddr,
>  		struct page *p);
>  extern int page_is_ram(unsigned long pfn);
>  
> +static inline void clear_page(void *page)
> +{
> +	clear_pages(page, 0);
> +}
> +
>  #ifdef CONFIG_PPC_SMLPAR
>  void arch_free_page(struct page *page, int order);
>  #define HAVE_ARCH_FREE_PAGE
> diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
> index 68d73b2..12ae694 100644
> --- a/arch/powerpc/include/asm/page_32.h
> +++ b/arch/powerpc/include/asm/page_32.h
> @@ -10,7 +10,7 @@
>  #define VM_DATA_DEFAULT_FLAGS	VM_DATA_DEFAULT_FLAGS32
>  
>  #ifdef CONFIG_NOT_COHERENT_CACHE
> -#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
> +#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES_MAX
>  #endif
>  
>  #ifdef CONFIG_PTE_64BIT
> @@ -37,8 +37,6 @@ typedef unsigned long pte_basic_t;
>  #endif
>  
>  struct page;
> -extern void clear_pages(void *page, int order);
> -static inline void clear_page(void *page) { clear_pages(page, 0); }
>  extern void copy_page(void *to, void *from);
>  
>  #include <asm-generic/getorder.h>
> diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
> index fb40ede..7e156f6 100644
> --- a/arch/powerpc/include/asm/page_64.h
> +++ b/arch/powerpc/include/asm/page_64.h
> @@ -42,23 +42,6 @@
>  
>  typedef unsigned long pte_basic_t;
>  
> -static __inline__ void clear_page(void *addr)
> -{
> -	unsigned long lines, line_size;
> -
> -	line_size = ppc64_caches.dline_size;
> -	lines = ppc64_caches.dlines_per_page;
> -
> -	__asm__ __volatile__(
> -	"mtctr	%1	# clear_page\n\
> -1:      dcbz	0,%0\n\
> -	add	%0,%0,%3\n\
> -	bdnz+	1b"
> -        : "=r" (addr)
> -        : "r" (lines), "0" (addr), "r" (line_size)
> -	: "ctr", "memory");
> -}
> -
>  extern void copy_page(void *to, void *from);
>  
>  /* Log 2 of page table size */
> diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
> index 8184ee9..debfb99 100644
> --- a/arch/powerpc/kernel/align.c
> +++ b/arch/powerpc/kernel/align.c
> @@ -233,14 +233,9 @@ static inline unsigned make_dsisr(unsigned instr)
>   */
>  static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
>  {
> +	int i, size = powerpc_caches.dcache_block_bytes;
>  	long __user *p;
> -	int i, size;
>  
> -#ifdef __powerpc64__
> -	size = ppc64_caches.dline_size;
> -#else
> -	size = L1_CACHE_BYTES;
> -#endif
>  	p = (long __user *) (regs->dar & -size);
>  	if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
>  		return -EFAULT;
> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index 7c5324f..505b25a 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -126,13 +126,14 @@ int main(void)
>  	DEFINE(TI_TASK, offsetof(struct thread_info, task));
>  	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
>  
> +	DEFINE(DCACHE_BLOCK_SHIFT,	offsetof(struct powerpc_caches, dcache_block_shift));
> +	DEFINE(DCACHE_BLOCK_BYTES,	offsetof(struct powerpc_caches, dcache_block_bytes));
> +	DEFINE(DCACHE_BLOCKS_PER_PAGE,	offsetof(struct powerpc_caches, dcache_blocks_per_page));
> +	DEFINE(ICACHE_BLOCK_SHIFT,	offsetof(struct powerpc_caches, icache_block_shift));
> +	DEFINE(ICACHE_BLOCK_BYTES,	offsetof(struct powerpc_caches, icache_block_bytes));
> +	DEFINE(ICACHE_BLOCKS_PER_PAGE,	offsetof(struct powerpc_caches, icache_blocks_per_page));
> +
>  #ifdef CONFIG_PPC64
> -	DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
> -	DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size));
> -	DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page));
> -	DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
> -	DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
> -	DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
>  	/* paca */
>  	DEFINE(PACA_SIZE, sizeof(struct paca_struct));
>  	DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token));
> diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
> index 0654dba..8abc44a 100644
> --- a/arch/powerpc/kernel/head_32.S
> +++ b/arch/powerpc/kernel/head_32.S
> @@ -786,7 +786,14 @@ relocate_kernel:
>  _ENTRY(copy_and_flush)
>  	addi	r5,r5,-4
>  	addi	r6,r6,-4
> -4:	li	r0,L1_CACHE_BYTES/4
> +4:	li	r0,L1_CACHE_BYTES_MIN/4	/* Use the smallest common	*/
> +					/* denominator cache line	*/
> +					/* size.  This results in	*/
> +					/* extra cache line flushes	*/
> +					/* but operation is correct.	*/
> +					/* Can't get cache line size	*/
> +					/* from device-tree yet		*/
> +
>  	mtctr	r0
>  3:	addi	r6,r6,4			/* copy a cache line */
>  	lwzx	r0,r6,r4
> diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
> index 06c7251..183d371 100644
> --- a/arch/powerpc/kernel/head_64.S
> +++ b/arch/powerpc/kernel/head_64.S
> @@ -480,7 +480,7 @@ p_end:	.llong	_end - _stext
>  _GLOBAL(copy_and_flush)
>  	addi	r5,r5,-8
>  	addi	r6,r6,-8
> -4:	li	r0,8			/* Use the smallest common	*/
> +4:	li	r0,L1_CACHE_BYTES_MIN/8	/* Use the smallest common	*/
>  					/* denominator cache line	*/
>  					/* size.  This results in	*/
>  					/* extra cache line flushes	*/
> diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
> index f7d760a..ee61600 100644
> --- a/arch/powerpc/kernel/misc_32.S
> +++ b/arch/powerpc/kernel/misc_32.S
> @@ -321,199 +321,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
>  	blr
>  
>  /*
> - * Write any modified data cache blocks out to memory
> - * and invalidate the corresponding instruction cache blocks.
> - * This is a no-op on the 601.
> - *
> - * flush_icache_range(unsigned long start, unsigned long stop)
> - */
> -_KPROBE(__flush_icache_range)
> -BEGIN_FTR_SECTION
> -	blr				/* for 601, do nothing */
> -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
> -	li	r5,L1_CACHE_BYTES-1
> -	andc	r3,r3,r5
> -	subf	r4,r3,r4
> -	add	r4,r4,r5
> -	srwi.	r4,r4,L1_CACHE_SHIFT
> -	beqlr
> -	mtctr	r4
> -	mr	r6,r3
> -1:	dcbst	0,r3
> -	addi	r3,r3,L1_CACHE_BYTES
> -	bdnz	1b
> -	sync				/* wait for dcbst's to get to ram */
> -#ifndef CONFIG_44x
> -	mtctr	r4
> -2:	icbi	0,r6
> -	addi	r6,r6,L1_CACHE_BYTES
> -	bdnz	2b
> -#else
> -	/* Flash invalidate on 44x because we are passed kmapped addresses and
> -	   this doesn't work for userspace pages due to the virtually tagged
> -	   icache.  Sigh. */
> -	iccci	0, r0
> -#endif
> -	sync				/* additional sync needed on g4 */
> -	isync
> -	blr
> -/*
> - * Write any modified data cache blocks out to memory.
> - * Does not invalidate the corresponding cache lines (especially for
> - * any corresponding instruction cache).
> - *
> - * clean_dcache_range(unsigned long start, unsigned long stop)
> - */
> -_GLOBAL(clean_dcache_range)
> -	li	r5,L1_CACHE_BYTES-1
> -	andc	r3,r3,r5
> -	subf	r4,r3,r4
> -	add	r4,r4,r5
> -	srwi.	r4,r4,L1_CACHE_SHIFT
> -	beqlr
> -	mtctr	r4
> -
> -1:	dcbst	0,r3
> -	addi	r3,r3,L1_CACHE_BYTES
> -	bdnz	1b
> -	sync				/* wait for dcbst's to get to ram */
> -	blr
> -
> -/*
> - * Write any modified data cache blocks out to memory and invalidate them.
> - * Does not invalidate the corresponding instruction cache blocks.
> - *
> - * flush_dcache_range(unsigned long start, unsigned long stop)
> - */
> -_GLOBAL(flush_dcache_range)
> -	li	r5,L1_CACHE_BYTES-1
> -	andc	r3,r3,r5
> -	subf	r4,r3,r4
> -	add	r4,r4,r5
> -	srwi.	r4,r4,L1_CACHE_SHIFT
> -	beqlr
> -	mtctr	r4
> -
> -1:	dcbf	0,r3
> -	addi	r3,r3,L1_CACHE_BYTES
> -	bdnz	1b
> -	sync				/* wait for dcbst's to get to ram */
> -	blr
> -
> -/*
> - * Like above, but invalidate the D-cache.  This is used by the 8xx
> - * to invalidate the cache so the PPC core doesn't get stale data
> - * from the CPM (no cache snooping here :-).
> - *
> - * invalidate_dcache_range(unsigned long start, unsigned long stop)
> - */
> -_GLOBAL(invalidate_dcache_range)
> -	li	r5,L1_CACHE_BYTES-1
> -	andc	r3,r3,r5
> -	subf	r4,r3,r4
> -	add	r4,r4,r5
> -	srwi.	r4,r4,L1_CACHE_SHIFT
> -	beqlr
> -	mtctr	r4
> -
> -1:	dcbi	0,r3
> -	addi	r3,r3,L1_CACHE_BYTES
> -	bdnz	1b
> -	sync				/* wait for dcbi's to get to ram */
> -	blr
> -
> -/*
> - * Flush a particular page from the data cache to RAM.
> - * Note: this is necessary because the instruction cache does *not*
> - * snoop from the data cache.
> - * This is a no-op on the 601 which has a unified cache.
> - *
> - *	void __flush_dcache_icache(void *page)
> - */
> -_GLOBAL(__flush_dcache_icache)
> -BEGIN_FTR_SECTION
> -	blr
> -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
> -	rlwinm	r3,r3,0,0,31-PAGE_SHIFT		/* Get page base address */
> -	li	r4,PAGE_SIZE/L1_CACHE_BYTES	/* Number of lines in a page */
> -	mtctr	r4
> -	mr	r6,r3
> -0:	dcbst	0,r3				/* Write line to ram */
> -	addi	r3,r3,L1_CACHE_BYTES
> -	bdnz	0b
> -	sync
> -#ifdef CONFIG_44x
> -	/* We don't flush the icache on 44x. Those have a virtual icache
> -	 * and we don't have access to the virtual address here (it's
> -	 * not the page vaddr but where it's mapped in user space). The
> -	 * flushing of the icache on these is handled elsewhere, when
> -	 * a change in the address space occurs, before returning to
> -	 * user space
> -	 */
> -BEGIN_MMU_FTR_SECTION
> -	blr
> -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
> -#endif /* CONFIG_44x */
> -	mtctr	r4
> -1:	icbi	0,r6
> -	addi	r6,r6,L1_CACHE_BYTES
> -	bdnz	1b
> -	sync
> -	isync
> -	blr
> -
> -#ifndef CONFIG_BOOKE
> -/*
> - * Flush a particular page from the data cache to RAM, identified
> - * by its physical address.  We turn off the MMU so we can just use
> - * the physical address (this may be a highmem page without a kernel
> - * mapping).
> - *
> - *	void __flush_dcache_icache_phys(unsigned long physaddr)
> - */
> -_GLOBAL(__flush_dcache_icache_phys)
> -BEGIN_FTR_SECTION
> -	blr					/* for 601, do nothing */
> -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
> -	mfmsr	r10
> -	rlwinm	r0,r10,0,28,26			/* clear DR */
> -	mtmsr	r0
> -	isync
> -	rlwinm	r3,r3,0,0,31-PAGE_SHIFT		/* Get page base address */
> -	li	r4,PAGE_SIZE/L1_CACHE_BYTES	/* Number of lines in a page */
> -	mtctr	r4
> -	mr	r6,r3
> -0:	dcbst	0,r3				/* Write line to ram */
> -	addi	r3,r3,L1_CACHE_BYTES
> -	bdnz	0b
> -	sync
> -	mtctr	r4
> -1:	icbi	0,r6
> -	addi	r6,r6,L1_CACHE_BYTES
> -	bdnz	1b
> -	sync
> -	mtmsr	r10				/* restore DR */
> -	isync
> -	blr
> -#endif /* CONFIG_BOOKE */
> -
> -/*
> - * Clear pages using the dcbz instruction, which doesn't cause any
> - * memory traffic (except to write out any cache lines which get
> - * displaced).  This only works on cacheable memory.
> - *
> - * void clear_pages(void *page, int order) ;
> - */
> -_GLOBAL(clear_pages)
> -	li	r0,PAGE_SIZE/L1_CACHE_BYTES
> -	slw	r0,r0,r4
> -	mtctr	r0
> -1:	dcbz	0,r3
> -	addi	r3,r3,L1_CACHE_BYTES
> -	bdnz	1b
> -	blr
> -
> -/*
>   * Copy a whole page.  We use the dcbz instruction on the destination
>   * to reduce memory traffic (it eliminates the unnecessary reads of
>   * the destination into cache).  This requires that the destination
> diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
> index 616921e..500fd61 100644
> --- a/arch/powerpc/kernel/misc_64.S
> +++ b/arch/powerpc/kernel/misc_64.S
> @@ -53,188 +53,6 @@ _GLOBAL(call_handle_irq)
>  	mtlr	r0
>  	blr
>  
> -	.section	".toc","aw"
> -PPC64_CACHES:
> -	.tc		ppc64_caches[TC],ppc64_caches
> -	.section	".text"
> -
> -/*
> - * Write any modified data cache blocks out to memory
> - * and invalidate the corresponding instruction cache blocks.
> - *
> - * flush_icache_range(unsigned long start, unsigned long stop)
> - *
> - *   flush all bytes from start through stop-1 inclusive
> - */
> -
> -_KPROBE(__flush_icache_range)
> -
> -/*
> - * Flush the data cache to memory 
> - * 
> - * Different systems have different cache line sizes
> - * and in some cases i-cache and d-cache line sizes differ from
> - * each other.
> - */
> - 	ld	r10,PPC64_CACHES at toc(r2)
> -	lwz	r7,DCACHEL1LINESIZE(r10)/* Get cache line size */
> -	addi	r5,r7,-1
> -	andc	r6,r3,r5		/* round low to line bdy */
> -	subf	r8,r6,r4		/* compute length */
> -	add	r8,r8,r5		/* ensure we get enough */
> -	lwz	r9,DCACHEL1LOGLINESIZE(r10)	/* Get log-2 of cache line size */
> -	srw.	r8,r8,r9		/* compute line count */
> -	beqlr				/* nothing to do? */
> -	mtctr	r8
> -1:	dcbst	0,r6
> -	add	r6,r6,r7
> -	bdnz	1b
> -	sync
> -
> -/* Now invalidate the instruction cache */
> -	
> -	lwz	r7,ICACHEL1LINESIZE(r10)	/* Get Icache line size */
> -	addi	r5,r7,-1
> -	andc	r6,r3,r5		/* round low to line bdy */
> -	subf	r8,r6,r4		/* compute length */
> -	add	r8,r8,r5
> -	lwz	r9,ICACHEL1LOGLINESIZE(r10)	/* Get log-2 of Icache line size */
> -	srw.	r8,r8,r9		/* compute line count */
> -	beqlr				/* nothing to do? */
> -	mtctr	r8
> -2:	icbi	0,r6
> -	add	r6,r6,r7
> -	bdnz	2b
> -	isync
> -	blr
> -	.previous .text
> -/*
> - * Like above, but only do the D-cache.
> - *
> - * flush_dcache_range(unsigned long start, unsigned long stop)
> - *
> - *    flush all bytes from start to stop-1 inclusive
> - */
> -_GLOBAL(flush_dcache_range)
> -
> -/*
> - * Flush the data cache to memory 
> - * 
> - * Different systems have different cache line sizes
> - */
> - 	ld	r10,PPC64_CACHES at toc(r2)
> -	lwz	r7,DCACHEL1LINESIZE(r10)	/* Get dcache line size */
> -	addi	r5,r7,-1
> -	andc	r6,r3,r5		/* round low to line bdy */
> -	subf	r8,r6,r4		/* compute length */
> -	add	r8,r8,r5		/* ensure we get enough */
> -	lwz	r9,DCACHEL1LOGLINESIZE(r10)	/* Get log-2 of dcache line size */
> -	srw.	r8,r8,r9		/* compute line count */
> -	beqlr				/* nothing to do? */
> -	mtctr	r8
> -0:	dcbst	0,r6
> -	add	r6,r6,r7
> -	bdnz	0b
> -	sync
> -	blr
> -
> -/*
> - * Like above, but works on non-mapped physical addresses.
> - * Use only for non-LPAR setups ! It also assumes real mode
> - * is cacheable. Used for flushing out the DART before using
> - * it as uncacheable memory 
> - *
> - * flush_dcache_phys_range(unsigned long start, unsigned long stop)
> - *
> - *    flush all bytes from start to stop-1 inclusive
> - */
> -_GLOBAL(flush_dcache_phys_range)
> - 	ld	r10,PPC64_CACHES at toc(r2)
> -	lwz	r7,DCACHEL1LINESIZE(r10)	/* Get dcache line size */
> -	addi	r5,r7,-1
> -	andc	r6,r3,r5		/* round low to line bdy */
> -	subf	r8,r6,r4		/* compute length */
> -	add	r8,r8,r5		/* ensure we get enough */
> -	lwz	r9,DCACHEL1LOGLINESIZE(r10)	/* Get log-2 of dcache line size */
> -	srw.	r8,r8,r9		/* compute line count */
> -	beqlr				/* nothing to do? */
> -	mfmsr	r5			/* Disable MMU Data Relocation */
> -	ori	r0,r5,MSR_DR
> -	xori	r0,r0,MSR_DR
> -	sync
> -	mtmsr	r0
> -	sync
> -	isync
> -	mtctr	r8
> -0:	dcbst	0,r6
> -	add	r6,r6,r7
> -	bdnz	0b
> -	sync
> -	isync
> -	mtmsr	r5			/* Re-enable MMU Data Relocation */
> -	sync
> -	isync
> -	blr
> -
> -_GLOBAL(flush_inval_dcache_range)
> - 	ld	r10,PPC64_CACHES at toc(r2)
> -	lwz	r7,DCACHEL1LINESIZE(r10)	/* Get dcache line size */
> -	addi	r5,r7,-1
> -	andc	r6,r3,r5		/* round low to line bdy */
> -	subf	r8,r6,r4		/* compute length */
> -	add	r8,r8,r5		/* ensure we get enough */
> -	lwz	r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */
> -	srw.	r8,r8,r9		/* compute line count */
> -	beqlr				/* nothing to do? */
> -	sync
> -	isync
> -	mtctr	r8
> -0:	dcbf	0,r6
> -	add	r6,r6,r7
> -	bdnz	0b
> -	sync
> -	isync
> -	blr
> -
> -
> -/*
> - * Flush a particular page from the data cache to RAM.
> - * Note: this is necessary because the instruction cache does *not*
> - * snoop from the data cache.
> - *
> - *	void __flush_dcache_icache(void *page)
> - */
> -_GLOBAL(__flush_dcache_icache)
> -/*
> - * Flush the data cache to memory 
> - * 
> - * Different systems have different cache line sizes
> - */
> -
> -/* Flush the dcache */
> - 	ld	r7,PPC64_CACHES at toc(r2)
> -	clrrdi	r3,r3,PAGE_SHIFT           	    /* Page align */
> -	lwz	r4,DCACHEL1LINESPERPAGE(r7)	/* Get # dcache lines per page */
> -	lwz	r5,DCACHEL1LINESIZE(r7)		/* Get dcache line size */
> -	mr	r6,r3
> -	mtctr	r4
> -0:	dcbst	0,r6
> -	add	r6,r6,r5
> -	bdnz	0b
> -	sync
> -
> -/* Now invalidate the icache */	
> -
> -	lwz	r4,ICACHEL1LINESPERPAGE(r7)	/* Get # icache lines per page */
> -	lwz	r5,ICACHEL1LINESIZE(r7)		/* Get icache line size */
> -	mtctr	r4
> -1:	icbi	0,r3
> -	add	r3,r3,r5
> -	bdnz	1b
> -	isync
> -	blr
> -
> -
>  #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
>  /*
>   * Do an IO access in real mode
> diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
> index acba8ce..ccdceb7 100644
> --- a/arch/powerpc/kernel/ppc_ksyms.c
> +++ b/arch/powerpc/kernel/ppc_ksyms.c
> @@ -53,7 +53,6 @@ extern void program_check_exception(struct pt_regs *regs);
>  extern void single_step_exception(struct pt_regs *regs);
>  extern int sys_sigreturn(struct pt_regs *regs);
>  
> -EXPORT_SYMBOL(clear_pages);
>  EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
>  EXPORT_SYMBOL(DMA_MODE_READ);
>  EXPORT_SYMBOL(DMA_MODE_WRITE);
> @@ -113,8 +112,6 @@ EXPORT_SYMBOL(giveup_spe);
>  #ifndef CONFIG_PPC64
>  EXPORT_SYMBOL(flush_instruction_cache);
>  #endif
> -EXPORT_SYMBOL(__flush_icache_range);
> -EXPORT_SYMBOL(flush_dcache_range);
>  
>  #ifdef CONFIG_SMP
>  #ifdef CONFIG_PPC32
> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> index 77bb77d..3abfea4 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -83,6 +83,54 @@ unsigned long klimit = (unsigned long) _end;
>  char cmd_line[COMMAND_LINE_SIZE];
>  
>  /*
> + * Initialize these values to minimum safe defaults in case they need to be
> + * used early during the boot process.  While this may not seem safe, it is
> + * actually safe in practice, because all of the kernel loops that use this
> + * data operate on whole pages.
> + *
> + * The PowerPC Book III-E spec documents that the pagesize is an even
> + * multiple of the cache block size and the cache blocks are always
> + * page-aligned.
> + *
> + * So, for example, when clearing a whole page there are only two things that
> + * can be done wrong with "dcbz":
> + *
> + *   (1) Call "dcbz" with an address outside the page you want to zero.
> + *
> + *   (2) Call "dcbz" too few times to actually hit all of the cachelines,
> + *       IE: Use a too-large cacheline stride.
> + *
> + * So as long as we ensure that this number is small enough for the current
> + * CPU everything will operate correctly, albeit with a slight performance
> + * hit, until we get a chance to parse the device-tree for the right value.
> + *
> + * NOTE: Userspace expects an exact value, so none of the above applies after
> + * the device tree has been unflattened and actual values computed.
> + *
> + * See arch/powerpc/asm/caches.h for more information.
> + */
> +struct powerpc_caches powerpc_caches = {
> +	/* Data cache sizes */
> +	.dcache_total_bytes  = 0, /* Unknown */
> +	.dcache_block_bytes = L1_CACHE_BYTES_MIN,
> +	.dcache_block_shift = L1_CACHE_SHIFT_MIN,
> +	.dcache_blocks_per_page = (PAGE_SIZE >> L1_CACHE_SHIFT_MIN),
> +
> +	/* Instruction cache sizes */
> +	.icache_total_bytes = 0,
> +	.icache_block_bytes = L1_CACHE_BYTES_MIN,
> +	.icache_block_shift = L1_CACHE_SHIFT_MIN,
> +	.icache_blocks_per_page = (PAGE_SIZE >> L1_CACHE_SHIFT_MIN),
> +
> +	/* Unified cache (assume cache is split by default) */
> +	.ucache_total_bytes = 0,
> +	.ucache_block_bytes = 0,
> +	.ucache_block_shift = 0,
> +	.ucache_blocks_per_page = 0,
> +};
> +EXPORT_SYMBOL_GPL(powerpc_caches);
> +
> +/*
>   * This still seems to be needed... -- paulus
>   */ 
>  struct screen_info screen_info = {
> @@ -349,6 +397,61 @@ const struct seq_operations cpuinfo_op = {
>  	.show =	show_cpuinfo,
>  };
>  
> +/* Helper functions to compute various values from a cache block size */
> +static void __init set_dcache_block_data(u32 bytes)
> +{
> +	u32 shift = __ilog2(bytes);
> +	powerpc_caches.dcache_block_bytes = bytes;
> +	powerpc_caches.dcache_block_shift = shift;
> +	powerpc_caches.dcache_blocks_per_page = (PAGE_SIZE >> shift);
> +}
> +static void __init set_icache_block_data(u32 bytes)
> +{
> +	u32 shift = __ilog2(bytes);
> +	powerpc_caches.icache_block_bytes = bytes;
> +	powerpc_caches.icache_block_shift = shift;
> +	powerpc_caches.icache_blocks_per_page = (PAGE_SIZE >> shift);
> +}
> +
> +/*
> + * Preinitialize the powerpc_caches structure from the cputable.  We will
> + * later scan the device-tree for this information, which may be more
> + * accurate.
> + */
> +void __init initialize_early_cache_info(void)
> +{
> +	set_dcache_block_data(cur_cpu_spec->dcache_bsize);
> +	set_icache_block_data(cur_cpu_spec->icache_bsize);
> +}
> +
> +/*
> + * Initialize the powerpc_caches structure from the device-tree for use by
> + * copy_page(), cache flush routines, and AT_DCACHEBSIZE elf headers.
> + *
> + * In the unlikely event that the device-tree doesn't have this information,
> + * the defaults loaded by initialize_early_cache_info() from the cputable
> + * will be used.
> + */
> +void __init initialize_cache_info(void)
> +{
> +	/* Assume that the cache properties are the same across all nodes */
> +	struct device_node *np = of_find_node_by_type(NULL, "cpu");
> +	u32 value = 0;
> +
> +	/* First check data/instruction cache block sizes */
> +	if (	!of_property_read_u32(np, "d-cache-block-size", &value) ||
> +		!of_property_read_u32(np, "d-cache-line-size", &value))
> +		set_dcache_block_data(value);
> +
> +	if (	!of_property_read_u32(np, "i-cache-block-size", &value) ||
> +		!of_property_read_u32(np, "i-cache-line-size", &value))
> +		set_icache_block_data(value);
> +
> +	/* Also read total cache sizes (no defaults here) */
> +	of_property_read_u32(np, "d-cache-size", &powerpc_caches.dcache_total_bytes);
> +	of_property_read_u32(np, "i-cache-size", &powerpc_caches.icache_total_bytes);
> +}
> +
>  void __init check_for_initrd(void)
>  {
>  #ifdef CONFIG_BLK_DEV_INITRD
> diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
> index 4c67ad7..1ae16ec 100644
> --- a/arch/powerpc/kernel/setup.h
> +++ b/arch/powerpc/kernel/setup.h
> @@ -1,6 +1,7 @@
>  #ifndef _POWERPC_KERNEL_SETUP_H
>  #define _POWERPC_KERNEL_SETUP_H
>  
> +void initialize_cache_info(void);
>  void check_for_initrd(void);
>  void do_init_bootmem(void);
>  void setup_panic(void);
> diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
> index c1ce863..1db2bfb 100644
> --- a/arch/powerpc/kernel/setup_32.c
> +++ b/arch/powerpc/kernel/setup_32.c
> @@ -63,14 +63,6 @@ EXPORT_SYMBOL(vgacon_remap_base);
>  #endif
>  
>  /*
> - * These are used in binfmt_elf.c to put aux entries on the stack
> - * for each elf executable being started.
> - */
> -int dcache_bsize;
> -int icache_bsize;
> -int ucache_bsize;
> -
> -/*
>   * We're called here very early in the boot.  We determine the machine
>   * type and call the appropriate low-level setup functions.
>   *  -- Cort <cort at fsmlabs.com>
> @@ -286,10 +278,13 @@ void __init setup_arch(char **cmdline_p)
>  {
>  	*cmdline_p = cmd_line;
>  
> +	initialize_early_cache_info();
> +
>  	/* so udelay does something sensible, assume <= 1000 bogomips */
>  	loops_per_jiffy = 500000000 / HZ;
>  
>  	unflatten_device_tree();
> +	initialize_cache_info();
>  	check_for_initrd();
>  
>  	if (ppc_md.init_early)
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 1a9dea8..bb686de 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -77,25 +77,6 @@ int boot_cpuid = 0;
>  int __initdata spinning_secondaries;
>  u64 ppc64_pft_size;
>  
> -/* Pick defaults since we might want to patch instructions
> - * before we've read this from the device tree.
> - */
> -struct ppc64_caches ppc64_caches = {
> -	.dline_size = 0x40,
> -	.log_dline_size = 6,
> -	.iline_size = 0x40,
> -	.log_iline_size = 6
> -};
> -EXPORT_SYMBOL_GPL(ppc64_caches);
> -
> -/*
> - * These are used in binfmt_elf.c to put aux entries on the stack
> - * for each elf executable being started.
> - */
> -int dcache_bsize;
> -int icache_bsize;
> -int ucache_bsize;
> -
>  #ifdef CONFIG_SMP
>  
>  static char *smt_enabled_cmdline;
> @@ -265,82 +246,6 @@ void smp_release_cpus(void)
>  #endif /* CONFIG_SMP || CONFIG_KEXEC */
>  
>  /*
> - * Initialize some remaining members of the ppc64_caches and systemcfg
> - * structures
> - * (at least until we get rid of them completely). This is mostly some
> - * cache informations about the CPU that will be used by cache flush
> - * routines and/or provided to userland
> - */
> -static void __init initialize_cache_info(void)
> -{
> -	struct device_node *np;
> -	unsigned long num_cpus = 0;
> -
> -	DBG(" -> initialize_cache_info()\n");
> -
> -	for_each_node_by_type(np, "cpu") {
> -		num_cpus += 1;
> -
> -		/*
> -		 * We're assuming *all* of the CPUs have the same
> -		 * d-cache and i-cache sizes... -Peter
> -		 */
> -		if (num_cpus == 1) {
> -			const u32 *sizep, *lsizep;
> -			u32 size, lsize;
> -
> -			size = 0;
> -			lsize = cur_cpu_spec->dcache_bsize;
> -			sizep = of_get_property(np, "d-cache-size", NULL);
> -			if (sizep != NULL)
> -				size = *sizep;
> -			lsizep = of_get_property(np, "d-cache-block-size",
> -						 NULL);
> -			/* fallback if block size missing */
> -			if (lsizep == NULL)
> -				lsizep = of_get_property(np,
> -							 "d-cache-line-size",
> -							 NULL);
> -			if (lsizep != NULL)
> -				lsize = *lsizep;
> -			if (sizep == 0 || lsizep == 0)
> -				DBG("Argh, can't find dcache properties ! "
> -				    "sizep: %p, lsizep: %p\n", sizep, lsizep);
> -
> -			ppc64_caches.dsize = size;
> -			ppc64_caches.dline_size = lsize;
> -			ppc64_caches.log_dline_size = __ilog2(lsize);
> -			ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
> -
> -			size = 0;
> -			lsize = cur_cpu_spec->icache_bsize;
> -			sizep = of_get_property(np, "i-cache-size", NULL);
> -			if (sizep != NULL)
> -				size = *sizep;
> -			lsizep = of_get_property(np, "i-cache-block-size",
> -						 NULL);
> -			if (lsizep == NULL)
> -				lsizep = of_get_property(np,
> -							 "i-cache-line-size",
> -							 NULL);
> -			if (lsizep != NULL)
> -				lsize = *lsizep;
> -			if (sizep == 0 || lsizep == 0)
> -				DBG("Argh, can't find icache properties ! "
> -				    "sizep: %p, lsizep: %p\n", sizep, lsizep);
> -
> -			ppc64_caches.isize = size;
> -			ppc64_caches.iline_size = lsize;
> -			ppc64_caches.log_iline_size = __ilog2(lsize);
> -			ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
> -		}
> -	}
> -
> -	DBG(" <- initialize_cache_info()\n");
> -}
> -
> -
> -/*
>   * Do some initial setup of the system.  The parameters are those which 
>   * were passed in from the bootloader.
>   */
> @@ -365,10 +270,7 @@ void __init setup_system(void)
>  	 */
>  	unflatten_device_tree();
>  
> -	/*
> -	 * Fill the ppc64_caches & systemcfg structures with informations
> - 	 * retrieved from the device-tree.
> -	 */
> +	/* Fill the powerpc_caches structure with device-tree data */
>  	initialize_cache_info();
>  
>  #ifdef CONFIG_PPC_RTAS
> @@ -423,12 +325,10 @@ void __init setup_system(void)
>  	printk("-----------------------------------------------------\n");
>  	printk("ppc64_pft_size                = 0x%llx\n", ppc64_pft_size);
>  	printk("physicalMemorySize            = 0x%llx\n", memblock_phys_mem_size());
> -	if (ppc64_caches.dline_size != 0x80)
> -		printk("ppc64_caches.dcache_line_size = 0x%x\n",
> -		       ppc64_caches.dline_size);
> -	if (ppc64_caches.iline_size != 0x80)
> -		printk("ppc64_caches.icache_line_size = 0x%x\n",
> -		       ppc64_caches.iline_size);
> +	if (powerpc_caches.dcache_block_bytes != 0x80)
> +		printk("dcache_block_bytes = 0x%x\n", powerpc_caches.dcache_block_bytes);
> +	if (powerpc_caches.icache_block_bytes != 0x80)
> +		printk("icache_block_bytes = 0x%x\n", powerpc_caches.icache_block_bytes);
>  #ifdef CONFIG_PPC_STD_MMU_64
>  	if (htab_address)
>  		printk("htab_address                  = 0x%p\n", htab_address);
> @@ -545,13 +445,7 @@ void __init setup_arch(char **cmdline_p)
>  
>  	*cmdline_p = cmd_line;
>  
> -	/*
> -	 * Set cache line size based on type of cpu as a default.
> -	 * Systems with OF can look in the properties on the cpu node(s)
> -	 * for a possibly more accurate value.
> -	 */
> -	dcache_bsize = ppc64_caches.dline_size;
> -	icache_bsize = ppc64_caches.iline_size;
> +	initialize_early_cache_info();
>  
>  	/* reboot on panic */
>  	panic_timeout = 180;
> diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> index 7d14bb6..4a038fb 100644
> --- a/arch/powerpc/kernel/vdso.c
> +++ b/arch/powerpc/kernel/vdso.c
> @@ -726,6 +726,7 @@ static int __init vdso_init(void)
>  	vdso_data->version.major = SYSTEMCFG_MAJOR;
>  	vdso_data->version.minor = SYSTEMCFG_MINOR;
>  	vdso_data->processor = mfspr(SPRN_PVR);
> +
>  	/*
>  	 * Fake the old platform number for pSeries and iSeries and add
>  	 * in LPAR bit if necessary
> @@ -734,29 +735,25 @@ static int __init vdso_init(void)
>  	if (firmware_has_feature(FW_FEATURE_LPAR))
>  		vdso_data->platform |= 1;
>  	vdso_data->physicalMemorySize = memblock_phys_mem_size();
> -	vdso_data->dcache_size = ppc64_caches.dsize;
> -	vdso_data->dcache_line_size = ppc64_caches.dline_size;
> -	vdso_data->icache_size = ppc64_caches.isize;
> -	vdso_data->icache_line_size = ppc64_caches.iline_size;
>  
> -	/* XXXOJN: Blocks should be added to ppc64_caches and used instead */
> -	vdso_data->dcache_block_size = ppc64_caches.dline_size;
> -	vdso_data->icache_block_size = ppc64_caches.iline_size;
> -	vdso_data->dcache_log_block_size = ppc64_caches.log_dline_size;
> -	vdso_data->icache_log_block_size = ppc64_caches.log_iline_size;
> +	/* There are more cache parameters saved for 64-bit than 32-bit */
> +	vdso_data->dcache_size           = powerpc_caches.dcache_total_size;
> +	vdso_data->icache_size           = powerpc_caches.icache_total_size;
> +	vdso_data->dcache_line_size      = powerpc_caches.dcache_block_bytes;
> +	vdso_data->icache_line_size      = powerpc_caches.icache_block_bytes;
>  
>  	/*
>  	 * Calculate the size of the 64 bits vDSO
>  	 */
>  	vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
>  	DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
> -#else
> -	vdso_data->dcache_block_size = L1_CACHE_BYTES;
> -	vdso_data->dcache_log_block_size = L1_CACHE_SHIFT;
> -	vdso_data->icache_block_size = L1_CACHE_BYTES;
> -	vdso_data->icache_log_block_size = L1_CACHE_SHIFT;
> -#endif /* CONFIG_PPC64 */
> +#endif
>  
> +	/* Save the cache-block sizes for the VDSO */
> +	vdso_data->dcache_block_size     = powerpc_caches.dcache_block_bytes;
> +	vdso_data->icache_block_size     = powerpc_caches.icache_block_bytes;
> +	vdso_data->dcache_log_block_size = powerpc_caches.dcache_block_shift;
> +	vdso_data->icache_log_block_size = powerpc_caches.icache_block_shift;
>  
>  	/*
>  	 * Calculate the size of the 32 bits vDSO
> diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
> index 53dcb6b..c466977 100644
> --- a/arch/powerpc/lib/copypage_64.S
> +++ b/arch/powerpc/lib/copypage_64.S
> @@ -12,17 +12,17 @@
>  #include <asm/asm-offsets.h>
>  
>          .section        ".toc","aw"
> -PPC64_CACHES:
> -        .tc             ppc64_caches[TC],ppc64_caches
> +POWERPC_CACHES:
> +        .tc             powerpc_caches[TC],powerpc_caches
>          .section        ".text"
>  
>  _GLOBAL(copy_page)
>  	lis	r5,PAGE_SIZE at h
>  	ori	r5,r5,PAGE_SIZE at l
>  BEGIN_FTR_SECTION
> -	ld      r10,PPC64_CACHES at toc(r2)
> -	lwz	r11,DCACHEL1LOGLINESIZE(r10)	/* log2 of cache line size */
> -	lwz     r12,DCACHEL1LINESIZE(r10)	/* get cache line size */
> +	ld      r10,POWERPC_CACHES at toc(r2)
> +	lwz	r11,DCACHE_BLOCK_SHIFT(r10)	/* log2 of cache line size */
> +	lwz     r12,DCACHE_BLOCK_BYTES(r10)	/* get cache line size */
>  	li	r9,0
>  	srd	r8,r5,r11
>  
> diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
> index 991ee81..8ad36a9 100644
> --- a/arch/powerpc/mm/Makefile
> +++ b/arch/powerpc/mm/Makefile
> @@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
>  
>  ccflags-$(CONFIG_PPC64)	:= -mno-minimal-toc
>  
> -obj-y				:= fault.o mem.o pgtable.o gup.o \
> +obj-y				:= cache.o fault.o mem.o pgtable.o gup.o \
>  				   init_$(CONFIG_WORD_SIZE).o \
>  				   pgtable_$(CONFIG_WORD_SIZE).o
>  obj-$(CONFIG_PPC_MMU_NOHASH)	+= mmu_context_nohash.o tlb_nohash.o \
> diff --git a/arch/powerpc/mm/cache.c b/arch/powerpc/mm/cache.c
> new file mode 100644
> index 0000000..0fbf2d6
> --- /dev/null
> +++ b/arch/powerpc/mm/cache.c
> @@ -0,0 +1,279 @@
> +#include <linux/kprobes.h>
> +#include <linux/export.h>
> +#include <linux/types.h>
> +
> +#include <asm/cputable.h>
> +#include <asm/system.h>
> +#include <asm/cache.h>
> +#include <asm/page.h>
> +#include <asm/mmu.h>
> +
> +/*
> + * Write any modified data cache blocks out to memory.
> + * Does not invalidate the corresponding cache lines (especially for
> + * any corresponding instruction cache).
> + */
> +void clean_dcache_range(unsigned long start, unsigned long stop)
> +{
> +	unsigned long addr;
> +	FOR_EACH_CACHELINE(addr, start, stop, dcache)
> +		dcbst(addr);
> +	mb();
> +}
> +
> +/*
> + * Write any modified data cache blocks out to memory and invalidate them.
> + * Does not invalidate the corresponding instruction cache blocks.
> + */
> +void flush_dcache_range(unsigned long start, unsigned long stop)
> +{
> +	unsigned long addr;
> +	FOR_EACH_CACHELINE(addr, start, stop, dcache)
> +		dcbf(addr);
> +	mb();
> +}
> +EXPORT_SYMBOL(flush_dcache_range);
> +
> +/*
> + * Like above, but invalidate the D-cache.  This is used by the 8xx
> + * to invalidate the cache so the PPC core doesn't get stale data
> + * from the CPM (no cache snooping here :-).
> + *
> + * invalidate_dcache_range(unsigned long start, unsigned long stop)
> + */
> +void invalidate_dcache_range(unsigned long start, unsigned long stop)
> +{
> +	unsigned long addr;
> +	FOR_EACH_CACHELINE(addr, start, stop, dcache)
> +		dcbi(addr);
> +	mb();
> +}
> +
> +/*
> + * Unfortunately, we cannot flush individual chunks of the icache on 44x as
> + * we are passed kmapped addresses and we have a virtually-tagged icache.
> + *
> + * The only workaround is to invalidate the whole icache.
> + *
> + * NOTE: The CPU does not use the operands for this instruction, so
> + *       they are passed as dummies.
> + */
> +__kprobes void __flush_icache_range(unsigned long start, unsigned long stop)
> +{
> +	unsigned long addr;
> +
> +	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
> +		return;
> +
> +	/* First ensure that data has been written to memory */
> +	FOR_EACH_CACHELINE(addr, start, stop, dcache)
> +		dcbst(addr);
> +	mb();
> +
> +#ifdef CONFIG_44x
> +	if (mmu_has_feature(MMU_FTR_TYPE_44x)) {
> +		asm volatile("iccci 0, r0" ::: "memory");
> +		return;
> +	}
> +#endif
> +
> +	/* Now discard the corresponding icache */
> +	FOR_EACH_CACHELINE(addr, start, stop, icache)
> +		icbi(addr);
> +	mb();
> +	isync();
> +}
> +EXPORT_SYMBOL(__flush_icache_range);
> +
> +/*
> + * Flush a particular page from the data cache to RAM.
> + * Note: this is necessary because the instruction cache does *not*
> + * snoop from the data cache.
> + * This is a no-op on the 601 which has a unified cache.
> + *
> + *	void __flush_dcache_icache(void *page)
> + */
> +void __flush_dcache_icache(void *page)
> +{
> +	unsigned long base = ((unsigned long)page) & ~(PAGE_SIZE-1);
> +	unsigned long addr;
> +
> +	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
> +		return;
> +
> +	/* First ensure that data has been written to memory */
> +	FOR_EACH_CACHELINE(addr, base, base + PAGE_SIZE, dcache)
> +		dcbst(addr);
> +
> +#ifdef CONFIG_44x
> +	/*
> +	 * We don't flush the icache on 44x. Those have a virtual icache and
> +	 * we don't have access to the virtual address here (it's not the
> +	 * page vaddr but where it's mapped in user space). The flushing of
> +	 * the icache on these is handled elsewhere, when a change in the
> +	 * address space occurs, before returning to user space.
> +	 */
> +	if (mmu_has_feature(MMU_FTR_TYPE_44x))
> +		return;
> +#endif
> +
> +	FOR_EACH_CACHELINE(addr, base, base + PAGE_SIZE, icache)
> +		icbi(addr);
> +
> +	mb();
> +	isync();
> +}
> +
> +/*
> + * Clear pages using the dcbz instruction, which doesn't cause any
> + * memory traffic (except to write out any cache lines which get
> + * displaced).  This only works on cacheable memory.
> + *
> + */
> +void clear_pages(void *page, int order)
> +{
> +	unsigned long addr, base = (unsigned long)page;
> +	FOR_EACH_CACHELINE(addr, base, base + (PAGE_SIZE << order), dcache)
> +		dcbz(addr);
> +}
> +EXPORT_SYMBOL(clear_pages);
> +
> +#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE)
> +/*
> + * Flush a particular page from the data cache to RAM, identified
> + * by its physical address.  We turn off the MMU so we can just use
> + * the physical address (this may be a highmem page without a kernel
> + * mapping).
> + */
> +void __flush_dcache_icache_phys(unsigned long phys_page)
> +{
> +	u32 d_size	= powerpc_caches.dcache_block_bytes;
> +	u32 i_size	= powerpc_caches.icache_block_bytes;
> +	u32 d_per_page	= powerpc_caches.dcache_blocks_per_page;
> +	u32 i_per_page	= powerpc_caches.icache_blocks_per_page;
> +
> +	/* Temporary registers for the ASM to use */
> +	unsigned long old_msr, tmp_msr, d_phys_page, i_phys_page;
> +
> +	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
> +		return;
> +
> +	/* Page base address (used in 2 different loops) */
> +	d_phys_page = i_phys_page = phys_page & ~(PAGE_SIZE - 1);
> +
> +	/*
> +	 * This part needs to be 100% ASM because we disable the MMU, and we
> +	 * can't accidentally let some C code go poking at memory while the
> +	 * MMU isn't enabled.
> +	 *
> +	 * NOTE: This looks blatantly unsafe with respect to interrupts.
> +	 *       Hopefully all the callers provide sufficient protection?
> +	 */
> +	asm volatile(
> +		/* First disable the MMU */
> +		"mfmsr %[old_msr]\n\t"
> +		"rlwinm %[tmp_msr], %[old_msr], 0, 28, 26\n\t"
> +		"mtmsr %[tmp_msr]\n\t"
> +		"isync\n\t"
> +
> +		/* Clean the data cache */
> +		"mtctr %[d_per_page]\n"
> +	"0:	dcbst 0, %[d_phys_page]\n\t"
> +		"add %[d_phys_page], %[d_phys_page], %[d_size]\n\t"
> +		"bdnz 0b\n\t"
> +		"sync\n\t"
> +
> +		/* Invalidate the instruction cache */
> +		"mtctr %[i_per_page]\n"
> +	"0:	icbi 0, %[i_phys_page]\n\t"
> +		"add %[i_phys_page], %[i_phys_page], %[i_size]\n\t"
> +		"bdnz 0b\n\t"
> +
> +		/* Finally, re-enable the MMU */
> +		"sync\n\t"
> +		"mtmsr %[old_msr]\n\t"
> +		"isync\n\t"
> +
> +		/* Temporary variables and inputs */
> +		: [old_msr]    "=&r" (old_msr),
> +		  [tmp_msr]    "=&r" (tmp_msr),
> +		  [d_phys_page] "=b" (d_phys_page),
> +		  [i_phys_page] "=b" (i_phys_page)
> +
> +		/* Inputs */
> +		: [d_size]     "b" (d_size),
> +		  [i_size]     "b" (i_size),
> +		  [d_per_page] "b" (d_per_page),
> +		  [i_per_page] "b" (i_per_page),
> +		  "[d_phys_page]"  (d_phys_page),
> +		  "[i_phys_page]"  (i_phys_page)
> +
> +		/* Clobbers */
> +		: "memory", "c"
> +	);
> +}
> +#endif /* CONFIG_PPC32 && !CONFIG_BOOKE */
> +
> +#ifdef CONFIG_PPC64
> +/*
> + * Data cache flush that works on non-mapped physical addresses.
> + * Use only for non-LPAR setups ! It also assumes real mode
> + * is cacheable. Used for flushing out the DART before using
> + * it as uncacheable memory 
> + */
> +void flush_dcache_phys_range(unsigned long start, unsigned long stop)
> +{
> +	/* System data cache block size */
> +	unsigned long bytes = powerpc_caches.dcache_block_bytes;
> +	unsigned long shift = powerpc_caches.dcache_block_shift;
> +
> +	/* Temporary registers for the ASM to use */
> +	unsigned long old_msr, tmp_msr;
> +
> +	/* Compute a start address and number of cachelines */
> +	unsigned long phys_addr = start & ~(bytes - 1);
> +	unsigned long nr_lines = ((stop - phys_addr) + (bytes - 1)) >> shift;
> +
> +	/*
> +	 * This part needs to be 100% ASM because we disable the MMU, and we
> +	 * can't accidentally let some C code go poking at memory while the
> +	 * MMU isn't enabled.
> +	 *
> +	 * NOTE: This looks blatantly unsafe with respect to interrupts.
> +	 *       Hopefully all the callers provide sufficient protection?
> +	 */
> +	asm volatile(
> +		/* First disable the MMU */
> +		"mfmsr %[old_msr]\n\t"
> +		"rlwinm %[tmp_msr], %[old_msr], 0, 28, 26\n\t"
> +		"mtmsr %[tmp_msr]\n\t"
> +		"isync\n\t"
> +
> +		/* Clean the data cache */
> +		"mtctr %[nr_lines]\n"
> +	"0:	dcbst 0, %[phys_addr]\n\t"
> +		"add %[phys_addr], %[phys_addr], %[bytes]\n\t"
> +		"bdnz 0b\n\t"
> +		"sync\n\t"
> +		"isync\n\t"
> +
> +		/* Finally, re-enable the MMU */
> +		"mtmsr %[old_msr]\n\t"
> +		"sync\n\t"
> +		"isync\n\t"
> +
> +		/* Temporary variables and inputs */
> +		: [old_msr]  "=&r" (old_msr),
> +		  [tmp_msr]  "=&r" (tmp_msr),
> +		  [phys_addr] "=b" (phys_addr)
> +
> +		/* Inputs */
> +		: [bytes]    "b" (bytes),
> +		  [nr_lines] "b" (nr_lines),
> +		  "[phys_addr]"  (phys_addr)
> +
> +		/* Clobbers */
> +		: "memory", "c"
> +	);
> +}
> +#endif /* CONFIG_PPC64 */
> diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
> index 329be36..3823f64 100644
> --- a/arch/powerpc/mm/dma-noncoherent.c
> +++ b/arch/powerpc/mm/dma-noncoherent.c
> @@ -328,7 +328,7 @@ void __dma_sync(void *vaddr, size_t size, int direction)
>  		 * invalidate only when cache-line aligned otherwise there is
>  		 * the potential for discarding uncommitted data from the cache
>  		 */
> -		if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1)))
> +		if ((start | size) & (powerpc_caches.dcache_block_bytes - 1))
>  			flush_dcache_range(start, end);
>  		else
>  			invalidate_dcache_range(start, end);
> diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
> index 08ab6fe..ac285d9 100644
> --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S
> +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
> @@ -394,11 +394,16 @@ restore_regs:
>  
> 
>  /* cache flushing code. copied from arch/ppc/boot/util.S */
> -#define NUM_CACHE_LINES (128*8)
> +#define NUM_CACHE_LINES ((128 * 8) << (L1_CACHE_SHIFT_MAX - L1_CACHE_SHIFT_MIN))
>  
>  /*
>   * Flush data cache
>   * Do this by just reading lots of stuff into the cache.
> + *
> + * NOTE: This does not handle variable-sized cachelines properly, but since
> + *       we are just trying to flush the data cache by reading lots of data,
> + *       this works anyways.  We just make sure we read as many cachelines
> + *       as we could possibly need to overflow the cache on any hardware.
>   */
>  flush_data_cache:
>  	lis	r3,CONFIG_KERNEL_START at h
> @@ -407,6 +412,6 @@ flush_data_cache:
>  	mtctr	r4
>  1:
>  	lwz	r4,0(r3)
> -	addi	r3,r3,L1_CACHE_BYTES	/* Next line, please */
> +	addi	r3,r3,L1_CACHE_BYTES_MIN /* Next line, please */
>  	bdnz	1b
>  	blr
> diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
> index 31a7d3a..8503e38 100644
> --- a/arch/powerpc/platforms/powermac/pci.c
> +++ b/arch/powerpc/platforms/powermac/pci.c
> @@ -1135,7 +1135,7 @@ int pmac_pci_enable_device_hook(struct pci_dev *dev)
>  		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 16);
>  
>  		pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
> -				      L1_CACHE_BYTES >> 2);
> +				powerpc_caches.dcache_block_bytes >> 2);
>  	}
>  
>  	return 0;
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index 03a217a..c537d49 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -26,6 +26,7 @@
>  
>  #include <asm/ptrace.h>
>  #include <asm/string.h>
> +#include <asm/cache.h>
>  #include <asm/prom.h>
>  #include <asm/machdep.h>
>  #include <asm/xmon.h>
> @@ -254,16 +255,6 @@ static inline void store_inst(void *p)
>  	asm volatile ("dcbst 0,%0; sync; icbi 0,%0; isync" : : "r" (p));
>  }
>  
> -static inline void cflush(void *p)
> -{
> -	asm volatile ("dcbf 0,%0; icbi 0,%0" : : "r" (p));
> -}
> -
> -static inline void cinval(void *p)
> -{
> -	asm volatile ("dcbi 0,%0; icbi 0,%0" : : "r" (p));
> -}
> -
>  /*
>   * Disable surveillance (the service processor watchdog function)
>   * while we are in xmon.
> @@ -1513,10 +1504,9 @@ static void prregs(struct pt_regs *fp)
>  
>  static void cacheflush(void)
>  {
> -	int cmd;
> -	unsigned long nflush;
> +	unsigned long nflush, i;
>  
> -	cmd = inchar();
> +	int cmd = inchar();
>  	if (cmd != 'i')
>  		termch = cmd;
>  	scanhex((void *)&adrs);
> @@ -1524,23 +1514,30 @@ static void cacheflush(void)
>  		termch = 0;
>  	nflush = 1;
>  	scanhex(&nflush);
> -	nflush = (nflush + L1_CACHE_BYTES - 1) / L1_CACHE_BYTES;
> -	if (setjmp(bus_error_jmp) == 0) {
> -		catch_memory_errors = 1;
> -		sync();
>  
> -		if (cmd != 'i') {
> -			for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES)
> -				cflush((void *) adrs);
> -		} else {
> -			for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES)
> -				cinval((void *) adrs);
> -		}
> -		sync();
> -		/* wait a little while to see if we get a machine check */
> -		__delay(200);
> +	if (setjmp(bus_error_jmp) != 0) {
> +		catch_memory_errors = 0;
> +		return;
>  	}
> -	catch_memory_errors = 0;
> +	catch_memory_errors = 1;
> +	sync();
> +
> +	/* First flush/invalidate data caches */
> +	if (cmd != 'i') {
> +		FOR_EACH_CACHELINE(i, adrs, adrs + nflush, dcache)
> +			dcbf(i);
> +	} else {
> +		FOR_EACH_CACHELINE(i, adrs, adrs + nflush, dcache)
> +			dcbi(i);
> +	}
> +
> +	/* Now invalidate instruction caches */
> +	FOR_EACH_CACHELINE(i, adrs, adrs + nflush, icache)
> +		icbi(i);
> +
> +	sync();
> +	/* wait a little while to see if we get a machine check */
> +	__delay(200);
>  }
>  
>  static unsigned long
> diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
> index 116a49c..04ead15 100644
> --- a/drivers/macintosh/smu.c
> +++ b/drivers/macintosh/smu.c
> @@ -136,7 +136,9 @@ static void smu_start_cmd(void)
>  	/* Flush command and data to RAM */
>  	faddr = (unsigned long)smu->cmd_buf;
>  	fend = faddr + smu->cmd_buf->length + 2;
> -	flush_inval_dcache_range(faddr, fend);
> +	flush_dcache_range(faddr, fend);
> +	mb();
> +	isync();
>  
> 
>  	/* We also disable NAP mode for the duration of the command
> @@ -198,7 +200,9 @@ static irqreturn_t smu_db_intr(int irq, void *arg)
>  		 * reply length (it's only 2 cache lines anyway)
>  		 */
>  		faddr = (unsigned long)smu->cmd_buf;
> -		flush_inval_dcache_range(faddr, faddr + 256);
> +		flush_dcache_range(faddr, faddr + 256);
> +		mb();
> +		isync();
>  
>  		/* Now check ack */
>  		ack = (~cmd->cmd) & 0xff;




More information about the Linuxppc-dev mailing list