powerpc: Merge bitops.h

Tue Nov 1 17:28:10 EST 2005

Here's a revised version.  This re-introduces the set_bits() function
from ppc64, which I removed because I thought it was unused (it exists
on no other arch).  In fact it is used in the powermac interrupt code
(but not on pSeries).

This seems to be running fine on my G5 (ARCH=powerpc), but it still
hasn't been tested on 32-bit, which should probably happen before merging.

- We use LARXL/STCXL macros to generate the right (32 or 64 bit)
  instructions, similar to LDL/STL from ppc_asm.h, used in fpu.S

- ppc32 previously used a full "sync" barrier at the end of
  test_and_*_bit(), whereas ppc64 used an "isync".  The merged version
  uses "isync", since I believe that's sufficient.

- The ppc64 versions of then minix_*() bitmap functions have changed
  semantics.  Previously on ppc64, these functions were big-endian
  (that is bit 0 was the LSB in the first 64-bit, big-endian word).
  On ppc32 (and x86, for that matter, they were little-endian.  As far
  as I can tell, the big-endian usage was simply wrong - I guess
  no-one ever tried to use minixfs on ppc64.

- On ppc32 find_next_bit() and find_next_zero_bit() are no longer
  inline (they were already out-of-line on ppc64).

- For ppc64, sched_find_first_bit() has moved from mmu_context.h to
  the merged bitops.  What it was doing in mmu_context.h in the first
  place, I have no idea.

- The fls() function is now implemented using the cntlzw instruction
  on ppc64, instead of generic_fls(), as it already was on ppc32.

- For ARCH=ppc, this patch requires adding arch/powerpc/lib to the
  arch/ppc/Makefile.  This in turn requires some changes to
  arch/powerpc/lib/Makefile which didn't correctly handle ARCH=ppc.

Built and running on G5.

Signed-off-by: David Gibson <david at gibson.dropbear.id.au>

Index: working-2.6/include/asm-ppc/bitops.h
===================================================================

--- working-2.6.orig/include/asm-ppc/bitops.h	2005-10-25 11:59:59.000000000 +1000
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,460 +0,0 @@
-/*
- * bitops.h: Bit string operations on the ppc
- */
-
-#ifdef __KERNEL__
-#ifndef _PPC_BITOPS_H
-#define _PPC_BITOPS_H
-
-#include <linux/config.h>
-#include <linux/compiler.h>
-#include <asm/byteorder.h>
-#include <asm/atomic.h>
-
-/*
- * The test_and_*_bit operations are taken to imply a memory barrier
- * on SMP systems.
- */
-#ifdef CONFIG_SMP
-#define SMP_WMB		"eieio\n"
-#define SMP_MB		"\nsync"
-#else
-#define SMP_WMB
-#define SMP_MB
-#endif /* CONFIG_SMP */
-
-static __inline__ void set_bit(int nr, volatile unsigned long * addr)
-{
-	unsigned long old;
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-	
-	__asm__ __volatile__("\n\
-1:	lwarx	%0,0,%3 \n\
-	or	%0,%0,%2 \n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%0,0,%3 \n\
-	bne-	1b"
-	: "=&r" (old), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc" );
-}
-
-/*
- * non-atomic version
- */
-static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-
-	*p |= mask;
-}
-
-/*
- * clear_bit doesn't imply a memory barrier
- */
-#define smp_mb__before_clear_bit()	smp_mb()
-#define smp_mb__after_clear_bit()	smp_mb()
-
-static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long old;
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-
-	__asm__ __volatile__("\n\
-1:	lwarx	%0,0,%3 \n\
-	andc	%0,%0,%2 \n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%0,0,%3 \n\
-	bne-	1b"
-	: "=&r" (old), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc");
-}
-
-/*
- * non-atomic version
- */
-static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-
-	*p &= ~mask;
-}
-
-static __inline__ void change_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long old;
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-
-	__asm__ __volatile__("\n\
-1:	lwarx	%0,0,%3 \n\
-	xor	%0,%0,%2 \n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%0,0,%3 \n\
-	bne-	1b"
-	: "=&r" (old), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc");
-}
-
-/*
- * non-atomic version
- */
-static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-
-	*p ^= mask;
-}
-
-/*
- * test_and_*_bit do imply a memory barrier (?)
- */
-static __inline__ int test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned int old, t;
-	unsigned int mask = 1 << (nr & 0x1f);
-	volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5);
-
-	__asm__ __volatile__(SMP_WMB "\n\
-1:	lwarx	%0,0,%4 \n\
-	or	%1,%0,%3 \n"
-	PPC405_ERR77(0,%4)
-"	stwcx.	%1,0,%4 \n\
-	bne	1b"
-	SMP_MB
-	: "=&r" (old), "=&r" (t), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc", "memory");
-
-	return (old & mask) != 0;
-}
-
-/*
- * non-atomic version
- */
-static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-	unsigned long old = *p;
-
-	*p = old | mask;
-	return (old & mask) != 0;
-}
-
-static __inline__ int test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned int old, t;
-	unsigned int mask = 1 << (nr & 0x1f);
-	volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5);
-
-	__asm__ __volatile__(SMP_WMB "\n\
-1:	lwarx	%0,0,%4 \n\
-	andc	%1,%0,%3 \n"
-	PPC405_ERR77(0,%4)
-"	stwcx.	%1,0,%4 \n\
-	bne	1b"
-	SMP_MB
-	: "=&r" (old), "=&r" (t), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc", "memory");
-
-	return (old & mask) != 0;
-}
-
-/*
- * non-atomic version
- */
-static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-	unsigned long old = *p;
-
-	*p = old & ~mask;
-	return (old & mask) != 0;
-}
-
-static __inline__ int test_and_change_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned int old, t;
-	unsigned int mask = 1 << (nr & 0x1f);
-	volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5);
-
-	__asm__ __volatile__(SMP_WMB "\n\
-1:	lwarx	%0,0,%4 \n\
-	xor	%1,%0,%3 \n"
-	PPC405_ERR77(0,%4)
-"	stwcx.	%1,0,%4 \n\
-	bne	1b"
-	SMP_MB
-	: "=&r" (old), "=&r" (t), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc", "memory");
-
-	return (old & mask) != 0;
-}
-
-/*
- * non-atomic version
- */
-static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-	unsigned long old = *p;
-
-	*p = old ^ mask;
-	return (old & mask) != 0;
-}
-
-static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
-{
-	return ((addr[nr >> 5] >> (nr & 0x1f)) & 1) != 0;
-}
-
-/* Return the bit position of the most significant 1 bit in a word */
-static __inline__ int __ilog2(unsigned long x)
-{
-	int lz;
-
-	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
-	return 31 - lz;
-}
-
-static __inline__ int ffz(unsigned long x)
-{
-	if ((x = ~x) == 0)
-		return 32;
-	return __ilog2(x & -x);
-}
-
-static inline int __ffs(unsigned long x)
-{
-	return __ilog2(x & -x);
-}
-
-/*
- * ffs: find first bit set. This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static __inline__ int ffs(int x)
-{
-	return __ilog2(x & -x) + 1;
-}
-
-/*
- * fls: find last (most-significant) bit set.
- * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
- */
-static __inline__ int fls(unsigned int x)
-{
-	int lz;
-
-	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
-	return 32 - lz;
-}
-
-/*
- * hweightN: returns the hamming weight (i.e. the number
- * of bits set) of a N-bit word
- */
-
-#define hweight32(x) generic_hweight32(x)
-#define hweight16(x) generic_hweight16(x)
-#define hweight8(x) generic_hweight8(x)
-
-/*
- * Find the first bit set in a 140-bit bitmap.
- * The first 100 bits are unlikely to be set.
- */
-static inline int sched_find_first_bit(const unsigned long *b)
-{
-	if (unlikely(b[0]))
-		return __ffs(b[0]);
-	if (unlikely(b[1]))
-		return __ffs(b[1]) + 32;
-	if (unlikely(b[2]))
-		return __ffs(b[2]) + 64;
-	if (b[3])
-		return __ffs(b[3]) + 96;
-	return __ffs(b[4]) + 128;
-}
-
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-static __inline__ unsigned long find_next_bit(const unsigned long *addr,
-	unsigned long size, unsigned long offset)
-{
-	unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
-	unsigned int result = offset & ~31UL;
-	unsigned int tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset &= 31UL;
-	if (offset) {
-		tmp = *p++;
-		tmp &= ~0UL << offset;
-		if (size < 32)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= 32;
-		result += 32;
-	}
-	while (size >= 32) {
-		if ((tmp = *p++) != 0)
-			goto found_middle;
-		result += 32;
-		size -= 32;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-
-found_first:
-	tmp &= ~0UL >> (32 - size);
-	if (tmp == 0UL)        /* Are any bits set? */
-		return result + size; /* Nope. */
-found_middle:
-	return result + __ffs(tmp);
-}
-
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first set bit, not the number of the byte
- * containing a bit.
- */
-#define find_first_bit(addr, size) \
-	find_next_bit((addr), (size), 0)
-
-/*
- * This implementation of find_{first,next}_zero_bit was stolen from
- * Linus' asm-alpha/bitops.h.
- */
-#define find_first_zero_bit(addr, size) \
-	find_next_zero_bit((addr), (size), 0)
-
-static __inline__ unsigned long find_next_zero_bit(const unsigned long *addr,
-	unsigned long size, unsigned long offset)
-{
-	unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
-	unsigned int result = offset & ~31UL;
-	unsigned int tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset &= 31UL;
-	if (offset) {
-		tmp = *p++;
-		tmp |= ~0UL >> (32-offset);
-		if (size < 32)
-			goto found_first;
-		if (tmp != ~0U)
-			goto found_middle;
-		size -= 32;
-		result += 32;
-	}
-	while (size >= 32) {
-		if ((tmp = *p++) != ~0U)
-			goto found_middle;
-		result += 32;
-		size -= 32;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-found_first:
-	tmp |= ~0UL << size;
-	if (tmp == ~0UL)        /* Are any bits zero? */
-		return result + size; /* Nope. */
-found_middle:
-	return result + ffz(tmp);
-}
-
-
-#define ext2_set_bit(nr, addr)	__test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
-#define ext2_set_bit_atomic(lock, nr, addr)  test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
-#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
-#define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
-
-static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
-{
-	__const__ unsigned char	*ADDR = (__const__ unsigned char *) addr;
-
-	return (ADDR[nr >> 3] >> (nr & 7)) & 1;
-}
-
-/*
- * This implementation of ext2_find_{first,next}_zero_bit was stolen from
- * Linus' asm-alpha/bitops.h and modified for a big-endian machine.
- */
-
-#define ext2_find_first_zero_bit(addr, size) \
-        ext2_find_next_zero_bit((addr), (size), 0)
-
-static __inline__ unsigned long ext2_find_next_zero_bit(const void *addr,
-	unsigned long size, unsigned long offset)
-{
-	unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
-	unsigned int result = offset & ~31UL;
-	unsigned int tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset &= 31UL;
-	if (offset) {
-		tmp = cpu_to_le32p(p++);
-		tmp |= ~0UL >> (32-offset);
-		if (size < 32)
-			goto found_first;
-		if (tmp != ~0U)
-			goto found_middle;
-		size -= 32;
-		result += 32;
-	}
-	while (size >= 32) {
-		if ((tmp = cpu_to_le32p(p++)) != ~0U)
-			goto found_middle;
-		result += 32;
-		size -= 32;
-	}
-	if (!size)
-		return result;
-	tmp = cpu_to_le32p(p);
-found_first:
-	tmp |= ~0U << size;
-	if (tmp == ~0UL)        /* Are any bits zero? */
-		return result + size; /* Nope. */
-found_middle:
-	return result + ffz(tmp);
-}
-
-/* Bitmap functions for the minix filesystem.  */
-#define minix_test_and_set_bit(nr,addr) ext2_set_bit(nr,addr)
-#define minix_set_bit(nr,addr) ((void)ext2_set_bit(nr,addr))
-#define minix_test_and_clear_bit(nr,addr) ext2_clear_bit(nr,addr)
-#define minix_test_bit(nr,addr) ext2_test_bit(nr,addr)
-#define minix_find_first_zero_bit(addr,size) ext2_find_first_zero_bit(addr,size)
-
-#endif /* _PPC_BITOPS_H */
-#endif /* __KERNEL__ */
Index: working-2.6/include/asm-ppc64/bitops.h
===================================================================
--- working-2.6.orig/include/asm-ppc64/bitops.h	2005-10-31 15:20:22.000000000 +1100
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,360 +0,0 @@
-/*
- * PowerPC64 atomic bit operations.
- * Dave Engebretsen, Todd Inglett, Don Reed, Pat McCarthy, Peter Bergner,
- * Anton Blanchard
- *
- * Originally taken from the 32b PPC code.  Modified to use 64b values for
- * the various counters & memory references.
- *
- * Bitops are odd when viewed on big-endian systems.  They were designed
- * on little endian so the size of the bitset doesn't matter (low order bytes
- * come first) as long as the bit in question is valid.
- *
- * Bits are "tested" often using the C expression (val & (1<<nr)) so we do
- * our best to stay compatible with that.  The assumption is that val will
- * be unsigned long for such tests.  As such, we assume the bits are stored
- * as an array of unsigned long (the usual case is a single unsigned long,
- * of course).  Here's an example bitset with bit numbering:
- *
- *   |63..........0|127........64|195.......128|255.......196|
- *
- * This leads to a problem. If an int, short or char is passed as a bitset
- * it will be a bad memory reference since we want to store in chunks
- * of unsigned long (64 bits here) size.
- *
- * There are a few little-endian macros used mostly for filesystem bitmaps,
- * these work on similar bit arrays layouts, but byte-oriented:
- *
- *   |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
- *
- * The main difference is that bit 3-5 in the bit number field needs to be
- * reversed compared to the big-endian bit fields. This can be achieved
- * by XOR with 0b111000 (0x38).
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _PPC64_BITOPS_H
-#define _PPC64_BITOPS_H
-
-#ifdef __KERNEL__
-
-#include <asm/synch.h>
-
-/*
- * clear_bit doesn't imply a memory barrier
- */
-#define smp_mb__before_clear_bit()	smp_mb()
-#define smp_mb__after_clear_bit()	smp_mb()
-
-static __inline__ int test_bit(unsigned long nr, __const__ volatile unsigned long *addr)
-{
-	return (1UL & (addr[nr >> 6] >> (nr & 63)));
-}
-
-static __inline__ void set_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long old;
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-
-	__asm__ __volatile__(
-"1:	ldarx	%0,0,%3		# set_bit\n\
-	or	%0,%0,%2\n\
-	stdcx.	%0,0,%3\n\
-	bne-	1b"
-	: "=&r" (old), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc");
-}
-
-static __inline__ void clear_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long old;
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-
-	__asm__ __volatile__(
-"1:	ldarx	%0,0,%3		# clear_bit\n\
-	andc	%0,%0,%2\n\
-	stdcx.	%0,0,%3\n\
-	bne-	1b"
-	: "=&r" (old), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc");
-}
-
-static __inline__ void change_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long old;
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-
-	__asm__ __volatile__(
-"1:	ldarx	%0,0,%3		# change_bit\n\
-	xor	%0,%0,%2\n\
-	stdcx.	%0,0,%3\n\
-	bne-	1b"
-	: "=&r" (old), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc");
-}
-
-static __inline__ int test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long old, t;
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-
-	__asm__ __volatile__(
-	EIEIO_ON_SMP
-"1:	ldarx	%0,0,%3		# test_and_set_bit\n\
-	or	%1,%0,%2 \n\
-	stdcx.	%1,0,%3 \n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	: "=&r" (old), "=&r" (t)
-	: "r" (mask), "r" (p)
-	: "cc", "memory");
-
-	return (old & mask) != 0;
-}
-
-static __inline__ int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long old, t;
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-
-	__asm__ __volatile__(
-	EIEIO_ON_SMP
-"1:	ldarx	%0,0,%3		# test_and_clear_bit\n\
-	andc	%1,%0,%2\n\
-	stdcx.	%1,0,%3\n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	: "=&r" (old), "=&r" (t)
-	: "r" (mask), "r" (p)
-	: "cc", "memory");
-
-	return (old & mask) != 0;
-}
-
-static __inline__ int test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long old, t;
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-
-	__asm__ __volatile__(
-	EIEIO_ON_SMP
-"1:	ldarx	%0,0,%3		# test_and_change_bit\n\
-	xor	%1,%0,%2\n\
-	stdcx.	%1,0,%3\n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	: "=&r" (old), "=&r" (t)
-	: "r" (mask), "r" (p)
-	: "cc", "memory");
-
-	return (old & mask) != 0;
-}
-
-static __inline__ void set_bits(unsigned long mask, unsigned long *addr)
-{
-	unsigned long old;
-
-	__asm__ __volatile__(
-"1:	ldarx	%0,0,%3		# set_bit\n\
-	or	%0,%0,%2\n\
-	stdcx.	%0,0,%3\n\
-	bne-	1b"
-	: "=&r" (old), "=m" (*addr)
-	: "r" (mask), "r" (addr), "m" (*addr)
-	: "cc");
-}
-
-/*
- * non-atomic versions
- */
-static __inline__ void __set_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-
-	*p |= mask;
-}
-
-static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-
-	*p &= ~mask;
-}
-
-static __inline__ void __change_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-
-	*p ^= mask;
-}
-
-static __inline__ int __test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-	unsigned long old = *p;
-
-	*p = old | mask;
-	return (old & mask) != 0;
-}
-
-static __inline__ int __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-	unsigned long old = *p;
-
-	*p = old & ~mask;
-	return (old & mask) != 0;
-}
-
-static __inline__ int __test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long mask = 1UL << (nr & 0x3f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
-	unsigned long old = *p;
-
-	*p = old ^ mask;
-	return (old & mask) != 0;
-}
-
-/*
- * Return the zero-based bit position (from RIGHT TO LEFT, 63 -> 0) of the
- * most significant (left-most) 1-bit in a double word.
- */
-static __inline__ int __ilog2(unsigned long x)
-{
-	int lz;
-
-	asm ("cntlzd %0,%1" : "=r" (lz) : "r" (x));
-	return 63 - lz;
-}
-
-/*
- * Determines the bit position of the least significant (rightmost) 0 bit
- * in the specified double word. The returned bit position will be zero-based,
- * starting from the right side (63 - 0).
- */
-static __inline__ unsigned long ffz(unsigned long x)
-{
-	/* no zero exists anywhere in the 8 byte area. */
-	if ((x = ~x) == 0)
-		return 64;
-
-	/*
-	 * Calculate the bit position of the least signficant '1' bit in x
-	 * (since x has been changed this will actually be the least signficant
-	 * '0' bit in * the original x).  Note: (x & -x) gives us a mask that
-	 * is the least significant * (RIGHT-most) 1-bit of the value in x.
-	 */
-	return __ilog2(x & -x);
-}
-
-static __inline__ int __ffs(unsigned long x)
-{
-	return __ilog2(x & -x);
-}
-
-/*
- * ffs: find first bit set. This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static __inline__ int ffs(int x)
-{
-	unsigned long i = (unsigned long)x;
-	return __ilog2(i & -i) + 1;
-}
-
-/*
- * fls: find last (most-significant) bit set.
- * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
- */
-#define fls(x) generic_fls(x)
-
-/*
- * hweightN: returns the hamming weight (i.e. the number
- * of bits set) of a N-bit word
- */
-#define hweight64(x) generic_hweight64(x)
-#define hweight32(x) generic_hweight32(x)
-#define hweight16(x) generic_hweight16(x)
-#define hweight8(x) generic_hweight8(x)
-
-extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset);
-#define find_first_zero_bit(addr, size) \
-	find_next_zero_bit((addr), (size), 0)
-
-extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset);
-#define find_first_bit(addr, size) \
-	find_next_bit((addr), (size), 0)
-
-extern unsigned long find_next_zero_le_bit(const unsigned long *addr, unsigned long size, unsigned long offset);
-#define find_first_zero_le_bit(addr, size) \
-	find_next_zero_le_bit((addr), (size), 0)
-
-static __inline__ int test_le_bit(unsigned long nr, __const__ unsigned long * addr)
-{
-	__const__ unsigned char	*ADDR = (__const__ unsigned char *) addr;
-	return (ADDR[nr >> 3] >> (nr & 7)) & 1;
-}
-
-#define test_and_clear_le_bit(nr, addr) \
-	test_and_clear_bit((nr) ^ 0x38, (addr))
-#define test_and_set_le_bit(nr, addr) \
-	test_and_set_bit((nr) ^ 0x38, (addr))
-
-/*
- * non-atomic versions
- */
-
-#define __set_le_bit(nr, addr) \
-	__set_bit((nr) ^ 0x38, (addr))
-#define __clear_le_bit(nr, addr) \
-	__clear_bit((nr) ^ 0x38, (addr))
-#define __test_and_clear_le_bit(nr, addr) \
-	__test_and_clear_bit((nr) ^ 0x38, (addr))
-#define __test_and_set_le_bit(nr, addr) \
-	__test_and_set_bit((nr) ^ 0x38, (addr))
-
-#define ext2_set_bit(nr,addr) \
-	__test_and_set_le_bit((nr), (unsigned long*)addr)
-#define ext2_clear_bit(nr, addr) \
-	__test_and_clear_le_bit((nr), (unsigned long*)addr)
-
-#define ext2_set_bit_atomic(lock, nr, addr) \
-	test_and_set_le_bit((nr), (unsigned long*)addr)
-#define ext2_clear_bit_atomic(lock, nr, addr) \
-	test_and_clear_le_bit((nr), (unsigned long*)addr)
-
-
-#define ext2_test_bit(nr, addr)      test_le_bit((nr),(unsigned long*)addr)
-#define ext2_find_first_zero_bit(addr, size) \
-	find_first_zero_le_bit((unsigned long*)addr, size)
-#define ext2_find_next_zero_bit(addr, size, off) \
-	find_next_zero_le_bit((unsigned long*)addr, size, off)
-
-#define minix_test_and_set_bit(nr,addr)		test_and_set_bit(nr,addr)
-#define minix_set_bit(nr,addr)			set_bit(nr,addr)
-#define minix_test_and_clear_bit(nr,addr)	test_and_clear_bit(nr,addr)
-#define minix_test_bit(nr,addr)			test_bit(nr,addr)
-#define minix_find_first_zero_bit(addr,size)	find_first_zero_bit(addr,size)
-
-#endif /* __KERNEL__ */
-#endif /* _PPC64_BITOPS_H */
Index: working-2.6/arch/powerpc/lib/bitops.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ working-2.6/arch/powerpc/lib/bitops.c	2005-11-01 15:51:56.000000000 +1100
@@ -0,0 +1,150 @@
+#include <linux/types.h>
+#include <linux/module.h>
+#include <asm/byteorder.h>
+#include <asm/bitops.h>
+
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+			    unsigned long offset)
+{
+	const unsigned long *p = addr + BITOP_WORD(offset);
+	unsigned long result = offset & ~(BITS_PER_LONG-1);
+	unsigned long tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset %= BITS_PER_LONG;
+	if (offset) {
+		tmp = *(p++);
+		tmp &= (~0UL << offset);
+		if (size < BITS_PER_LONG)
+			goto found_first;
+		if (tmp)
+			goto found_middle;
+		size -= BITS_PER_LONG;
+		result += BITS_PER_LONG;
+	}
+	while (size & ~(BITS_PER_LONG-1)) {
+		if ((tmp = *(p++)))
+			goto found_middle;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+
+found_first:
+	tmp &= (~0UL >> (64 - size));
+	if (tmp == 0UL)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found_middle:
+	return result + __ffs(tmp);
+}
+EXPORT_SYMBOL(find_next_bit);
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+				 unsigned long offset)
+{
+	const unsigned long *p = addr + BITOP_WORD(offset);
+	unsigned long result = offset & ~(BITS_PER_LONG-1);
+	unsigned long tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset %= BITS_PER_LONG;
+	if (offset) {
+		tmp = *(p++);
+		tmp |= ~0UL >> (BITS_PER_LONG - offset);
+		if (size < BITS_PER_LONG)
+			goto found_first;
+		if (~tmp)
+			goto found_middle;
+		size -= BITS_PER_LONG;
+		result += BITS_PER_LONG;
+	}
+	while (size & ~(BITS_PER_LONG-1)) {
+		if (~(tmp = *(p++)))
+			goto found_middle;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+
+found_first:
+	tmp |= ~0UL << size;
+	if (tmp == ~0UL)	/* Are any bits zero? */
+		return result + size;	/* Nope. */
+found_middle:
+	return result + ffz(tmp);
+}
+EXPORT_SYMBOL(find_next_zero_bit);
+
+static inline unsigned int ext2_ilog2(unsigned int x)
+{
+	int lz;
+
+	asm("cntlzw %0,%1": "=r"(lz):"r"(x));
+	return 31 - lz;
+}
+
+static inline unsigned int ext2_ffz(unsigned int x)
+{
+	u32 rc;
+	if ((x = ~x) == 0)
+		return 32;
+	rc = ext2_ilog2(x & -x);
+	return rc;
+}
+
+unsigned long find_next_zero_le_bit(const unsigned long *addr,
+				    unsigned long size, unsigned long offset)
+{
+	const unsigned int *p = ((const unsigned int *)addr) + (offset >> 5);
+	unsigned int result = offset & ~31;
+	unsigned int tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset &= 31;
+	if (offset) {
+		tmp = cpu_to_le32p(p++);
+		tmp |= ~0U >> (32 - offset);	/* bug or feature ? */
+		if (size < 32)
+			goto found_first;
+		if (tmp != ~0)
+			goto found_middle;
+		size -= 32;
+		result += 32;
+	}
+	while (size >= 32) {
+		if ((tmp = cpu_to_le32p(p++)) != ~0)
+			goto found_middle;
+		result += 32;
+		size -= 32;
+	}
+	if (!size)
+		return result;
+	tmp = cpu_to_le32p(p);
+found_first:
+	tmp |= ~0 << size;
+	if (tmp == ~0)		/* Are any bits zero? */
+		return result + size;	/* Nope. */
+found_middle:
+	return result + ext2_ffz(tmp);
+}
+EXPORT_SYMBOL(find_next_zero_le_bit);
Index: working-2.6/arch/ppc64/kernel/bitops.c
===================================================================
--- working-2.6.orig/arch/ppc64/kernel/bitops.c	2005-10-25 11:59:53.000000000 +1000
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,147 +0,0 @@
-/*
- * These are too big to be inlined.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/bitops.h>
-#include <asm/byteorder.h>
-
-unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
-				 unsigned long offset)
-{
-	const unsigned long *p = addr + (offset >> 6);
-	unsigned long result = offset & ~63UL;
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset &= 63UL;
-	if (offset) {
-		tmp = *(p++);
-		tmp |= ~0UL >> (64 - offset);
-		if (size < 64)
-			goto found_first;
-		if (~tmp)
-			goto found_middle;
-		size -= 64;
-		result += 64;
-	}
-	while (size & ~63UL) {
-		if (~(tmp = *(p++)))
-			goto found_middle;
-		result += 64;
-		size -= 64;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-
-found_first:
-	tmp |= ~0UL << size;
-	if (tmp == ~0UL)	/* Are any bits zero? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + ffz(tmp);
-}
-
-EXPORT_SYMBOL(find_next_zero_bit);
-
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
-			    unsigned long offset)
-{
-	const unsigned long *p = addr + (offset >> 6);
-	unsigned long result = offset & ~63UL;
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset &= 63UL;
-	if (offset) {
-		tmp = *(p++);
-		tmp &= (~0UL << offset);
-		if (size < 64)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= 64;
-		result += 64;
-	}
-	while (size & ~63UL) {
-		if ((tmp = *(p++)))
-			goto found_middle;
-		result += 64;
-		size -= 64;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-
-found_first:
-	tmp &= (~0UL >> (64 - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + __ffs(tmp);
-}
-
-EXPORT_SYMBOL(find_next_bit);
-
-static inline unsigned int ext2_ilog2(unsigned int x)
-{
-	int lz;
-
-	asm("cntlzw %0,%1": "=r"(lz):"r"(x));
-	return 31 - lz;
-}
-
-static inline unsigned int ext2_ffz(unsigned int x)
-{
-	u32 rc;
-	if ((x = ~x) == 0)
-		return 32;
-	rc = ext2_ilog2(x & -x);
-	return rc;
-}
-
-unsigned long find_next_zero_le_bit(const unsigned long *addr, unsigned long size,
-				    unsigned long offset)
-{
-	const unsigned int *p = ((const unsigned int *)addr) + (offset >> 5);
-	unsigned int result = offset & ~31;
-	unsigned int tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset &= 31;
-	if (offset) {
-		tmp = cpu_to_le32p(p++);
-		tmp |= ~0U >> (32 - offset);	/* bug or feature ? */
-		if (size < 32)
-			goto found_first;
-		if (tmp != ~0)
-			goto found_middle;
-		size -= 32;
-		result += 32;
-	}
-	while (size >= 32) {
-		if ((tmp = cpu_to_le32p(p++)) != ~0)
-			goto found_middle;
-		result += 32;
-		size -= 32;
-	}
-	if (!size)
-		return result;
-	tmp = cpu_to_le32p(p);
-found_first:
-	tmp |= ~0 << size;
-	if (tmp == ~0)		/* Are any bits zero? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + ext2_ffz(tmp);
-}
-
-EXPORT_SYMBOL(find_next_zero_le_bit);
Index: working-2.6/arch/powerpc/kernel/ppc_ksyms.c
===================================================================
--- working-2.6.orig/arch/powerpc/kernel/ppc_ksyms.c	2005-10-31 15:20:57.000000000 +1100
+++ working-2.6/arch/powerpc/kernel/ppc_ksyms.c	2005-11-01 15:51:56.000000000 +1100
@@ -81,15 +81,6 @@
 EXPORT_SYMBOL(ucSystemType);
 #endif
 
-#if !defined(__INLINE_BITOPS)
-EXPORT_SYMBOL(set_bit);
-EXPORT_SYMBOL(clear_bit);
-EXPORT_SYMBOL(change_bit);
-EXPORT_SYMBOL(test_and_set_bit);
-EXPORT_SYMBOL(test_and_clear_bit);
-EXPORT_SYMBOL(test_and_change_bit);
-#endif /* __INLINE_BITOPS */
-
 EXPORT_SYMBOL(strcpy);
 EXPORT_SYMBOL(strncpy);
 EXPORT_SYMBOL(strcat);
Index: working-2.6/arch/ppc/kernel/bitops.c
===================================================================
--- working-2.6.orig/arch/ppc/kernel/bitops.c	2005-10-25 11:59:53.000000000 +1000
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,126 +0,0 @@
-/*
- * Copyright (C) 1996 Paul Mackerras.
- */
-
-#include <linux/kernel.h>
-#include <linux/bitops.h>
-
-/*
- * If the bitops are not inlined in bitops.h, they are defined here.
- *  -- paulus
- */
-#if !__INLINE_BITOPS
-void set_bit(int nr, volatile void * addr)
-{
-	unsigned long old;
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-	
-	__asm__ __volatile__(SMP_WMB "\n\
-1:	lwarx	%0,0,%3 \n\
-	or	%0,%0,%2 \n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%0,0,%3 \n\
-	bne	1b"
-	SMP_MB
-	: "=&r" (old), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc" );
-}
-
-void clear_bit(int nr, volatile void *addr)
-{
-	unsigned long old;
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-
-	__asm__ __volatile__(SMP_WMB "\n\
-1:	lwarx	%0,0,%3 \n\
-	andc	%0,%0,%2 \n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%0,0,%3 \n\
-	bne	1b"
-	SMP_MB
-	: "=&r" (old), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc");
-}
-
-void change_bit(int nr, volatile void *addr)
-{
-	unsigned long old;
-	unsigned long mask = 1 << (nr & 0x1f);
-	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
-
-	__asm__ __volatile__(SMP_WMB "\n\
-1:	lwarx	%0,0,%3 \n\
-	xor	%0,%0,%2 \n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%0,0,%3 \n\
-	bne	1b"
-	SMP_MB
-	: "=&r" (old), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc");
-}
-
-int test_and_set_bit(int nr, volatile void *addr)
-{
-	unsigned int old, t;
-	unsigned int mask = 1 << (nr & 0x1f);
-	volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5);
-
-	__asm__ __volatile__(SMP_WMB "\n\
-1:	lwarx	%0,0,%4 \n\
-	or	%1,%0,%3 \n"
-	PPC405_ERR77(0,%4)
-"	stwcx.	%1,0,%4 \n\
-	bne	1b"
-	SMP_MB
-	: "=&r" (old), "=&r" (t), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc");
-
-	return (old & mask) != 0;
-}
-
-int test_and_clear_bit(int nr, volatile void *addr)
-{
-	unsigned int old, t;
-	unsigned int mask = 1 << (nr & 0x1f);
-	volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5);
-
-	__asm__ __volatile__(SMP_WMB "\n\
-1:	lwarx	%0,0,%4 \n\
-	andc	%1,%0,%3 \n"
-	PPC405_ERR77(0,%4)
-"	stwcx.	%1,0,%4 \n\
-	bne	1b"
-	SMP_MB
-	: "=&r" (old), "=&r" (t), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc");
-
-	return (old & mask) != 0;
-}
-
-int test_and_change_bit(int nr, volatile void *addr)
-{
-	unsigned int old, t;
-	unsigned int mask = 1 << (nr & 0x1f);
-	volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5);
-
-	__asm__ __volatile__(SMP_WMB "\n\
-1:	lwarx	%0,0,%4 \n\
-	xor	%1,%0,%3 \n"
-	PPC405_ERR77(0,%4)
-"	stwcx.	%1,0,%4 \n\
-	bne	1b"
-	SMP_MB
-	: "=&r" (old), "=&r" (t), "=m" (*p)
-	: "r" (mask), "r" (p), "m" (*p)
-	: "cc");
-
-	return (old & mask) != 0;
-}
-#endif /* !__INLINE_BITOPS */
Index: working-2.6/arch/ppc64/kernel/Makefile
===================================================================
--- working-2.6.orig/arch/ppc64/kernel/Makefile	2005-10-31 15:20:57.000000000 +1100
+++ working-2.6/arch/ppc64/kernel/Makefile	2005-11-01 15:51:56.000000000 +1100
@@ -13,7 +13,7 @@
 
 obj-y               +=	irq.o idle.o dma.o \
 			signal.o \
-			align.o bitops.o pacaData.o \
+			align.o pacaData.o \
 			udbg.o ioctl32.o \
 			rtc.o \
 			cpu_setup_power4.o \
Index: working-2.6/include/asm-powerpc/bitops.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ working-2.6/include/asm-powerpc/bitops.h	2005-11-01 15:51:56.000000000 +1100
@@ -0,0 +1,437 @@
+/*
+ * PowerPC atomic bit operations.
+ *
+ * Merged version by David Gibson <david at gibson.dropbear.id.au>.
+ * Based on ppc64 versions by: Dave Engebretsen, Todd Inglett, Don
+ * Reed, Pat McCarthy, Peter Bergner, Anton Blanchard.  They
+ * originally took it from the ppc32 code.
+ *
+ * Within a word, bits are numbered LSB first.  Lot's of places make
+ * this assumption by directly testing bits with (val & (1<<nr)).
+ * This can cause confusion for large (> 1 word) bitmaps on a
+ * big-endian system because, unlike little endian, the number of each
+ * bit depends on the word size.
+ *
+ * The bitop functions are defined to work on unsigned longs, so for a
+ * ppc64 system the bits end up numbered:
+ *   |63..............0|127............64|191...........128|255...........196|
+ * and on ppc32:
+ *   |31.....0|63....31|95....64|127...96|159..128|191..160|223..192|255..224|
+ *
+ * There are a few little-endian macros used mostly for filesystem
+ * bitmaps, these work on similar bit arrays layouts, but
+ * byte-oriented:
+ *   |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
+ *
+ * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
+ * number field needs to be reversed compared to the big-endian bit
+ * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_BITOPS_H
+#define _ASM_POWERPC_BITOPS_H
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+#include <asm/atomic.h>
+#include <asm/synch.h>
+
+/*
+ * clear_bit doesn't imply a memory barrier
+ */
+#define smp_mb__before_clear_bit()	smp_mb()
+#define smp_mb__after_clear_bit()	smp_mb()
+
+#define BITOP_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
+
+#ifdef CONFIG_PPC64
+#define LARXL		"ldarx"
+#define STCXL		"stdcx."
+#define CNTLZL		"cntlzd"
+#else
+#define LARXL		"lwarx"
+#define STCXL		"stwcx."
+#define CNTLZL		"cntlzw"
+#endif
+
+static __inline__ void set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long old;
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+	__asm__ __volatile__(
+"1:"	LARXL "	%0,0,%3	# set_bit\n"
+	"or	%0,%0,%2\n"
+	PPC405_ERR77(0,%3)
+	STCXL "	%0,0,%3\n"
+	"bne-	1b"
+	: "=&r"(old), "=m"(*p)
+	: "r"(mask), "r"(p), "m"(*p)
+	: "cc" );
+}
+
+static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long old;
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+	__asm__ __volatile__(
+"1:"	LARXL "	%0,0,%3	# set_bit\n"
+	"andc	%0,%0,%2\n"
+	PPC405_ERR77(0,%3)
+	STCXL "	%0,0,%3\n"
+	"bne-	1b"
+	: "=&r"(old), "=m"(*p)
+	: "r"(mask), "r"(p), "m"(*p)
+	: "cc" );
+}
+
+static __inline__ void change_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long old;
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+	__asm__ __volatile__(
+"1:"	LARXL "	%0,0,%3	# set_bit\n"
+	"xor	%0,%0,%2\n"
+	PPC405_ERR77(0,%3)
+	STCXL "	%0,0,%3\n"
+	"bne-	1b"
+	: "=&r"(old), "=m"(*p)
+	: "r"(mask), "r"(p), "m"(*p)
+	: "cc" );
+}
+
+static __inline__ int test_and_set_bit(unsigned long nr,
+				       volatile unsigned long *addr)
+{
+	unsigned long old, t;
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+	__asm__ __volatile__(
+	EIEIO_ON_SMP
+"1:"	LARXL "	%0,0,%3		# test_and_set_bit\n"
+	"or	%1,%0,%2 \n"
+	PPC405_ERR77(0,%3)
+	STCXL "	%1,0,%3 \n"
+	"bne-	1b"
+	ISYNC_ON_SMP
+	: "=&r" (old), "=&r" (t)
+	: "r" (mask), "r" (p)
+	: "cc", "memory");
+
+	return (old & mask) != 0;
+}
+
+static __inline__ int test_and_clear_bit(unsigned long nr,
+					 volatile unsigned long *addr)
+{
+	unsigned long old, t;
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+	__asm__ __volatile__(
+	EIEIO_ON_SMP
+"1:"	LARXL "	%0,0,%3		# test_and_clear_bit\n"
+	"andc	%1,%0,%2 \n"
+	PPC405_ERR77(0,%3)
+	STCXL "	%1,0,%3 \n"
+	"bne-	1b"
+	ISYNC_ON_SMP
+	: "=&r" (old), "=&r" (t)
+	: "r" (mask), "r" (p)
+	: "cc", "memory");
+
+	return (old & mask) != 0;
+}
+
+static __inline__ int test_and_change_bit(unsigned long nr,
+					  volatile unsigned long *addr)
+{
+	unsigned long old, t;
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+	__asm__ __volatile__(
+	EIEIO_ON_SMP
+"1:"	LARXL "	%0,0,%3		# test_and_change_bit\n"
+	"xor	%1,%0,%2 \n"
+	PPC405_ERR77(0,%3)
+	STCXL "	%1,0,%3 \n"
+	"bne-	1b"
+	ISYNC_ON_SMP
+	: "=&r" (old), "=&r" (t)
+	: "r" (mask), "r" (p)
+	: "cc", "memory");
+
+	return (old & mask) != 0;
+}
+
+static __inline__ void set_bits(unsigned long mask, unsigned long *addr)
+{
+        unsigned long old;
+
+	__asm__ __volatile__(
+"1:"	LARXL "	%0,0,%3         # set_bit\n"
+	"or	%0,%0,%2\n"
+	STCXL "	%0,0,%3\n"
+	"bne-	1b"
+	: "=&r" (old), "=m" (*addr)
+	: "r" (mask), "r" (addr), "m" (*addr)
+	: "cc");
+}
+
+/* Non-atomic versions */
+static __inline__ int test_bit(unsigned long nr,
+			       __const__ volatile unsigned long *addr)
+{
+	return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+static __inline__ void __set_bit(unsigned long nr,
+				 volatile unsigned long *addr)
+{
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+	*p  |= mask;
+}
+
+static __inline__ void __clear_bit(unsigned long nr,
+				   volatile unsigned long *addr)
+{
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+	*p &= ~mask;
+}
+
+static __inline__ void __change_bit(unsigned long nr,
+				    volatile unsigned long *addr)
+{
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+	*p ^= mask;
+}
+
+static __inline__ int __test_and_set_bit(unsigned long nr,
+					 volatile unsigned long *addr)
+{
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old | mask;
+	return (old & mask) != 0;
+}
+
+static __inline__ int __test_and_clear_bit(unsigned long nr,
+					   volatile unsigned long *addr)
+{
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old & ~mask;
+	return (old & mask) != 0;
+}
+
+static __inline__ int __test_and_change_bit(unsigned long nr,
+					    volatile unsigned long *addr)
+{
+	unsigned long mask = BITOP_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old ^ mask;
+	return (old & mask) != 0;
+}
+
+/*
+ * Return the zero-based bit position (LE, not IBM bit numbering) of
+ * the most significant 1-bit in a double word.
+ */
+static __inline__ int __ilog2(unsigned long x)
+{
+	int lz;
+
+	asm (CNTLZL " %0,%1" : "=r" (lz) : "r" (x));
+	return BITS_PER_LONG - 1 - lz;
+}
+
+/*
+ * Determines the bit position of the least significant 0 bit in the
+ * specified double word. The returned bit position will be
+ * zero-based, starting from the right side (63/31 - 0).
+ */
+static __inline__ unsigned long ffz(unsigned long x)
+{
+	/* no zero exists anywhere in the 8 byte area. */
+	if ((x = ~x) == 0)
+		return BITS_PER_LONG;
+
+	/*
+	 * Calculate the bit position of the least signficant '1' bit in x
+	 * (since x has been changed this will actually be the least signficant
+	 * '0' bit in * the original x).  Note: (x & -x) gives us a mask that
+	 * is the least significant * (RIGHT-most) 1-bit of the value in x.
+	 */
+	return __ilog2(x & -x);
+}
+
+static __inline__ int __ffs(unsigned long x)
+{
+	return __ilog2(x & -x);
+}
+
+/*
+ * ffs: find first bit set. This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static __inline__ int ffs(int x)
+{
+	unsigned long i = (unsigned long)x;
+	return __ilog2(i & -i) + 1;
+}
+
+/*
+ * fls: find last (most-significant) bit set.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static __inline__ int fls(unsigned int x)
+{
+	int lz;
+
+	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
+	return 32 - lz;
+}
+
+/*
+ * hweightN: returns the hamming weight (i.e. the number
+ * of bits set) of a N-bit word
+ */
+#define hweight64(x) generic_hweight64(x)
+#define hweight32(x) generic_hweight32(x)
+#define hweight16(x) generic_hweight16(x)
+#define hweight8(x) generic_hweight8(x)
+
+#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
+unsigned long find_next_zero_bit(const unsigned long *addr,
+				 unsigned long size, unsigned long offset);
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
+ */
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+unsigned long find_next_bit(const unsigned long *addr,
+			    unsigned long size, unsigned long offset);
+
+/* Little-endian versions */
+
+static __inline__ int test_le_bit(unsigned long nr,
+				  __const__ unsigned long *addr)
+{
+	__const__ unsigned char	*tmp = (__const__ unsigned char *) addr;
+	return (tmp[nr >> 3] >> (nr & 7)) & 1;
+}
+
+#define __set_le_bit(nr, addr) \
+	__set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define __clear_le_bit(nr, addr) \
+	__clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+
+#define test_and_set_le_bit(nr, addr) \
+	test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define test_and_clear_le_bit(nr, addr) \
+	test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+
+#define __test_and_set_le_bit(nr, addr) \
+	__test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define __test_and_clear_le_bit(nr, addr) \
+	__test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+
+#define find_first_zero_le_bit(addr, size) find_next_zero_le_bit((addr), (size), 0)
+unsigned long find_next_zero_le_bit(const unsigned long *addr,
+				    unsigned long size, unsigned long offset);
+
+/* Bitmap functions for the ext2 filesystem */
+
+#define ext2_set_bit(nr,addr) \
+	__test_and_set_le_bit((nr), (unsigned long*)addr)
+#define ext2_clear_bit(nr, addr) \
+	__test_and_clear_le_bit((nr), (unsigned long*)addr)
+
+#define ext2_set_bit_atomic(lock, nr, addr) \
+	test_and_set_le_bit((nr), (unsigned long*)addr)
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+	test_and_clear_le_bit((nr), (unsigned long*)addr)
+
+#define ext2_test_bit(nr, addr)      test_le_bit((nr),(unsigned long*)addr)
+
+#define ext2_find_first_zero_bit(addr, size) \
+	find_first_zero_le_bit((unsigned long*)addr, size)
+#define ext2_find_next_zero_bit(addr, size, off) \
+	find_next_zero_le_bit((unsigned long*)addr, size, off)
+
+/* Bitmap functions for the minix filesystem.  */
+
+#define minix_test_and_set_bit(nr,addr) \
+	__test_and_set_le_bit(nr, (unsigned long *)addr)
+#define minix_set_bit(nr,addr) \
+	__set_le_bit(nr, (unsigned long *)addr)
+#define minix_test_and_clear_bit(nr,addr) \
+	__test_and_clear_le_bit(nr, (unsigned long *)addr)
+#define minix_test_bit(nr,addr) \
+	test_le_bit(nr, (unsigned long *)addr)
+
+#define minix_find_first_zero_bit(addr,size) \
+	find_first_zero_le_bit((unsigned long *)addr, size)
+
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 140-bit bitmap where the first 100 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 140
+ * bits is cleared.
+ */
+static inline int sched_find_first_bit(const unsigned long *b)
+{
+#ifdef CONFIG_PPC64
+	if (unlikely(b[0]))
+		return __ffs(b[0]);
+	if (unlikely(b[1]))
+		return __ffs(b[1]) + 64;
+	return __ffs(b[2]) + 128;
+#else
+	if (unlikely(b[0]))
+		return __ffs(b[0]);
+	if (unlikely(b[1]))
+		return __ffs(b[1]) + 32;
+	if (unlikely(b[2]))
+		return __ffs(b[2]) + 64;
+	if (b[3])
+		return __ffs(b[3]) + 96;
+	return __ffs(b[4]) + 128;
+#endif
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_POWERPC_BITOPS_H */
Index: working-2.6/include/asm-ppc64/mmu_context.h
===================================================================
--- working-2.6.orig/include/asm-ppc64/mmu_context.h	2005-10-25 11:59:59.000000000 +1000
+++ working-2.6/include/asm-ppc64/mmu_context.h	2005-11-01 15:51:56.000000000 +1100
@@ -16,21 +16,6 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-/*
- * Every architecture must define this function. It's the fastest
- * way of searching a 140-bit bitmap where the first 100 bits are
- * unlikely to be set. It's guaranteed that at least one of the 140
- * bits is cleared.
- */
-static inline int sched_find_first_bit(unsigned long *b)
-{
-	if (unlikely(b[0]))
-		return __ffs(b[0]);
-	if (unlikely(b[1]))
-		return __ffs(b[1]) + 64;
-	return __ffs(b[2]) + 128;
-}
-
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
 }
Index: working-2.6/arch/ppc/Makefile
===================================================================
--- working-2.6.orig/arch/ppc/Makefile	2005-10-31 15:20:57.000000000 +1100
+++ working-2.6/arch/ppc/Makefile	2005-11-01 15:51:56.000000000 +1100
@@ -66,7 +66,8 @@
 core-y				+= arch/ppc/kernel/ arch/powerpc/kernel/ \
 				   arch/ppc/platforms/ \
 				   arch/ppc/mm/ arch/ppc/lib/ \
-				   arch/ppc/syslib/ arch/powerpc/sysdev/
+				   arch/ppc/syslib/ arch/powerpc/sysdev/ \
+				   arch/powerpc/lib/
 core-$(CONFIG_4xx)		+= arch/ppc/platforms/4xx/
 core-$(CONFIG_83xx)		+= arch/ppc/platforms/83xx/
 core-$(CONFIG_85xx)		+= arch/ppc/platforms/85xx/
Index: working-2.6/arch/powerpc/lib/Makefile
===================================================================
--- working-2.6.orig/arch/powerpc/lib/Makefile	2005-10-31 15:20:57.000000000 +1100
+++ working-2.6/arch/powerpc/lib/Makefile	2005-11-01 15:51:56.000000000 +1100
@@ -3,13 +3,14 @@
 #
 
 ifeq ($(CONFIG_PPC_MERGE),y)
-obj-y			:= string.o
+obj-y			:= string.o strcase.o
+obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
 endif
 
-obj-y			+= strcase.o
-obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
+obj-y			+= bitops.o
 obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o \
-			   memcpy_64.o usercopy_64.o mem_64.o
+			   memcpy_64.o usercopy_64.o mem_64.o \
+			   strcase.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
 obj-$(CONFIG_XMON)	+= sstep.o
 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/people/dgibson