[PATCH] powerpc: improved byte swapping functions

Thu Oct 6 04:20:31 EST 2005

From: Gabriel Paubert <paubert at iram.es>

The previous versions of ___arch__swab16 and ___arch__swab32 were
not optimal. In most cases the code can be made shorter and faster
with this patch.

Signed-off-by: Gabriel Paubert <paubert at iram.es>

---

Additional notes: 

1) for ___arch__swab16, the trick is to let the compiler 
generate a single rlwinm instruction for the final right 
shift and cast. 

2) For ___arch_swab32, the rotated value passed as a parameter 
already has 2 bytes at the right place, so only 2 rlwimi 
instructions are necessary to complete the byte swap.

3) edit if you don't like the formatting of the result.

4) I've been reading the thread about how to format patches
and I hope that I got it right. But I believe that the
diffstat output is overkill for such a small patch.

	Regards,
	Gabriel

diff --git a/include/asm-powerpc/byteorder.h b/include/asm-powerpc/byteorder.h
--- a/include/asm-powerpc/byteorder.h
+++ b/include/asm-powerpc/byteorder.h
@@ -42,23 +42,22 @@ static __inline__ void st_le32(volatile 
 
 static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 value)
 {
-	__u16 result;
+	__u32 tmp;
 
-	__asm__("rlwimi %0,%1,8,16,23"
-	    : "=r" (result)
-	    : "r" (value), "0" (value >> 8));
-	return result;
+	__asm__("rlwimi %0,%0,16,8,15"
+		: "=r" (tmp) : "0" (value));
+	return (__u16)(tmp>>8);
 }
 
 static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 value)
 {
 	__u32 result;
 
-	__asm__("rlwimi %0,%1,24,16,23\n\t"
-	    "rlwimi %0,%1,8,8,15\n\t"
-	    "rlwimi %0,%1,24,0,7"
+	__asm__(
+"	rlwimi %0,%1,24,16,23\n"
+"	rlwimi %0,%1,24,0,7\n"
 	    : "=r" (result)
-	    : "r" (value), "0" (value >> 24));
+	    : "r" (value), "0" ((value >> 24)|(value<<8)));
 	return result;
 }