[RFC] Optimize __arch_swab32 and __arch_swab16

Joakim Tjernlund joakim.tjernlund at transmode.se
Thu Aug 11 18:13:34 EST 2011


PPC __arch_swab32 and __arch_swab16 generates non optimal code.
It doesn't schedule very well, need to copy its input register and
and swab16 needs an extra insn to clear its upper bits. I have improved
these functions(see my__xx). Any problem with the new asm? If
not I will send a patch.

Below some example code to illustrate:

#include <stdio.h>

unsigned long __arch_swab32(unsigned long value)
{
	unsigned long result;

	__asm__("rlwimi %0,%1,24,16,23\n\t"
	    "rlwimi %0,%1,8,8,15\n\t"
	    "rlwimi %0,%1,24,0,7"
	    : "=r" (result)
	    : "r" (value), "0" (value >> 24));
	return result;
}

unsigned long my__arch_swab32(unsigned long value)
{
	unsigned long tmp;

	__asm__("rlwimi %0,%0,24,0xffffffff"
		: "+r" (value));
	__asm__("rlwinm %0,%1,16,0xffffffff"
		: "=r" (tmp), "+r" (value));
	__asm__("rlwimi %0,%1,0,0x00ff0000"
		: "+r" (value), "+r" (tmp));
	__asm__("rlwimi %0,%1,0,0x000000ff"
		: "+r" (value), "+r" (tmp));
	return value;
}

unsigned short __arch_swab16(unsigned short value)
{
	unsigned short result;

	__asm__("rlwimi %0,%1,8,16,23"
	    : "=r" (result)
	    : "r" (value), "0" (value >> 8));
	return result;
}

unsigned short my__arch_swab16(unsigned short value)
{
	__asm__("rlwimi %0,%0,16,0x00ff0000"
		: "+r" (value));
	__asm__("rlwinm %0,%0,24,0x0000ffff"
		: "+r"(value));
	return value;
}

main()
{
	unsigned long x=0x12345678, y;

	y = my__arch_swab32(x);
	printf("swab32 x:%x, y:%x\n", x, y);
	y = my__arch_swab16(x);
	printf("swab16 x:%x, y:%x\n", x, y);
}

Generated asm:

	.file	"tst.c"
	.section	".text"
	.align 2
	.globl __arch_swab32
	.type	__arch_swab32, @function
__arch_swab32:
	mr %r0,%r3
	srwi %r3,%r3,24
#APP
	rlwimi %r3,%r0,24,16,23
	rlwimi %r3,%r0,8,8,15
	rlwimi %r3,%r0,24,0,7
#NO_APP
	blr
	.size	__arch_swab32, .-__arch_swab32
	.align 2
	.globl my__arch_swab32
	.type	my__arch_swab32, @function
my__arch_swab32:
#APP
	rlwimi %r3,%r3,24,0xffffffff
	rlwinm %r0,%r3,16,0xffffffff
	rlwimi %r3,%r0,0,0x00ff0000
	rlwimi %r3,%r0,0,0x000000ff
#NO_APP
	blr
	.size	my__arch_swab32, .-my__arch_swab32
	.align 2
	.globl __arch_swab16
	.type	__arch_swab16, @function
__arch_swab16:
	mr %r0,%r3
	srwi %r3,%r3,8
#APP
	rlwimi %r3,%r0,8,16,23
#NO_APP
	rlwinm %r3,%r3,0,0xffff
	blr
	.size	__arch_swab16, .-__arch_swab16
	.align 2
	.globl my__arch_swab16
	.type	my__arch_swab16, @function
my__arch_swab16:
#APP
	rlwimi %r3,%r3,16,0x00ff0000
	rlwinm %r3,%r3,24,0x0000ffff
#NO_APP
	blr
	.size	my__arch_swab16, .-my__arch_swab16
	.section	.rodata.str1.4,"aMS", at progbits,1
	.align 2
.LC0:
	.string	"swab32 x:%x, y:%x\n"
	.align 2
.LC1:
	.string	"swab16 x:%x, y:%x\n"
	.section	".text"
	.align 2
	.globl main
	.type	main, @function
main:
	mflr %r0
	lis %r3,0x1234
	stwu %r1,-16(%r1)
	ori %r3,%r3,22136
	stw %r0,20(%r1)
	bl my__arch_swab32
	mr %r5,%r3
	lis %r4,0x1234
	lis %r3,.LC0 at ha
	ori %r4,%r4,22136
	la %r3,.LC0 at l(%r3)
	bl printf
	li %r3,22136
	bl my__arch_swab16
	lis %r4,0x1234
	mr %r5,%r3
	lis %r3,.LC1 at ha
	la %r3,.LC1 at l(%r3)
	ori %r4,%r4,22136
	bl printf
	lwz %r0,20(%r1)
	addi %r1,%r1,16
	mtlr %r0
	blr
	.size	main, .-main
	.section	.note.GNU-stack,"", at progbits
	.ident	"GCC: (GNU) 3.4.6 (Gentoo 3.4.6-r2, ssp-3.4.6-1.0, pie-8.7.9)"



More information about the Linuxppc-dev mailing list