gcc optimizes loops badly.
Joakim Tjernlund
Joakim.Tjernlund at lumentis.se
Thu Jan 2 01:44:51 EST 2003
I have spent some time to optimize the crc32 function since JFFS2 uses it heavily. I found that
gcc 2.95.3 optimizes loops badly, even gcc 2.96 RH produces better code for x86 in some cases.
So I optimized the C code a bit and got much better results.
Now I wounder how recent(>= 3.2) gcc performs. Could somebody run gcc -S -O2 -mregnames on
functions below and mail me the results?
Jocke
These are different version of the same crc32 function:
#include <linux/types.h>
extern const __u32 crc32_table[256];
/* Return a 32-bit CRC of the contents of the buffer. */
__u32 crc32org(__u32 val, const void *ss, unsigned int len)
{
const unsigned char *s = ss;
while (len--){
val = crc32_table[(val ^ *s++) & 0xff] ^ (val >> 8);
}
return val;
}
__u32 crc32do_while(__u32 val, const void *ss, unsigned int len)
{
const unsigned char *s = ss;
if(len){
do {
val = crc32_table[(val ^ *s++) & 0xff] ^ (val >> 8);
} while (--len);
}
return val;
}
__u32 crc32do_while_dec(__u32 val, const void *ss, unsigned int len)
{
const unsigned char *s = ss;
if(len){
--s;
do {
val = crc32_table[(val ^ *(++s)) & 0xff] ^ (val >> 8);
} while (--len);
}
return val;
}
and the resulting assembly:
.file "crc32.c"
gcc2_compiled.:
.section ".text"
.align 2
.globl crc32org
.type crc32org, at function
crc32org:
cmpwi %cr0,%r5,0
addi %r5,%r5,-1
bclr 12,2
lis %r9,crc32_table at ha
la %r10,crc32_table at l(%r9)
.L18:
lbz %r0,0(%r4)
cmpwi %cr0,%r5,0
xor %r0,%r3,%r0
rlwinm %r0,%r0,2,22,29
lwzx %r11,%r10,%r0
srwi %r9,%r3,8
xor %r3,%r11,%r9
addi %r4,%r4,1
addi %r5,%r5,-1
bc 4,2,.L18
blr
.Lfe1:
.size crc32org,.Lfe1-crc32org
.align 2
.globl crc32do_while
.type crc32do_while, at function
crc32do_while:
mr. %r0,%r5
mtctr %r0
bclr 12,2
lis %r9,crc32_table at ha
la %r10,crc32_table at l(%r9)
.L25:
lbz %r0,0(%r4)
srwi %r11,%r3,8
xor %r0,%r3,%r0
rlwinm %r0,%r0,2,22,29
lwzx %r9,%r10,%r0
addi %r4,%r4,1
xor %r3,%r9,%r11
bdnz .L25
blr
.Lfe2:
.size crc32do_while,.Lfe2-crc32do_while
.align 2
.globl crc32do_while_dec
.type crc32do_while_dec, at function
crc32do_while_dec:
mr. %r0,%r5
mtctr %r0
bclr 12,2
lis %r9,crc32_table at ha
la %r10,crc32_table at l(%r9)
addi %r4,%r4,-1
.L31:
lbzu %r0,1(4)
srwi %r11,%r3,8
xor %r0,%r3,%r0
rlwinm %r0,%r0,2,22,29
lwzx %r9,%r10,%r0
xor %r3,%r9,%r11
bdnz .L31
blr
** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/
More information about the Linuxppc-embedded
mailing list