[PATCH v2 22/25] powerpc32: move xxxxx_dcache_range() functions inline

Joakim Tjernlund joakim.tjernlund at transmode.se
Wed Sep 23 06:07:51 AEST 2015


> > And generally the one proposing uglification-for-optimization should provide 
> > the evidence. :-)
> 
> When it comes to gcc, past history is my evidence until proven otherwise :)
> Maybe I will check again ...

OK then:
static inline void mb(void)
{
       __asm__ __volatile__ ("sync" : : : "memory");
}

static inline void dcbf(void *addr)
{
       __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory");
}
#define L1_CACHE_SHIFT 5
#define L1_CACHE_BYTES  (1 << L1_CACHE_SHIFT)
void flush_dcache_range(unsigned long start, unsigned long stop)
{
       void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
       unsigned int size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
       unsigned int i;

       for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
               dcbf(addr);
       if (i)
               mb();   /* sync */
}

gives:
flush_dcache_range:
	stwu %r1,-16(%r1)
	rlwinm %r3,%r3,0,0,26
	addi %r4,%r4,31
	subf %r9,%r3,%r4
	srwi. %r10,%r9,5
	beq %cr0,.L1
	mtctr %r10
	.p2align 4,,15
.L4:
#APP
 # 8 "gccloop.c" 1
	dcbf 0, %r3
 # 0 "" 2
#NO_APP
	addi %r3,%r3,32
	bdnz .L4
#APP
 # 3 "gccloop.c" 1
	sync
 # 0 "" 2
#NO_APP
.L1:
	addi %r1,%r1,16
	blr

good enough :)


More information about the Linuxppc-dev mailing list