[PATCH] powerpc: tiny memcpy_(to|from)io optimisation
Joakim Tjernlund
joakim.tjernlund at transmode.se
Fri May 29 02:13:43 EST 2009
>
> This trivial patch changes memcpy_(to|from)io as to transfer as many
> 32-bit words as possible in 32-bit accesses (in the current solution,
> the last 32-bit word was transferred as 4 byte accesses).
>
> Signed-off-by: Albrecht Dreß <albrecht.dress at arcor.de>
> ---
>
> diff -urpN -X linux-2.6.29.1.orig/Documentation/dontdiff
> linux-2.6.29.1.orig/arch/powerpc/kernel/io.c
> linux-2.6.29.1/arch/powerpc/kernel/io.c
> --- linux-2.6.29.1.orig/arch/powerpc/kernel/io.c 2009-04-02
> 22:55:27.000000000 +0200
> +++ linux-2.6.29.1/arch/powerpc/kernel/io.c 2009-05-27
> 11:36:09.000000000 +0200
> @@ -161,7 +161,7 @@ void _memcpy_fromio(void *dest, const vo
> dest++;
> n--;
> }
> - while(n > 4) {
> + while(n >= 4) {
> *((u32 *)dest) = *((volatile u32 *)vsrc);
> eieio();
> vsrc += 4;
> @@ -190,7 +190,7 @@ void _memcpy_toio(volatile void __iomem
> vdest++;
> n--;
> }
> - while(n > 4) {
> + while(n >= 4) {
> *((volatile u32 *)vdest) = *((volatile u32 *)src);
> src += 4;
> vdest += 4;
hmm, these do look a bit unoptimal anyway. Any reason not to write
them something like below(written by me for uClibc long time ago). You will
have to add eieio()/sync
void *memcpy(void *to, const void *from, size_t n)
/* PPC can do pre increment and load/store, but not post increment and load/store.
Therefore use *++ptr instead of *ptr++. */
{
unsigned long rem, chunks, tmp1, tmp2;
unsigned char *tmp_to;
unsigned char *tmp_from = (unsigned char *)from;
chunks = n / 8;
tmp_from -= 4;
tmp_to = to - 4;
if (!chunks)
goto lessthan8;
rem = (unsigned long )tmp_to % 4;
if (rem)
goto align;
copy_chunks:
do {
/* make gcc to load all data, then store it */
tmp1 = *(unsigned long *)(tmp_from+4);
tmp_from += 8;
tmp2 = *(unsigned long *)tmp_from;
*(unsigned long *)(tmp_to+4) = tmp1;
tmp_to += 8;
*(unsigned long *)tmp_to = tmp2;
} while (--chunks);
lessthan8:
n = n % 8;
if (n >= 4) {
*(unsigned long *)(tmp_to+4) = *(unsigned long *)(tmp_from+4);
tmp_from += 4;
tmp_to += 4;
n = n-4;
}
if (!n ) return to;
tmp_from += 3;
tmp_to += 3;
do {
*++tmp_to = *++tmp_from;
} while (--n);
return to;
align:
rem = 4 - rem;
n = n - rem;
do {
*(tmp_to+4) = *(tmp_from+4);
++tmp_from;
++tmp_to;
} while (--rem);
chunks = n / 8;
if (chunks)
goto copy_chunks;
goto lessthan8;
}
More information about the Linuxppc-dev
mailing list