[PATCH] powerpc: tiny memcpy_(to|from)io optimisation

Joakim Tjernlund joakim.tjernlund at transmode.se
Fri May 29 02:13:43 EST 2009


>
> This trivial patch changes memcpy_(to|from)io as to transfer as many
> 32-bit words as possible in 32-bit accesses (in the current solution,
> the last 32-bit word was transferred as 4 byte accesses).
>
> Signed-off-by: Albrecht Dreß <albrecht.dress at arcor.de>
> ---
>
> diff -urpN -X linux-2.6.29.1.orig/Documentation/dontdiff
> linux-2.6.29.1.orig/arch/powerpc/kernel/io.c
> linux-2.6.29.1/arch/powerpc/kernel/io.c
> --- linux-2.6.29.1.orig/arch/powerpc/kernel/io.c   2009-04-02
> 22:55:27.000000000 +0200
> +++ linux-2.6.29.1/arch/powerpc/kernel/io.c   2009-05-27
> 11:36:09.000000000 +0200
> @@ -161,7 +161,7 @@ void _memcpy_fromio(void *dest, const vo
>         dest++;
>         n--;
>      }
> -   while(n > 4) {
> +   while(n >= 4) {
>         *((u32 *)dest) = *((volatile u32 *)vsrc);
>         eieio();
>         vsrc += 4;
> @@ -190,7 +190,7 @@ void _memcpy_toio(volatile void __iomem
>         vdest++;
>         n--;
>      }
> -   while(n > 4) {
> +   while(n >= 4) {
>         *((volatile u32 *)vdest) = *((volatile u32 *)src);
>         src += 4;
>         vdest += 4;

hmm, these do look a bit unoptimal anyway. Any reason not to write
them something like below(written by me for uClibc long time ago). You will
have to add eieio()/sync

void *memcpy(void *to, const void *from, size_t n)
/* PPC can do pre increment and load/store, but not post increment and load/store.
   Therefore use *++ptr instead of *ptr++. */
{
	unsigned long rem, chunks, tmp1, tmp2;
	unsigned char *tmp_to;
	unsigned char *tmp_from = (unsigned char *)from;

	chunks = n / 8;
	tmp_from -= 4;
	tmp_to = to - 4;
	if (!chunks)
		goto lessthan8;
	rem = (unsigned long )tmp_to % 4;
	if (rem)
		goto align;
 copy_chunks:
	do {
		/* make gcc to load all data, then store it */
		tmp1 = *(unsigned long *)(tmp_from+4);
		tmp_from += 8;
		tmp2 = *(unsigned long *)tmp_from;
		*(unsigned long *)(tmp_to+4) = tmp1;
		tmp_to += 8;
		*(unsigned long *)tmp_to = tmp2;
	} while (--chunks);
 lessthan8:
	n = n % 8;
	if (n >= 4) {
		*(unsigned long *)(tmp_to+4) = *(unsigned long *)(tmp_from+4);
		tmp_from += 4;
		tmp_to += 4;
		n = n-4;
	}
	if (!n ) return to;
	tmp_from += 3;
	tmp_to += 3;
	do {
		*++tmp_to = *++tmp_from;
	} while (--n);

	return to;
 align:
	rem = 4 - rem;
	n = n - rem;
	do {
		*(tmp_to+4) = *(tmp_from+4);
		++tmp_from;
		++tmp_to;
	} while (--rem);
	chunks = n / 8;
	if (chunks)
		goto copy_chunks;
	goto lessthan8;
}




More information about the Linuxppc-dev mailing list