[SLOF] [PATCH 3/4] fbuffer: Implement MRMOVE as an accelerated primitive

Alexey Kardashevskiy aik at ozlabs.ru
Mon Aug 3 16:01:11 AEST 2015


On 07/31/2015 11:00 PM, Thomas Huth wrote:
> The character drawing function fb8-draw-character uses "mrmove"
> (which moves main memory contents to IO memory) to copy the data
> of the character from main memory to the frame buffer. However,
> the current implementation of "mrmove"  performs quite badly on
> board-qemu since it triggers a hypercall for each memory access
> (e.g. for each 8 bytes that are transfered).
> But since the KVMPPC_H_LOGICAL_MEMOP hypercall can transfer bigger
> regions at once, we can accelerate the character drawing quite a
> bit by simply mapping the "mrmove" to the same macro that is
> already used for the "rmove".

You described "mrmove" (which is good), please add a couple of words how 
"rmove" is different from mrmove.


> For keeping board-js2x in sync,
> this patch also transforms the "mrmove" for js2x into primitives.
>
> Signed-off-by: Thomas Huth <thuth at redhat.com>
> ---
>   include/ppc970/cache.h | 18 ++++++++++++++++++
>   include/ppcp7/cache.h  |  2 ++
>   slof/fs/rmove.fs       | 16 ----------------
>   slof/prim.code         |  6 ++++++
>   slof/prim.in           |  2 +-
>   5 files changed, 27 insertions(+), 17 deletions(-)
>
> diff --git a/include/ppc970/cache.h b/include/ppc970/cache.h
> index b748689..3d27f92 100644
> --- a/include/ppc970/cache.h
> +++ b/include/ppc970/cache.h
> @@ -83,4 +83,22 @@ cache_inhibited_access(uint64_t, 64)
>   		default:		_RMOVE(s, d, size, type_c); break; \
>   	}
>
> +/* main RAM to IO memory move */
> +#define _MRMOVE(s, d, size, t)	\
> +{ \
> +	t *s1=(t *)s, *d1=(t *)d; \


The next patch puts spaces around "=" but not this one.

Also, values passed to a macro should always be wrapped in braces.




> +	register t tmp; \
> +	while (size > 0) { \
> +		tmp = *s1++; SET_CI; *d1++ = tmp; CLR_CI; size -= sizeof(t); \
> +	} \
> +}
> +
> +#define _FASTMRMOVE(s, d, size) \
> +	switch (((type_u)s | (type_u)d | size) & (sizeof(type_u)-1)) { \
> +		case 0:			_MRMOVE(s, d, size, type_u); break; \
> +		case sizeof(type_l):	_MRMOVE(s, d, size, type_l); break; \
> +		case sizeof(type_w):	_MRMOVE(s, d, size, type_w); break; \
> +		default:		_MRMOVE(s, d, size, type_c); break; \
> +	}
> +


You could have one _FASTMRMOVE() (or even expand it prim.code) and define 
_MRMOVE() per board (would be ci_rmove() for qemu).





>   #endif
> diff --git a/include/ppcp7/cache.h b/include/ppcp7/cache.h
> index dc68371..69a69c3 100644
> --- a/include/ppcp7/cache.h
> +++ b/include/ppcp7/cache.h
> @@ -122,6 +122,8 @@ static inline void ci_rmove(void *dst, void *src, unsigned long esize,
>   		}							      \
>   	} while(0)
>
> +#define _FASTMRMOVE(s, d, size) _FASTRMOVE(s, d, size)
> +
>   static inline uint16_t bswap16_load(uint64_t addr)
>   {
>   	unsigned int val;
> diff --git a/slof/fs/rmove.fs b/slof/fs/rmove.fs
> index c28dba9..3776ee2 100644
> --- a/slof/fs/rmove.fs
> +++ b/slof/fs/rmove.fs
> @@ -24,22 +24,6 @@ defer '(r!)
>   	-rot bounds ?do dup i '(r!) /(r) +loop drop
>   ;
>
> -: (fwrmove) ( src dest size -- )
> -	>r 0 -rot r> bounds ?do + dup '(r@) i '(r!) /(r) dup +loop 2drop
> -;
> -
> -\ Move from main to device memory
> -: mrmove ( src dest size -- )
> -	3dup or or 7 AND CASE
> -		0 OF ['] x@ ['] rx! /x ENDOF
> -		4 OF ['] l@ ['] rl! /l ENDOF
> -		2 OF ['] w@ ['] rw! /w ENDOF
> -		dup OF ['] c@ ['] rb! /c ENDOF
> -	ENDCASE
> -	( We already know that source and destination do not overlap )
> -	to /(r) to '(r!) to '(r@) (fwrmove)
> -;
> -
>   : rfill ( addr size pattern -- )
>   	3dup drop or 7 AND CASE
>   		0 OF ['] rx! /x ENDOF
> diff --git a/slof/prim.code b/slof/prim.code
> index 9fbed71..77e550d 100644
> --- a/slof/prim.code
> +++ b/slof/prim.code
> @@ -520,6 +520,12 @@ PRIM(RMOVE)
>
>   	MIRP
>
> +PRIM(MRMOVE)
> +	type_u size = ((dp--)->u);
> +	type_u *d = (type_u *)((dp--)->u);
> +	type_u *s = (type_u *)((dp--)->u);
> +	_FASTMRMOVE(s, d, size);
> +	MIRP
>
>   // String compare, case insensitive:
>   // : string=ci  ( str1 len1 str2 len2 -- equal? )
> diff --git a/slof/prim.in b/slof/prim.in
> index 7a0d6a2..f323aed 100644
> --- a/slof/prim.in
> +++ b/slof/prim.in
> @@ -104,8 +104,8 @@ cod(SEMICOLON)
>   cod(EXECUTE)
>
>   cod(MOVE)
> -// cod(RMOVE64)

This does not belong to this patch.


>   cod(RMOVE)
> +cod(MRMOVE)
>   cod(ZCOUNT)
>   con(HASH-SIZE HASHSIZE)
>   cod(HASH)
>


-- 
Alexey


More information about the SLOF mailing list