[SLOF] [PATCH 3/4] fbuffer: Implement MRMOVE as an accelerated primitive

Nikunj A Dadhania nikunj at linux.vnet.ibm.com
Mon Aug 3 20:37:36 AEST 2015


Thomas Huth <thuth at redhat.com> writes:

> The character drawing function fb8-draw-character uses "mrmove"
> (which moves main memory contents to IO memory) to copy the data
> of the character from main memory to the frame buffer. However,
> the current implementation of "mrmove"  performs quite badly on
> board-qemu since it triggers a hypercall for each memory access
> (e.g. for each 8 bytes that are transfered).
> But since the KVMPPC_H_LOGICAL_MEMOP hypercall can transfer bigger
> regions at once, we can accelerate the character drawing quite a
> bit by simply mapping the "mrmove" to the same macro that is
> already used for the "rmove". For keeping board-js2x in sync,
> this patch also transforms the "mrmove" for js2x into primitives.
>
> Signed-off-by: Thomas Huth <thuth at redhat.com>

I dont have a js2x handy, did you test this on js2x?
Apart from that:

Reviewed-by: Nikunj A Dadhania <nikunj at linux.vnet.ibm.com>

> ---
>  include/ppc970/cache.h | 18 ++++++++++++++++++
>  include/ppcp7/cache.h  |  2 ++
>  slof/fs/rmove.fs       | 16 ----------------
>  slof/prim.code         |  6 ++++++
>  slof/prim.in           |  2 +-
>  5 files changed, 27 insertions(+), 17 deletions(-)
>
> diff --git a/include/ppc970/cache.h b/include/ppc970/cache.h
> index b748689..3d27f92 100644
> --- a/include/ppc970/cache.h
> +++ b/include/ppc970/cache.h
> @@ -83,4 +83,22 @@ cache_inhibited_access(uint64_t, 64)
>  		default:		_RMOVE(s, d, size, type_c); break; \
>  	}
>
> +/* main RAM to IO memory move */
> +#define _MRMOVE(s, d, size, t)	\
> +{ \
> +	t *s1=(t *)s, *d1=(t *)d; \
> +	register t tmp; \
> +	while (size > 0) { \
> +		tmp = *s1++; SET_CI; *d1++ = tmp; CLR_CI; size -= sizeof(t); \
> +	} \
> +}
> +
> +#define _FASTMRMOVE(s, d, size) \
> +	switch (((type_u)s | (type_u)d | size) & (sizeof(type_u)-1)) { \
> +		case 0:			_MRMOVE(s, d, size, type_u); break; \
> +		case sizeof(type_l):	_MRMOVE(s, d, size, type_l); break; \
> +		case sizeof(type_w):	_MRMOVE(s, d, size, type_w); break; \
> +		default:		_MRMOVE(s, d, size, type_c); break; \
> +	}
> +
>  #endif
> diff --git a/include/ppcp7/cache.h b/include/ppcp7/cache.h
> index dc68371..69a69c3 100644
> --- a/include/ppcp7/cache.h
> +++ b/include/ppcp7/cache.h
> @@ -122,6 +122,8 @@ static inline void ci_rmove(void *dst, void *src, unsigned long esize,
>  		}							      \
>  	} while(0)
>
> +#define _FASTMRMOVE(s, d, size) _FASTRMOVE(s, d, size)
> +
>  static inline uint16_t bswap16_load(uint64_t addr)
>  {
>  	unsigned int val;
> diff --git a/slof/fs/rmove.fs b/slof/fs/rmove.fs
> index c28dba9..3776ee2 100644
> --- a/slof/fs/rmove.fs
> +++ b/slof/fs/rmove.fs
> @@ -24,22 +24,6 @@ defer '(r!)
>  	-rot bounds ?do dup i '(r!) /(r) +loop drop
>  ;
>
> -: (fwrmove) ( src dest size -- )
> -	>r 0 -rot r> bounds ?do + dup '(r@) i '(r!) /(r) dup +loop 2drop
> -;
> -
> -\ Move from main to device memory
> -: mrmove ( src dest size -- )
> -	3dup or or 7 AND CASE
> -		0 OF ['] x@ ['] rx! /x ENDOF
> -		4 OF ['] l@ ['] rl! /l ENDOF
> -		2 OF ['] w@ ['] rw! /w ENDOF
> -		dup OF ['] c@ ['] rb! /c ENDOF
> -	ENDCASE
> -	( We already know that source and destination do not overlap )
> -	to /(r) to '(r!) to '(r@) (fwrmove)
> -;
> -
>  : rfill ( addr size pattern -- )
>  	3dup drop or 7 AND CASE
>  		0 OF ['] rx! /x ENDOF
> diff --git a/slof/prim.code b/slof/prim.code
> index 9fbed71..77e550d 100644
> --- a/slof/prim.code
> +++ b/slof/prim.code
> @@ -520,6 +520,12 @@ PRIM(RMOVE)
>
>  	MIRP
>
> +PRIM(MRMOVE)
> +	type_u size = ((dp--)->u);
> +	type_u *d = (type_u *)((dp--)->u);
> +	type_u *s = (type_u *)((dp--)->u);
> +	_FASTMRMOVE(s, d, size);
> +	MIRP
>
>  // String compare, case insensitive:
>  // : string=ci  ( str1 len1 str2 len2 -- equal? )
> diff --git a/slof/prim.in b/slof/prim.in
> index 7a0d6a2..f323aed 100644
> --- a/slof/prim.in
> +++ b/slof/prim.in
> @@ -104,8 +104,8 @@ cod(SEMICOLON)
>  cod(EXECUTE)
>
>  cod(MOVE)
> -// cod(RMOVE64)
>  cod(RMOVE)
> +cod(MRMOVE)
>  cod(ZCOUNT)
>  con(HASH-SIZE HASHSIZE)
>  cod(HASH)
> -- 
> 1.8.3.1



More information about the SLOF mailing list