[SLOF] [PATCH 3/4] fbuffer: Implement MRMOVE as an accelerated primitive
Alexey Kardashevskiy
aik at ozlabs.ru
Mon Aug 3 16:01:11 AEST 2015
On 07/31/2015 11:00 PM, Thomas Huth wrote:
> The character drawing function fb8-draw-character uses "mrmove"
> (which moves main memory contents to IO memory) to copy the data
> of the character from main memory to the frame buffer. However,
> the current implementation of "mrmove" performs quite badly on
> board-qemu since it triggers a hypercall for each memory access
> (e.g. for each 8 bytes that are transfered).
> But since the KVMPPC_H_LOGICAL_MEMOP hypercall can transfer bigger
> regions at once, we can accelerate the character drawing quite a
> bit by simply mapping the "mrmove" to the same macro that is
> already used for the "rmove".
You described "mrmove" (which is good), please add a couple of words how
"rmove" is different from mrmove.
> For keeping board-js2x in sync,
> this patch also transforms the "mrmove" for js2x into primitives.
>
> Signed-off-by: Thomas Huth <thuth at redhat.com>
> ---
> include/ppc970/cache.h | 18 ++++++++++++++++++
> include/ppcp7/cache.h | 2 ++
> slof/fs/rmove.fs | 16 ----------------
> slof/prim.code | 6 ++++++
> slof/prim.in | 2 +-
> 5 files changed, 27 insertions(+), 17 deletions(-)
>
> diff --git a/include/ppc970/cache.h b/include/ppc970/cache.h
> index b748689..3d27f92 100644
> --- a/include/ppc970/cache.h
> +++ b/include/ppc970/cache.h
> @@ -83,4 +83,22 @@ cache_inhibited_access(uint64_t, 64)
> default: _RMOVE(s, d, size, type_c); break; \
> }
>
> +/* main RAM to IO memory move */
> +#define _MRMOVE(s, d, size, t) \
> +{ \
> + t *s1=(t *)s, *d1=(t *)d; \
The next patch puts spaces around "=" but not this one.
Also, values passed to a macro should always be wrapped in braces.
> + register t tmp; \
> + while (size > 0) { \
> + tmp = *s1++; SET_CI; *d1++ = tmp; CLR_CI; size -= sizeof(t); \
> + } \
> +}
> +
> +#define _FASTMRMOVE(s, d, size) \
> + switch (((type_u)s | (type_u)d | size) & (sizeof(type_u)-1)) { \
> + case 0: _MRMOVE(s, d, size, type_u); break; \
> + case sizeof(type_l): _MRMOVE(s, d, size, type_l); break; \
> + case sizeof(type_w): _MRMOVE(s, d, size, type_w); break; \
> + default: _MRMOVE(s, d, size, type_c); break; \
> + }
> +
You could have one _FASTMRMOVE() (or even expand it prim.code) and define
_MRMOVE() per board (would be ci_rmove() for qemu).
> #endif
> diff --git a/include/ppcp7/cache.h b/include/ppcp7/cache.h
> index dc68371..69a69c3 100644
> --- a/include/ppcp7/cache.h
> +++ b/include/ppcp7/cache.h
> @@ -122,6 +122,8 @@ static inline void ci_rmove(void *dst, void *src, unsigned long esize,
> } \
> } while(0)
>
> +#define _FASTMRMOVE(s, d, size) _FASTRMOVE(s, d, size)
> +
> static inline uint16_t bswap16_load(uint64_t addr)
> {
> unsigned int val;
> diff --git a/slof/fs/rmove.fs b/slof/fs/rmove.fs
> index c28dba9..3776ee2 100644
> --- a/slof/fs/rmove.fs
> +++ b/slof/fs/rmove.fs
> @@ -24,22 +24,6 @@ defer '(r!)
> -rot bounds ?do dup i '(r!) /(r) +loop drop
> ;
>
> -: (fwrmove) ( src dest size -- )
> - >r 0 -rot r> bounds ?do + dup '(r@) i '(r!) /(r) dup +loop 2drop
> -;
> -
> -\ Move from main to device memory
> -: mrmove ( src dest size -- )
> - 3dup or or 7 AND CASE
> - 0 OF ['] x@ ['] rx! /x ENDOF
> - 4 OF ['] l@ ['] rl! /l ENDOF
> - 2 OF ['] w@ ['] rw! /w ENDOF
> - dup OF ['] c@ ['] rb! /c ENDOF
> - ENDCASE
> - ( We already know that source and destination do not overlap )
> - to /(r) to '(r!) to '(r@) (fwrmove)
> -;
> -
> : rfill ( addr size pattern -- )
> 3dup drop or 7 AND CASE
> 0 OF ['] rx! /x ENDOF
> diff --git a/slof/prim.code b/slof/prim.code
> index 9fbed71..77e550d 100644
> --- a/slof/prim.code
> +++ b/slof/prim.code
> @@ -520,6 +520,12 @@ PRIM(RMOVE)
>
> MIRP
>
> +PRIM(MRMOVE)
> + type_u size = ((dp--)->u);
> + type_u *d = (type_u *)((dp--)->u);
> + type_u *s = (type_u *)((dp--)->u);
> + _FASTMRMOVE(s, d, size);
> + MIRP
>
> // String compare, case insensitive:
> // : string=ci ( str1 len1 str2 len2 -- equal? )
> diff --git a/slof/prim.in b/slof/prim.in
> index 7a0d6a2..f323aed 100644
> --- a/slof/prim.in
> +++ b/slof/prim.in
> @@ -104,8 +104,8 @@ cod(SEMICOLON)
> cod(EXECUTE)
>
> cod(MOVE)
> -// cod(RMOVE64)
This does not belong to this patch.
> cod(RMOVE)
> +cod(MRMOVE)
> cod(ZCOUNT)
> con(HASH-SIZE HASHSIZE)
> cod(HASH)
>
--
Alexey
More information about the SLOF
mailing list