[SLOF] [PATCH 3/4] fbuffer: Implement MRMOVE as an accelerated primitive

Thomas Huth thuth at redhat.com
Fri Jul 31 23:00:07 AEST 2015


The character drawing function fb8-draw-character uses "mrmove"
(which moves main memory contents to IO memory) to copy the data
of the character from main memory to the frame buffer. However,
the current implementation of "mrmove"  performs quite badly on
board-qemu since it triggers a hypercall for each memory access
(e.g. for each 8 bytes that are transfered).
But since the KVMPPC_H_LOGICAL_MEMOP hypercall can transfer bigger
regions at once, we can accelerate the character drawing quite a
bit by simply mapping the "mrmove" to the same macro that is
already used for the "rmove". For keeping board-js2x in sync,
this patch also transforms the "mrmove" for js2x into primitives.

Signed-off-by: Thomas Huth <thuth at redhat.com>
---
 include/ppc970/cache.h | 18 ++++++++++++++++++
 include/ppcp7/cache.h  |  2 ++
 slof/fs/rmove.fs       | 16 ----------------
 slof/prim.code         |  6 ++++++
 slof/prim.in           |  2 +-
 5 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/include/ppc970/cache.h b/include/ppc970/cache.h
index b748689..3d27f92 100644
--- a/include/ppc970/cache.h
+++ b/include/ppc970/cache.h
@@ -83,4 +83,22 @@ cache_inhibited_access(uint64_t, 64)
 		default:		_RMOVE(s, d, size, type_c); break; \
 	}
 
+/* main RAM to IO memory move */
+#define _MRMOVE(s, d, size, t)	\
+{ \
+	t *s1=(t *)s, *d1=(t *)d; \
+	register t tmp; \
+	while (size > 0) { \
+		tmp = *s1++; SET_CI; *d1++ = tmp; CLR_CI; size -= sizeof(t); \
+	} \
+}
+
+#define _FASTMRMOVE(s, d, size) \
+	switch (((type_u)s | (type_u)d | size) & (sizeof(type_u)-1)) { \
+		case 0:			_MRMOVE(s, d, size, type_u); break; \
+		case sizeof(type_l):	_MRMOVE(s, d, size, type_l); break; \
+		case sizeof(type_w):	_MRMOVE(s, d, size, type_w); break; \
+		default:		_MRMOVE(s, d, size, type_c); break; \
+	}
+
 #endif
diff --git a/include/ppcp7/cache.h b/include/ppcp7/cache.h
index dc68371..69a69c3 100644
--- a/include/ppcp7/cache.h
+++ b/include/ppcp7/cache.h
@@ -122,6 +122,8 @@ static inline void ci_rmove(void *dst, void *src, unsigned long esize,
 		}							      \
 	} while(0)
 
+#define _FASTMRMOVE(s, d, size) _FASTRMOVE(s, d, size)
+
 static inline uint16_t bswap16_load(uint64_t addr)
 {
 	unsigned int val;
diff --git a/slof/fs/rmove.fs b/slof/fs/rmove.fs
index c28dba9..3776ee2 100644
--- a/slof/fs/rmove.fs
+++ b/slof/fs/rmove.fs
@@ -24,22 +24,6 @@ defer '(r!)
 	-rot bounds ?do dup i '(r!) /(r) +loop drop
 ;
 
-: (fwrmove) ( src dest size -- )
-	>r 0 -rot r> bounds ?do + dup '(r@) i '(r!) /(r) dup +loop 2drop
-;
-
-\ Move from main to device memory
-: mrmove ( src dest size -- )
-	3dup or or 7 AND CASE
-		0 OF ['] x@ ['] rx! /x ENDOF
-		4 OF ['] l@ ['] rl! /l ENDOF
-		2 OF ['] w@ ['] rw! /w ENDOF
-		dup OF ['] c@ ['] rb! /c ENDOF
-	ENDCASE
-	( We already know that source and destination do not overlap )
-	to /(r) to '(r!) to '(r@) (fwrmove)
-;
-
 : rfill ( addr size pattern -- )
 	3dup drop or 7 AND CASE
 		0 OF ['] rx! /x ENDOF
diff --git a/slof/prim.code b/slof/prim.code
index 9fbed71..77e550d 100644
--- a/slof/prim.code
+++ b/slof/prim.code
@@ -520,6 +520,12 @@ PRIM(RMOVE)
 
 	MIRP
 
+PRIM(MRMOVE)
+	type_u size = ((dp--)->u);
+	type_u *d = (type_u *)((dp--)->u);
+	type_u *s = (type_u *)((dp--)->u);
+	_FASTMRMOVE(s, d, size);
+	MIRP
 
 // String compare, case insensitive:
 // : string=ci  ( str1 len1 str2 len2 -- equal? )
diff --git a/slof/prim.in b/slof/prim.in
index 7a0d6a2..f323aed 100644
--- a/slof/prim.in
+++ b/slof/prim.in
@@ -104,8 +104,8 @@ cod(SEMICOLON)
 cod(EXECUTE)
 
 cod(MOVE)
-// cod(RMOVE64)
 cod(RMOVE)
+cod(MRMOVE)
 cod(ZCOUNT)
 con(HASH-SIZE HASHSIZE)
 cod(HASH)
-- 
1.8.3.1



More information about the SLOF mailing list