[SLOF] [PATCH v2 3/6] fbuffer: Implement MRMOVE as an accelerated primitive

Thomas Huth thuth at redhat.com
Tue Aug 4 05:30:58 AEST 2015


The character drawing function fb8-draw-character uses "mrmove"
(which moves main memory contents to IO memory) to copy the data
of the character from main memory to the frame buffer. However,
the current implementation of "mrmove"  performs quite badly on
board-qemu since it triggers a hypercall for each memory access
(e.g. for each 8 bytes that are transfered).
But since the KVMPPC_H_LOGICAL_MEMOP hypercall can transfer bigger
regions at once, we can accelerate the character drawing quite a
bit by simply mapping the "mrmove" to the same macro that is
already used for the "rmove" (which is normally only used for
copying from IO memory to IO memory, but on board-qemu it does
not matter). For keeping board-js2x in sync, this patch also
transforms the "mrmove" for js2x into primitives.

Signed-off-by: Thomas Huth <thuth at redhat.com>
---
 include/ppc970/cache.h | 18 ++++++++++++++++++
 include/ppcp7/cache.h  |  2 ++
 slof/fs/rmove.fs       | 16 ----------------
 slof/prim.code         |  6 ++++++
 slof/prim.in           |  1 +
 5 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/include/ppc970/cache.h b/include/ppc970/cache.h
index b748689..a53585b 100644
--- a/include/ppc970/cache.h
+++ b/include/ppc970/cache.h
@@ -83,4 +83,22 @@ cache_inhibited_access(uint64_t, 64)
 		default:		_RMOVE(s, d, size, type_c); break; \
 	}
 
+/* main RAM to IO memory move */
+#define FAST_MRMOVE_TYPED(s, d, size, t)	\
+{ \
+	t *s1 = (s), *d1 = (d); \
+	register t tmp; \
+	while (size > 0) { \
+		tmp = *s1++; SET_CI; *d1++ = tmp; CLR_CI; size -= sizeof(t); \
+	} \
+}
+
+#define FAST_MRMOVE(s, d, size) \
+	switch (((type_u)(s) | (type_u)(d) | (size)) & (sizeof(type_u)-1)) { \
+	case 0:		FAST_MRMOVE_TYPED(s, d, size, type_u); break; \
+	case 4:		FAST_MRMOVE_TYPED(s, d, size, type_l); break; \
+	case 2: case 6:	FAST_MRMOVE_TYPED(s, d, size, type_w); break; \
+	default:	FAST_MRMOVE_TYPED(s, d, size, type_c); break; \
+	}
+
 #endif
diff --git a/include/ppcp7/cache.h b/include/ppcp7/cache.h
index dc68371..64bcb00 100644
--- a/include/ppcp7/cache.h
+++ b/include/ppcp7/cache.h
@@ -122,6 +122,8 @@ static inline void ci_rmove(void *dst, void *src, unsigned long esize,
 		}							      \
 	} while(0)
 
+#define FAST_MRMOVE(s, d, size) _FASTRMOVE(s, d, size)
+
 static inline uint16_t bswap16_load(uint64_t addr)
 {
 	unsigned int val;
diff --git a/slof/fs/rmove.fs b/slof/fs/rmove.fs
index c28dba9..3776ee2 100644
--- a/slof/fs/rmove.fs
+++ b/slof/fs/rmove.fs
@@ -24,22 +24,6 @@ defer '(r!)
 	-rot bounds ?do dup i '(r!) /(r) +loop drop
 ;
 
-: (fwrmove) ( src dest size -- )
-	>r 0 -rot r> bounds ?do + dup '(r@) i '(r!) /(r) dup +loop 2drop
-;
-
-\ Move from main to device memory
-: mrmove ( src dest size -- )
-	3dup or or 7 AND CASE
-		0 OF ['] x@ ['] rx! /x ENDOF
-		4 OF ['] l@ ['] rl! /l ENDOF
-		2 OF ['] w@ ['] rw! /w ENDOF
-		dup OF ['] c@ ['] rb! /c ENDOF
-	ENDCASE
-	( We already know that source and destination do not overlap )
-	to /(r) to '(r!) to '(r@) (fwrmove)
-;
-
 : rfill ( addr size pattern -- )
 	3dup drop or 7 AND CASE
 		0 OF ['] rx! /x ENDOF
diff --git a/slof/prim.code b/slof/prim.code
index 9fbed71..cb6e201 100644
--- a/slof/prim.code
+++ b/slof/prim.code
@@ -520,6 +520,12 @@ PRIM(RMOVE)
 
 	MIRP
 
+PRIM(MRMOVE)
+	type_u size = TOS.u; POP;
+	void *d = TOS.a; POP;
+	void *s = TOS.a; POP;
+	FAST_MRMOVE(s, d, size);
+	MIRP
 
 // String compare, case insensitive:
 // : string=ci  ( str1 len1 str2 len2 -- equal? )
diff --git a/slof/prim.in b/slof/prim.in
index 7a0d6a2..c291535 100644
--- a/slof/prim.in
+++ b/slof/prim.in
@@ -106,6 +106,7 @@ cod(EXECUTE)
 cod(MOVE)
 // cod(RMOVE64)
 cod(RMOVE)
+cod(MRMOVE)
 cod(ZCOUNT)
 con(HASH-SIZE HASHSIZE)
 cod(HASH)
-- 
1.8.3.1



More information about the SLOF mailing list