[SLOF] [PATCH v2 4/6] fbuffer: Implement RFILL as an accelerated primitive
Thomas Huth
thuth at redhat.com
Tue Aug 4 05:30:59 AEST 2015
By implementing RFILL as an primitive, we can get a huge
speed-up of the screen erasing function. On board-js2x,
it writes the pattern directly into the IO region, and on
board-qemu it uses the KVMPPC_H_LOGICAL_MEMOP hypercall to
copy the pattern from a temporary buffer into the IO region.
Signed-off-by: Thomas Huth <thuth at redhat.com>
---
include/ppc970/cache.h | 24 ++++++++++++++++++++++++
include/ppcp7/cache.h | 12 ++++++++++++
slof/fs/base.fs | 2 --
slof/fs/rmove.fs | 37 -------------------------------------
slof/prim.code | 7 +++++++
slof/prim.in | 1 +
6 files changed, 44 insertions(+), 39 deletions(-)
delete mode 100644 slof/fs/rmove.fs
diff --git a/include/ppc970/cache.h b/include/ppc970/cache.h
index a53585b..2bac433 100644
--- a/include/ppc970/cache.h
+++ b/include/ppc970/cache.h
@@ -101,4 +101,28 @@ cache_inhibited_access(uint64_t, 64)
default: FAST_MRMOVE_TYPED(s, d, size, type_c); break; \
}
+/* fill IO memory with pattern */
+#define FAST_RFILL_TYPED(dst, size, pat, t) \
+{ \
+ t *d1 = (dst); \
+ register t tmp = 0; \
+ int i = sizeof(t); \
+ while (i > 0) { \
+ tmp <<= 8; tmp |= pat & 0xff; \
+ } \
+ SET_CI; \
+ while (size > 0) { \
+ *d1++ = tmp; size -= sizeof(t); \
+ } \
+ CLR_CI; \
+}
+
+#define FAST_RFILL(dst, size, pat) \
+ switch (((type_u)dst | size) & (sizeof(type_u)-1)) { \
+ case 0: FAST_RFILL_TYPED(dst, size, pat, type_u); break; \
+ case 4: FAST_RFILL_TYPED(dst, size, pat, type_l); break; \
+ case 2: case 6: FAST_RFILL_TYPED(dst, size, pat, type_w); break; \
+ default: FAST_RFILL_TYPED(dst, size, pat, type_c); break; \
+ }
+
#endif
diff --git a/include/ppcp7/cache.h b/include/ppcp7/cache.h
index 64bcb00..c64b4c6 100644
--- a/include/ppcp7/cache.h
+++ b/include/ppcp7/cache.h
@@ -124,6 +124,18 @@ static inline void ci_rmove(void *dst, void *src, unsigned long esize,
#define FAST_MRMOVE(s, d, size) _FASTRMOVE(s, d, size)
+#define FAST_RFILL(dst, size, pat) do { \
+ type_u buf[64]; \
+ char *d = (char *)(dst); \
+ memset(buf, pat, size < sizeof(buf) ? size : sizeof(buf)); \
+ while (size > sizeof(buf)) { \
+ FAST_MRMOVE(buf, d, sizeof(buf)); \
+ d += sizeof(buf); \
+ size -= sizeof(buf); \
+ } \
+ FAST_MRMOVE(buf, d, size); \
+ } while(0)
+
static inline uint16_t bswap16_load(uint64_t addr)
{
unsigned int val;
diff --git a/slof/fs/base.fs b/slof/fs/base.fs
index e71e087..03e77e5 100644
--- a/slof/fs/base.fs
+++ b/slof/fs/base.fs
@@ -579,8 +579,6 @@ defer cursor-off ( -- )
#include "debug.fs"
\ provide 7.5.3.1 Dictionary search
#include "dictionary.fs"
-\ block data access for IO devices - ought to be implemented in engine
-#include "rmove.fs"
\ provide a simple run time preprocessor
#include <preprocessor.fs>
diff --git a/slof/fs/rmove.fs b/slof/fs/rmove.fs
deleted file mode 100644
index 3776ee2..0000000
--- a/slof/fs/rmove.fs
+++ /dev/null
@@ -1,37 +0,0 @@
-\ *****************************************************************************
-\ * Copyright (c) 2004, 2008 IBM Corporation
-\ * All rights reserved.
-\ * This program and the accompanying materials
-\ * are made available under the terms of the BSD License
-\ * which accompanies this distribution, and is available at
-\ * http://www.opensource.org/licenses/bsd-license.php
-\ *
-\ * Contributors:
-\ * IBM Corporation - initial implementation
-\ ****************************************************************************/
-
-defer '(r@)
-defer '(r!)
-1 VALUE /(r)
-
-
-\ The rest of the code already implemented in prim.in
-\ In the end all of this should be moved over there and this file terminated
-
-: (rfill) ( addr size pattern 'r! /r -- )
- to /(r) to '(r!) ff and
- dup 8 lshift or dup 10 lshift or dup 20 lshift or
- -rot bounds ?do dup i '(r!) /(r) +loop drop
-;
-
-: rfill ( addr size pattern -- )
- 3dup drop or 7 AND CASE
- 0 OF ['] rx! /x ENDOF
- 4 OF ['] rl! /l ENDOF
- 2 OF ['] rw! /w ENDOF
- dup OF ['] rb! /c ENDOF
- ENDCASE (rfill)
-;
-
-
-
diff --git a/slof/prim.code b/slof/prim.code
index cb6e201..bb9e036 100644
--- a/slof/prim.code
+++ b/slof/prim.code
@@ -527,6 +527,13 @@ PRIM(MRMOVE)
FAST_MRMOVE(s, d, size);
MIRP
+PRIM(RFILL)
+ type_u pat = TOS.u; POP;
+ type_u size = TOS.u; POP;
+ void *dst = TOS.a; POP;
+ FAST_RFILL(dst, size, pat);
+ MIRP
+
// String compare, case insensitive:
// : string=ci ( str1 len1 str2 len2 -- equal? )
PRIM(STRING_X3d_CI)
diff --git a/slof/prim.in b/slof/prim.in
index c291535..a9bb625 100644
--- a/slof/prim.in
+++ b/slof/prim.in
@@ -107,6 +107,7 @@ cod(MOVE)
// cod(RMOVE64)
cod(RMOVE)
cod(MRMOVE)
+cod(RFILL)
cod(ZCOUNT)
con(HASH-SIZE HASHSIZE)
cod(HASH)
--
1.8.3.1
More information about the SLOF
mailing list