[PATCH 1/2] powerpc: string: implement optimized memset variants
Naveen N. Rao
naveen.n.rao at linux.vnet.ibm.com
Tue Mar 28 06:37:40 AEDT 2017
Based on Matthew Wilcox's patches for other architectures.
Signed-off-by: Naveen N. Rao <naveen.n.rao at linux.vnet.ibm.com>
---
arch/powerpc/include/asm/string.h | 24 ++++++++++++++++++++++++
arch/powerpc/lib/mem_64.S | 19 ++++++++++++++++++-
2 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index da3cdffca440..b0e82466d4e8 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -23,6 +23,30 @@ extern void * memmove(void *,const void *,__kernel_size_t);
extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
+#ifdef CONFIG_PPC64
+#define __HAVE_ARCH_MEMSET16
+#define __HAVE_ARCH_MEMSET32
+#define __HAVE_ARCH_MEMSET64
+
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
+
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+ return __memset16(p, v, n * 2);
+}
+
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+ return __memset32(p, v, n * 4);
+}
+
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+ return __memset64(p, v, n * 8);
+}
+#endif
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_STRING_H */
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 85fa9869aec5..ec531de99996 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -13,6 +13,23 @@
#include <asm/ppc_asm.h>
#include <asm/export.h>
+_GLOBAL(__memset16)
+ rlwimi r4,r4,16,0,15
+ /* fall through */
+
+_GLOBAL(__memset32)
+ rldimi r4,r4,32,0
+ /* fall through */
+
+_GLOBAL(__memset64)
+ neg r0,r3
+ andi. r0,r0,7
+ cmplw cr1,r5,r0
+ b .Lms
+EXPORT_SYMBOL(__memset16)
+EXPORT_SYMBOL(__memset32)
+EXPORT_SYMBOL(__memset64)
+
_GLOBAL(memset)
neg r0,r3
rlwimi r4,r4,8,16,23
@@ -20,7 +37,7 @@ _GLOBAL(memset)
rlwimi r4,r4,16,0,15
cmplw cr1,r5,r0 /* do we get that far? */
rldimi r4,r4,32,0
- PPC_MTOCRF(1,r0)
+.Lms: PPC_MTOCRF(1,r0)
mr r6,r3
blt cr1,8f
beq+ 3f /* if already 8-byte aligned */
--
2.11.1
More information about the Linuxppc-dev
mailing list