[Skiboot] [PATCH v2] libc/string: speed up common string functions

Nicholas Piggin npiggin at gmail.com
Fri May 10 14:47:09 AEST 2019


Use compiler builtins for the string functions, and compile the
libc/string/ directory with -O2.

This reduces instructions booting skiboot in mambo by 2.9 million in
slow-sim mode, or 3.8 in normal mode, for less than 1kB image size
increase.

This can result in the compiler warning more cases of string function
problems.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 core/init.c                  |  6 ++---
 libc/include/string.h        | 44 ++++++++++++++++++++----------------
 libc/string/Makefile.inc     |  2 ++
 libc/string/memchr.c         |  7 +++---
 libc/string/memcmp.c         |  7 +++---
 libc/string/memcpy.c         | 27 ++++++++++++++--------
 libc/string/memcpy_from_ci.c |  4 ++--
 libc/string/memmove.c        | 28 +++++++++--------------
 libc/string/memset.c         |  8 +++----
 libc/string/strcasecmp.c     |  5 ++--
 libc/string/strcat.c         |  8 ++++---
 libc/string/strchr.c         |  6 ++---
 libc/string/strcmp.c         |  7 ++----
 libc/string/strcpy.c         |  6 ++---
 libc/string/strdup.c         |  4 +++-
 libc/string/strlen.c         | 10 ++++----
 libc/string/strncasecmp.c    |  6 ++---
 libc/string/strncmp.c        |  7 +++---
 libc/string/strncpy.c        |  6 ++---
 libc/string/strrchr.c        |  6 ++---
 libc/string/strstr.c         |  8 ++++---
 libc/string/strtok.c         |  6 ++---
 22 files changed, 112 insertions(+), 106 deletions(-)

diff --git a/core/init.c b/core/init.c
index c97a1c3ee..9f60d7855 100644
--- a/core/init.c
+++ b/core/init.c
@@ -385,7 +385,7 @@ static bool load_kernel(void)
 		 */
 		if (kernel_entry < EXCEPTION_VECTORS_END) {
 			cpu_set_sreset_enable(false);
-			memcpy(NULL, old_vectors, EXCEPTION_VECTORS_END);
+			memcpy_null(NULL, old_vectors, EXCEPTION_VECTORS_END);
 			sync_icache();
 		} else {
 			/* Hack for STB in Mambo, assume at least 4kb in mem */
@@ -787,7 +787,7 @@ static void setup_branch_null_catcher(void)
         * ABI v1 (ie. big endian).  This will be broken if we ever
         * move to ABI v2 (ie little endian)
         */
-       memcpy(0, bn, 16);
+       memcpy_null(0, bn, 16);
 }
 #else
 static void setup_branch_null_catcher(void)
@@ -824,7 +824,7 @@ void copy_exception_vectors(void)
 	/* Backup previous vectors as this could contain a kernel
 	 * image.
 	 */
-	memcpy(old_vectors, NULL, EXCEPTION_VECTORS_END);
+	memcpy_null(old_vectors, NULL, EXCEPTION_VECTORS_END);
 
 	/* Copy from 0x100 to EXCEPTION_VECTORS_END, avoid below 0x100 as
 	 * this is the boot flag used by CPUs still potentially entering
diff --git a/libc/include/string.h b/libc/include/string.h
index d2597bb6f..f3d6117fb 100644
--- a/libc/include/string.h
+++ b/libc/include/string.h
@@ -15,28 +15,34 @@
 
 #include "stddef.h"
 
-char *strcpy(char *dest, const char *src);
-char *strncpy(char *dest, const char *src, size_t n);
-char *strcat(char *dest, const char *src);
-int strcmp(const char *s1, const char *s2);
-int strncmp(const char *s1, const char *s2, size_t n);
-int strcasecmp(const char *s1, const char *s2);
-int strncasecmp(const char *s1, const char *s2, size_t n);
-char *strchr(const char *s, int c);
-char *strrchr(const char *s, int c);
-char *strrchr(const char *s, int c);
-size_t strlen(const char *s);
-size_t strnlen(const char *s, size_t n);
-char *strstr(const char *hay, const char *needle);
+#define strcpy __builtin_strcpy
+#define strncpy __builtin_strncpy
+#define strcat __builtin_strcat
+#define strcmp __builtin_strcmp
+#define strncmp __builtin_strncmp
+#define strcasecmp __builtin_strcasecmp
+#define strncasecmp __builtin_strncasecmp
+#define strchr __builtin_strchr
+#define strrchr __builtin_strrchr
+#define strlen __builtin_strlen
+#define strlen __builtin_strlen
+size_t strnlen(const char *s, size_t maxlen);
+#define strstr __builtin_strstr
+#define strdup __builtin_strdup
 char *strtok(char *src, const char *pattern);
-char *strdup(const char *src);
 
-void *memset(void *s, int c, size_t n);
-void *memchr(const void *s, int c, size_t n);
-void *memcpy(void *dest, const void *src, size_t n);
+#define memset __builtin_memset
+#define memchr __builtin_memchr
+#define memcpy __builtin_memcpy
+#define memmove __builtin_memmove
+#define memcmp __builtin_memcmp
+static inline void *memcpy_null(void *dest, const void *src, size_t n)
+{
+	asm("" : "+r"(dest));
+	asm("" : "+r"(src));
+	return memcpy(dest, src, n);
+}
 void *memcpy_from_ci(void *destpp, const void *srcpp, size_t len);
-void *memmove(void *dest, const void *src, size_t n);
-int memcmp(const void *s1, const void *s2, size_t n);
 
 static inline int ffs(unsigned long val)
 {
diff --git a/libc/string/Makefile.inc b/libc/string/Makefile.inc
index 26582aa05..2f038219f 100644
--- a/libc/string/Makefile.inc
+++ b/libc/string/Makefile.inc
@@ -19,3 +19,5 @@ STRING_OBJS = strcat.o strchr.o strrchr.o strcmp.o strcpy.o strlen.o \
 STRING = $(LIBCDIR)/string/built-in.a
 $(STRING): $(STRING_OBJS:%=$(LIBCDIR)/string/%)
 
+CFLAGS_SKIP_libc/string/ += -Os
+CFLAGS_libc/string/ += -O2
diff --git a/libc/string/memchr.c b/libc/string/memchr.c
index c3fe751c6..db9a147c6 100644
--- a/libc/string/memchr.c
+++ b/libc/string/memchr.c
@@ -10,11 +10,10 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include "string.h"
+#include <stddef.h>
 
-
-void *
-memchr(const void *ptr, int c, size_t n)
+void *memchr(const void *ptr, int c, size_t n);
+void *memchr(const void *ptr, int c, size_t n)
 {
 	unsigned char ch = (unsigned char)c;
 	const unsigned char *p = ptr;
diff --git a/libc/string/memcmp.c b/libc/string/memcmp.c
index 3b69cefb9..b270b597b 100644
--- a/libc/string/memcmp.c
+++ b/libc/string/memcmp.c
@@ -10,11 +10,10 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include "string.h"
+#include <stddef.h>
 
-
-int
-memcmp(const void *ptr1, const void *ptr2, size_t n)
+int memcmp(const void *ptr1, const void *ptr2, size_t n);
+int memcmp(const void *ptr1, const void *ptr2, size_t n)
 {
 	const unsigned char *p1 = ptr1;
 	const unsigned char *p2 = ptr2;
diff --git a/libc/string/memcpy.c b/libc/string/memcpy.c
index 00f419b80..26f953d2e 100644
--- a/libc/string/memcpy.c
+++ b/libc/string/memcpy.c
@@ -10,18 +10,27 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include "string.h"
+#include <stddef.h>
+#include <ccan/short_types/short_types.h>
 
-void *
-memcpy(void *dest, const void *src, size_t n)
+void *memcpy(void *dest, const void *src, size_t n);
+void *memcpy(void *dest, const void *src, size_t n)
 {
-	char *cdest;
-	const char *csrc = src;
+	void *ret = dest;
 
-	cdest = dest;
-	while (n-- > 0) {
-		*cdest++ = *csrc++;
+	while (n >= 8) {
+		*(uint64_t *)dest = *(uint64_t *)src;
+		dest += 8;
+		src += 8;
+		n -= 8;
 	}
 
-	return dest;
+	while (n > 0) {
+		*(uint8_t *)dest = *(uint8_t *)src;
+		dest += 1;
+		src += 1;
+		n -= 1;
+	}
+
+	return ret;
 }
diff --git a/libc/string/memcpy_from_ci.c b/libc/string/memcpy_from_ci.c
index 02affa38c..4c5582fe5 100644
--- a/libc/string/memcpy_from_ci.c
+++ b/libc/string/memcpy_from_ci.c
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include <string.h>
 #include <ccan/short_types/short_types.h>
 #include <io.h>
+#include <string.h>
 
-void* memcpy_from_ci(void *destpp, const void *srcpp, size_t len)
+void *memcpy_from_ci(void *destpp, const void *srcpp, size_t len)
 {
 	const size_t block = sizeof(uint64_t);
 	unsigned long int destp = (long int) destpp;
diff --git a/libc/string/memmove.c b/libc/string/memmove.c
index 3acf1a973..76aef6c87 100644
--- a/libc/string/memmove.c
+++ b/libc/string/memmove.c
@@ -10,33 +10,27 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include "string.h"
+#include <stddef.h>
 
-
-void *
-memmove(void *dest, const void *src, size_t n)
+void *memcpy(void *dest, const void *src, size_t n);
+void *memmove(void *dest, const void *src, size_t n);
+void *memmove(void *dest, const void *src, size_t n)
 {
-	char *cdest;
-	const char *csrc;
-	int i;
-
 	/* Do the buffers overlap in a bad way? */
 	if (src < dest && src + n >= dest) {
+		char *cdest;
+		const char *csrc;
+		int i;
+
 		/* Copy from end to start */
 		cdest = dest + n - 1;
 		csrc = src + n - 1;
 		for (i = 0; i < n; i++) {
 			*cdest-- = *csrc--;
 		}
-	}
-	else {
+		return dest;
+	} else {
 		/* Normal copy is possible */
-		cdest = dest;
-		csrc = src;
-		for (i = 0; i < n; i++) {
-			*cdest++ = *csrc++;
-		}
+		return memcpy(dest, src, n);
 	}
-
-	return dest;
 }
diff --git a/libc/string/memset.c b/libc/string/memset.c
index dae43660f..f96a0231f 100644
--- a/libc/string/memset.c
+++ b/libc/string/memset.c
@@ -10,12 +10,12 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include "string.h"
-
 #define CACHE_LINE_SIZE 128
 
-void *
-memset(void *dest, int c, size_t size)
+#include <stddef.h>
+
+void *memset(void *dest, int c, size_t size);
+void *memset(void *dest, int c, size_t size)
 {
 	unsigned char *d = (unsigned char *)dest;
 	unsigned long big_c = 0;
diff --git a/libc/string/strcasecmp.c b/libc/string/strcasecmp.c
index f75294fb9..ba1aedb5f 100644
--- a/libc/string/strcasecmp.c
+++ b/libc/string/strcasecmp.c
@@ -10,11 +10,10 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
 #include <ctype.h>
 
-int
-strcasecmp(const char *s1, const char *s2)
+int strcasecmp(const char *s1, const char *s2);
+int strcasecmp(const char *s1, const char *s2)
 {
 	while (*s1 != 0 && *s2 != 0) {
 		if (toupper(*s1) != toupper(*s2))
diff --git a/libc/string/strcat.c b/libc/string/strcat.c
index 936e5b103..329cc88e1 100644
--- a/libc/string/strcat.c
+++ b/libc/string/strcat.c
@@ -10,10 +10,12 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
+#include <stddef.h>
 
-char *
-strcat(char *dst, const char *src)
+size_t strlen(const char *s);
+char *strcpy(char *dst, const char *src);
+char *strcat(char *dst, const char *src);
+char *strcat(char *dst, const char *src)
 {
 	size_t p;
 
diff --git a/libc/string/strchr.c b/libc/string/strchr.c
index 528a319c9..88f25f96b 100644
--- a/libc/string/strchr.c
+++ b/libc/string/strchr.c
@@ -10,10 +10,10 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
+#include <stddef.h>
 
-char *
-strchr(const char *s, int c)
+char *strchr(const char *s, int c);
+char *strchr(const char *s, int c)
 {
 	char cb = c;
 
diff --git a/libc/string/strcmp.c b/libc/string/strcmp.c
index 48eaed246..5afbae2a7 100644
--- a/libc/string/strcmp.c
+++ b/libc/string/strcmp.c
@@ -10,11 +10,8 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
-
-
-int
-strcmp(const char *s1, const char *s2)
+int strcmp(const char *s1, const char *s2);
+int strcmp(const char *s1, const char *s2)
 {
 	while (*s1 != 0 && *s2 != 0) {
 		if (*s1 != *s2)
diff --git a/libc/string/strcpy.c b/libc/string/strcpy.c
index 48eb62cb5..514be1714 100644
--- a/libc/string/strcpy.c
+++ b/libc/string/strcpy.c
@@ -10,10 +10,8 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
-
-char *
-strcpy(char *dst, const char *src)
+char *strcpy(char *dst, const char *src);
+char *strcpy(char *dst, const char *src)
 {
 	char *ptr = dst;
 
diff --git a/libc/string/strdup.c b/libc/string/strdup.c
index be91e233b..b0a4b4d70 100644
--- a/libc/string/strdup.c
+++ b/libc/string/strdup.c
@@ -10,9 +10,11 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
 #include <stdlib.h>
 
+size_t strlen(const char *s);
+void *memcpy(void *dest, const void *src, size_t n);
+char *strdup(const char *src);
 char *strdup(const char *src)
 {
 	size_t len = strlen(src) + 1;
diff --git a/libc/string/strlen.c b/libc/string/strlen.c
index 5b408e7ef..f3c5a8362 100644
--- a/libc/string/strlen.c
+++ b/libc/string/strlen.c
@@ -10,10 +10,10 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
+#include <stddef.h>
 
-size_t
-strlen(const char *s)
+size_t strlen(const char *s);
+size_t strlen(const char *s)
 {
 	size_t len = 0;
 
@@ -25,8 +25,8 @@ strlen(const char *s)
 	return len;
 }
 
-size_t
-strnlen(const char *s, size_t n)
+size_t strnlen(const char *s, size_t n);
+size_t strnlen(const char *s, size_t n)
 {
 	size_t len = 0;
 
diff --git a/libc/string/strncasecmp.c b/libc/string/strncasecmp.c
index 4140931e3..c6b158e60 100644
--- a/libc/string/strncasecmp.c
+++ b/libc/string/strncasecmp.c
@@ -10,12 +10,10 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
 #include <ctype.h>
 
-
-int
-strncasecmp(const char *s1, const char *s2, size_t n)
+int strncasecmp(const char *s1, const char *s2, size_t n);
+int strncasecmp(const char *s1, const char *s2, size_t n)
 {
 	if (n < 1)
 		return 0;
diff --git a/libc/string/strncmp.c b/libc/string/strncmp.c
index a886736a9..a5422c0dc 100644
--- a/libc/string/strncmp.c
+++ b/libc/string/strncmp.c
@@ -10,11 +10,10 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
+#include <stddef.h>
 
-
-int
-strncmp(const char *s1, const char *s2, size_t n)
+int strncmp(const char *s1, const char *s2, size_t n);
+int strncmp(const char *s1, const char *s2, size_t n)
 {
 	if (n < 1)
 		return 0;
diff --git a/libc/string/strncpy.c b/libc/string/strncpy.c
index 0f41f93c9..621c89b64 100644
--- a/libc/string/strncpy.c
+++ b/libc/string/strncpy.c
@@ -10,10 +10,10 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
+#include <stddef.h>
 
-char *
-strncpy(char *dst, const char *src, size_t n)
+char *strncpy(char *dst, const char *src, size_t n);
+char *strncpy(char *dst, const char *src, size_t n)
 {
 	char *ret = dst;
 
diff --git a/libc/string/strrchr.c b/libc/string/strrchr.c
index 6652fad34..262a68287 100644
--- a/libc/string/strrchr.c
+++ b/libc/string/strrchr.c
@@ -10,10 +10,10 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
+#include <stddef.h>
 
-char *
-strrchr(const char *s, int c)
+char *strrchr(const char *s, int c);
+char *strrchr(const char *s, int c)
 {
 	char *last = NULL;
 	char cb = c;
diff --git a/libc/string/strstr.c b/libc/string/strstr.c
index a6e96187f..cd9ccae9a 100644
--- a/libc/string/strstr.c
+++ b/libc/string/strstr.c
@@ -10,10 +10,12 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
+#include <stddef.h>
 
-char *
-strstr(const char *hay, const char *needle)
+size_t strlen(const char *s);
+int strncmp(const char *s1, const char *s2, size_t n);
+char *strstr(const char *hay, const char *needle);
+char *strstr(const char *hay, const char *needle)
 {
 	char *pos;
 	size_t hlen, nlen;
diff --git a/libc/string/strtok.c b/libc/string/strtok.c
index aa42d77ed..fcc3fce32 100644
--- a/libc/string/strtok.c
+++ b/libc/string/strtok.c
@@ -10,10 +10,10 @@
  *     IBM Corporation - initial implementation
  *****************************************************************************/
 
-#include <string.h>
+#include <stddef.h>
 
-char *
-strtok(char *src, const char *pattern)
+char *strtok(char *src, const char *pattern);
+char *strtok(char *src, const char *pattern)
 {
 	static char *nxtTok;
 	char *retVal = NULL;
-- 
2.20.1



More information about the Skiboot mailing list