[Cbe-oss-dev] [PATCH] libspe2: setup intial registers with trampoline, rather than the regs file

Jeremy Kerr jk at ozlabs.org
Wed Aug 13 17:16:49 EST 2008


Currently, libspe directly accesses a context's "regs" file to setup
the initial register state. This has two drawbacks:

 * the regs file is not available for NOSCHED contexts; and

 * accessing the regs file requires the context to be de-scheduled.
   Although contexts currently start in the saved state, this isn't
   a defined bahaviour of spufs, so setting up the regs file in this
   manner may cause a performance penalty with future kernels. This
   will also be the case when already-loaded contexts are being
   re-used with a new program.

This change alters the method used to setup the initial register set.
Insead of accessing the regs file, we place a small trampoline area
in the area of memory to be later used as the stack. This trampoline
contains the initial values of r3-r6, plus the program entry point.

The trampoline code loads the register set, sets up the inital stack
pointer, then branches to the program.

Signed-off-by: Jeremy Kerr <jk at ozlabs.org>

--

Update: stack pointer is r1, not r0.

---

 spebase/Makefile |    2 -
 spebase/regs.c   |  104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 spebase/regs.h   |   36 +++++++++++++++++++
 spebase/run.c    |   58 +++++++++---------------------
 4 files changed, 159 insertions(+), 41 deletions(-)

Index: libspe2/spebase/regs.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ libspe2/spebase/regs.h	2008-08-13 16:06:25.000000000 +1000
@@ -0,0 +1,36 @@
+/*
+ * libspe2 - A wrapper library to adapt the JSRE SPU usage model to SPUFS
+ * Copyright (C) 2008 IBM Corp.
+ *
+ * Author: Jeremy Kerr <jk at ozlabs.org>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License,
+ * or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _regs_h_
+#define _regs_h_
+
+#include "spebase.h"
+
+struct spe_reg_state {
+	struct spe_reg128 r3, r4, r5, r6;
+	struct spe_reg128 entry;
+};
+
+int _base_spe_setup_registers(struct spe_context *spe,
+		struct spe_reg_state *regs,
+		unsigned int *entry);
+
+#endif
Index: libspe2/spebase/regs.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ libspe2/spebase/regs.c	2008-08-13 17:14:12.000000000 +1000
@@ -0,0 +1,104 @@
+/*
+ * libspe2 - A wrapper library to adapt the JSRE SPU usage model to SPUFS
+ * Copyright (C) 2008 IBM Corp.
+ *
+ * Author: Jeremy Kerr <jk at ozlabs.org>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License,
+ * or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "spebase.h"
+#include "regs.h"
+
+/**
+ * A little PIC trampoline that is written to the end of local store, which
+ * will later be overwritten by the stack.
+ *
+ * This trampoline provides an area for a struct spe_reg_state, and a little
+ * code to load the appropriate areas of the reg_state into the actual regs,
+ * then branch to the entry point of the program
+ *
+ * After loading this trampoline, we need to copy the spe_reg_state struct
+ * into the base address of the trampoline.
+ */
+
+#if 0
+reg_state:
+	.space	80			/* sizeof(spe_reg_state) */
+_start:
+	lqr	r3,reg_state + 0	/* r3 = spe_reg_state.r3 */
+	lqr	r4,reg_state + 16	/* r4 = spe_reg_state.r4 */
+	lqr	r5,reg_state + 32	/* r5 = spe_reg_state.r5 */
+
+	/* we have two alignment requirements here: reg_state needs to sit
+	 * on a quadword boundary, and the bisl instruction needs to be
+	 * the last word before the backchain pointer. So, align here, then
+	 * add three instructions after the alignment, leaving bisl on the
+	 * 4th word. */
+	.balign	16
+
+	lqr	r6,reg_state + 48	/* r6 = spe_reg_state.r6 */
+	lqr	r1,reg_state + 64	/* r1 = spe_reg_state.entry */
+	il	r2,0			/* stack size: 0 = default */
+
+	bisl	r1,r1			/* branch to the program entry, and
+					   set the stack pointer to the
+					   following word */
+backchain:
+	/* initial stack backchain pointer - NULL*/
+	.long	0x0
+	.long	0x0
+	.long	0x0
+	.long	0x0
+#endif
+static uint32_t reg_setup_trampoline[] = {
+/* reg_state: */
+	[sizeof(struct spe_reg_state) / sizeof(uint32_t)] =
+/* _start: */
+	0x33fff603, /* lqr     r3,0 <reg_state>       */
+	0x33fff784, /* lqr     r4,10 <reg_state+0x10> */
+	0x33fff905, /* lqr     r5,20 <reg_state+0x20> */
+	0x00200000, /* lnop                           */
+	0x33fffa06, /* lqr     r6,30 <reg_state+0x30> */
+	0x33fffb81, /* lqr     r1,40 <reg_state+0x40> */
+	0x40800002, /* il      r2,0                   */
+	0x35200081, /* bisl    r1,r1                  */
+/* backchain: */
+	0x00000000, /* stop                           */
+	0x00000000, /* stop                           */
+	0x00000000, /* stop                           */
+	0x00000000, /* stop                           */
+};
+
+int _base_spe_setup_registers(struct spe_context *spe,
+		struct spe_reg_state *regs,
+		unsigned int *entry)
+{
+	unsigned int base_addr = LS_SIZE - sizeof(reg_setup_trampoline);
+
+	memcpy(spe->base_private->mem_mmap_base + base_addr,
+			reg_setup_trampoline, sizeof(reg_setup_trampoline));
+
+	memcpy(spe->base_private->mem_mmap_base + base_addr,
+			regs, sizeof(*regs));
+
+	*entry = base_addr + sizeof(struct spe_reg_state);
+
+	return 0;
+}
+
Index: libspe2/spebase/Makefile
===================================================================
--- libspe2.orig/spebase/Makefile	2008-08-13 16:06:16.000000000 +1000
+++ libspe2/spebase/Makefile	2008-08-13 16:06:25.000000000 +1000
@@ -31,7 +31,7 @@ libspebase_SONAME	:= libspebase.so.${MAJ
 
 libspebase_OBJS := create.o  elf_loader.o load.o run.o image.o lib_builtin.o \
 				default_c99_handler.o default_posix1_handler.o default_libea_handler.o \
-				dma.o mbox.o accessors.o info.o
+				dma.o mbox.o accessors.o info.o regs.o
 
 CFLAGS += -I..
 CFLAGS += -D_ATFILE_SOURCE
Index: libspe2/spebase/run.c
===================================================================
--- libspe2.orig/spebase/run.c	2008-08-13 16:06:16.000000000 +1000
+++ libspe2/spebase/run.c	2008-08-13 16:06:25.000000000 +1000
@@ -35,6 +35,7 @@
 #include "elf_loader.h"
 #include "lib_builtin.h"
 #include "spebase.h"
+#include "regs.h"
 
 /*Thread-local variable for use by the debugger*/
 __thread struct spe_context_info {
@@ -51,27 +52,6 @@ static void cleanupspeinfo(struct spe_co
 	__spe_current_active_context = tmp;
 }
 
-static int set_regs(struct spe_context *spe, void *regs)
-{
-	int fd_regs, rc;
-
-	fd_regs = openat(spe->base_private->fd_spe_dir, "regs", O_RDWR);
-	if (fd_regs < 0) {
-		DEBUG_PRINTF("Could not open SPE regs file.\n");
-		errno = EFAULT;
-		return -1;
-	}
-
-	rc = write(fd_regs, regs, 2048);
-
-	close(fd_regs);
-
-	if (rc < 0)
-		return -1;
-
-	return 0;
-}
-
 static int issue_isolated_exit(struct spe_context *spe)
 {
 	struct spe_spu_control_area *cntl_area =
@@ -128,43 +108,41 @@ int _base_spe_context_run(spe_context_pt
 				(SPE_ISOLATE | SPE_ISOLATE_EMULATE))) {
 
 		addr64 argp64, envp64, tid64, ls64;
-		unsigned int regs[128][4];
+		struct spe_reg_state reg_state;
 
 		/* setup parameters */
 		argp64.ull = (uint64_t)(unsigned long)argp;
 		envp64.ull = (uint64_t)(unsigned long)envp;
 		tid64.ull = (uint64_t)(unsigned long)spe;
+		ls64.ull = (uint64_t)(unsigned long)
+				spe->base_private->mem_mmap_base;
 
 		/* make sure the register values are 0 */
-		memset(regs, 0, sizeof(regs));
-
-		/* set sensible values for stack_ptr and stack_size */
-		regs[1][0] = (unsigned int) LS_SIZE - 16; 	/* stack_ptr */
-		regs[2][0] = 0; 							/* stack_size ( 0 = default ) */
+		memset(&reg_state, 0, sizeof(reg_state));
 
 		if (runflags & SPE_RUN_USER_REGS) {
 			/* When SPE_USER_REGS is set, argp points to an array
 			 * of 3x128b registers to be passed directly to the SPE
 			 * program.
 			 */
-			memcpy(regs[3], argp, sizeof(unsigned int) * 12);
+			memcpy(&reg_state, argp, 3 * sizeof(struct spe_reg128));
 		} else {
-			regs[3][0] = tid64.ui[0];
-			regs[3][1] = tid64.ui[1];
+			reg_state.r3.slot[0] = tid64.ui[0];
+			reg_state.r3.slot[1] = tid64.ui[1];
 
-			regs[4][0] = argp64.ui[0];
-			regs[4][1] = argp64.ui[1];
+			reg_state.r4.slot[0] = argp64.ui[0];
+			reg_state.r4.slot[1] = argp64.ui[1];
+
+			reg_state.r5.slot[0] = envp64.ui[0];
+			reg_state.r5.slot[1] = envp64.ui[1];
 
-			regs[5][0] = envp64.ui[0];
-			regs[5][1] = envp64.ui[1];
 		}
-		
-		/* Store the LS base address in R6 */
-		ls64.ull = (uint64_t)(unsigned long)spe->base_private->mem_mmap_base;
-		regs[6][0] = ls64.ui[0];
-		regs[6][1] = ls64.ui[1];
+		reg_state.r6.slot[0] = ls64.ui[0];
+		reg_state.r6.slot[1] = ls64.ui[1];
+
+		reg_state.entry.slot[0] = tmp_entry;
 
-		if (set_regs(spe, regs))
+		if (_base_spe_setup_registers(spe, &reg_state, &tmp_entry))
 			return -1;
 	}
 



More information about the cbe-oss-dev mailing list