[Cbe-oss-dev] [PATCH] libspe2: setup intial registers with trampoline, rather than the regs file
Jeremy Kerr
jk at ozlabs.org
Wed Aug 13 17:16:49 EST 2008
Currently, libspe directly accesses a context's "regs" file to setup
the initial register state. This has two drawbacks:
* the regs file is not available for NOSCHED contexts; and
* accessing the regs file requires the context to be de-scheduled.
Although contexts currently start in the saved state, this isn't
a defined bahaviour of spufs, so setting up the regs file in this
manner may cause a performance penalty with future kernels. This
will also be the case when already-loaded contexts are being
re-used with a new program.
This change alters the method used to setup the initial register set.
Insead of accessing the regs file, we place a small trampoline area
in the area of memory to be later used as the stack. This trampoline
contains the initial values of r3-r6, plus the program entry point.
The trampoline code loads the register set, sets up the inital stack
pointer, then branches to the program.
Signed-off-by: Jeremy Kerr <jk at ozlabs.org>
--
Update: stack pointer is r1, not r0.
---
spebase/Makefile | 2 -
spebase/regs.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
spebase/regs.h | 36 +++++++++++++++++++
spebase/run.c | 58 +++++++++---------------------
4 files changed, 159 insertions(+), 41 deletions(-)
Index: libspe2/spebase/regs.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ libspe2/spebase/regs.h 2008-08-13 16:06:25.000000000 +1000
@@ -0,0 +1,36 @@
+/*
+ * libspe2 - A wrapper library to adapt the JSRE SPU usage model to SPUFS
+ * Copyright (C) 2008 IBM Corp.
+ *
+ * Author: Jeremy Kerr <jk at ozlabs.org>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License,
+ * or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _regs_h_
+#define _regs_h_
+
+#include "spebase.h"
+
+struct spe_reg_state {
+ struct spe_reg128 r3, r4, r5, r6;
+ struct spe_reg128 entry;
+};
+
+int _base_spe_setup_registers(struct spe_context *spe,
+ struct spe_reg_state *regs,
+ unsigned int *entry);
+
+#endif
Index: libspe2/spebase/regs.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ libspe2/spebase/regs.c 2008-08-13 17:14:12.000000000 +1000
@@ -0,0 +1,104 @@
+/*
+ * libspe2 - A wrapper library to adapt the JSRE SPU usage model to SPUFS
+ * Copyright (C) 2008 IBM Corp.
+ *
+ * Author: Jeremy Kerr <jk at ozlabs.org>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License,
+ * or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "spebase.h"
+#include "regs.h"
+
+/**
+ * A little PIC trampoline that is written to the end of local store, which
+ * will later be overwritten by the stack.
+ *
+ * This trampoline provides an area for a struct spe_reg_state, and a little
+ * code to load the appropriate areas of the reg_state into the actual regs,
+ * then branch to the entry point of the program
+ *
+ * After loading this trampoline, we need to copy the spe_reg_state struct
+ * into the base address of the trampoline.
+ */
+
+#if 0
+reg_state:
+ .space 80 /* sizeof(spe_reg_state) */
+_start:
+ lqr r3,reg_state + 0 /* r3 = spe_reg_state.r3 */
+ lqr r4,reg_state + 16 /* r4 = spe_reg_state.r4 */
+ lqr r5,reg_state + 32 /* r5 = spe_reg_state.r5 */
+
+ /* we have two alignment requirements here: reg_state needs to sit
+ * on a quadword boundary, and the bisl instruction needs to be
+ * the last word before the backchain pointer. So, align here, then
+ * add three instructions after the alignment, leaving bisl on the
+ * 4th word. */
+ .balign 16
+
+ lqr r6,reg_state + 48 /* r6 = spe_reg_state.r6 */
+ lqr r1,reg_state + 64 /* r1 = spe_reg_state.entry */
+ il r2,0 /* stack size: 0 = default */
+
+ bisl r1,r1 /* branch to the program entry, and
+ set the stack pointer to the
+ following word */
+backchain:
+ /* initial stack backchain pointer - NULL*/
+ .long 0x0
+ .long 0x0
+ .long 0x0
+ .long 0x0
+#endif
+static uint32_t reg_setup_trampoline[] = {
+/* reg_state: */
+ [sizeof(struct spe_reg_state) / sizeof(uint32_t)] =
+/* _start: */
+ 0x33fff603, /* lqr r3,0 <reg_state> */
+ 0x33fff784, /* lqr r4,10 <reg_state+0x10> */
+ 0x33fff905, /* lqr r5,20 <reg_state+0x20> */
+ 0x00200000, /* lnop */
+ 0x33fffa06, /* lqr r6,30 <reg_state+0x30> */
+ 0x33fffb81, /* lqr r1,40 <reg_state+0x40> */
+ 0x40800002, /* il r2,0 */
+ 0x35200081, /* bisl r1,r1 */
+/* backchain: */
+ 0x00000000, /* stop */
+ 0x00000000, /* stop */
+ 0x00000000, /* stop */
+ 0x00000000, /* stop */
+};
+
+int _base_spe_setup_registers(struct spe_context *spe,
+ struct spe_reg_state *regs,
+ unsigned int *entry)
+{
+ unsigned int base_addr = LS_SIZE - sizeof(reg_setup_trampoline);
+
+ memcpy(spe->base_private->mem_mmap_base + base_addr,
+ reg_setup_trampoline, sizeof(reg_setup_trampoline));
+
+ memcpy(spe->base_private->mem_mmap_base + base_addr,
+ regs, sizeof(*regs));
+
+ *entry = base_addr + sizeof(struct spe_reg_state);
+
+ return 0;
+}
+
Index: libspe2/spebase/Makefile
===================================================================
--- libspe2.orig/spebase/Makefile 2008-08-13 16:06:16.000000000 +1000
+++ libspe2/spebase/Makefile 2008-08-13 16:06:25.000000000 +1000
@@ -31,7 +31,7 @@ libspebase_SONAME := libspebase.so.${MAJ
libspebase_OBJS := create.o elf_loader.o load.o run.o image.o lib_builtin.o \
default_c99_handler.o default_posix1_handler.o default_libea_handler.o \
- dma.o mbox.o accessors.o info.o
+ dma.o mbox.o accessors.o info.o regs.o
CFLAGS += -I..
CFLAGS += -D_ATFILE_SOURCE
Index: libspe2/spebase/run.c
===================================================================
--- libspe2.orig/spebase/run.c 2008-08-13 16:06:16.000000000 +1000
+++ libspe2/spebase/run.c 2008-08-13 16:06:25.000000000 +1000
@@ -35,6 +35,7 @@
#include "elf_loader.h"
#include "lib_builtin.h"
#include "spebase.h"
+#include "regs.h"
/*Thread-local variable for use by the debugger*/
__thread struct spe_context_info {
@@ -51,27 +52,6 @@ static void cleanupspeinfo(struct spe_co
__spe_current_active_context = tmp;
}
-static int set_regs(struct spe_context *spe, void *regs)
-{
- int fd_regs, rc;
-
- fd_regs = openat(spe->base_private->fd_spe_dir, "regs", O_RDWR);
- if (fd_regs < 0) {
- DEBUG_PRINTF("Could not open SPE regs file.\n");
- errno = EFAULT;
- return -1;
- }
-
- rc = write(fd_regs, regs, 2048);
-
- close(fd_regs);
-
- if (rc < 0)
- return -1;
-
- return 0;
-}
-
static int issue_isolated_exit(struct spe_context *spe)
{
struct spe_spu_control_area *cntl_area =
@@ -128,43 +108,41 @@ int _base_spe_context_run(spe_context_pt
(SPE_ISOLATE | SPE_ISOLATE_EMULATE))) {
addr64 argp64, envp64, tid64, ls64;
- unsigned int regs[128][4];
+ struct spe_reg_state reg_state;
/* setup parameters */
argp64.ull = (uint64_t)(unsigned long)argp;
envp64.ull = (uint64_t)(unsigned long)envp;
tid64.ull = (uint64_t)(unsigned long)spe;
+ ls64.ull = (uint64_t)(unsigned long)
+ spe->base_private->mem_mmap_base;
/* make sure the register values are 0 */
- memset(regs, 0, sizeof(regs));
-
- /* set sensible values for stack_ptr and stack_size */
- regs[1][0] = (unsigned int) LS_SIZE - 16; /* stack_ptr */
- regs[2][0] = 0; /* stack_size ( 0 = default ) */
+ memset(®_state, 0, sizeof(reg_state));
if (runflags & SPE_RUN_USER_REGS) {
/* When SPE_USER_REGS is set, argp points to an array
* of 3x128b registers to be passed directly to the SPE
* program.
*/
- memcpy(regs[3], argp, sizeof(unsigned int) * 12);
+ memcpy(®_state, argp, 3 * sizeof(struct spe_reg128));
} else {
- regs[3][0] = tid64.ui[0];
- regs[3][1] = tid64.ui[1];
+ reg_state.r3.slot[0] = tid64.ui[0];
+ reg_state.r3.slot[1] = tid64.ui[1];
- regs[4][0] = argp64.ui[0];
- regs[4][1] = argp64.ui[1];
+ reg_state.r4.slot[0] = argp64.ui[0];
+ reg_state.r4.slot[1] = argp64.ui[1];
+
+ reg_state.r5.slot[0] = envp64.ui[0];
+ reg_state.r5.slot[1] = envp64.ui[1];
- regs[5][0] = envp64.ui[0];
- regs[5][1] = envp64.ui[1];
}
-
- /* Store the LS base address in R6 */
- ls64.ull = (uint64_t)(unsigned long)spe->base_private->mem_mmap_base;
- regs[6][0] = ls64.ui[0];
- regs[6][1] = ls64.ui[1];
+ reg_state.r6.slot[0] = ls64.ui[0];
+ reg_state.r6.slot[1] = ls64.ui[1];
+
+ reg_state.entry.slot[0] = tmp_entry;
- if (set_regs(spe, regs))
+ if (_base_spe_setup_registers(spe, ®_state, &tmp_entry))
return -1;
}
More information about the cbe-oss-dev
mailing list