[Cbe-oss-dev] [PATCH 22/22]MARS/task: context switch reduce stack consumption

Yuji Mano yuji.mano at am.sony.com
Fri Mar 20 07:55:01 EST 2009


Previously the non-volatile registers that need to be saved/restored during a
task context save/restore were placed on to the task's stack space before being
dma'd into the task context save area in host storage. However, this required an
extra 768 bytes of available task stack space.

This patch removes the need for the extra 768 bytes of task stack by using the
free space in the workload module designated area that was reclaimed by the task
module code size reduction.

Signed-off-by: Yuji Mano <yuji.mano at am.sony.com>
---
 task/src/common/task_internal_types.h |    2 
 task/src/host/lib/task.c              |    3 -
 task/src/mpu/module/task_module.c     |   40 ++++++++++++---
 task/src/mpu/module/task_switch.S     |   86 ++++++++++++++++++++++++----------
 4 files changed, 97 insertions(+), 34 deletions(-)

--- a/task/src/common/task_internal_types.h
+++ b/task/src/common/task_internal_types.h
@@ -48,6 +48,8 @@
 #define MARS_TASK_CONTEXT_ALIGN			MARS_WORKLOAD_CONTEXT_ALIGN
 #define MARS_TASK_CONTEXT_SAVE_ALIGN		128
 
+#define MARS_TASK_REGISTER_SAVE_AREA_SIZE 	(16 * (127 - 80))
+
 struct mars_task_context {
 	uint8_t workload_reserved[MARS_WORKLOAD_RESERVED_SIZE];
 	uint8_t pad[MARS_TASK_CONTEXT_SIZE - MARS_WORKLOAD_RESERVED_SIZE - 128];
--- a/task/src/host/lib/task.c
+++ b/task/src/host/lib/task.c
@@ -207,7 +207,8 @@ int mars_task_create(struct mars_context
 		/* allocate context save area */
 		task->context_save_area_ea =
 			mars_ea_memalign(MARS_TASK_CONTEXT_SAVE_ALIGN,
-					 context_save_size);
+					 context_save_size +
+					 MARS_TASK_REGISTER_SAVE_AREA_SIZE);
 		if (!task->context_save_area_ea) {
 			mars_workload_queue_add_cancel(mars, workload_id);
 			return MARS_ERROR_MEMORY;
--- a/task/src/mpu/module/task_module.c
+++ b/task/src/mpu/module/task_module.c
@@ -47,13 +47,12 @@
 #define MARS_TASK_MODULE_DMA_SIZE_MAX		16384
 #define MARS_TASK_MODULE_DMA_SIZE_MASK		0x7f
 
-#define MARS_TASK_REGISTER_SAVE_AREA_SIZE 	(16 * (127 - 80 + 1))
-
 /* global task variables */
 static struct mars_task_context *task;
 
 /* called by task_switch.S */
 void __module_main(void);
+void __dma_registers(void *ptr, int put);
 void __task_save(void *task_heap);
 void __task_restore(int task_cached);
 
@@ -102,37 +101,62 @@ static void dma(void *ls, uint64_t ea, i
 		mars_module_dma_get(ls, ea, size, MARS_TASK_MODULE_DMA_TAG);
 }
 
+/*
+ * Structure of Context Save Area
+ *                                      High Address
+ * +------------------------------------+
+ * |       Local Storage (stack)        |
+ * +------------------------------------+
+ * | Local Storage (text + data + heap) |
+ * +------------------------------------+
+ * |      Non-volatile Registers        |
+ * +------------------------------------+
+ *                                      Low Address
+*/
 static void dma_context(uint32_t low_size, uint32_t high_size, int put)
 {
 	/* save or restore data segment and heap (low address) */
-	dma((void *)task->data_vaddr, task->context_save_area_ea,
+	dma((void *)task->data_vaddr, task->context_save_area_ea +
+	    MARS_TASK_REGISTER_SAVE_AREA_SIZE,
 	    low_size, put);
 
 	/* save or restore stack (high address) */
 	dma((void *)MARS_TASK_BASE_ADDR + MARS_TASK_CONTEXT_SAVE_SIZE_MAX -
-	    high_size, task->context_save_area_ea + low_size,
+	    high_size, task->context_save_area_ea +
+	    MARS_TASK_REGISTER_SAVE_AREA_SIZE + low_size,
 	    high_size, put);
 
 	dma_wait();
 }
 
+void __dma_registers(void *ptr, int put)
+{
+	/* dma registers state to/from the context save area */
+	dma(ptr, task->context_save_area_ea, MARS_TASK_REGISTER_SAVE_AREA_SIZE,
+	    put);
+
+	dma_wait();
+}
+
 void __task_save(void *task_heap)
 {
 	/* save workload stack pointer */
 	task->stack = (uint32_t)__task_stack;
 
-	/* save context MPU storage state */
+	/* save data segment and heap size (low address) */
 	task->context_save_area_low_size =
-		((uintptr_t)task_heap -
-		 task->data_vaddr + MARS_TASK_MODULE_DMA_SIZE_MASK) &
+		((uintptr_t)task_heap - task->data_vaddr +
+		 MARS_TASK_MODULE_DMA_SIZE_MASK) &
 		~MARS_TASK_MODULE_DMA_SIZE_MASK;
 
+	/* save used stack size (high address) */
 	task->context_save_area_high_size =
 		(MARS_TASK_BASE_ADDR + MARS_TASK_CONTEXT_SAVE_SIZE_MAX -
-		 (uintptr_t)__task_stack + MARS_TASK_REGISTER_SAVE_AREA_SIZE +
+		 (uintptr_t)__task_stack +
 		 MARS_TASK_MODULE_DMA_SIZE_MASK) &
 		~MARS_TASK_MODULE_DMA_SIZE_MASK;
 
+	/* save context MPU storage state */
 	dma_context(task->context_save_area_low_size,
 		    task->context_save_area_high_size, 1);
 }
--- a/task/src/mpu/module/task_switch.S
+++ b/task/src/mpu/module/task_switch.S
@@ -35,6 +35,9 @@
  * LIBRARY OR THE USE OR OTHER DEALINGS IN THE LIBRARY.
  */
 
+#define NUM_REGS	48	/* number of registers to be saved/restored */
+
+
 .section .bss
 
 /* void *__module_stack */
@@ -49,6 +52,12 @@ __module_stack:
 __task_stack:
 .space 16
 
+/* void *__work_stack*/
+.align	4
+.global	__work_stack
+__work_stack:
+.space (NUM_REGS + 3) * 16
+
 
 .text
 
@@ -89,20 +98,22 @@ task_exit:
 .type	task_save, @function
 task_save:
 	stqd	$LR, 16($SP)			/* save link register */
-	stqd	$SP, -48($SP)			/* save back chain */
-	ai	$SP, $SP, -48			/* push stack frame */
+	stqd	$SP, -32($SP)			/* save back chain */
+	ai	$SP, $SP, -32			/* push stack frame */
 
-	stqd    $4, 32($SP)			/* save function param */
+	stqa	$4, __work_stack		/* save func param */
 
 	stqa	$SP, __task_stack		/* save task stack */
+
 	lqa	$SP, __module_stack		/* restore module stack */
-	brsl	$LR, __registers_save		/* save registers */
+
 	brsl	$LR, __task_save		/* call task save body */
 
-	lqa	$2, __task_stack		/* load saved task stack ptr */
-	lqd	$3, 32($2)			/* load saved function param */
+	brsl	$LR, __registers_save		/* save registers */
+
+	lqa	$2, __work_stack		/* restore func param */
 
-	brz	$3, mars_module_workload_yield	/* module yield (no return) */
+	brz	$2, mars_module_workload_yield	/* module yield (no return) */
 	br	mars_module_workload_wait	/* module wait (no return) */
 
 .size	task_save, .-task_save
@@ -116,12 +127,18 @@ task_restore:
 	stqd	$SP, -32($SP)			/* save back chain */
 	ai	$SP, $SP, -32			/* push stack frame */
 
-	brsl	$LR, __task_restore		/* call task restore body */
+	stqa	$3, __work_stack		/* save func param */
+
 	brsl	$LR, __registers_restore	/* restore registers */
+
+	lqa	$3, __work_stack		/* restore func param */
+
+	brsl	$LR, __task_restore		/* call task restore body */
+
 	lqa	$SP, __task_stack		/* restore task stack */
 	sync					/* sync before execution */
 
-	ai	$SP, $SP, 48			/* pop task_save stack frame */
+	ai	$SP, $SP, 32			/* pop task_save stack frame */
 	lqd	$LR, 16($SP)			/* restore link register */
 	bi	$LR				/* return from task_save call */
 
@@ -129,30 +146,28 @@ task_restore:
 
 
 /*
- * Registers are saved/restored on task stack.
+ * Registers are saved/restored to task module work stack
  *
- * Task Stack               High Address
- * +------------------------+
- * |  Used Task Stack Area  |
- * +------------------------+ <--- STACK_PTR
+ *                          High Address
+* +------------------------+ <--- TOP OF STACK   (__work_stack + 48 + 768)
  * | Non-volatile Registers |          |
  * |         $127           |          |
  * |         $126           |          |
  * |         $125           |       (size)
- * |         ...            |    NUM_REGS * 16
+ * |         ...            |    NUM_REGS * 16   (48 * 16 = 768)
  * |         $82            |          |
  * |         $81            |          |
  * |         $80            |          |
- * +------------------------+ <--- REGS_PTR
+ * +------------------------+ <--- REGS_PTR      (__work_stack + 48)
  * |    Work Code Block     |
- * +------------------------+ <--- CODE_PTR + 16
+ * +------------------------+ <--- CODE_PTR + 16 (__work_stack + 32)
  * |    Loop Code Block     |
- * +------------------------+ <--- CODE_PTR
+ * +------------------------+ <--- CODE_PTR      (__work_stack + 16)
+ * |    Save Func Param     |
+ * +------------------------+ <--- __work_stack
  *                          Low Address
  */
 
-#define NUM_REGS	48	/* number of registers to be saved/restored */
-#define STACK_PTR	$73	/* current task stack pointer */
 #define CODE_PTR	$74	/* where code will be loaded into stack */
 #define REGS_PTR	$75	/* where regs will be placed into stack */
 #define REGS_INC	$76	/* register increment value for instruction */
@@ -161,18 +176,28 @@ task_restore:
 #define INST_MASK	$79	/* mask instruction to store or load */
 
 __registers_save:
+	stqd	$LR, 16($SP)			/* save link register */
+	stqd	$SP, -32($SP)			/* save back chain pointer */
+	ai	$SP, $SP, -32			/* push stack frame */
+
 	il	INST_MASK, 0			/* no mask - default is stqd */
 	br	body				/* jump to the body */
 
 __registers_restore:
+	stqd	$LR, 16($SP)			/* save link register */
+	stqd	$SP, -32($SP)			/* save back chain pointer */
+	ai	$SP, $SP, -32			/* push stack frame */
+
+	ila	$3, __work_stack + 48		/* ptr = __work_stack */
+	il	$4, 0				/* restore so put = 0 */
+	brsl	$LR, __dma_registers		/* call __dma_registers(0) */
+
 	il	INST_MASK, 16			/* set necessary bits */
 	shlqbyi	INST_MASK, INST_MASK, 15	/* shift to correct position */
 
 body:
-	lqa	STACK_PTR, __task_stack		/* load task stack pointer */
-	il	TEMP, -NUM_REGS * 16		/* size of reg save area */
-	a	REGS_PTR, STACK_PTR, TEMP	/* set reg load addr pointer */
-	ai	CODE_PTR, REGS_PTR, -2 * 16	/* set code load addr pointer */
+	ila	CODE_PTR, __work_stack + 16	/* set code load addr pointer */
+	ila	REGS_PTR, __work_stack + 48	/* set regs load addr pointer */
 	lqr	TEMP, loop			/* load loop code to register */
 	stqd	TEMP, 0(CODE_PTR)		/* store loop code to stack */
 	lqr	INST, inst			/* load inst code to register */
@@ -181,7 +206,18 @@ body:
 	shlqbyi	REGS_INC, REGS_INC, 12		/* shift to correct word slot */
 	il	TEMP, NUM_REGS - 1		/* initialize loop counter */
 	sync					/* wait till loop code stored */
-	bi	CODE_PTR			/* jump and never return */
+	bisl	$LR, CODE_PTR			/* jump and return */
+
+	brnz	INST_MASK, done			/* restore called so done */
+
+	ila	$3, __work_stack + 48		/* ptr = __work_stack */
+	il	$4, 1				/* regs save so put = 1 */
+	brsl	$LR, __dma_registers		/* call __dma_registers(1) */
+
+done:
+	ai	$SP, $SP, 32			/* pop stack frame */
+	lqd	$LR, 16($SP)			/* load saved link register */
+	bi	$LR				/* done so return to caller */
 
 	.balignl	16, 0			/* align self modifying code */
 loop:






More information about the cbe-oss-dev mailing list