[Cbe-oss-dev] [PATCH 12/28]MARS/base: kernel scheduler fix

Fri Feb 6 13:31:31 EST 2009

This addresses some bugs and performance issues in the scheduler implementation.

This adds a new layer of workload queue block bits stored inside the workload
queue header. The block bits hold information about each block (max priority of
workloads in the ready state, whether a block holds any workloads in ready
state, or whether a block holds any workloads in waiting state, etc.).
This avoids having to dma each workload queue block and search all workload bits
to get that information.

* queue header is now inherited from mars_mutex
* queue header holds block info bits
* queue header block info bits must be updated after each workload state change
* workload blocks reduced to 50, max workloads reduced to 750
* scheduler only needs to fetch and search workload queue blocks that have
  waiting workloads or holds the workload with highest priority to be scheduled
  next.

Signed-off-by: Yuji Mano <yuji.mano at am.sony.com>
---
 base/src/common/workload_internal_types.h |   39 ++++-
 base/src/host/lib/workload_queue.c        |   76 +++++++++-
 base/src/mpu/kernel/kernel.c              |  227 +++++++++++++++++++++---------
 3 files changed, 270 insertions(+), 72 deletions(-)

--- a/base/src/common/workload_internal_types.h
+++ b/base/src/common/workload_internal_types.h
@@ -63,8 +63,8 @@
 #define MARS_WORKLOAD_SIGNAL_ON			0x1	/* signal set on */
 
 #define MARS_WORKLOAD_PER_BLOCK			15	/* wl/block */
-#define MARS_WORKLOAD_NUM_BLOCKS		100	/* total blocks */
-#define MARS_WORKLOAD_MAX			1500	/* blocks * wl/block */
+#define MARS_WORKLOAD_NUM_BLOCKS		50	/* total blocks */
+#define MARS_WORKLOAD_MAX			750	/* blocks * wl/block */
 
 #define MARS_WORKLOAD_QUEUE_ALIGN		128	/* align to 128 bytes */
 #define MARS_WORKLOAD_QUEUE_HEADER_ALIGN	128	/* align to 128 bytes */
@@ -74,6 +74,37 @@
 #define MARS_WORKLOAD_QUEUE_FLAG_NONE		0x0	/* no flag set */
 #define MARS_WORKLOAD_QUEUE_FLAG_EXIT		0x1	/* exit flag */
 
+#define MARS_WORKLOAD_BLOCK_PRIORITY_MIN	MARS_WORKLOAD_PRIORITY_MIN
+#define MARS_WORKLOAD_BLOCK_PRIORITY_MAX	MARS_WORKLOAD_PRIORITY_MAX
+
+#define MARS_WORKLOAD_BLOCK_COUNTER_MIN		0x00
+#define MARS_WORKLOAD_BLOCK_COUNTER_MAX		0x3f
+
+#define MARS_WORKLOAD_BLOCK_READY_OFF		0x0
+#define MARS_WORKLOAD_BLOCK_READY_ON		0x1
+
+#define MARS_WORKLOAD_BLOCK_WAITING_OFF		0x0
+#define MARS_WORKLOAD_BLOCK_WAITING_ON		0x1
+/*
+ * MARS workload queue header block bits (16-bits)
+ * ------------------------------------------
+ * |[15.....8]|[7.....2]|[   1   ]|[   0   ]|
+ * ------------------------------------------
+ * |  8-bits  | 6-bits  |  1-bit  |  1-bit  |
+ * ------------------------------------------
+ * | PRIORITY | COUNTER |  READY  | WAITING |
+ * ------------------------------------------
+ */
+#define MARS_BITS_SHIFT_BLOCK_PRIORITY		8
+#define MARS_BITS_SHIFT_BLOCK_COUNTER		2
+#define MARS_BITS_SHIFT_BLOCK_READY		1
+#define MARS_BITS_SHIFT_BLOCK_WAITING		0
+
+#define MARS_BITS_MASK_BLOCK_PRIORITY		0x000000000000ff00ULL
+#define MARS_BITS_MASK_BLOCK_COUNTER		0x00000000000000fcULL
+#define MARS_BITS_MASK_BLOCK_READY		0x0000000000000002ULL
+#define MARS_BITS_MASK_BLOCK_WAITING		0x0000000000000001ULL
+
 /*
  * MARS workload queue block workload bits (64-bits)
  * ------------------------------------------------------------------
@@ -107,10 +138,12 @@
 
 /* 128 byte workload queue header structure */
 struct mars_workload_queue_header {
+	uint32_t lock;
+	uint32_t pad;
 	uint64_t queue_ea;
 	uint64_t context_ea;
 	uint32_t flag;
-	uint8_t pad[MARS_WORKLOAD_QUEUE_HEADER_SIZE - 20];
+	uint16_t bits[MARS_WORKLOAD_NUM_BLOCKS];
 } __attribute__((aligned(MARS_WORKLOAD_QUEUE_HEADER_ALIGN)));
 
 /* 128 byte workload queue block structure */
--- a/base/src/host/lib/workload_queue.c
+++ b/base/src/host/lib/workload_queue.c
@@ -74,6 +74,8 @@ static inline uint64_t get_block_bits_ea
 
 static void init_header(uint64_t queue_ea)
 {
+	int block;
+	uint16_t bits = 0;
 	struct mars_workload_queue *queue;
 
 	/* prepare work area for queue header */
@@ -87,8 +89,20 @@ static void init_header(uint64_t queue_e
 	queue->header.context_ea =
 		queue_ea + offsetof(struct mars_workload_queue, context);
 
+	/* create initial bit pattern of workload queue header */
+	MARS_BITS_SET(&bits, BLOCK_PRIORITY, MARS_WORKLOAD_BLOCK_PRIORITY_MIN);
+	MARS_BITS_SET(&bits, BLOCK_COUNTER, MARS_WORKLOAD_BLOCK_COUNTER_MIN);
+	MARS_BITS_SET(&bits, BLOCK_READY, MARS_WORKLOAD_BLOCK_READY_OFF);
+	MARS_BITS_SET(&bits, BLOCK_WAITING, MARS_WORKLOAD_BLOCK_WAITING_OFF);
+
+	for (block = 0; block < MARS_WORKLOAD_NUM_BLOCKS; block++)
+		queue->header.bits[block] = bits;
+
 	/* update queue header on EA */
 	mars_ea_put(queue_ea, queue, sizeof(struct mars_workload_queue_header));
+
+	/* reset mutex portion of queue header */
+	mars_mutex_reset(queue_ea);
 }
 
 static void init_blocks(uint64_t queue_ea)
@@ -456,13 +470,71 @@ int mars_workload_queue_schedule_begin(s
 			   NULL);
 }
 
+static void update_header_bits(struct mars_context *mars, uint16_t id)
+{
+	int block = id / MARS_WORKLOAD_PER_BLOCK;;
+	int index;
+	uint64_t queue_ea;
+	uint64_t block_ea;
+	uint64_t header_bits_ea;
+	uint16_t header_bits;
+	uint8_t block_ready = MARS_WORKLOAD_BLOCK_READY_OFF;
+	uint8_t block_waiting = MARS_WORKLOAD_BLOCK_WAITING_OFF;
+	uint8_t block_priority = MARS_WORKLOAD_BLOCK_PRIORITY_MIN;
+
+	queue_ea = mars->workload_queue_ea;
+
+	block_ea = get_block_ea(queue_ea, block);
+
+	/* search through currently locked queue block workload bits */
+	for (index = 0; index < MARS_WORKLOAD_PER_BLOCK; index++) {
+		uint64_t bits_ea = get_block_bits_ea(block_ea, index);
+		uint64_t bits = mars_ea_get_uint64(bits_ea);
+		uint8_t state = MARS_BITS_GET(&bits, WORKLOAD_STATE);
+
+		/* workload state is ready so check priority */
+		if (state == MARS_WORKLOAD_STATE_READY) {
+			uint8_t priority = MARS_BITS_GET(&bits,
+							 WORKLOAD_PRIORITY);
+
+			/* set block priority if higher then current */
+			if (priority > block_priority)
+				block_priority = priority;
+
+			/* set block ready bit in header bits for block */
+			block_ready = MARS_WORKLOAD_BLOCK_READY_ON;
+		} else if (state == MARS_WORKLOAD_STATE_WAITING) {
+			/* set block waiting bit in header bits for block */
+			block_waiting = MARS_WORKLOAD_BLOCK_WAITING_ON;
+		}
+	}
+
+	/* lock the queue header */
+	mars_mutex_lock(queue_ea);
+
+	/* set the info bits inside queue header for this queue block */
+	header_bits_ea = queue_ea +
+			 offsetof(struct mars_workload_queue_header, bits) +
+			 sizeof(uint16_t) * block;
+	header_bits = mars_ea_get_uint16(header_bits_ea);
+
+	MARS_BITS_SET(&header_bits, BLOCK_READY, block_ready);
+	MARS_BITS_SET(&header_bits, BLOCK_WAITING, block_waiting);
+	MARS_BITS_SET(&header_bits, BLOCK_PRIORITY, block_priority);
+
+	mars_ea_put_uint16(header_bits_ea, header_bits);
+
+	/* unlock the queue header */
+	mars_mutex_unlock(queue_ea);
+}
+
 int mars_workload_queue_schedule_end(struct mars_context *mars,
 				     uint16_t id)
 {
 	return change_state(mars, id, NULL,
 			    MARS_WORKLOAD_STATE_SCHEDULING,
 			    MARS_WORKLOAD_STATE_READY,
-			    NULL);
+			    update_header_bits);
 }
 
 int mars_workload_queue_schedule_cancel(struct mars_context *mars,
@@ -566,5 +638,5 @@ int mars_workload_queue_signal_send(stru
 	return change_bits(mars, id, NULL,
 			   check_state_bits_not, MARS_WORKLOAD_STATE_NONE,
 			   set_signal_bits, MARS_WORKLOAD_SIGNAL_ON,
-			   NULL);
+			   update_header_bits);
 }
--- a/base/src/mpu/kernel/kernel.c
+++ b/base/src/mpu/kernel/kernel.c
@@ -48,9 +48,6 @@
 #include "kernel_internal_types.h"
 #include "workload_internal_types.h"
 
-#define MARS_WORKLOAD_RESERVED_NONE	0
-#define MARS_WORKLOAD_RESERVED		1
-
 /* kernel */
 void *__kernel_stack;
 static struct mars_kernel_params kernel_params;
@@ -218,6 +215,74 @@ static uint64_t get_block_bits(uint16_t 
 	return block_bits;
 }
 
+static void update_header_bits(int block)
+{
+	int i;
+	uint16_t *block_bits = &queue_header.bits[block];
+	uint8_t block_ready = MARS_WORKLOAD_BLOCK_READY_OFF;
+	uint8_t block_waiting = MARS_WORKLOAD_BLOCK_WAITING_OFF;
+	uint8_t block_priority = MARS_WORKLOAD_BLOCK_PRIORITY_MIN;
+
+	/* search through currently locked queue block workload bits */
+	for (i = 0; i < MARS_WORKLOAD_PER_BLOCK; i++) {
+		uint64_t *bits = &queue_block.bits[i];
+		uint8_t state = MARS_BITS_GET(bits, WORKLOAD_STATE);
+
+		/* workload state is ready so check priority */
+		if (state == MARS_WORKLOAD_STATE_READY) {
+			uint8_t priority = MARS_BITS_GET(bits,
+							 WORKLOAD_PRIORITY);
+
+			/* set block priority if higher then current */
+			if (priority > block_priority)
+				block_priority = priority;
+
+			/* set block ready bit in header bits for block */
+			block_ready = MARS_WORKLOAD_BLOCK_READY_ON;
+		} else if (state == MARS_WORKLOAD_STATE_WAITING) {
+			/* set block waiting bit in header bits for block */
+			block_waiting = MARS_WORKLOAD_BLOCK_WAITING_ON;
+		}
+	}
+
+	/* lock the queue header */
+	mars_mutex_lock_get(kernel_params.workload_queue_ea,
+			    (struct mars_mutex *)&queue_header);
+
+	/* set the info bits inside queue header for this queue block */
+	MARS_BITS_SET(block_bits, BLOCK_READY, block_ready);
+	MARS_BITS_SET(block_bits, BLOCK_WAITING, block_waiting);
+	MARS_BITS_SET(block_bits, BLOCK_PRIORITY, block_priority);
+
+	/* unlock the queue header */
+	mars_mutex_unlock_put(kernel_params.workload_queue_ea,
+			      (struct mars_mutex *)&queue_header);
+}
+
+static void update_header_bits_counter(int block, int reset)
+{
+	uint16_t *block_bits = &queue_header.bits[block];
+	uint8_t block_counter = MARS_WORKLOAD_BLOCK_COUNTER_MIN;
+
+	/* lock the queue header */
+	mars_mutex_lock_get(kernel_params.workload_queue_ea,
+			    (struct mars_mutex *)&queue_header);
+
+	/* reset is not specified so increment current block counter */
+	if (!reset) {
+		block_counter = MARS_BITS_GET(block_bits, BLOCK_COUNTER);
+		if (block_counter < MARS_WORKLOAD_BLOCK_COUNTER_MAX)
+			block_counter++;
+	}
+
+	/* set the block counter bits */
+	MARS_BITS_SET(block_bits, BLOCK_COUNTER, block_counter);
+
+	/* unlock the queue header */
+	mars_mutex_unlock_put(kernel_params.workload_queue_ea,
+			      (struct mars_mutex *)&queue_header);
+}
+
 static int workload_query(uint16_t id, int query)
 {
 	uint64_t bits = get_block_bits(id);
@@ -430,6 +495,9 @@ static void schedule_end_callback(uint16
 	/* put the workload context into workload queue */
 	dma_put((void *)&schedule_workload, get_workload_ea(id),
 		sizeof(struct mars_workload_context));
+
+	/* update queue header bits */
+	update_header_bits(id / MARS_WORKLOAD_PER_BLOCK);
 }
 
 static int workload_schedule_end(uint16_t id)
@@ -484,12 +552,16 @@ static struct mars_kernel_syscalls kerne
 	host_signal_send
 };
 
-static int search_block(int block)
+static int search_block(int block, int ready)
 {
 	int i;
 	int index = -1;
 	uint8_t max_priority = 0;
 	uint16_t max_counter = 0;
+	uint64_t block_ea = get_block_ea(block);
+
+	/* lock the queue block */
+	mars_mutex_lock_get(block_ea, (struct mars_mutex *)&queue_block);
 
 	/* search through all workloads in block */
 	for (i = 0; i < MARS_WORKLOAD_PER_BLOCK; i++) {
@@ -501,7 +573,7 @@ static int search_block(int block)
 		uint16_t counter = MARS_BITS_GET(bits, WORKLOAD_COUNTER);
 
 		/* found workload in ready state */
-		if (state == MARS_WORKLOAD_STATE_READY) {
+		if (ready && state == MARS_WORKLOAD_STATE_READY) {
 			/* compare priority and counter with previous ones */
 			if (index < 0 || priority > max_priority ||
 			  (priority == max_priority && counter > max_counter)) {
@@ -515,7 +587,7 @@ static int search_block(int block)
 				MARS_BITS_SET(bits, WORKLOAD_COUNTER,
 					      counter + 1);
 		/* found workload in waiting state */
-		} else if (state == MARS_WORKLOAD_STATE_WAITING) {
+		} else if (!ready && state == MARS_WORKLOAD_STATE_WAITING) {
 			/* waiting for workload to finish so check status */
 			if (wait_id != MARS_WORKLOAD_ID_NONE) {
 				struct mars_workload_queue_block wait_block;
@@ -550,6 +622,9 @@ static int search_block(int block)
 						MARS_WORKLOAD_ID_NONE);
 					MARS_BITS_SET(bits, WORKLOAD_STATE,
 						MARS_WORKLOAD_STATE_READY);
+
+					/* update queue header bits */
+					update_header_bits(block);
 				}
 			/* waiting for signal so check signal bit and reset */
 			} else if (signal == MARS_WORKLOAD_SIGNAL_ON) {
@@ -557,23 +632,14 @@ static int search_block(int block)
 					      MARS_WORKLOAD_SIGNAL_OFF);
 				MARS_BITS_SET(bits, WORKLOAD_STATE,
 					      MARS_WORKLOAD_STATE_READY);
+
+				/* update queue header bits */
+				update_header_bits(block);
 			}
 		}
 	}
 
-	/* returns -1 if no runnable workload found */
-	return index;
-}
-
-static int reserve_block(int block)
-{
-	int index;
-	uint64_t block_ea = get_block_ea(block);
-
-	mars_mutex_lock_get(block_ea, (struct mars_mutex *)&queue_block);
-
-	/* set the workload index */
-	index = search_block(block);
+	/* index is set so reserve the runnable workload */
 	if (index >= 0) {
 		/* update the current state of the workload */
 		MARS_BITS_SET(&queue_block.bits[index], WORKLOAD_STATE,
@@ -582,10 +648,16 @@ static int reserve_block(int block)
 		/* reset the counter for reserved workload */
 		MARS_BITS_SET(&queue_block.bits[index], WORKLOAD_COUNTER,
 			      MARS_WORKLOAD_COUNTER_MIN);
+
+		/* update queue header bits and reset block counter */
+		update_header_bits(block);
+		update_header_bits_counter(block, 1);
 	}
 
+	/* unlock the queue block */
 	mars_mutex_unlock_put(block_ea, (struct mars_mutex *)&queue_block);
 
+	/* returns -1 if no runnable workload found */
 	return index;
 }
 
@@ -599,36 +671,57 @@ static void notify_host_bits(uint64_t bl
 	host_signal_send(bits_ea);
 }
 
-static void release_block(int block, int index)
+static int reserve_workload(void)
 {
-	uint64_t block_ea = get_block_ea(block);
-
-	mars_mutex_lock_get(block_ea, (struct mars_mutex *)&queue_block);
+	int i;
+	int block = -1;
+	int index = -1;
+	uint8_t max_block_priority = 0;
+	uint16_t max_block_counter = 0;
 
-	/* update current workload state in workload queue block */
-	MARS_BITS_SET(&queue_block.bits[index], WORKLOAD_STATE, workload_state);
+	/* get the workload queue header */
+	dma_get(&queue_header, kernel_params.workload_queue_ea,
+		sizeof(struct mars_workload_queue_header));
 
-	mars_mutex_unlock_put(block_ea, (struct mars_mutex *)&queue_block);
+	/* return exit status if exit flag is set from host */
+	if (queue_header.flag == MARS_WORKLOAD_QUEUE_FLAG_EXIT)
+		return MARS_KERNEL_STATUS_EXIT;
 
-	if (workload_state == MARS_WORKLOAD_STATE_FINISHED)
-		notify_host_bits(block_ea, index);
-}
+	/* search workload queue header for highest priority ready block that
+	 * has waited the longest in ready state */
+	for (i = 0; i < MARS_WORKLOAD_NUM_BLOCKS; i++) {
+		uint16_t *bits = &queue_header.bits[i];
+		uint8_t block_ready    = MARS_BITS_GET(bits, BLOCK_READY);
+		uint8_t block_waiting  = MARS_BITS_GET(bits, BLOCK_WAITING);
+		uint8_t block_priority = MARS_BITS_GET(bits, BLOCK_PRIORITY);
+		uint16_t block_counter = MARS_BITS_GET(bits, BLOCK_COUNTER);
+
+		/* block is ready so check scheduling conditions */
+		if (block_ready &&
+		    (block < 0 || block_priority > max_block_priority ||
+		    (block_priority == max_block_priority &&
+		     block_counter > max_block_counter))) {
+			block = i;
+			max_block_priority = block_priority;
+			max_block_counter = block_counter;
 
-static int reserve_workload(void)
-{
-	int block;
-	int index = -1;
+			/* increment block counter */
+			update_header_bits_counter(block, 0);
+		}
 
-	/* search workload queue blocks until runnable workload reserved */
-	for (block = 0; block < MARS_WORKLOAD_NUM_BLOCKS; block++) {
-		index = reserve_block(block);
-		if (index >= 0)
-			break;
+		/* block is waiting so check block */
+		if (block_waiting)
+			search_block(i, 0);
 	}
 
 	/* no runnable workload found */
+	if (block < 0)
+		return MARS_KERNEL_STATUS_IDLE;
+
+	/* search block for workload index to run */
+	index = search_block(block, 1);
 	if (index < 0)
-		return MARS_WORKLOAD_RESERVED_NONE;
+		return MARS_KERNEL_STATUS_IDLE;
 
 	/* set global workload info based on workload block and index */
 	workload_id = MARS_WORKLOAD_PER_BLOCK * block + index;
@@ -637,19 +730,33 @@ static int reserve_workload(void)
 	/* get the workload context code from workload queue */
 	dma_get(&workload, workload_ea, sizeof(struct mars_workload_context));
 
-	return MARS_WORKLOAD_RESERVED;
+	return MARS_KERNEL_STATUS_BUSY;
 }
 
 static void release_workload(void)
 {
 	int block = workload_id / MARS_WORKLOAD_PER_BLOCK;
 	int index = workload_id % MARS_WORKLOAD_PER_BLOCK;
+	uint64_t block_ea = get_block_ea(block);
 
 	/* put the workload context into workload queue */
 	dma_put(&workload, workload_ea, sizeof(struct mars_workload_context));
 
-	/* release block reservation */
-	release_block(block, index);
+	/* lock the queue block */
+	mars_mutex_lock_get(block_ea, (struct mars_mutex *)&queue_block);
+
+	/* update current workload state in workload queue block */
+	MARS_BITS_SET(&queue_block.bits[index], WORKLOAD_STATE, workload_state);
+
+	/* update queue header bits */
+	update_header_bits(block);
+
+	/* unlock the queue block */
+	mars_mutex_unlock_put(block_ea, (struct mars_mutex *)&queue_block);
+
+	/* workload state is finished so notify host */
+	if (workload_state == MARS_WORKLOAD_STATE_FINISHED)
+		notify_host_bits(block_ea, index);
 }
 
 static void __attribute__((noinline)) run_workload(void)
@@ -694,35 +801,21 @@ static void load_workload_module(void)
 
 static int scheduler(void)
 {
-	int status;
+	int status = reserve_workload();
 
-	/* get the workload queue header */
-	dma_get(&queue_header,
-		kernel_params.workload_queue_ea +
-		offsetof(struct mars_workload_queue, header),
-		sizeof(struct mars_workload_queue_header));
-
-	/* return exit status if exit flag is set from host */
-	if (queue_header.flag == MARS_WORKLOAD_QUEUE_FLAG_EXIT)
-		return MARS_KERNEL_STATUS_EXIT;
+	/* workload reserved */
+	if (status == MARS_KERNEL_STATUS_BUSY) {
+		/* load the workload module */
+		load_workload_module();
 
-	/* reserve next workload to run or return idle status if none found */
-	status = reserve_workload();
+		/* run workload */
+		run_workload();
 
-	/* return idle status if no workload was reserved */
-	if (status == MARS_WORKLOAD_RESERVED_NONE)
-		return MARS_KERNEL_STATUS_IDLE;
-
-	/* load the workload module */
-	load_workload_module();
-
-	/* run workload */
-	run_workload();
-
-	/* release reservation of current workload */
-	release_workload();
+		/* release reservation of current workload */
+		release_workload();
+	}
 
-	return MARS_KERNEL_STATUS_BUSY;
+	return status;
 }
 
 static void get_params(uint64_t kernel_params_ea)