[Cbe-oss-dev] [PATCH 12/28]MARS/base: kernel scheduler fix
Yuji Mano
yuji.mano at am.sony.com
Fri Feb 6 13:31:31 EST 2009
This addresses some bugs and performance issues in the scheduler implementation.
This adds a new layer of workload queue block bits stored inside the workload
queue header. The block bits hold information about each block (max priority of
workloads in the ready state, whether a block holds any workloads in ready
state, or whether a block holds any workloads in waiting state, etc.).
This avoids having to dma each workload queue block and search all workload bits
to get that information.
* queue header is now inherited from mars_mutex
* queue header holds block info bits
* queue header block info bits must be updated after each workload state change
* workload blocks reduced to 50, max workloads reduced to 750
* scheduler only needs to fetch and search workload queue blocks that have
waiting workloads or holds the workload with highest priority to be scheduled
next.
Signed-off-by: Yuji Mano <yuji.mano at am.sony.com>
---
base/src/common/workload_internal_types.h | 39 ++++-
base/src/host/lib/workload_queue.c | 76 +++++++++-
base/src/mpu/kernel/kernel.c | 227 +++++++++++++++++++++---------
3 files changed, 270 insertions(+), 72 deletions(-)
--- a/base/src/common/workload_internal_types.h
+++ b/base/src/common/workload_internal_types.h
@@ -63,8 +63,8 @@
#define MARS_WORKLOAD_SIGNAL_ON 0x1 /* signal set on */
#define MARS_WORKLOAD_PER_BLOCK 15 /* wl/block */
-#define MARS_WORKLOAD_NUM_BLOCKS 100 /* total blocks */
-#define MARS_WORKLOAD_MAX 1500 /* blocks * wl/block */
+#define MARS_WORKLOAD_NUM_BLOCKS 50 /* total blocks */
+#define MARS_WORKLOAD_MAX 750 /* blocks * wl/block */
#define MARS_WORKLOAD_QUEUE_ALIGN 128 /* align to 128 bytes */
#define MARS_WORKLOAD_QUEUE_HEADER_ALIGN 128 /* align to 128 bytes */
@@ -74,6 +74,37 @@
#define MARS_WORKLOAD_QUEUE_FLAG_NONE 0x0 /* no flag set */
#define MARS_WORKLOAD_QUEUE_FLAG_EXIT 0x1 /* exit flag */
+#define MARS_WORKLOAD_BLOCK_PRIORITY_MIN MARS_WORKLOAD_PRIORITY_MIN
+#define MARS_WORKLOAD_BLOCK_PRIORITY_MAX MARS_WORKLOAD_PRIORITY_MAX
+
+#define MARS_WORKLOAD_BLOCK_COUNTER_MIN 0x00
+#define MARS_WORKLOAD_BLOCK_COUNTER_MAX 0x3f
+
+#define MARS_WORKLOAD_BLOCK_READY_OFF 0x0
+#define MARS_WORKLOAD_BLOCK_READY_ON 0x1
+
+#define MARS_WORKLOAD_BLOCK_WAITING_OFF 0x0
+#define MARS_WORKLOAD_BLOCK_WAITING_ON 0x1
+/*
+ * MARS workload queue header block bits (16-bits)
+ * ------------------------------------------
+ * |[15.....8]|[7.....2]|[ 1 ]|[ 0 ]|
+ * ------------------------------------------
+ * | 8-bits | 6-bits | 1-bit | 1-bit |
+ * ------------------------------------------
+ * | PRIORITY | COUNTER | READY | WAITING |
+ * ------------------------------------------
+ */
+#define MARS_BITS_SHIFT_BLOCK_PRIORITY 8
+#define MARS_BITS_SHIFT_BLOCK_COUNTER 2
+#define MARS_BITS_SHIFT_BLOCK_READY 1
+#define MARS_BITS_SHIFT_BLOCK_WAITING 0
+
+#define MARS_BITS_MASK_BLOCK_PRIORITY 0x000000000000ff00ULL
+#define MARS_BITS_MASK_BLOCK_COUNTER 0x00000000000000fcULL
+#define MARS_BITS_MASK_BLOCK_READY 0x0000000000000002ULL
+#define MARS_BITS_MASK_BLOCK_WAITING 0x0000000000000001ULL
+
/*
* MARS workload queue block workload bits (64-bits)
* ------------------------------------------------------------------
@@ -107,10 +138,12 @@
/* 128 byte workload queue header structure */
struct mars_workload_queue_header {
+ uint32_t lock;
+ uint32_t pad;
uint64_t queue_ea;
uint64_t context_ea;
uint32_t flag;
- uint8_t pad[MARS_WORKLOAD_QUEUE_HEADER_SIZE - 20];
+ uint16_t bits[MARS_WORKLOAD_NUM_BLOCKS];
} __attribute__((aligned(MARS_WORKLOAD_QUEUE_HEADER_ALIGN)));
/* 128 byte workload queue block structure */
--- a/base/src/host/lib/workload_queue.c
+++ b/base/src/host/lib/workload_queue.c
@@ -74,6 +74,8 @@ static inline uint64_t get_block_bits_ea
static void init_header(uint64_t queue_ea)
{
+ int block;
+ uint16_t bits = 0;
struct mars_workload_queue *queue;
/* prepare work area for queue header */
@@ -87,8 +89,20 @@ static void init_header(uint64_t queue_e
queue->header.context_ea =
queue_ea + offsetof(struct mars_workload_queue, context);
+ /* create initial bit pattern of workload queue header */
+ MARS_BITS_SET(&bits, BLOCK_PRIORITY, MARS_WORKLOAD_BLOCK_PRIORITY_MIN);
+ MARS_BITS_SET(&bits, BLOCK_COUNTER, MARS_WORKLOAD_BLOCK_COUNTER_MIN);
+ MARS_BITS_SET(&bits, BLOCK_READY, MARS_WORKLOAD_BLOCK_READY_OFF);
+ MARS_BITS_SET(&bits, BLOCK_WAITING, MARS_WORKLOAD_BLOCK_WAITING_OFF);
+
+ for (block = 0; block < MARS_WORKLOAD_NUM_BLOCKS; block++)
+ queue->header.bits[block] = bits;
+
/* update queue header on EA */
mars_ea_put(queue_ea, queue, sizeof(struct mars_workload_queue_header));
+
+ /* reset mutex portion of queue header */
+ mars_mutex_reset(queue_ea);
}
static void init_blocks(uint64_t queue_ea)
@@ -456,13 +470,71 @@ int mars_workload_queue_schedule_begin(s
NULL);
}
+static void update_header_bits(struct mars_context *mars, uint16_t id)
+{
+ int block = id / MARS_WORKLOAD_PER_BLOCK;;
+ int index;
+ uint64_t queue_ea;
+ uint64_t block_ea;
+ uint64_t header_bits_ea;
+ uint16_t header_bits;
+ uint8_t block_ready = MARS_WORKLOAD_BLOCK_READY_OFF;
+ uint8_t block_waiting = MARS_WORKLOAD_BLOCK_WAITING_OFF;
+ uint8_t block_priority = MARS_WORKLOAD_BLOCK_PRIORITY_MIN;
+
+ queue_ea = mars->workload_queue_ea;
+
+ block_ea = get_block_ea(queue_ea, block);
+
+ /* search through currently locked queue block workload bits */
+ for (index = 0; index < MARS_WORKLOAD_PER_BLOCK; index++) {
+ uint64_t bits_ea = get_block_bits_ea(block_ea, index);
+ uint64_t bits = mars_ea_get_uint64(bits_ea);
+ uint8_t state = MARS_BITS_GET(&bits, WORKLOAD_STATE);
+
+ /* workload state is ready so check priority */
+ if (state == MARS_WORKLOAD_STATE_READY) {
+ uint8_t priority = MARS_BITS_GET(&bits,
+ WORKLOAD_PRIORITY);
+
+ /* set block priority if higher then current */
+ if (priority > block_priority)
+ block_priority = priority;
+
+ /* set block ready bit in header bits for block */
+ block_ready = MARS_WORKLOAD_BLOCK_READY_ON;
+ } else if (state == MARS_WORKLOAD_STATE_WAITING) {
+ /* set block waiting bit in header bits for block */
+ block_waiting = MARS_WORKLOAD_BLOCK_WAITING_ON;
+ }
+ }
+
+ /* lock the queue header */
+ mars_mutex_lock(queue_ea);
+
+ /* set the info bits inside queue header for this queue block */
+ header_bits_ea = queue_ea +
+ offsetof(struct mars_workload_queue_header, bits) +
+ sizeof(uint16_t) * block;
+ header_bits = mars_ea_get_uint16(header_bits_ea);
+
+ MARS_BITS_SET(&header_bits, BLOCK_READY, block_ready);
+ MARS_BITS_SET(&header_bits, BLOCK_WAITING, block_waiting);
+ MARS_BITS_SET(&header_bits, BLOCK_PRIORITY, block_priority);
+
+ mars_ea_put_uint16(header_bits_ea, header_bits);
+
+ /* unlock the queue header */
+ mars_mutex_unlock(queue_ea);
+}
+
int mars_workload_queue_schedule_end(struct mars_context *mars,
uint16_t id)
{
return change_state(mars, id, NULL,
MARS_WORKLOAD_STATE_SCHEDULING,
MARS_WORKLOAD_STATE_READY,
- NULL);
+ update_header_bits);
}
int mars_workload_queue_schedule_cancel(struct mars_context *mars,
@@ -566,5 +638,5 @@ int mars_workload_queue_signal_send(stru
return change_bits(mars, id, NULL,
check_state_bits_not, MARS_WORKLOAD_STATE_NONE,
set_signal_bits, MARS_WORKLOAD_SIGNAL_ON,
- NULL);
+ update_header_bits);
}
--- a/base/src/mpu/kernel/kernel.c
+++ b/base/src/mpu/kernel/kernel.c
@@ -48,9 +48,6 @@
#include "kernel_internal_types.h"
#include "workload_internal_types.h"
-#define MARS_WORKLOAD_RESERVED_NONE 0
-#define MARS_WORKLOAD_RESERVED 1
-
/* kernel */
void *__kernel_stack;
static struct mars_kernel_params kernel_params;
@@ -218,6 +215,74 @@ static uint64_t get_block_bits(uint16_t
return block_bits;
}
+static void update_header_bits(int block)
+{
+ int i;
+ uint16_t *block_bits = &queue_header.bits[block];
+ uint8_t block_ready = MARS_WORKLOAD_BLOCK_READY_OFF;
+ uint8_t block_waiting = MARS_WORKLOAD_BLOCK_WAITING_OFF;
+ uint8_t block_priority = MARS_WORKLOAD_BLOCK_PRIORITY_MIN;
+
+ /* search through currently locked queue block workload bits */
+ for (i = 0; i < MARS_WORKLOAD_PER_BLOCK; i++) {
+ uint64_t *bits = &queue_block.bits[i];
+ uint8_t state = MARS_BITS_GET(bits, WORKLOAD_STATE);
+
+ /* workload state is ready so check priority */
+ if (state == MARS_WORKLOAD_STATE_READY) {
+ uint8_t priority = MARS_BITS_GET(bits,
+ WORKLOAD_PRIORITY);
+
+ /* set block priority if higher then current */
+ if (priority > block_priority)
+ block_priority = priority;
+
+ /* set block ready bit in header bits for block */
+ block_ready = MARS_WORKLOAD_BLOCK_READY_ON;
+ } else if (state == MARS_WORKLOAD_STATE_WAITING) {
+ /* set block waiting bit in header bits for block */
+ block_waiting = MARS_WORKLOAD_BLOCK_WAITING_ON;
+ }
+ }
+
+ /* lock the queue header */
+ mars_mutex_lock_get(kernel_params.workload_queue_ea,
+ (struct mars_mutex *)&queue_header);
+
+ /* set the info bits inside queue header for this queue block */
+ MARS_BITS_SET(block_bits, BLOCK_READY, block_ready);
+ MARS_BITS_SET(block_bits, BLOCK_WAITING, block_waiting);
+ MARS_BITS_SET(block_bits, BLOCK_PRIORITY, block_priority);
+
+ /* unlock the queue header */
+ mars_mutex_unlock_put(kernel_params.workload_queue_ea,
+ (struct mars_mutex *)&queue_header);
+}
+
+static void update_header_bits_counter(int block, int reset)
+{
+ uint16_t *block_bits = &queue_header.bits[block];
+ uint8_t block_counter = MARS_WORKLOAD_BLOCK_COUNTER_MIN;
+
+ /* lock the queue header */
+ mars_mutex_lock_get(kernel_params.workload_queue_ea,
+ (struct mars_mutex *)&queue_header);
+
+ /* reset is not specified so increment current block counter */
+ if (!reset) {
+ block_counter = MARS_BITS_GET(block_bits, BLOCK_COUNTER);
+ if (block_counter < MARS_WORKLOAD_BLOCK_COUNTER_MAX)
+ block_counter++;
+ }
+
+ /* set the block counter bits */
+ MARS_BITS_SET(block_bits, BLOCK_COUNTER, block_counter);
+
+ /* unlock the queue header */
+ mars_mutex_unlock_put(kernel_params.workload_queue_ea,
+ (struct mars_mutex *)&queue_header);
+}
+
static int workload_query(uint16_t id, int query)
{
uint64_t bits = get_block_bits(id);
@@ -430,6 +495,9 @@ static void schedule_end_callback(uint16
/* put the workload context into workload queue */
dma_put((void *)&schedule_workload, get_workload_ea(id),
sizeof(struct mars_workload_context));
+
+ /* update queue header bits */
+ update_header_bits(id / MARS_WORKLOAD_PER_BLOCK);
}
static int workload_schedule_end(uint16_t id)
@@ -484,12 +552,16 @@ static struct mars_kernel_syscalls kerne
host_signal_send
};
-static int search_block(int block)
+static int search_block(int block, int ready)
{
int i;
int index = -1;
uint8_t max_priority = 0;
uint16_t max_counter = 0;
+ uint64_t block_ea = get_block_ea(block);
+
+ /* lock the queue block */
+ mars_mutex_lock_get(block_ea, (struct mars_mutex *)&queue_block);
/* search through all workloads in block */
for (i = 0; i < MARS_WORKLOAD_PER_BLOCK; i++) {
@@ -501,7 +573,7 @@ static int search_block(int block)
uint16_t counter = MARS_BITS_GET(bits, WORKLOAD_COUNTER);
/* found workload in ready state */
- if (state == MARS_WORKLOAD_STATE_READY) {
+ if (ready && state == MARS_WORKLOAD_STATE_READY) {
/* compare priority and counter with previous ones */
if (index < 0 || priority > max_priority ||
(priority == max_priority && counter > max_counter)) {
@@ -515,7 +587,7 @@ static int search_block(int block)
MARS_BITS_SET(bits, WORKLOAD_COUNTER,
counter + 1);
/* found workload in waiting state */
- } else if (state == MARS_WORKLOAD_STATE_WAITING) {
+ } else if (!ready && state == MARS_WORKLOAD_STATE_WAITING) {
/* waiting for workload to finish so check status */
if (wait_id != MARS_WORKLOAD_ID_NONE) {
struct mars_workload_queue_block wait_block;
@@ -550,6 +622,9 @@ static int search_block(int block)
MARS_WORKLOAD_ID_NONE);
MARS_BITS_SET(bits, WORKLOAD_STATE,
MARS_WORKLOAD_STATE_READY);
+
+ /* update queue header bits */
+ update_header_bits(block);
}
/* waiting for signal so check signal bit and reset */
} else if (signal == MARS_WORKLOAD_SIGNAL_ON) {
@@ -557,23 +632,14 @@ static int search_block(int block)
MARS_WORKLOAD_SIGNAL_OFF);
MARS_BITS_SET(bits, WORKLOAD_STATE,
MARS_WORKLOAD_STATE_READY);
+
+ /* update queue header bits */
+ update_header_bits(block);
}
}
}
- /* returns -1 if no runnable workload found */
- return index;
-}
-
-static int reserve_block(int block)
-{
- int index;
- uint64_t block_ea = get_block_ea(block);
-
- mars_mutex_lock_get(block_ea, (struct mars_mutex *)&queue_block);
-
- /* set the workload index */
- index = search_block(block);
+ /* index is set so reserve the runnable workload */
if (index >= 0) {
/* update the current state of the workload */
MARS_BITS_SET(&queue_block.bits[index], WORKLOAD_STATE,
@@ -582,10 +648,16 @@ static int reserve_block(int block)
/* reset the counter for reserved workload */
MARS_BITS_SET(&queue_block.bits[index], WORKLOAD_COUNTER,
MARS_WORKLOAD_COUNTER_MIN);
+
+ /* update queue header bits and reset block counter */
+ update_header_bits(block);
+ update_header_bits_counter(block, 1);
}
+ /* unlock the queue block */
mars_mutex_unlock_put(block_ea, (struct mars_mutex *)&queue_block);
+ /* returns -1 if no runnable workload found */
return index;
}
@@ -599,36 +671,57 @@ static void notify_host_bits(uint64_t bl
host_signal_send(bits_ea);
}
-static void release_block(int block, int index)
+static int reserve_workload(void)
{
- uint64_t block_ea = get_block_ea(block);
-
- mars_mutex_lock_get(block_ea, (struct mars_mutex *)&queue_block);
+ int i;
+ int block = -1;
+ int index = -1;
+ uint8_t max_block_priority = 0;
+ uint16_t max_block_counter = 0;
- /* update current workload state in workload queue block */
- MARS_BITS_SET(&queue_block.bits[index], WORKLOAD_STATE, workload_state);
+ /* get the workload queue header */
+ dma_get(&queue_header, kernel_params.workload_queue_ea,
+ sizeof(struct mars_workload_queue_header));
- mars_mutex_unlock_put(block_ea, (struct mars_mutex *)&queue_block);
+ /* return exit status if exit flag is set from host */
+ if (queue_header.flag == MARS_WORKLOAD_QUEUE_FLAG_EXIT)
+ return MARS_KERNEL_STATUS_EXIT;
- if (workload_state == MARS_WORKLOAD_STATE_FINISHED)
- notify_host_bits(block_ea, index);
-}
+ /* search workload queue header for highest priority ready block that
+ * has waited the longest in ready state */
+ for (i = 0; i < MARS_WORKLOAD_NUM_BLOCKS; i++) {
+ uint16_t *bits = &queue_header.bits[i];
+ uint8_t block_ready = MARS_BITS_GET(bits, BLOCK_READY);
+ uint8_t block_waiting = MARS_BITS_GET(bits, BLOCK_WAITING);
+ uint8_t block_priority = MARS_BITS_GET(bits, BLOCK_PRIORITY);
+ uint16_t block_counter = MARS_BITS_GET(bits, BLOCK_COUNTER);
+
+ /* block is ready so check scheduling conditions */
+ if (block_ready &&
+ (block < 0 || block_priority > max_block_priority ||
+ (block_priority == max_block_priority &&
+ block_counter > max_block_counter))) {
+ block = i;
+ max_block_priority = block_priority;
+ max_block_counter = block_counter;
-static int reserve_workload(void)
-{
- int block;
- int index = -1;
+ /* increment block counter */
+ update_header_bits_counter(block, 0);
+ }
- /* search workload queue blocks until runnable workload reserved */
- for (block = 0; block < MARS_WORKLOAD_NUM_BLOCKS; block++) {
- index = reserve_block(block);
- if (index >= 0)
- break;
+ /* block is waiting so check block */
+ if (block_waiting)
+ search_block(i, 0);
}
/* no runnable workload found */
+ if (block < 0)
+ return MARS_KERNEL_STATUS_IDLE;
+
+ /* search block for workload index to run */
+ index = search_block(block, 1);
if (index < 0)
- return MARS_WORKLOAD_RESERVED_NONE;
+ return MARS_KERNEL_STATUS_IDLE;
/* set global workload info based on workload block and index */
workload_id = MARS_WORKLOAD_PER_BLOCK * block + index;
@@ -637,19 +730,33 @@ static int reserve_workload(void)
/* get the workload context code from workload queue */
dma_get(&workload, workload_ea, sizeof(struct mars_workload_context));
- return MARS_WORKLOAD_RESERVED;
+ return MARS_KERNEL_STATUS_BUSY;
}
static void release_workload(void)
{
int block = workload_id / MARS_WORKLOAD_PER_BLOCK;
int index = workload_id % MARS_WORKLOAD_PER_BLOCK;
+ uint64_t block_ea = get_block_ea(block);
/* put the workload context into workload queue */
dma_put(&workload, workload_ea, sizeof(struct mars_workload_context));
- /* release block reservation */
- release_block(block, index);
+ /* lock the queue block */
+ mars_mutex_lock_get(block_ea, (struct mars_mutex *)&queue_block);
+
+ /* update current workload state in workload queue block */
+ MARS_BITS_SET(&queue_block.bits[index], WORKLOAD_STATE, workload_state);
+
+ /* update queue header bits */
+ update_header_bits(block);
+
+ /* unlock the queue block */
+ mars_mutex_unlock_put(block_ea, (struct mars_mutex *)&queue_block);
+
+ /* workload state is finished so notify host */
+ if (workload_state == MARS_WORKLOAD_STATE_FINISHED)
+ notify_host_bits(block_ea, index);
}
static void __attribute__((noinline)) run_workload(void)
@@ -694,35 +801,21 @@ static void load_workload_module(void)
static int scheduler(void)
{
- int status;
+ int status = reserve_workload();
- /* get the workload queue header */
- dma_get(&queue_header,
- kernel_params.workload_queue_ea +
- offsetof(struct mars_workload_queue, header),
- sizeof(struct mars_workload_queue_header));
-
- /* return exit status if exit flag is set from host */
- if (queue_header.flag == MARS_WORKLOAD_QUEUE_FLAG_EXIT)
- return MARS_KERNEL_STATUS_EXIT;
+ /* workload reserved */
+ if (status == MARS_KERNEL_STATUS_BUSY) {
+ /* load the workload module */
+ load_workload_module();
- /* reserve next workload to run or return idle status if none found */
- status = reserve_workload();
+ /* run workload */
+ run_workload();
- /* return idle status if no workload was reserved */
- if (status == MARS_WORKLOAD_RESERVED_NONE)
- return MARS_KERNEL_STATUS_IDLE;
-
- /* load the workload module */
- load_workload_module();
-
- /* run workload */
- run_workload();
-
- /* release reservation of current workload */
- release_workload();
+ /* release reservation of current workload */
+ release_workload();
+ }
- return MARS_KERNEL_STATUS_BUSY;
+ return status;
}
static void get_params(uint64_t kernel_params_ea)
More information about the cbe-oss-dev
mailing list