[Cbe-oss-dev] [PATCH 6/10] MARS: workload queue block use bit fields

Thu Aug 28 15:49:18 EST 2008

Yuji Mano wrote:
> This patch changes the workload queue block structure to use a bitfield rather
> than having separate arrays for each member and also allows us to specify
> smaller sizes for members that don't need all 8-bits.
> 
> Signed-off-by: Yuji Mano <yuji.mano at am.sony.com>
> 
> ---
>  include/common/mars/mars_workload_types.h |   24 ++++++++----
>  src/host/lib/mars_workload_queue.c        |   56 +++++++++++++++---------------
>  src/mpu/kernel/mars_kernel_scheduler.c    |   51 ++++++++++++++-------------
>  src/mpu/kernel/mars_kernel_workload.c     |   33 +++++++++--------
>  4 files changed, 89 insertions(+), 75 deletions(-)
> 
> --- a/include/common/mars/mars_workload_types.h
> +++ b/include/common/mars/mars_workload_types.h
> @@ -61,8 +61,8 @@ extern "C" {
>  #define MARS_WORKLOAD_COUNTER_MIN		0x0000	/* minimum counter */
>  #define MARS_WORKLOAD_COUNTER_MAX		0xffff	/* maximum counter */
>  
> -#define MARS_WORKLOAD_SIGNAL_ON			1	/* signal set on */
> -#define MARS_WORKLOAD_SIGNAL_OFF		0	/* signal set off */
> +#define MARS_WORKLOAD_SIGNAL_ON			0x1	/* signal set on */
> +#define MARS_WORKLOAD_SIGNAL_OFF		0x0	/* signal set off */
>  
>  #define MARS_WORKLOAD_MAX			1024	/* wl max */
>  #define MARS_WORKLOAD_PER_BLOCK			16	/* wl per block */
> @@ -70,9 +70,10 @@ extern "C" {
>  
>  #define MARS_WORKLOAD_CONTEXT_SIZE		128	/* size of 128 bytes */
>  #define MARS_WORKLOAD_CONTEXT_ALIGN		128	/* align to 128 bytes */
> +#define MARS_WORKLOAD_QUEUE_ALIGN		128	/* align to 128 bytes */
>  #define MARS_WORKLOAD_QUEUE_HEADER_ALIGN	128	/* align to 128 bytes */
>  #define MARS_WORKLOAD_QUEUE_BLOCK_ALIGN		128	/* align to 128 bytes */
> -#define MARS_WORKLOAD_QUEUE_ALIGN		128	/* align to 128 bytes */
> +#define MARS_WORKLOAD_QUEUE_BLOCK_BITS_ALIGN	8	/* align to 8 bytes */
>  
>  /* mars workload context */
>  struct mars_workload_context {
> @@ -87,14 +88,19 @@ struct mars_workload_queue_header {
>  	uint8_t flag;
>  } __attribute__((aligned(MARS_WORKLOAD_QUEUE_HEADER_ALIGN)));
>  
> +/* 8 byte workload queue block bits structure */
> +struct mars_workload_queue_block_bits {
> +	uint64_t type:4;
> +	uint64_t state:4;
> +	uint64_t priority:8;
> +	uint64_t signal:1;
> +	uint64_t wait:16;
> +	uint64_t counter:16;
> +} __attribute__((aligned(MARS_WORKLOAD_QUEUE_BLOCK_BITS_ALIGN)));

The changes are essentially OK, but I suggest not using C bit-fields
syntax and rather using explicit bit mask, so that you can guarantee
binary compatibility of MARS and/or enable various combinations of hosts
and MPUs.

> +
>  /* 128 byte workload queue block structure */
>  struct mars_workload_queue_block {
> -	uint8_t type[MARS_WORKLOAD_PER_BLOCK];
> -	uint8_t state[MARS_WORKLOAD_PER_BLOCK];
> -	uint8_t priority[MARS_WORKLOAD_PER_BLOCK];
> -	uint16_t counter[MARS_WORKLOAD_PER_BLOCK];
> -	uint8_t signal[MARS_WORKLOAD_PER_BLOCK];
> -	uint16_t wait[MARS_WORKLOAD_PER_BLOCK];
> +	struct mars_workload_queue_block_bits bits[MARS_WORKLOAD_PER_BLOCK];
>  } __attribute__((aligned(MARS_WORKLOAD_QUEUE_BLOCK_ALIGN)));
>  
>  /* mars workload queue structure */
> --- a/src/host/lib/mars_workload_queue.c
> +++ b/src/host/lib/mars_workload_queue.c
> @@ -88,7 +88,7 @@ int workload_queue_add(struct mars_workl
>  
>  		for (i = 0; i < MARS_WORKLOAD_NUM_BLOCKS; i++) {
>  			for (j = 0; j < MARS_WORKLOAD_PER_BLOCK; j++) {
> -				if (queue->block[i].state[j] ==
> +				if (queue->block[i].bits[j].state ==
>  					MARS_WORKLOAD_STATE_NONE) {
>  					block = i;
>  					index = j;
> @@ -107,12 +107,12 @@ int workload_queue_add(struct mars_workl
>  		sizeof(struct mars_workload_context));
>  
>  	/* update workload queue header info */
> -	queue->block[block].type[index] = type;
> -	queue->block[block].state[index] = MARS_WORKLOAD_STATE_FINISHED;
> -	queue->block[block].priority[index] = MARS_WORKLOAD_PRIORITY_MIN;
> -	queue->block[block].counter[index] = MARS_WORKLOAD_COUNTER_MIN;
> -	queue->block[block].signal[index] = MARS_WORKLOAD_SIGNAL_OFF;
> -	queue->block[block].wait[index] = MARS_WORKLOAD_ID_NONE;
> +	queue->block[block].bits[index].type = type;
> +	queue->block[block].bits[index].state = MARS_WORKLOAD_STATE_FINISHED;
> +	queue->block[block].bits[index].priority = MARS_WORKLOAD_PRIORITY_MIN;
> +	queue->block[block].bits[index].counter = MARS_WORKLOAD_COUNTER_MIN;
> +	queue->block[block].bits[index].signal = MARS_WORKLOAD_SIGNAL_OFF;
> +	queue->block[block].bits[index].wait = MARS_WORKLOAD_ID_NONE;
>  	queue->header.count++;
>  
>  	return MARS_SUCCESS;
> @@ -128,7 +128,7 @@ int workload_queue_remove(struct mars_wo
>  	int block = id / MARS_WORKLOAD_PER_BLOCK;
>  	int index = id % MARS_WORKLOAD_PER_BLOCK;
>  
> -	MARS_CHECK_RET(queue->block[block].state[index] ==
> +	MARS_CHECK_RET(queue->block[block].bits[index].state ==
>  			MARS_WORKLOAD_STATE_FINISHED, MARS_ERROR_STATE);
>  
>  	/* copy workload context out from workload queue */
> @@ -136,12 +136,12 @@ int workload_queue_remove(struct mars_wo
>  		sizeof(struct mars_workload_context));
>  
>  	/* update workload queue info */
> -	queue->block[block].type[index] = MARS_WORKLOAD_TYPE_NONE;
> -	queue->block[block].state[index] = MARS_WORKLOAD_STATE_NONE;
> -	queue->block[block].priority[index] = MARS_WORKLOAD_PRIORITY_MIN;
> -	queue->block[block].counter[index] = MARS_WORKLOAD_COUNTER_MIN;
> -	queue->block[block].signal[index] = MARS_WORKLOAD_SIGNAL_OFF;
> -	queue->block[block].wait[index] = MARS_WORKLOAD_ID_NONE;
> +	queue->block[block].bits[index].type = MARS_WORKLOAD_TYPE_NONE;
> +	queue->block[block].bits[index].state = MARS_WORKLOAD_STATE_NONE;
> +	queue->block[block].bits[index].priority = MARS_WORKLOAD_PRIORITY_MIN;
> +	queue->block[block].bits[index].counter = MARS_WORKLOAD_COUNTER_MIN;
> +	queue->block[block].bits[index].signal = MARS_WORKLOAD_SIGNAL_OFF;
> +	queue->block[block].bits[index].wait = MARS_WORKLOAD_ID_NONE;
>  	queue->header.count--;
>  
>  	return MARS_SUCCESS;
> @@ -157,7 +157,7 @@ int workload_queue_context(struct mars_w
>  	int block = id / MARS_WORKLOAD_PER_BLOCK;
>  	int index = id % MARS_WORKLOAD_PER_BLOCK;
>  
> -	MARS_CHECK_RET(queue->block[block].state[index] !=
> +	MARS_CHECK_RET(queue->block[block].bits[index].state !=
>  			MARS_WORKLOAD_STATE_NONE, MARS_ERROR_STATE);
>  
>  	*workload = &queue->context[id];
> @@ -174,14 +174,14 @@ int workload_queue_schedule(struct mars_
>  	int block = id / MARS_WORKLOAD_PER_BLOCK;
>  	int index = id % MARS_WORKLOAD_PER_BLOCK;
>  
> -	MARS_CHECK_RET(queue->block[block].state[index] ==
> +	MARS_CHECK_RET(queue->block[block].bits[index].state ==
>  			MARS_WORKLOAD_STATE_FINISHED, MARS_ERROR_STATE);
>  
> -	queue->block[block].state[index] = MARS_WORKLOAD_STATE_READY;
> -	queue->block[block].priority[index] = priority;
> -	queue->block[block].counter[index] = MARS_WORKLOAD_COUNTER_MIN;
> -	queue->block[block].signal[index] = MARS_WORKLOAD_SIGNAL_OFF;
> -	queue->block[block].wait[index] = MARS_WORKLOAD_ID_NONE;
> +	queue->block[block].bits[index].state = MARS_WORKLOAD_STATE_READY;
> +	queue->block[block].bits[index].priority = priority;
> +	queue->block[block].bits[index].counter = MARS_WORKLOAD_COUNTER_MIN;
> +	queue->block[block].bits[index].signal = MARS_WORKLOAD_SIGNAL_OFF;
> +	queue->block[block].bits[index].wait = MARS_WORKLOAD_ID_NONE;
>  
>  	return MARS_SUCCESS;
>  }
> @@ -194,10 +194,11 @@ int workload_queue_wait(struct mars_work
>  	int block = id / MARS_WORKLOAD_PER_BLOCK;
>  	int index = id % MARS_WORKLOAD_PER_BLOCK;
>  
> -	MARS_CHECK_RET(queue->block[block].state[index] !=
> +	MARS_CHECK_RET(queue->block[block].bits[index].state !=
>  			MARS_WORKLOAD_STATE_NONE, MARS_ERROR_STATE);
>  
> -	while (queue->block[block].state[index] != MARS_WORKLOAD_STATE_FINISHED)
> +	while (queue->block[block].bits[index].state !=
> +		MARS_WORKLOAD_STATE_FINISHED)
>  		sched_yield();
>  
>  	return MARS_SUCCESS;
> @@ -211,10 +212,11 @@ int workload_queue_try_wait(struct mars_
>  	int block = id / MARS_WORKLOAD_PER_BLOCK;
>  	int index = id % MARS_WORKLOAD_PER_BLOCK;
>  
> -	MARS_CHECK_RET(queue->block[block].state[index] !=
> +	MARS_CHECK_RET(queue->block[block].bits[index].state !=
>  			MARS_WORKLOAD_STATE_NONE, MARS_ERROR_STATE);
>  
> -	if (queue->block[block].state[index] != MARS_WORKLOAD_STATE_FINISHED)
> +	if (queue->block[block].bits[index].state !=
> +		MARS_WORKLOAD_STATE_FINISHED)
>  		return MARS_ERROR_BUSY;
>  
>  	return MARS_SUCCESS;
> @@ -228,10 +230,10 @@ int workload_queue_signal_send(struct ma
>  	int block = id / MARS_WORKLOAD_PER_BLOCK;
>  	int index = id % MARS_WORKLOAD_PER_BLOCK;
>  
> -	MARS_CHECK_RET(queue->block[block].state[index] !=
> +	MARS_CHECK_RET(queue->block[block].bits[index].state !=
>  			MARS_WORKLOAD_STATE_NONE, MARS_ERROR_STATE);
>  
> -	queue->block[block].signal[index] = MARS_WORKLOAD_SIGNAL_ON;
> +	queue->block[block].bits[index].signal = MARS_WORKLOAD_SIGNAL_ON;
>  
>  	return MARS_SUCCESS;
>  }
> --- a/src/mpu/kernel/mars_kernel_scheduler.c
> +++ b/src/mpu/kernel/mars_kernel_scheduler.c
> @@ -73,33 +73,34 @@ static int search_block(int block)
>  	 * and pick the workload that has been waiting the longest
>  	 */
>  	for (i = 0; i < MARS_WORKLOAD_PER_BLOCK; i++) {
> -		struct mars_workload_queue_block *p = &queue_block;
> +		struct mars_workload_queue_block_bits *bits
> +			= &queue_block.bits[i];
>  
> -		switch (p->state[i]) {
> +		switch (bits->state) {
>  		case MARS_WORKLOAD_STATE_READY:
>  			/* priority greater than max priority so select */
> -			if ((int)p->priority[i] > max_priority) {
> +			if ((int)bits->priority > max_priority) {
>  				index = i;
> -				max_count = p->counter[i];
> -				max_priority = p->priority[i];
> +				max_count = bits->counter;
> +				max_priority = bits->priority;
>  			/* priority equal and wait counter greater so select */
> -			} else if ((int)p->priority[i] == max_priority &&
> -				(int)p->counter[i] > max_count) {
> +			} else if ((int)bits->priority == max_priority &&
> +				(int)bits->counter > max_count) {
>  				index = i;
> -				max_count = p->counter[i];
> +				max_count = bits->counter;
>  			}
>  			/* increment wait counter without overflowing */
> -			if (p->counter[i] < MARS_WORKLOAD_COUNTER_MAX)
> -				p->counter[i]++;
> +			if (bits->counter < MARS_WORKLOAD_COUNTER_MAX)
> +				bits->counter++;
>  			break;
>  		case MARS_WORKLOAD_STATE_WAITING:
>  			/* waiting for workload to finish so check status */
> -			if (p->wait[i] != MARS_WORKLOAD_ID_NONE) {
> +			if (bits->wait != MARS_WORKLOAD_ID_NONE) {
>  				struct mars_workload_queue_block wait_block;
>  				struct mars_workload_queue_block *p_wait_block;
>  
> -				int bl = p->wait[i] / MARS_WORKLOAD_PER_BLOCK;
> -				int id = p->wait[i] % MARS_WORKLOAD_PER_BLOCK;
> +				int bl = bits->wait / MARS_WORKLOAD_PER_BLOCK;
> +				int id = bits->wait % MARS_WORKLOAD_PER_BLOCK;
>  
>  				/* check if workload id is in the same block */
>  				if (block != bl) {
> @@ -109,19 +110,19 @@ static int search_block(int block)
>  					p_wait_block = &wait_block;
>  				} else {
>  					/* set pointer to check current block */
> -					p_wait_block = p;
> +					p_wait_block = &queue_block;
>  				}
>  
>  				/* check if workload is finished and reset */
> -				if (p_wait_block->state[id] ==
> +				if (p_wait_block->bits[id].state ==
>  					MARS_WORKLOAD_STATE_FINISHED) {
> -					p->wait[i] = MARS_WORKLOAD_ID_NONE;
> -					p->state[i] = MARS_WORKLOAD_STATE_READY;
> +					bits->wait = MARS_WORKLOAD_ID_NONE;
> +					bits->state = MARS_WORKLOAD_STATE_READY;
>  				}
>  			/* waiting for signal so check signal bit and reset */
> -			} else if (p->signal[i] == MARS_WORKLOAD_SIGNAL_ON) {
> -				p->signal[i] = MARS_WORKLOAD_SIGNAL_OFF;
> -				p->state[i] = MARS_WORKLOAD_STATE_READY;
> +			} else if (bits->signal == MARS_WORKLOAD_SIGNAL_ON) {
> +				bits->signal = MARS_WORKLOAD_SIGNAL_OFF;
> +				bits->state = MARS_WORKLOAD_STATE_READY;
>  				i--;
>  			}
>  			break;
> @@ -153,10 +154,12 @@ static int reserve_block(int block)
>  		index = search_block(block);
>  		if (index >= 0) {
>  			/* update the current state of the workload */
> -			queue_block.state[index] = MARS_WORKLOAD_STATE_RUNNING;
> +			queue_block.bits[index].state =
> +				MARS_WORKLOAD_STATE_RUNNING;
>  
>  			/* reset the counter for reserved workload */
> -			queue_block.counter[index] = MARS_WORKLOAD_COUNTER_MIN;
> +			queue_block.bits[index].counter =
> +				MARS_WORKLOAD_COUNTER_MIN;
>  		}
>  
>  		/* attempt to write back workload queue block to cache line */
> @@ -189,7 +192,7 @@ static void release_block(int block, int
>  			sizeof(struct mars_workload_queue_block) * block);
>  
>  		/* update current workload state in workload queue block */
> -		queue_block.state[index] = workload_state;
> +		queue_block.bits[index].state = workload_state;
>  
>  		/* attempt to write back workload queue block to cache line */
>  		status = atomic_put(&queue_block,
> @@ -223,7 +226,7 @@ int reserve_workload(void)
>  
>  	/* set global workload info based on workload block and index */
>  	workload_index = MARS_WORKLOAD_PER_BLOCK * block + index;
> -	workload_type = queue_block.type[index];
> +	workload_type = queue_block.bits[index].type;
>  	workload_ea = queue_header.context_ea +
>  			workload_index * sizeof(struct mars_workload_context);
>  
> --- a/src/mpu/kernel/mars_kernel_workload.c
> +++ b/src/mpu/kernel/mars_kernel_workload.c
> @@ -144,13 +144,14 @@ int workload_schedule(uint16_t workload_
>  			sizeof(struct mars_workload_queue_block) * block);
>  
>  		/* make sure workload is in the correct state */
> -		if (queue_block.state[index] != MARS_WORKLOAD_STATE_FINISHED) {
> +		if (queue_block.bits[index].state !=
> +			MARS_WORKLOAD_STATE_FINISHED) {
>  			atomic_event_restore();
>  			return MARS_ERROR_STATE;
>  		}
>  
>  		/* get information of workload to schedule */
> -		schedule_workload_type = queue_block.type[index];
> +		schedule_workload_type = queue_block.bits[index].type;
>  		schedule_workload_ea = queue_header.context_ea +
>  			workload_id * sizeof(struct mars_workload_context);
>  
> @@ -175,11 +176,11 @@ int workload_schedule(uint16_t workload_
>  					sizeof(struct mars_workload_context),
>  					MARS_DMA_TAG);
>  
> -		queue_block.state[index] = MARS_WORKLOAD_STATE_READY;
> -		queue_block.priority[index] = priority;
> -		queue_block.counter[index] = MARS_WORKLOAD_COUNTER_MIN;
> -		queue_block.signal[index] = MARS_WORKLOAD_SIGNAL_OFF;
> -		queue_block.wait[index] = MARS_WORKLOAD_ID_NONE;
> +		queue_block.bits[index].state = MARS_WORKLOAD_STATE_READY;
> +		queue_block.bits[index].priority = priority;
> +		queue_block.bits[index].counter = MARS_WORKLOAD_COUNTER_MIN;
> +		queue_block.bits[index].signal = MARS_WORKLOAD_SIGNAL_OFF;
> +		queue_block.bits[index].wait = MARS_WORKLOAD_ID_NONE;
>  
>  		/* attempt to write back workload queue block to cache line */
>  		status = atomic_put(&queue_block,
> @@ -216,13 +217,13 @@ int workload_wait(uint16_t workload_id)
>  			sizeof(struct mars_workload_queue_block) * block);
>  
>  		/* make sure workload is initialized */
> -		if (queue_block.state[index] == MARS_WORKLOAD_STATE_NONE) {
> +		if (queue_block.bits[index].state == MARS_WORKLOAD_STATE_NONE) {
>  			atomic_event_restore();
>  			return MARS_ERROR_STATE;
>  		}
>  
>  		/* set the workload id to wait for */
> -		queue_block.wait[index] = workload_id;
> +		queue_block.bits[index].wait = workload_id;
>  
>  		/* attempt to write back workload queue block to cache line */
>  		status = atomic_put(&queue_block,
> @@ -253,10 +254,11 @@ int workload_try_wait(uint16_t workload_
>  		offsetof(struct mars_workload_queue, block) +
>  		sizeof(struct mars_workload_queue_block) * block);
>  
> -	MARS_CHECK_RET(queue_block.state[index] != MARS_WORKLOAD_STATE_NONE,
> +	MARS_CHECK_RET(queue_block.bits[index].state !=
> +			MARS_WORKLOAD_STATE_NONE,
>  			MARS_ERROR_STATE);
>  
> -	if (queue_block.state[index] != MARS_WORKLOAD_STATE_FINISHED)
> +	if (queue_block.bits[index].state != MARS_WORKLOAD_STATE_FINISHED)
>  		return MARS_ERROR_BUSY;
>  
>  	return MARS_SUCCESS;
> @@ -282,13 +284,13 @@ int workload_signal_send(uint16_t worklo
>  			sizeof(struct mars_workload_queue_block) * block);
>  
>  		/* make sure workload is initialized */
> -		if (queue_block.state[index] == MARS_WORKLOAD_STATE_NONE) {
> +		if (queue_block.bits[index].state == MARS_WORKLOAD_STATE_NONE) {
>  			atomic_event_restore();
>  			return MARS_ERROR_STATE;
>  		}
>  
>  		/* set the workload signal */
> -		queue_block.signal[index] = MARS_WORKLOAD_SIGNAL_ON;
> +		queue_block.bits[index].signal = MARS_WORKLOAD_SIGNAL_ON;
>  
>  		/* attempt to write back workload queue block to cache line */
>  		status = atomic_put(&queue_block,
> @@ -333,11 +335,12 @@ int workload_signal_try_wait(void)
>  		offsetof(struct mars_workload_queue, block) +
>  		sizeof(struct mars_workload_queue_block) * block);
>  
> -	MARS_CHECK_RET(queue_block.state[index] != MARS_WORKLOAD_STATE_NONE,
> +	MARS_CHECK_RET(queue_block.bits[index].state !=
> +			MARS_WORKLOAD_STATE_NONE,
>  			MARS_ERROR_STATE);
>  
>  	/* return busy if task has not received signal */
> -	if (queue_block.signal[index] != MARS_WORKLOAD_SIGNAL_ON)
> +	if (queue_block.bits[index].signal != MARS_WORKLOAD_SIGNAL_ON)
>  		return MARS_ERROR_BUSY;
>  
>  	return MARS_SUCCESS;
> 
> 
> 

-- 
(ASAYAMA Kazunori
  (asayama at sm.sony.co.jp))
t