[Cbe-oss-dev] [PATCH 1/2]MARS/base: add dma kernel syscalls

Tue Feb 10 10:31:00 EST 2009

This modifies the internal dma implementation inside the kernel to support dma
transfers of larger than 16KB and makes the dma API available through syscalls.

The purpose of this change is to allow for loading of workload modules larger
than 16KB and to reduce the code size of workload modules that will most likely
need to use the same dma routines.

Signed-off-by: Yuji Mano <yuji.mano at am.sony.com>
---
 base/include/mpu/mars/module.h          |   61 ++++++++++++++++++--
 base/src/common/kernel_internal_types.h |   12 +++
 base/src/mpu/kernel/Makefile.am         |    1 
 base/src/mpu/kernel/dma.c               |   97 ++++++++++++++++++++++++++++++++
 base/src/mpu/kernel/kernel.c            |   69 +++++++++++-----------
 base/src/mpu/lib/module.c               |   20 +++++-
 6 files changed, 218 insertions(+), 42 deletions(-)

--- a/base/include/mpu/mars/module.h
+++ b/base/include/mpu/mars/module.h
@@ -293,6 +293,17 @@ void mars_module_workload_finish(void);
 
 /**
  * \ingroup group_mars_workload_module
+ * \brief <b>[MPU]</b> Notify host a particular 32-bit area is modified.
+ *
+ * \param[in] watch_point_ea	- ea of modified area
+ *
+ * \return
+ *	MARS_SUCCESS		- signal sent to host
+ */
+int mars_module_host_signal_send(uint64_t watch_point_ea);
+
+/**
+ * \ingroup group_mars_workload_module
  * \brief <b>[MPU]</b> Locks a mutex.
  *
  * This function locks a mutex and blocks other requests to lock it.
@@ -327,14 +338,56 @@ int mars_module_mutex_unlock_put(uint64_
 
 /**
  * \ingroup group_mars_workload_module
- * \brief <b>[MPU]</b> Notify host a particular 32-bit area is modified.
+ * \brief <b>[MPU]</b> DMA transfer from host storage to MPU storage.
  *
- * \param[in] watch_point_ea	- ea of modified area
+ * This function begins a DMA transfer request from host storage to MPU storage.
+ * Transfer completion is not guaranteed until calling \ref mars_module_dma_wait
+ * with the corresponding tag used to request the transfer.
+ *
+ * \param[in] ls		- address of MPU storage to transfer to
+ * \param[in] ea		- ea of host storage to transfer from
+ * \param[in] size		- size of dma transfer
+ * \param[in] tag		- tag of dma transfer
+ * \return
+ *	MARS_SUCCESS		- successfully tranferred data
+ * \n	MARS_ERROR_PARAMS	- invalid tag specified
+ * \n	MARS_ERROR_ALIGN	- ls or ea not aligned properly
+ */
+int mars_module_dma_get(void *ls, uint64_t ea, uint32_t size, uint32_t tag);
+
+/**
+ * \ingroup group_mars_workload_module
+ * \brief <b>[MPU]</b> DMA transfer from MPU storage to host storage.
  *
+ * This function begins a DMA transfer request from MPU storage to host storage.
+ * Transfer completion is not guaranteed until calling \ref mars_module_dma_wait
+ * with the corresponding tag used to request the transfer.
+ *
+ * \param[in] ls		- address of MPU storage to transfer to
+ * \param[in] ea		- ea of host storage to transfer from
+ * \param[in] size		- size of dma transfer
+ * \param[in] tag		- tag of dma transfer
  * \return
- *	MARS_SUCCESS		- signal sent to host
+ *	MARS_SUCCESS		- successfully tranferred data
+ * \n	MARS_ERROR_PARAMS	- invalid tag specified
+ * \n	MARS_ERROR_ALIGN	- ls or ea not aligned properly
  */
-int mars_module_host_signal_send(uint64_t watch_point_ea);
+int mars_module_dma_put(const void *ls, uint64_t ea, uint32_t size,
+			uint32_t tag);
+
+/**
+ * \ingroup group_mars_workload_module
+ * \brief <b>[MPU]</b> Waits for completion of requested DMA transfer.
+ *
+ * This function waits until completion of all previously started DMA transfer
+ * requests with the same tag.
+ *
+ * \param[in] tag		- tag of dma transfer
+ * \return
+ *	MARS_SUCCESS		- successfully waited for transfer completion
+ * \n	MARS_ERROR_PARAMS	- invalid tag specified
+ */
+int mars_module_dma_wait(uint32_t tag);
 
 #if defined(__cplusplus)
 }
--- a/base/src/common/kernel_internal_types.h
+++ b/base/src/common/kernel_internal_types.h
@@ -76,10 +76,15 @@ struct mars_kernel_syscalls {
 	int  (*workload_schedule_end)(uint16_t id);
 	int  (*workload_schedule_cancel)(uint16_t id);
 
+	int  (*host_signal_send)(uint64_t watch_point_ea);
+
 	int  (*mutex_lock_get)(uint64_t mutex_ea, struct mars_mutex *mutex);
 	int  (*mutex_unlock_put)(uint64_t mutex_ea, struct mars_mutex *mutex);
 
-	int  (*host_signal_send)(uint64_t watch_point_ea);
+	int  (*dma_get)(void *ls, uint64_t ea, uint32_t size, uint32_t tag);
+	int  (*dma_put)(const void *ls, uint64_t ea, uint32_t size,
+			uint32_t tag);
+	int  (*dma_wait)(uint32_t tag);
 };
 
 /* mars kernel ticks */
@@ -101,6 +106,11 @@ struct mars_kernel_params {
 int mutex_lock_get(uint64_t mutex_ea, struct mars_mutex *mutex);
 int mutex_unlock_put(uint64_t mutex_ea, struct mars_mutex *mutex);
 
+/* mars kernel dma */
+int dma_get(void *ls, uint64_t ea, uint32_t size, uint32_t tag);
+int dma_put(const void *ls, uint64_t ea, uint32_t size, uint32_t tag);
+int dma_wait(uint32_t tag);
+
 /* mars module entry */
 void mars_module_entry(struct mars_kernel_syscalls *syscalls);
 
--- a/base/src/mpu/kernel/Makefile.am
+++ b/base/src/mpu/kernel/Makefile.am
@@ -96,6 +96,7 @@ noinst_PROGRAMS = mars_kernel
 
 mars_kernel_SOURCES = \
 	$(srcdir)/../../../src/common/*.h \
+	dma.c \
 	kernel.c \
 	mutex.c
 
--- /dev/null
+++ b/base/src/mpu/kernel/dma.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2008 Sony Corporation of America
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this Library and associated documentation files (the
+ * "Library"), to deal in the Library without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Library, and to
+ * permit persons to whom the Library is furnished to do so, subject to
+ * the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be
+ *  included in all copies or substantial portions of the Library.
+ *
+ *  If you modify the Library, you may copy and distribute your modified
+ *  version of the Library in object code or as an executable provided
+ *  that you also do one of the following:
+ *
+ *   Accompany the modified version of the Library with the complete
+ *   corresponding machine-readable source code for the modified version
+ *   of the Library; or,
+ *
+ *   Accompany the modified version of the Library with a written offer
+ *   for a complete machine-readable copy of the corresponding source
+ *   code of the modified version of the Library.
+ *
+ *
+ * THE LIBRARY IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * LIBRARY OR THE USE OR OTHER DEALINGS IN THE LIBRARY.
+ */
+
+#include <stdint.h>
+#include <spu_mfcio.h>
+
+#include "mars/error.h"
+
+#include "kernel_internal_types.h"
+
+#define MARS_DMA_TAG_MAX	31
+#define MARS_DMA_SIZE_MAX	16384
+#define MARS_DMA_ALIGN_MASK	0xf
+
+static int dma_large(void *ls, uint64_t ea, uint32_t size, uint32_t tag,
+		     int put)
+{
+	if (tag > MARS_DMA_TAG_MAX)
+		return MARS_ERROR_PARAMS;
+	if (((uintptr_t)ls & MARS_DMA_ALIGN_MASK) ||
+	    ((uintptr_t)ea & MARS_DMA_ALIGN_MASK))
+		return MARS_ERROR_ALIGN;
+
+	while (size) {
+		unsigned int block_size;
+
+		block_size = (size < MARS_DMA_SIZE_MAX) ?
+			      size : MARS_DMA_SIZE_MAX;
+
+		if (put)
+			mfc_put((volatile void *)ls, ea, block_size, tag, 0, 0);
+		else
+			mfc_get((volatile void *)ls, ea, block_size, tag, 0, 0);
+
+		ls += block_size;
+		ea += block_size;
+		size -= block_size;
+	}
+
+	return MARS_SUCCESS;
+}
+
+int dma_get(void *ls, uint64_t ea, uint32_t size, uint32_t tag)
+{
+	return dma_large(ls, ea, size, tag, 0);
+}
+
+int dma_put(const void *ls, uint64_t ea, uint32_t size, uint32_t tag)
+{
+	return dma_large((void *)ls, ea, size, tag, 1);
+}
+
+int dma_wait(uint32_t tag)
+{
+	if (tag > MARS_DMA_TAG_MAX)
+		return MARS_ERROR_PARAMS;
+
+	mfc_write_tag_mask(1 << tag);
+	mfc_write_tag_update_all();
+	mfc_read_tag_status();
+
+	return MARS_SUCCESS;
+}
--- a/base/src/mpu/kernel/kernel.c
+++ b/base/src/mpu/kernel/kernel.c
@@ -102,32 +102,6 @@ static void kernel_memcpy(void *dst, con
 	}
 }
 
-static void dma_get(void *ls, uint64_t ea, uint32_t size)
-{
-	mfc_get((volatile void *)ls, ea, size, MARS_KERNEL_DMA_TAG, 0, 0);
-	mfc_write_tag_mask(1 << MARS_KERNEL_DMA_TAG);
-	mfc_write_tag_update_all();
-	mfc_read_tag_status();
-}
-
-static void dma_put(void *ls, uint64_t ea, uint32_t size)
-{
-	mfc_put((volatile void *)ls, ea, size, MARS_KERNEL_DMA_TAG, 0, 0);
-	mfc_write_tag_mask(1 << MARS_KERNEL_DMA_TAG);
-	mfc_write_tag_update_all();
-	mfc_read_tag_status();
-}
-
-static int host_signal_send(uint64_t watch_point_ea)
-{
-#ifdef ENABLE_COND_WAIT_FUTEX
-	spu_write_out_mbox((uint32_t)(watch_point_ea >> 32));
-	spu_write_out_intr_mbox((uint32_t)(watch_point_ea & 0xffffffff));
-#endif
-
-	return MARS_SUCCESS;
-}
-
 static uint32_t get_ticks(void)
 {
 	return kernel_params.kernel_ticks.offset - spu_read_decrementer();
@@ -169,7 +143,8 @@ static struct mars_workload_context *get
 
 	/* get the workload context from workload queue */
 	dma_get((void *)&ret_workload, get_workload_ea(id),
-		sizeof(struct mars_workload_context));
+		sizeof(struct mars_workload_context), MARS_KERNEL_DMA_TAG);
+	dma_wait(MARS_KERNEL_DMA_TAG);
 
 	return &ret_workload;
 }
@@ -474,7 +449,8 @@ static void schedule_begin_callback(uint
 
 	/* get the workload context from workload queue */
 	dma_get((void *)&schedule_workload, get_workload_ea(id),
-		sizeof(struct mars_workload_context));
+		sizeof(struct mars_workload_context), MARS_KERNEL_DMA_TAG);
+	dma_wait(MARS_KERNEL_DMA_TAG);
 }
 
 static int workload_schedule_begin(uint16_t id, uint8_t priority,
@@ -508,7 +484,8 @@ static void schedule_end_callback(uint16
 
 	/* put the workload context into workload queue */
 	dma_put((void *)&schedule_workload, get_workload_ea(id),
-		sizeof(struct mars_workload_context));
+		sizeof(struct mars_workload_context), MARS_KERNEL_DMA_TAG);
+	dma_wait(MARS_KERNEL_DMA_TAG);
 
 	/* update queue header bits */
 	update_header_bits(id / MARS_WORKLOAD_PER_BLOCK);
@@ -536,6 +513,16 @@ static int workload_schedule_cancel(uint
 			    NULL);
 }
 
+static int host_signal_send(uint64_t watch_point_ea)
+{
+#ifdef ENABLE_COND_WAIT_FUTEX
+	spu_write_out_mbox((uint32_t)(watch_point_ea >> 32));
+	spu_write_out_intr_mbox((uint32_t)(watch_point_ea & 0xffffffff));
+#endif
+
+	return MARS_SUCCESS;
+}
+
 static struct mars_kernel_syscalls kernel_syscalls =
 {
 	get_ticks,
@@ -553,9 +540,12 @@ static struct mars_kernel_syscalls kerne
 	workload_schedule_begin,
 	workload_schedule_end,
 	workload_schedule_cancel,
+	host_signal_send,
 	mutex_lock_get,
 	mutex_unlock_put,
-	host_signal_send
+	dma_get,
+	dma_put,
+	dma_wait
 };
 
 static int search_block(int block, int ready)
@@ -607,7 +597,9 @@ static int search_block(int block, int r
 				if (block != bl) {
 					/* fetch the necessary block */
 					dma_get(&wait_block, get_block_ea(bl),
-						sizeof(wait_block));
+						sizeof(wait_block),
+						MARS_KERNEL_DMA_TAG);
+					dma_wait(MARS_KERNEL_DMA_TAG);
 
 					/* set pointer to check fetched block */
 					p_wait_block = &wait_block;
@@ -699,7 +691,8 @@ static int reserve_workload(void)
 
 	/* get the workload queue header */
 	dma_get(&queue_header, kernel_params.workload_queue_ea,
-		sizeof(struct mars_workload_queue_header));
+		sizeof(struct mars_workload_queue_header), MARS_KERNEL_DMA_TAG);
+	dma_wait(MARS_KERNEL_DMA_TAG);
 
 	/* return exit status if exit flag is set from host */
 	if (queue_header.flag == MARS_WORKLOAD_QUEUE_FLAG_EXIT)
@@ -746,7 +739,9 @@ static int reserve_workload(void)
 	workload_ea = get_workload_ea(workload_id);
 
 	/* get the workload context code from workload queue */
-	dma_get(&workload, workload_ea, sizeof(struct mars_workload_context));
+	dma_get(&workload, workload_ea, sizeof(struct mars_workload_context),
+		MARS_KERNEL_DMA_TAG);
+	dma_wait(MARS_KERNEL_DMA_TAG);
 
 	return MARS_KERNEL_STATUS_BUSY;
 }
@@ -758,7 +753,9 @@ static void release_workload(void)
 	uint64_t block_ea = get_block_ea(block);
 
 	/* put the workload context into workload queue */
-	dma_put(&workload, workload_ea, sizeof(struct mars_workload_context));
+	dma_put(&workload, workload_ea, sizeof(struct mars_workload_context),
+		MARS_KERNEL_DMA_TAG);
+	dma_wait(MARS_KERNEL_DMA_TAG);
 
 	/* lock the queue block */
 	mutex_lock_get(block_ea, (struct mars_mutex *)&queue_block);
@@ -806,7 +803,9 @@ static void load_workload_module(void)
 			  sizeof(struct mars_workload_module))) {
 		/* load the exec code into mpu storage from host storage */
 		dma_get((void *)MARS_WORKLOAD_MODULE_BASE_ADDR,
-			workload.module.exec_ea, workload.module.exec_size);
+			workload.module.exec_ea, workload.module.exec_size,
+			MARS_KERNEL_DMA_TAG);
+		dma_wait(MARS_KERNEL_DMA_TAG);
 
 		/* store the current cached workload module ea */
 		kernel_memcpy(&cached_workload_module, &workload.module,
--- a/base/src/mpu/lib/module.c
+++ b/base/src/mpu/lib/module.c
@@ -162,6 +162,11 @@ void mars_module_workload_finish(void)
 	(*kernel_syscalls->workload_exit)(MARS_WORKLOAD_STATE_FINISHED);
 }
 
+int mars_module_host_signal_send(uint64_t watch_point_ea)
+{
+	return (*kernel_syscalls->host_signal_send)(watch_point_ea);
+}
+
 int mars_module_mutex_lock_get(uint64_t mutex_ea, struct mars_mutex *mutex)
 {
 	return (*kernel_syscalls->mutex_lock_get)(mutex_ea, mutex);
@@ -172,7 +177,18 @@ int mars_module_mutex_unlock_put(uint64_
 	return (*kernel_syscalls->mutex_unlock_put)(mutex_ea, mutex);
 }
 
-int mars_module_host_signal_send(uint64_t watch_point_ea)
+int mars_module_dma_get(void *ls, uint64_t ea, uint32_t size, uint32_t tag)
 {
-	return (*kernel_syscalls->host_signal_send)(watch_point_ea);
+	return (*kernel_syscalls->dma_get)(ls, ea, size, tag);
+}
+
+int mars_module_dma_put(const void *ls, uint64_t ea, uint32_t size,
+			uint32_t tag)
+{
+	return (*kernel_syscalls->dma_put)(ls, ea, size, tag);
+}
+
+int mars_module_dma_wait(uint32_t tag)
+{
+	return (*kernel_syscalls->dma_wait)(tag);
 }