[Cbe-oss-dev] [RFC 4/4] spufs: SPE side implementation of kspu

Geoff Levand geoffrey.levand at am.sony.com
Wed Aug 1 05:12:58 EST 2007


Sebastian Siewior wrote:
> The SPU part of KSPU which consists of the a multiplexor and one helper
> function. The multiplexor invokes the offloaded functions and performs multi
> buffering (DMA_BUFFERS=2 -> double buffering, DMA_BUFFERS=3 -> triple \ldots).
> The offloaded function cares only about processing the buffer and arranging
> the transfer of the result. Waiting for the transfers to complete as well as
> signaling the completion of functions is taken care of by the multiplexor.
> 
> Signed-off-by: Sebastian Siewior <sebastian at breakpoint.cc>
> 
> --- /dev/null
> +++ b/arch/powerpc/platforms/cell/spufs/kspu_util.h
> @@ -0,0 +1,30 @@
> +#ifndef KSPU_UTIL_H
> +#define KSPU_UTIL_H
> +#include <linux/wait.h>
> +
> +struct kspu_code {
> +	const unsigned int *code;
> +	unsigned int code_len;
> +	unsigned int kspu_data_offset;
> +	unsigned int queue_mask;
> +	unsigned int queue_entr_size;
> +};
> +
> +struct notify_cb_info {
> +	void *notify;
> +};
> +
> +struct kspu_context {
> +	struct spu_context *spu_ctx;
> +	wait_queue_head_t newitem_wq;
> +	void **notify_cb_info;
> +	unsigned int last_notified;
> +	struct kspu_code *spu_code;
> +	struct task_struct *thread;
> +	/* spinlock protects qlen + work_queue */
> +	spinlock_t queue_lock;
> +	unsigned int qlen;
> +	struct list_head work_queue;
> +};


Since this is where struct kspu_context is defined, shouldn't this patch
be applied earlier in your series, or put the kspu_util.h hunks in an
earlier patch?


> +
> +#endif
> --- /dev/null
> +++ b/arch/powerpc/platforms/cell/spufs/spu_main.c
> @@ -0,0 +1,122 @@
> +/*
> + * This code can be considered as crt0.S
> + * Compile with -O[123S] and make sure that here is only one function
> + * that starts at 0x0
> + * Author: Sebastian Siewior <sebastian at breakpoint.cc>
> + * License: GPLv2
> + */
> +#include <asm/kspu/merged_code.h>
> +#include <spu_mfcio.h>
> +#include "spu_runtime.h"
> +
> +#define barrier() __asm__ __volatile__("": : :"memory")
> +
> +static spu_operation spu_funcs[TOTAL_SPU_FUNCS] __attribute__((aligned(16))) = {
> +	[SPU_FUNC_nop] = spu_nop,
> +};
> +
> +static unsigned char kspu_buff[DMA_BUFFERS][DMA_MAX_TRANS_SIZE];
> +
> +void _start(void) __attribute__((noreturn));
> +void _start(void)
> +{
> +	struct kernel_spu_data *spu_data;
> +
> +	spu_data = (struct kernel_spu_data *) KERNEL_SPU_DATA_OFFSET;
> +
> +	while (37) {


This is a bit strange, convention is to use while(1), which you use elsewhere.


> +		struct kspu_job *kjob;
> +		void *dma_buff;
> +		unsigned int consumed;
> +		unsigned int outstanding;
> +		unsigned int cur_req;
> +		unsigned int cur_item;
> +		unsigned int cur_buf;
> +		unsigned int i;
> +
> +		spu_stop(1);
> +		/*
> +		 * Once started, it is guaranteed that atleast DMA_BUFFERS *2
> +		 * requests are in ring buffer. The work order is:
> +		 * 1. request DMA_BUFFERS transfers, every in a seperate buffer
> +		 *    with its own tag.
> +		 * 2. process those buffers and request new ones.
> +		 * 3. if more than (DMA_BUFFERS *2) are available, than the
> +		 *    main loop begins:
> +		 *   - wait for tag to finish transfers
> +		 *   - notify done work
> +		 *   - process request
> +		 *   - write back
> +		 * 4. if no more request are available, process the last
> +		 *    DMA_BUFFERS request that are left, write them back and
> +		 *    wait until that transfers completes and spu_stop()
> +		 */
> +
...
> --- /dev/null
> +++ b/include/asm-powerpc/kspu/merged_code.h
> @@ -0,0 +1,43 @@
> +#ifndef KSPU_MERGED_CODE_H
> +#define KSPU_MERGED_CODE_H
> +
> +#define KSPU_LS_SIZE 0x40000
> +
> +#define RB_SLOTS 256
> +#define RB_MASK (RB_SLOTS-1)
> +
> +#define DMA_MAX_TRANS_SIZE (16 * 1024)
> +#define DMA_BUFFERS   2
> +#define DMA_BUFF_MASK (DMA_BUFFERS-1)
> +#define ALL_DMA_BUFFS ((1 << DMA_BUFFERS)-1)
> +
> +enum SPU_FUNCTIONS {
> +	SPU_FUNC_nop,
> +
> +	TOTAL_SPU_FUNCS,
> +};
> +
> +struct kspu_job {
> +	enum SPU_FUNCTIONS operation __attribute__((aligned(16)));
> +	unsigned long long in __attribute__((aligned(16)));
> +	unsigned int in_size __attribute__((aligned(16)));
> +	union {
> +	} __attribute__((aligned(16)));
> +};

What is this empty union for?

> +
> +typedef void (*spu_operation)(struct kspu_job *kjob, void *buffer,
> +		unsigned int buf_num);


Would this type be better as spu_operation_t?


> +struct kspu_ring_data {
> +	unsigned int consumed __attribute__((aligned(16)));
> +	unsigned int outstanding __attribute__((aligned(16)));
> +};
> +
> +struct kernel_spu_data {
> +	struct kspu_ring_data kspu_ring_data __attribute__((aligned(16)));
> +	struct kspu_job work_item[RB_SLOTS] __attribute__((aligned(16)));
> +};
> +
> +#define KERNEL_SPU_DATA_OFFSET (KSPU_LS_SIZE - sizeof(struct kernel_spu_data))
> +
> +#endif
> 







More information about the cbe-oss-dev mailing list