[Cbe-oss-dev] [PATCH 1/3] spufs: Add marker-based tracing facility

Jeremy Kerr jk at ozlabs.org
Fri Jan 11 15:03:26 EST 2008


From: Christoph Hellwig <hch at lst.de>

This adds markers two important points in the spufs code and a new
module (sputrace.ko) that allows reading these out through a proc file.

Long-term I'd rather see something like lttng extended to use the spufs
instrumentation, but for now I think this is a good enough quick
solution.  We'll probably want to add various addition event in addition
to that ones I have already.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Signed-off-by: Jeremy Kerr <jk at ozlabs.org>

---
 arch/powerpc/platforms/cell/Kconfig          |    7 
 arch/powerpc/platforms/cell/spufs/Makefile   |    2 
 arch/powerpc/platforms/cell/spufs/file.c     |    6 
 arch/powerpc/platforms/cell/spufs/sched.c    |   28 ++-
 arch/powerpc/platforms/cell/spufs/spufs.h    |    5 
 arch/powerpc/platforms/cell/spufs/sputrace.c |  250 +++++++++++++++++++++++++++
 6 files changed, 291 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index e1e2f6a..68d0b17 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -54,6 +54,13 @@ config SPU_FS_64K_LS
 	  uses 4K pages. This can improve performances of applications
 	  using multiple SPEs by lowering the TLB pressure on them.
 
+config SPU_TRACE
+	tristate "SPU event tracing support"
+	depends on SPU_FS && MARKERS
+	help
+	  This option allows reading a trace of spu-related events through
+	  the sputrace file in procfs.
+
 config SPU_BASE
 	bool
 	default n
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
index d3a349f..99610a6 100644
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -4,6 +4,8 @@ spufs-y += inode.o file.o context.o syscalls.o coredump.o
 spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
 spufs-y += switch.o fault.o lscsa_alloc.o
 
+obj-$(CONFIG_SPU_TRACE)	+= sputrace.o
+
 # Rules to build switch.o with the help of SPU tool chain
 SPU_CROSS	:= spu-
 SPU_CC		:= $(SPU_CROSS)gcc
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 3fcd064..1018acd 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -29,6 +29,7 @@
 #include <linux/poll.h>
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
+#include <linux/marker.h>
 
 #include <asm/io.h>
 #include <asm/semaphore.h>
@@ -358,6 +359,8 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
 	struct spu_context *ctx = vma->vm_file->private_data;
 	unsigned long area, offset = address - vma->vm_start;
 
+	spu_context_nospu_trace(spufs_ps_nopfn__enter, ctx);
+
 	offset += vma->vm_pgoff << PAGE_SHIFT;
 	if (offset >= ps_size)
 		return NOPFN_SIGBUS;
@@ -375,11 +378,14 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
 
 	if (ctx->state == SPU_STATE_SAVED) {
 		up_read(&current->mm->mmap_sem);
+		spu_context_nospu_trace(spufs_ps_nopfn__sleep, ctx);
 		spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+		spu_context_trace(spufs_ps_nopfn__wake, ctx, ctx->spu);
 		down_read(&current->mm->mmap_sem);
 	} else {
 		area = ctx->spu->problem_phys + ps_offs;
 		vm_insert_pfn(vma, address, (area + offset) >> PAGE_SHIFT);
+		spu_context_trace(spufs_ps_nopfn__insert, ctx, ctx->spu);
 	}
 
 	spu_release(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 8c8af11..939e62a 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -39,6 +39,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/marker.h>
 
 #include <asm/io.h>
 #include <asm/mmu_context.h>
@@ -240,8 +241,8 @@ EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
  */
 static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
 {
-	pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
-		 spu->number, spu->node);
+	spu_context_trace(spu_bind_context__enter, ctx, spu);
+
 	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
 
 	if (ctx->flags & SPU_CREATE_NOSCHED)
@@ -423,8 +424,8 @@ static int has_affinity(struct spu_context *ctx)
  */
 static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
 {
-	pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
-		 spu->pid, spu->number, spu->node);
+	spu_context_trace(spu_unbind_context__enter, ctx, spu);
+
 	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
 
  	if (spu->ctx->flags & SPU_CREATE_NOSCHED)
@@ -552,6 +553,8 @@ static struct spu *spu_get_idle(struct spu_context *ctx)
 	struct spu *spu, *aff_ref_spu;
 	int node, n;
 
+	spu_context_nospu_trace(spu_get_idle__enter, ctx);
+
 	if (ctx->gang) {
 		mutex_lock(&ctx->gang->aff_mutex);
 		if (has_affinity(ctx)) {
@@ -570,8 +573,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx)
 			if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
 				ctx->gang->aff_ref_spu = NULL;
 			mutex_unlock(&ctx->gang->aff_mutex);
-
-			return NULL;
+			goto not_found;
 		}
 		mutex_unlock(&ctx->gang->aff_mutex);
 	}
@@ -589,12 +591,14 @@ static struct spu *spu_get_idle(struct spu_context *ctx)
 		mutex_unlock(&cbe_spu_info[node].list_mutex);
 	}
 
+ not_found:
+	spu_context_nospu_trace(spu_get_idle__not_found, ctx);
 	return NULL;
 
  found:
 	spu->alloc_state = SPU_USED;
 	mutex_unlock(&cbe_spu_info[node].list_mutex);
-	pr_debug("Got SPU %d %d\n", spu->number, spu->node);
+	spu_context_trace(spu_get_idle__found, ctx, spu);
 	spu_init_channels(spu);
 	return spu;
 }
@@ -611,6 +615,8 @@ static struct spu *find_victim(struct spu_context *ctx)
 	struct spu *spu;
 	int node, n;
 
+	spu_context_nospu_trace(spu_find_vitim__enter, ctx);
+
 	/*
 	 * Look for a possible preemption candidate on the local node first.
 	 * If there is no candidate look at the other nodes.  This isn't
@@ -664,6 +670,8 @@ static struct spu *find_victim(struct spu_context *ctx)
 				goto restart;
 			}
 
+			spu_context_trace(__spu_deactivate__unload, ctx, spu);
+
 			mutex_lock(&cbe_spu_info[node].list_mutex);
 			cbe_spu_info[node].nr_active--;
 			spu_unbind_context(spu, victim);
@@ -846,6 +854,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
  */
 void spu_deactivate(struct spu_context *ctx)
 {
+	spu_context_nospu_trace(spu_deactivate__enter, ctx);
 	__spu_deactivate(ctx, 1, MAX_PRIO);
 }
 
@@ -859,6 +868,7 @@ void spu_deactivate(struct spu_context *ctx)
  */
 void spu_yield(struct spu_context *ctx)
 {
+	spu_context_nospu_trace(spu_yield__enter, ctx);
 	if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
 		mutex_lock(&ctx->state_mutex);
 		__spu_deactivate(ctx, 0, MAX_PRIO);
@@ -888,11 +898,15 @@ static noinline void spusched_tick(struct spu_context *ctx)
 		goto out;
 
 	spu = ctx->spu;
+
+	spu_context_trace(spusched_tick__preempt, ctx, spu);
+
 	new = grab_runnable_context(ctx->prio + 1, spu->node);
 	if (new) {
 		spu_unschedule(spu, ctx);
 		spu_add_to_rq(ctx);
 	} else {
+		spu_context_nospu_trace(spusched_tick__newslice, ctx);
 		ctx->time_slice++;
 	}
 out:
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 0e11403..795a1b5 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -325,4 +325,9 @@ extern void spu_free_lscsa(struct spu_state *csa);
 extern void spuctx_switch_state(struct spu_context *ctx,
 		enum spu_utilization_state new_state);
 
+#define spu_context_trace(name, ctx, spu) \
+	trace_mark(name, "%p %p", ctx, spu);
+#define spu_context_nospu_trace(name, ctx) \
+	trace_mark(name, "%p", ctx);
+
 #endif
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
new file mode 100644
index 0000000..2b1953f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (C) 2007 IBM Deutschland Entwicklung GmbH
+ *	Released under GPL v2.
+ *
+ * Partially based on net/ipv4/tcp_probe.c.
+ *
+ * Simple tracing facility for spu contexts.
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <linux/proc_fs.h>
+#include <linux/wait.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+#include "spufs.h"
+
+struct spu_probe {
+	const char *name;
+	const char *format;
+	marker_probe_func *probe_func;
+};
+
+struct sputrace {
+	ktime_t tstamp;
+	int owner_tid; /* owner */
+	int curr_tid;
+	const char *name;
+	int number;
+};
+
+static int bufsize __read_mostly = 16384;
+MODULE_PARM_DESC(bufsize, "Log buffer size (number of records)");
+module_param(bufsize, int, 0);
+
+
+static DEFINE_SPINLOCK(sputrace_lock);
+static DECLARE_WAIT_QUEUE_HEAD(sputrace_wait);
+static ktime_t sputrace_start;
+static unsigned long sputrace_head, sputrace_tail;
+static struct sputrace *sputrace_log;
+
+static int sputrace_used(void)
+{
+	return (sputrace_head - sputrace_tail) % bufsize;
+}
+
+static inline int sputrace_avail(void)
+{
+	return bufsize - sputrace_used();
+}
+
+static int sputrace_sprint(char *tbuf, int n)
+{
+	const struct sputrace *t = sputrace_log + sputrace_tail % bufsize;
+	struct timespec tv =
+		ktime_to_timespec(ktime_sub(t->tstamp, sputrace_start));
+
+	return snprintf(tbuf, n,
+		"[%lu.%09lu] %d: %s (thread = %d, spu = %d)\n",
+		(unsigned long) tv.tv_sec,
+		(unsigned long) tv.tv_nsec,
+		t->owner_tid,
+		t->name,
+		t->curr_tid,
+		t->number);
+}
+
+static ssize_t sputrace_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	int error = 0, cnt = 0;
+
+	if (!buf || len < 0)
+		return -EINVAL;
+
+	while (cnt < len) {
+		char tbuf[128];
+		int width;
+
+		error = wait_event_interruptible(sputrace_wait,
+						 sputrace_used() > 0);
+		if (error)
+			break;
+
+		spin_lock(&sputrace_lock);
+		if (sputrace_head == sputrace_tail) {
+			spin_unlock(&sputrace_lock);
+			continue;
+		}
+
+		width = sputrace_sprint(tbuf, sizeof(tbuf));
+		if (width < len)
+			sputrace_tail = (sputrace_tail + 1) % bufsize;
+		spin_unlock(&sputrace_lock);
+
+		if (width >= len)
+			break;
+
+		error = copy_to_user(buf + cnt, tbuf, width);
+		if (error)
+			break;
+		cnt += width;
+	}
+
+	return cnt == 0 ? error : cnt;
+}
+
+static int sputrace_open(struct inode *inode, struct file *file)
+{
+	spin_lock(&sputrace_lock);
+	sputrace_head = sputrace_tail = 0;
+	sputrace_start = ktime_get();
+	spin_unlock(&sputrace_lock);
+
+	return 0;
+}
+
+static const struct file_operations sputrace_fops = {
+	.owner	= THIS_MODULE,
+	.open	= sputrace_open,
+	.read	= sputrace_read,
+};
+
+static void sputrace_log_item(const char *name, struct spu_context *ctx,
+		struct spu *spu)
+{
+	spin_lock(&sputrace_lock);
+	if (sputrace_avail() > 1) {
+		struct sputrace *t = sputrace_log + sputrace_head;
+
+		t->tstamp = ktime_get();
+		t->owner_tid = ctx->tid;
+		t->name = name;
+		t->curr_tid = current->pid;
+		t->number = spu ? spu->number : -1;
+
+		sputrace_head = (sputrace_head + 1) % bufsize;
+	} else {
+		printk(KERN_WARNING
+		       "sputrace: lost samples due to full buffer.\n");
+	}
+	spin_unlock(&sputrace_lock);
+
+	wake_up(&sputrace_wait);
+}
+
+static void spu_context_event(const struct marker *mdata,
+		void *private, const char *format, ...)
+{
+	struct spu_probe *p = mdata->private;
+	va_list ap;
+	struct spu_context *ctx;
+	struct spu *spu;
+
+	va_start(ap, format);
+	ctx = va_arg(ap, struct spu_context *);
+	spu = va_arg(ap, struct spu *);
+
+	sputrace_log_item(p->name, ctx, spu);
+	va_end(ap);
+}
+
+static void spu_context_nospu_event(const struct marker *mdata,
+		void *private, const char *format, ...)
+{
+	struct spu_probe *p = mdata->private;
+	va_list ap;
+	struct spu_context *ctx;
+
+	va_start(ap, format);
+	ctx = va_arg(ap, struct spu_context *);
+
+	sputrace_log_item(p->name, ctx, NULL);
+	va_end(ap);
+}
+
+struct spu_probe spu_probes[] = {
+	{ "spu_bind_context__enter", "%p %p", spu_context_event },
+	{ "spu_unbind_context__enter", "%p %p", spu_context_event },
+	{ "spu_get_idle__enter", "%p", spu_context_nospu_event },
+	{ "spu_get_idle__found", "%p %p", spu_context_event },
+	{ "spu_get_idle__not_found", "%p", spu_context_nospu_event },
+	{ "spu_find_victim__enter", "%p", spu_context_nospu_event },
+	{ "spusched_tick__preempt", "%p %p", spu_context_event },
+	{ "spusched_tick__newslice", "%p", spu_context_nospu_event },
+	{ "spu_yield__enter", "%p", spu_context_nospu_event },
+	{ "spu_deactivate__enter", "%p", spu_context_nospu_event },
+	{ "__spu_deactivate__unload", "%p %p", spu_context_event },
+	{ "spufs_ps_nopfn__enter", "%p", spu_context_nospu_event },
+	{ "spufs_ps_nopfn__sleep", "%p", spu_context_nospu_event },
+	{ "spufs_ps_nopfn__wake", "%p %p", spu_context_event },
+	{ "spufs_ps_nopfn__insert", "%p %p", spu_context_event },
+	{ "spu_acquire_saved__enter", "%p", spu_context_nospu_event },
+	{ "destroy_spu_context__enter", "%p", spu_context_nospu_event },
+};
+
+static int __init sputrace_init(void)
+{
+	struct proc_dir_entry *entry;
+	int i, error = -ENOMEM;
+
+	sputrace_log = kcalloc(sizeof(struct sputrace),
+				bufsize, GFP_KERNEL);
+	if (!sputrace_log)
+		goto out;
+
+	entry = create_proc_entry("sputrace", S_IRUSR, NULL);
+	if (!entry)
+		goto out_free_log;
+	entry->proc_fops = &sputrace_fops;
+
+	for (i = 0; i < ARRAY_SIZE(spu_probes); i++) {
+		struct spu_probe *p = &spu_probes[i];
+
+		error = marker_probe_register(p->name, p->format,
+					      p->probe_func, p);
+		if (error)
+			printk(KERN_INFO "Unable to register probe %s\n",
+					p->name);
+
+		error = marker_arm(p->name);
+		if (error)
+			printk(KERN_INFO "Unable to arm probe %s\n", p->name);
+	}
+
+	return 0;
+
+out_free_log:
+	kfree(sputrace_log);
+out:
+	return -ENOMEM;
+}
+
+static void __exit sputrace_exit(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(spu_probes); i++)
+		marker_probe_unregister(spu_probes[i].name);
+
+	remove_proc_entry("sputrace", NULL);
+	kfree(sputrace_log);
+}
+
+module_init(sputrace_init);
+module_exit(sputrace_exit);
+
+MODULE_LICENSE("GPL");



More information about the cbe-oss-dev mailing list