[Cbe-oss-dev] spufs: add spu stats in sysfs and ctx stat file to spufs

Andre Detsch adetsch at br.ibm.com
Fri Jul 13 00:54:49 EST 2007


Subject: spufs: add spu stats in sysfs and ctx stat file to spufs

From: André Detsch <adetsch at br.ibm.com>

This patch exports per-context statistics in spufs as long as spu
statistics in sysfs.

It was formed by merging:
"spufs: add spu stats in sysfs"   From: Christoph Hellwig
"spufs: add stat file to spufs"   From: Christoph Hellwig
"spufs: Fix libassist accounting" From: Jeremy Kerr
"spusched: fix spu utilization statistics" From: Luke Browning
And some adjustments by myself, after suggestions on cbe-oss-dev.

Having separate patches was making the review process harder
than it should, as we end up integrating spus ans ctx statistics
accounting much more than it was on the first implementation.

Signed-off-by: André Detsch <adetsch at br.ibm.com>

Index: linux-2.6.22-rc7/arch/powerpc/platforms/cell/spu_base.c
===================================================================
--- linux-2.6.22-rc7.orig/arch/powerpc/platforms/cell/spu_base.c
+++ linux-2.6.22-rc7/arch/powerpc/platforms/cell/spu_base.c
@@ -203,7 +203,7 @@ static int __spu_trap_data_seg(struct sp
 		spu->slb_replace = 0;
 
 	spu_restart_dma(spu);
-
+	spu->stats.slb_flt++;
 	return 0;
 }
 
@@ -359,6 +359,7 @@ spu_irq_class_2(int irq, void *data)
 	if (stat & 0x10) /* SPU mailbox threshold */
 		spu->wbox_callback(spu);
 
+	spu->stats.class2_intr++;
 	return stat ? IRQ_HANDLED : IRQ_NONE;
 }
 
@@ -596,6 +597,7 @@ static int __init create_spu(void *data)
 	int ret;
 	static int number;
 	unsigned long flags;
+	struct timespec ts;
 
 	ret = -ENOMEM;
 	spu = kzalloc(sizeof (*spu), GFP_KERNEL);
@@ -634,6 +636,10 @@ static int __init create_spu(void *data)
 	spin_unlock_irqrestore(&spu_full_list_lock, flags);
 	mutex_unlock(&spu_full_list_mutex);
 
+	spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
+	ktime_get_ts(&ts);
+	spu->stats.tstamp = timespec_to_ns(&ts);
+
 	INIT_LIST_HEAD(&spu->aff_list);
 
 	goto out;
@@ -648,6 +654,53 @@ out:
 	return ret;
 }
 
+static const char *spu_state_names[] = {
+	"user", "system", "iowait", "idle"
+};
+
+static unsigned long long spu_acct_time(struct spu *spu,
+		enum spu_utilization_state state)
+{
+	struct timespec ts;
+	unsigned long long time = spu->stats.times[state];
+
+	/*
+	 * If the spu is idle or the context is stopped, utilization
+	 * statistics are not updated.  Apply the time delta from the
+	 * last recorded state of the spu.
+	 */
+	if (spu->stats.util_state == state) {
+		ktime_get_ts(&ts);
+		time += timespec_to_ns(&ts) - spu->stats.tstamp;
+	}
+
+	return time / NSEC_PER_MSEC;
+}
+
+
+static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf)
+{
+	struct spu *spu = container_of(sysdev, struct spu, sysdev);
+
+	return sprintf(buf, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		spu_state_names[spu->stats.util_state],
+		spu_acct_time(spu, SPU_UTIL_USER),
+		spu_acct_time(spu, SPU_UTIL_SYSTEM),
+		spu_acct_time(spu, SPU_UTIL_IOWAIT),
+		spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
+		spu->stats.vol_ctx_switch,
+		spu->stats.invol_ctx_switch,
+		spu->stats.slb_flt,
+		spu->stats.hash_flt,
+		spu->stats.min_flt,
+		spu->stats.maj_flt,
+		spu->stats.class2_intr,
+		spu->stats.libassist);
+}
+
+static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL);
+
 /* Hardcoded affinity idxs for QS20 */
 #define SPES_PER_BE 8
 static int QS20_reg_idxs[SPES_PER_BE] =   { 0, 2, 4, 6, 7, 5, 3, 1 };
@@ -815,6 +868,8 @@ static int __init init_spu_base(void)
 #endif
 	mutex_unlock(&spu_full_list_mutex);
 
+	spu_add_sysdev_attr(&attr_stat);
+
 	if (of_has_vicinity()) {
 		init_aff_fw_vicinity();
 	} else {
Index: linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/fault.c
===================================================================
--- linux-2.6.22-rc7.orig/arch/powerpc/platforms/cell/spufs/fault.c
+++ linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/fault.c
@@ -33,7 +33,8 @@
  * function. Currently, there are a few corner cases that we haven't had
  * to handle fortunately.
  */
-static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, 
unsigned long dsisr)
+static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+		unsigned long dsisr, unsigned *flt)
 {
 	struct vm_area_struct *vma;
 	unsigned long is_write;
@@ -73,7 +74,8 @@ good_area:
 			goto bad_area;
 	}
 	ret = 0;
-	switch (handle_mm_fault(mm, vma, ea, is_write)) {
+	*flt = handle_mm_fault(mm, vma, ea, is_write);
+	switch (*flt) {
 	case VM_FAULT_MINOR:
 		current->min_flt++;
 		break;
@@ -153,6 +155,7 @@ int spufs_handle_class1(struct spu_conte
 {
 	u64 ea, dsisr, access;
 	unsigned long flags;
+	unsigned flt = 0;
 	int ret;
 
 	/*
@@ -178,9 +181,15 @@ int spufs_handle_class1(struct spu_conte
 	if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
 		return 0;
 
+	spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
+
 	pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
 		dsisr, ctx->state);
 
+	ctx->stats.hash_flt++;
+	if (ctx->state == SPU_STATE_RUNNABLE)
+		ctx->spu->stats.hash_flt++;
+
 	/* we must not hold the lock when entering spu_handle_mm_fault */
 	spu_release(ctx);
 
@@ -192,7 +201,7 @@ int spufs_handle_class1(struct spu_conte
 
 	/* hashing failed, so try the actual fault handler */
 	if (ret)
-		ret = spu_handle_mm_fault(current->mm, ea, dsisr);
+		ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt);
 
 	spu_acquire(ctx);
 	/*
@@ -201,11 +210,23 @@ int spufs_handle_class1(struct spu_conte
 	 * In case of unhandled error report the problem to user space.
 	 */
 	if (!ret) {
+		if (flt == VM_FAULT_MINOR)
+			ctx->stats.min_flt++;
+		else
+			ctx->stats.maj_flt++;
+		if (ctx->state == SPU_STATE_RUNNABLE) {
+			if (flt == VM_FAULT_MINOR)
+				ctx->spu->stats.min_flt++;
+			else
+				ctx->spu->stats.maj_flt++;
+		}
+
 		if (ctx->spu)
 			ctx->ops->restart_dma(ctx);
 	} else
 		spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
 
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(spufs_handle_class1);
Index: linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/file.c
===================================================================
--- linux-2.6.22-rc7.orig/arch/powerpc/platforms/cell/spufs/file.c
+++ linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/file.c
@@ -2060,6 +2060,95 @@ static const struct file_operations spuf
 	.release	= single_release,
 };
 
+static const char *ctx_state_names[] = {
+	"user", "system", "iowait", "loaded"
+};
+
+static unsigned long long spufs_acct_time(struct spu_context *ctx,
+		enum spu_utilization_state state)
+{
+	struct timespec ts;
+	unsigned long long time = ctx->stats.times[state];
+
+	/*
+	 * In general, utilization statistics are updated by the controlling
+	 * thread as the spu context moves through various well defined
+	 * state transitions, but if the context is lazily loaded its
+	 * utilization statistics are not updated as the controlling thread
+	 * is not tightly coupled with the execution of the spu context.  We
+	 * calculate and apply the time delta from the last recorded state
+	 * of the spu context.
+	 */
+	if (ctx->spu && ctx->stats.util_state == state) {
+		ktime_get_ts(&ts);
+		time += timespec_to_ns(&ts) - ctx->stats.tstamp;
+	}
+
+	return time / NSEC_PER_MSEC;
+}
+
+static unsigned long long spufs_slb_flts(struct spu_context *ctx)
+{
+	unsigned long long slb_flts = ctx->stats.slb_flt;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		slb_flts += (ctx->spu->stats.slb_flt -
+			     ctx->stats.slb_flt_base);
+	}
+
+	return slb_flts;
+}
+
+static unsigned long long spufs_class2_intrs(struct spu_context *ctx)
+{
+	unsigned long long class2_intrs = ctx->stats.class2_intr;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		class2_intrs += (ctx->spu->stats.class2_intr -
+				 ctx->stats.class2_intr_base);
+	}
+
+	return class2_intrs;
+}
+
+
+static int spufs_show_stat(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	spu_acquire(ctx);
+	seq_printf(s, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		ctx_state_names[ctx->stats.util_state],
+		spufs_acct_time(ctx, SPU_UTIL_USER),
+		spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
+		spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
+		spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
+		ctx->stats.vol_ctx_switch,
+		ctx->stats.invol_ctx_switch,
+		spufs_slb_flts(ctx),
+		ctx->stats.hash_flt,
+		ctx->stats.min_flt,
+		ctx->stats.maj_flt,
+		spufs_class2_intrs(ctx),
+		ctx->stats.libassist);
+	spu_release(ctx);
+	return 0;
+}
+
+static int spufs_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_stat_fops = {
+	.open		= spufs_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
 struct tree_descr spufs_dir_contents[] = {
 	{ "capabilities", &spufs_caps_fops, 0444, },
 	{ "mem",  &spufs_mem_fops,  0666, },
@@ -2094,6 +2183,7 @@ struct tree_descr spufs_dir_contents[] =
 	{ "dma_info", &spufs_dma_info_fops, 0444, },
 	{ "proxydma_info", &spufs_proxydma_info_fops, 0444, },
 	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
 	{},
 };
 
@@ -2118,6 +2208,7 @@ struct tree_descr spufs_dir_nosched_cont
 	{ "phys-id", &spufs_id_ops, 0666, },
 	{ "object-id", &spufs_object_id_ops, 0666, },
 	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
 	{},
 };
 
Index: linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/run.c
===================================================================
--- linux-2.6.22-rc7.orig/arch/powerpc/platforms/cell/spufs/run.c
+++ linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/run.c
@@ -128,6 +128,8 @@ out:
 
 static int spu_run_init(struct spu_context *ctx, u32 * npc)
 {
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
 	if (ctx->flags & SPU_CREATE_ISOLATE) {
 		unsigned long runcntl;
 
@@ -153,6 +155,8 @@ static int spu_run_init(struct spu_conte
 		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
 	}
 
+	spuctx_switch_state(ctx, SPU_UTIL_USER);
+
 	return 0;
 }
 
@@ -163,6 +167,8 @@ static int spu_run_fini(struct spu_conte
 
 	*status = ctx->ops->status_read(ctx);
 	*npc = ctx->ops->npc_read(ctx);
+
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
 	spu_release(ctx);
 
 	if (signal_pending(current))
@@ -332,6 +338,9 @@ long spufs_run_spu(struct file *file, st
 		spu = ctx->spu;
 		if (unlikely(ret))
 			break;
+
+		spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
 		if (unlikely(test_bit(SPU_SCHED_NOTIFY_ACTIVE,
 				      &ctx->sched_flags))) {
 			clear_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags);
@@ -363,6 +372,11 @@ long spufs_run_spu(struct file *file, st
 				      SPU_STATUS_STOPPED_BY_HALT |
 				       SPU_STATUS_SINGLE_STEP)));
 
+	if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	    (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100) &&
+	    (ctx->state == SPU_STATE_RUNNABLE))
+		ctx->stats.libassist++;
+
 	spu_disable_spu(ctx);
 	ret = spu_run_fini(ctx, npc, &status);
 	spu_yield(ctx);
Index: linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- linux-2.6.22-rc7.orig/arch/powerpc/platforms/cell/spufs/sched.c
+++ linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/sched.c
@@ -393,10 +393,16 @@ static void spu_bind_context(struct spu 
 	pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
 		 spu->number, spu->node);
 
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
 	if (ctx->flags & SPU_CREATE_NOSCHED)
 		atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
 	if (!list_empty(&ctx->aff_list))
 		atomic_inc(&ctx->gang->aff_sched_count);
+
+	ctx->stats.slb_flt_base = spu->stats.slb_flt;
+	ctx->stats.class2_intr_base = spu->stats.class2_intr;
+
 	spu->ctx = ctx;
 	spu->flags = 0;
 	ctx->spu = spu;
@@ -416,6 +422,8 @@ static void spu_bind_context(struct spu 
 	spu_cpu_affinity_set(spu, raw_smp_processor_id());
 	spu_switch_notify(spu, ctx);
 	ctx->state = SPU_STATE_RUNNABLE;
+
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
 }
 
 /**
@@ -428,6 +436,8 @@ static void spu_unbind_context(struct sp
 	pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
 		 spu->pid, spu->number, spu->node);
 
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
  	if (spu->ctx->flags & SPU_CREATE_NOSCHED)
 		atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
  	if (!list_empty(&ctx->aff_list))
@@ -447,9 +457,17 @@ static void spu_unbind_context(struct sp
 	spu->pid = 0;
 	spu->tgid = 0;
 	ctx->ops = &spu_backing_ops;
-	ctx->spu = NULL;
 	spu->flags = 0;
 	spu->ctx = NULL;
+
+	ctx->stats.slb_flt +=
+		(spu->stats.slb_flt - ctx->stats.slb_flt_base);
+	ctx->stats.class2_intr +=
+		(spu->stats.class2_intr - ctx->stats.class2_intr_base);
+
+	/* This maps the underlying spu state to idle */
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	ctx->spu = NULL;
 }
 
 /**
@@ -595,6 +613,8 @@ static struct spu *find_victim(struct sp
 			}
 			spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, victim);
+			victim->stats.invol_ctx_switch++;
+			spu->stats.invol_ctx_switch++;
 			mutex_unlock(&victim->state_mutex);
 			/*
 			 * We need to break out of the wait loop in spu_run
@@ -620,7 +640,6 @@ static struct spu *find_victim(struct sp
  */
 int spu_activate(struct spu_context *ctx, unsigned long flags)
 {
-
 	do {
 		struct spu *spu;
 
@@ -693,6 +712,8 @@ static int __spu_deactivate(struct spu_c
 		if (new || force) {
 			spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
+			ctx->stats.vol_ctx_switch++;
+			spu->stats.vol_ctx_switch++;
 			spu_free(spu);
 			if (new)
 				wake_up(&new->stop_wq);
@@ -765,6 +786,8 @@ static void spusched_tick(struct spu_con
 
 			__spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
+			ctx->stats.invol_ctx_switch++;
+			spu->stats.invol_ctx_switch++;
 			spu_free(spu);
 			wake_up(&new->stop_wq);
 			/*
Index: linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/spufs.h
===================================================================
--- linux-2.6.22-rc7.orig/arch/powerpc/platforms/cell/spufs/spufs.h
+++ linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -40,6 +40,7 @@ enum {
 struct spu_context_ops;
 struct spu_gang;
 
+
 /* ctx->sched_flags */
 enum {
 	SPU_SCHED_NOTIFY_ACTIVE,
@@ -95,6 +96,24 @@ struct spu_context {
 	int policy;
 	int prio;
 
+	/* statistics */
+	struct {
+		/* updates protected by ctx->state_mutex */
+		enum spu_utilization_state util_state;
+		unsigned long long tstamp;		/* time of last state switch */
+		unsigned long long times[SPU_UTIL_MAX];
+		unsigned long long vol_ctx_switch;
+		unsigned long long invol_ctx_switch;
+		unsigned long long min_flt;
+		unsigned long long maj_flt;
+		unsigned long long hash_flt;
+		unsigned long long slb_flt;
+		unsigned long long slb_flt_base; /* # at last ctx switch */
+		unsigned long long class2_intr;
+		unsigned long long class2_intr_base; /* # at last ctx switch */
+		unsigned long long libassist;
+	} stats;
+
 	struct list_head aff_list;
 	int aff_head;
 	int aff_offset;
@@ -283,4 +302,41 @@ struct spufs_coredump_reader {
 extern struct spufs_coredump_reader spufs_coredump_read[];
 extern int spufs_coredump_num_notes;
 
+/*
+ * This function is a little bit too large for an inline, but
+ * as fault.c is built into the kernel we can't move it out of
+ * line.
+ */
+static inline void spuctx_switch_state(struct spu_context *ctx,
+		enum spu_utilization_state new_state)
+{
+	unsigned long long curtime;
+	signed long long delta;
+	struct timespec ts;
+	struct spu *spu;
+	enum spu_utilization_state old_state;
+
+	ktime_get_ts(&ts);
+	curtime = timespec_to_ns(&ts);
+	delta = curtime - ctx->stats.tstamp;
+
+	WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+	WARN_ON(delta < 0);
+
+	spu = ctx->spu;
+	old_state = ctx->stats.util_state;
+	ctx->stats.util_state = new_state;
+	ctx->stats.tstamp = curtime;
+
+	/*
+	 * Update the physical SPU utilization statistics.
+	 */
+	if (spu) {
+		ctx->stats.times[old_state] += delta;
+		spu->stats.times[old_state] += delta;
+		spu->stats.util_state = new_state;
+		spu->stats.tstamp = curtime;
+	}
+}
+
 #endif
Index: linux-2.6.22-rc7/include/asm-powerpc/spu.h
===================================================================
--- linux-2.6.22-rc7.orig/include/asm-powerpc/spu.h
+++ linux-2.6.22-rc7/include/asm-powerpc/spu.h
@@ -106,6 +106,14 @@ struct spu_context;
 struct spu_runqueue;
 struct device_node;
 
+enum spu_utilization_state {
+	SPU_UTIL_USER,
+	SPU_UTIL_SYSTEM,
+	SPU_UTIL_IOWAIT,
+	SPU_UTIL_IDLE_LOADED,
+	SPU_UTIL_MAX
+};
+
 struct spu {
 	const char *name;
 	unsigned long local_store_phys;
@@ -161,6 +169,21 @@ struct spu {
 
 	int has_mem_affinity;
 	struct list_head aff_list;
+
+	struct {
+		/* protected by interrupt reentrancy */
+		enum spu_utilization_state util_state;
+		unsigned long long tstamp;
+		unsigned long long times[SPU_UTIL_MAX];
+		unsigned long long vol_ctx_switch;
+		unsigned long long invol_ctx_switch;
+		unsigned long long min_flt;
+		unsigned long long maj_flt;
+		unsigned long long hash_flt;
+		unsigned long long slb_flt;
+		unsigned long long class2_intr;
+		unsigned long long libassist;
+	} stats;
 };
 
 struct cbe_spu_info {
Index: linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/context.c
===================================================================
--- linux-2.6.22-rc7.orig/arch/powerpc/platforms/cell/spufs/context.c
+++ linux-2.6.22-rc7/arch/powerpc/platforms/cell/spufs/context.c
@@ -61,6 +61,7 @@ struct spu_context *alloc_spu_context(st
 		spu_gang_add_ctx(gang, ctx);
 	ctx->cpus_allowed = current->cpus_allowed;
 	spu_set_timeslice(ctx);
+	ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
 
 	atomic_inc(&nr_spu_contexts);
 	goto out;



More information about the cbe-oss-dev mailing list