[Cbe-oss-dev] [PATCH] fix physical spu utilization statistics

Luke Browning lukebr at linux.vnet.ibm.com
Sun Jul 8 08:43:46 EST 2007


This patch fixes physical spu utilization.  

SPU contexts only accrue SPU time while they are loaded or running on an
SPU.  Time spent on the runqueue is not an interesting physical spu
utilization statistic. This patch fixes that bug by only measure time
when the context is associated with an SPU. 

This patch also use the timebase register to improve granularity of time
stamping and provide a more repeatable statistic. Many of the critical
sections that are measured fall into the few microsecond level.  ie.
context switching.

For SPE contexts, utilization should be interpreted as:

The "user" category measures the time spent executing SPE application
code.  The "system" utilization category is a measure of system overhead
and includes time spent context switching and executing remote system
calls.  The physical SPU is not available to be used to schedule other
SPU contexts while in this state.  The "iowait" time measures time spent
processing page faults and provides an indication of the potential
benefit that be realized by tuning the I/O subsystem and/or the
application.  The "loaded" statistic reflects the time spent lazily
loaded.  

Statistics are also collected at the Logical SPUs level.  In general,
the SPE context statistics are simply added to the corresponding logical
SPE utilization bucket, except the SPE context "loaded" category is
added to the system spu "idle" bucket.

Fixes made so far:
- white space formatting
- unbind ctxt state transition marked as loaded, so that underlying 
  physical spu is marked as idle. The state of the spu context in this 
  case doesn't matter, since it is not bound to a physical spu.

Signed-off-by: Luke Browning <lukebrowning at us.ibm.com>
---
Index: linux-2.6.22-rc5/arch/powerpc/platforms/cell/spu_base.c
===================================================================
--- linux-2.6.22-rc5.orig/arch/powerpc/platforms/cell/spu_base.c	2007-07-04 11:57:31.000000000 -0300
+++ linux-2.6.22-rc5/arch/powerpc/platforms/cell/spu_base.c	2007-07-05 13:19:05.000000000 -0300
@@ -637,8 +637,8 @@
 
 	INIT_LIST_HEAD(&spu->aff_list);
 
-	spu->stats.utilization_state = SPU_UTIL_IDLE;
-	spu->stats.tstamp = jiffies;
+	spu->stats.util_state = SPU_UTIL_IDLE;
+	spu->stats.tstamp = get_tbl();
 
 	goto out;
 
@@ -661,10 +661,15 @@
 {
 	unsigned long long time = spu->stats.times[state];
 
-	if (spu->stats.utilization_state == state)
-		time += jiffies - spu->stats.tstamp;
+	/*
+	 * If the spu is idle or the context is stopped, utilization
+	 * statistics are not updated.  Apply the time delta from the
+	 * last recorded state of the spu.
+	 */
+	if (spu->stats.util_state == state)
+		time += get_tbl() - spu->stats.tstamp;
 
-	return jiffies_to_msecs(time);
+	return time / tb_ticks_per_usec / 1000;			/* msecs */
 }
 
 
@@ -674,7 +679,7 @@
 
 	return sprintf(buf, "%s %llu %llu %llu %llu "
 		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
-		spu_state_names[spu->stats.utilization_state],
+		spu_state_names[spu->stats.util_state],
 		spu_acct_time(spu, SPU_UTIL_USER),
 		spu_acct_time(spu, SPU_UTIL_SYSTEM),
 		spu_acct_time(spu, SPU_UTIL_IOWAIT),
Index: linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/context.c
===================================================================
--- linux-2.6.22-rc5.orig/arch/powerpc/platforms/cell/spufs/context.c	2007-07-04 11:57:31.000000000 -0300
+++ linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/context.c	2007-07-04 12:32:38.000000000 -0300
@@ -61,8 +61,7 @@
 		spu_gang_add_ctx(gang, ctx);
 	ctx->cpus_allowed = current->cpus_allowed;
 	spu_set_timeslice(ctx);
-	ctx->stats.execution_state = SPUCTX_UTIL_USER;
-	ctx->stats.tstamp = jiffies;
+	ctx->stats.util_state = SPUCTX_UTIL_USER;
 
 	atomic_inc(&nr_spu_contexts);
 	goto out;
Index: linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/fault.c
===================================================================
--- linux-2.6.22-rc5.orig/arch/powerpc/platforms/cell/spufs/fault.c	2007-07-04 11:57:31.000000000 -0300
+++ linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/fault.c	2007-07-04 12:32:38.000000000 -0300
@@ -187,10 +187,8 @@
 		dsisr, ctx->state);
 
 	ctx->stats.hash_flt++;
-	if (ctx->state == SPU_STATE_RUNNABLE) {
+	if (ctx->state == SPU_STATE_RUNNABLE)
 		ctx->spu->stats.hash_flt++;
-		spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT);
-	}
 
 	/* we must not hold the lock when entering spu_handle_mm_fault */
 	spu_release(ctx);
Index: linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/file.c
===================================================================
--- linux-2.6.22-rc5.orig/arch/powerpc/platforms/cell/spufs/file.c	2007-07-04 11:57:31.000000000 -0300
+++ linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/file.c	2007-07-05 13:19:30.000000000 -0300
@@ -2065,14 +2065,23 @@
 };
 
 static unsigned long long spufs_acct_time(struct spu_context *ctx,
-		enum spuctx_execution_state state)
+		enum spuctx_utilization_state state)
 {
 	unsigned long time = ctx->stats.times[state];
 
-	if (ctx->stats.execution_state == state)
-		time += jiffies - ctx->stats.tstamp;
+	/*
+	 * In general, utilization statistics are updated by the controlling
+	 * thread as the spu context moves through various well defined
+	 * state transitions, but if the context is lazily loaded its
+	 * utilization statistics are not updated as the controlling thread
+	 * is not tightly coupled with the execution of the spu context.  We
+	 * calculate and apply the time delta from the last recorded state
+	 * of the spu context.
+	 */
+	if (ctx->spu && ctx->stats.util_state == state)
+		time += get_tbl() - ctx->stats.tstamp;
 
-	return jiffies_to_msecs(time);
+	return time / tb_ticks_per_usec / 1000;			/* msecs */
 }
 
 static unsigned long long spufs_slb_flts(struct spu_context *ctx)
@@ -2107,7 +2116,7 @@
 	spu_acquire(ctx);
 	seq_printf(s, "%s %llu %llu %llu %llu "
 		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
-		ctx_state_names[ctx->stats.execution_state],
+		ctx_state_names[ctx->stats.util_state],
 		spufs_acct_time(ctx, SPUCTX_UTIL_USER),
 		spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM),
 		spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT),
Index: linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/run.c
===================================================================
--- linux-2.6.22-rc5.orig/arch/powerpc/platforms/cell/spufs/run.c	2007-07-04 11:57:31.000000000 -0300
+++ linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/run.c	2007-07-05 13:20:58.000000000 -0300
@@ -128,6 +128,9 @@
 
 static int spu_run_init(struct spu_context *ctx, u32 * npc)
 {
+
+	spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
+
 	if (ctx->flags & SPU_CREATE_ISOLATE) {
 		unsigned long runcntl;
 
@@ -153,6 +156,8 @@
 		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
 	}
 
+	spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
+
 	return 0;
 }
 
@@ -161,8 +166,12 @@
 {
 	int ret = 0;
 
+	spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
+
 	*status = ctx->ops->status_read(ctx);
 	*npc = ctx->ops->npc_read(ctx);
+
+	spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED);
 	spu_release(ctx);
 
 	if (signal_pending(current))
@@ -332,6 +341,9 @@
 		spu = ctx->spu;
 		if (unlikely(ret))
 			break;
+
+		spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
+
 		if (unlikely(test_bit(SPU_SCHED_NOTIFY_ACTIVE,
 				      &ctx->sched_flags))) {
 			clear_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags);
Index: linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- linux-2.6.22-rc5.orig/arch/powerpc/platforms/cell/spufs/sched.c	2007-07-04 11:57:31.000000000 -0300
+++ linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/sched.c	2007-07-07 19:27:37.000000000 -0300
@@ -392,6 +392,9 @@
 {
 	pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
 		 spu->number, spu->node);
+
+	spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
+
 	if (ctx->flags & SPU_CREATE_NOSCHED)
 		atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
 	if (!list_empty(&ctx->aff_list))
@@ -404,6 +407,7 @@
 	spu->flags = 0;
 	ctx->spu = spu;
 	ctx->ops = &spu_hw_ops;
+	ctx->state = SPU_STATE_RUNNABLE;
 	spu->pid = current->pid;
 	spu->tgid = current->tgid;
 	spu_associate_mm(spu, ctx->owner);
@@ -418,8 +422,8 @@
 	spu->timestamp = jiffies;
 	spu_cpu_affinity_set(spu, raw_smp_processor_id());
 	spu_switch_notify(spu, ctx);
-	ctx->state = SPU_STATE_RUNNABLE;
-	spu_switch_state(spu, SPU_UTIL_SYSTEM);
+
+	spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED);
 }
 
 /**
@@ -432,7 +436,7 @@
 	pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
 		 spu->pid, spu->number, spu->node);
 
-	spu_switch_state(spu, SPU_UTIL_IDLE);
+	spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
 
  	if (spu->ctx->flags & SPU_CREATE_NOSCHED)
 		atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
@@ -443,7 +447,6 @@
 	spu_unmap_mappings(ctx);
 	spu_save(&ctx->csa, spu);
 	spu->timestamp = jiffies;
-	ctx->state = SPU_STATE_SAVED;
 	spu->ibox_callback = NULL;
 	spu->wbox_callback = NULL;
 	spu->stop_callback = NULL;
@@ -453,14 +456,18 @@
 	spu->pid = 0;
 	spu->tgid = 0;
 	ctx->ops = &spu_backing_ops;
-	ctx->spu = NULL;
-	spu->flags = 0;
 	spu->ctx = NULL;
+	spu->flags = 0;
 
 	ctx->stats.slb_flt +=
 		(spu->stats.slb_flt - ctx->stats.slb_flt_base);
 	ctx->stats.class2_intr +=
 		(spu->stats.class2_intr - ctx->stats.class2_intr_base);
+
+	/* This maps the underlying spu state to idle */
+	spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED);
+	ctx->spu = NULL;
+	ctx->state = SPU_STATE_SAVED;
 }
 
 /**
@@ -633,8 +640,6 @@
  */
 int spu_activate(struct spu_context *ctx, unsigned long flags)
 {
-	spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
-
 	do {
 		struct spu *spu;
 
@@ -738,7 +743,6 @@
 	}
 
 	__spu_deactivate(ctx, 1, MAX_PRIO);
-	spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
 }
 
 /**
@@ -753,12 +757,7 @@
 {
 	if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
 		mutex_lock(&ctx->state_mutex);
-		if (__spu_deactivate(ctx, 0, MAX_PRIO))
-			spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
-		else {
-			spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED);
-			spu_switch_state(ctx->spu, SPU_UTIL_USER);
-		}
+		__spu_deactivate(ctx, 0, MAX_PRIO);
 		mutex_unlock(&ctx->state_mutex);
 	}
 }
Index: linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/spufs.h
===================================================================
--- linux-2.6.22-rc5.orig/arch/powerpc/platforms/cell/spufs/spufs.h	2007-07-04 11:57:31.000000000 -0300
+++ linux-2.6.22-rc5/arch/powerpc/platforms/cell/spufs/spufs.h	2007-07-05 13:27:19.000000000 -0300
@@ -49,12 +49,14 @@
  * This is the state for spu utilization reporting to userspace.
  * Because this state is visible to userspace it must never change and needs
  * to be kept strictly separate from any internal state kept by the kernel.
+ *
+ * Must be kept in sync with spu_utilization_state.
  */
-enum spuctx_execution_state {
+enum spuctx_utilization_state {
 	SPUCTX_UTIL_USER = 0,
 	SPUCTX_UTIL_SYSTEM,
 	SPUCTX_UTIL_IOWAIT,
-	SPUCTX_UTIL_LOADED,
+	SPUCTX_UTIL_LOADED,	/* mapped to SPU_UTIL_IDLE at system spu level*/
 	SPUCTX_UTIL_MAX
 };
 
@@ -115,7 +117,7 @@
 	/* statistics */
 	struct {
 		/* updates protected by ctx->state_mutex */
-		enum spuctx_execution_state execution_state;
+		enum spuctx_utilization_state util_state;
 		unsigned long tstamp;		/* time of last ctx switch */
 		unsigned long times[SPUCTX_UTIL_MAX];
 		unsigned long long vol_ctx_switch;
@@ -320,30 +322,31 @@
  * line.
  */
 static inline void spuctx_switch_state(struct spu_context *ctx,
-		enum spuctx_execution_state new_state)
+		enum spuctx_utilization_state new_state)
 {
-	WARN_ON(!mutex_is_locked(&ctx->state_mutex));
-
-	if (ctx->stats.execution_state != new_state) {
-		unsigned long curtime = jiffies;
+	unsigned long long curtime = get_tbl();
+	unsigned long long delta = curtime - ctx->stats.tstamp;
+	struct spu *spu;
+	enum spuctx_utilization_state old_state;
 
-		ctx->stats.times[ctx->stats.execution_state] +=
-				 curtime - ctx->stats.tstamp;
-		ctx->stats.tstamp = curtime;
-		ctx->stats.execution_state = new_state;
-	}
-}
+	WARN_ON(!mutex_is_locked(&ctx->state_mutex));
 
-static inline void spu_switch_state(struct spu *spu,
-		enum spuctx_execution_state new_state)
-{
-	if (spu->stats.utilization_state != new_state) {
-		unsigned long curtime = jiffies;
+	if ((signed long) delta < 0)
+		return;
 
-		spu->stats.times[spu->stats.utilization_state] +=
-				 curtime - spu->stats.tstamp;
+	spu = ctx->spu;
+	old_state = ctx->stats.util_state;
+	ctx->stats.util_state = new_state;
+	ctx->stats.tstamp = curtime;
+
+	/*
+	 * Update the physical SPU utilization statistics.
+	 */
+	if (spu) {
+		ctx->stats.times[old_state] += delta;
+		spu->stats.times[old_state] += delta;
+		spu->stats.util_state = new_state;
 		spu->stats.tstamp = curtime;
-		spu->stats.utilization_state = new_state;
 	}
 }
 
Index: linux-2.6.22-rc5/include/asm-powerpc/spu.h
===================================================================
--- linux-2.6.22-rc5.orig/include/asm-powerpc/spu.h	2007-07-04 11:57:31.000000000 -0300
+++ linux-2.6.22-rc5/include/asm-powerpc/spu.h	2007-07-04 12:32:38.000000000 -0300
@@ -106,9 +106,15 @@
 struct spu_runqueue;
 struct device_node;
 
+/*
+ * MUST be kept in sync with spuctx_utilization_statistics as those
+ * utilization buckets are dumped into the spu structure's. The spu_context
+ * category SPUCTX_UTIL_LOAD is dumped into SPU_UTIL_IDLE as the lazily loaded
+ * context may be preempted by any context that is ready to run.
+ */
 enum spu_utilization_state {
-	SPU_UTIL_SYSTEM,
 	SPU_UTIL_USER,
+	SPU_UTIL_SYSTEM,
 	SPU_UTIL_IOWAIT,
 	SPU_UTIL_IDLE,
 	SPU_UTIL_MAX
@@ -172,9 +178,9 @@
 
 	struct {
 		/* protected by interrupt reentrancy */
-		enum spu_utilization_state utilization_state;
-		unsigned long tstamp;		/* time of last ctx switch */
-		unsigned long times[SPU_UTIL_MAX];
+		enum spu_utilization_state util_state;
+		unsigned long long tstamp;
+		unsigned long long times[SPU_UTIL_MAX];
 		unsigned long long vol_ctx_switch;
 		unsigned long long invol_ctx_switch;
 		unsigned long long min_flt;





More information about the cbe-oss-dev mailing list