[Cbe-oss-dev] [patch 2/5] Add support to OProfile for profiling Cell/B.E. SPUs

Tue Jul 3 09:56:23 EST 2007

Hi,

Can you run this through checkpatch? lots of little stuff going on. Some
of them pointed out below.

Also, only the IBM processors have their reg_setup functions converted
from void to int. Please fix the rest of them as well.

On Tue, Jun 19, 2007 at 12:42:49AM +0200, Arnd Bergmann wrote:
> Index: linux-2.6/arch/powerpc/configs/cell_defconfig
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/configs/cell_defconfig
> +++ linux-2.6/arch/powerpc/configs/cell_defconfig
> @@ -1492,7 +1492,8 @@ CONFIG_HAS_IOPORT=y
>  # Instrumentation Support
>  #
>  CONFIG_PROFILING=y
> -CONFIG_OPROFILE=y
> +CONFIG_OPROFILE=m
> +CONFIG_OPROFILE_CELL=y

Why switch to module in this patch? Doesn't seem related?

> Index: linux-2.6/arch/powerpc/oprofile/cell/pr_util.h
> ===================================================================
> --- /dev/null
> +++ linux-2.6/arch/powerpc/oprofile/cell/pr_util.h
> @@ -0,0 +1,90 @@
> + /*
> + * Cell Broadband Engine OProfile Support
> + *
> + * (C) Copyright IBM Corporation 2006
> + *
> + * Author: Maynard Johnson <maynardj at us.ibm.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#ifndef PR_UTIL_H
> +#define PR_UTIL_H
> +
> +#include <linux/cpumask.h>
> +#include <linux/oprofile.h>
> +#include <asm/cell-pmu.h>
> +#include <asm/spu.h>
> +
> +#include "../../platforms/cell/cbe_regs.h"

Can't that be <platforms/cell/cbe_regs.h>?

> Index: linux-2.6/arch/powerpc/oprofile/cell/spu_profiler.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6/arch/powerpc/oprofile/cell/spu_profiler.c
> @@ -0,0 +1,220 @@
> +/*
> + * Cell Broadband Engine OProfile Support
> + *
> + * (C) Copyright IBM Corporation 2006
> + *
> + * Authors: Maynard Johnson <maynardj at us.ibm.com>
> + *	    Carl Love <carll at us.ibm.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/hrtimer.h>
> +#include <linux/smp.h>
> +#include <linux/slab.h>
> +#include <asm/cell-pmu.h>
> +#include <asm/time.h>
> +#include "pr_util.h"
> +
> +#define TRACE_ARRAY_SIZE 1024
> +#define SCALE_SHIFT 14
> +
> +static u32 * samples;

static u32 *samples;

> +static int spu_prof_running = 0;
> +static unsigned int profiling_interval = 0;

Don't init to 0

> +extern int spu_prof_num_nodes;
> +
> +
> +#define NUM_SPU_BITS_TRBUF 16
> +#define SPUS_PER_TB_ENTRY   4
> +#define SPUS_PER_NODE	     8
> +
> +#define SPU_PC_MASK	     0xFFFF
> +
> +static spinlock_t sample_array_lock=SPIN_LOCK_UNLOCKED;

Don't do this. DEFINE_SPINLOCK() should be used, since that'll make lockdep
work properly.

> +unsigned long sample_array_lock_flags;
> +
> +void set_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
> +{
> +	unsigned long nsPerCyc;

SillyCaps?!

> +	if (!freq_khz)
> +		freq_khz = ppc_proc_freq/1000;
> +
> +	/* To calculate a timeout in nanoseconds, the basic
> +	 * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency).
> +	 * To avoid floating point math, we use the scale math
> +	 * technique as described in linux/jiffies.h.  We use
> +	 * a scale factor of SCALE_SHIFT,which provides 4 decimal places
> +	 * of precision, which is close enough for the purpose at hand.
> +	 *
> +	 * The value of the timeout should be small enough that the hw
> +	 * trace buffer will not get more then a bout 1/3 full for the
> +	 * maximum user specified (the LFSR value) hw sampling frequency.
> +	 * This is to ensure the trace buffer will never fill even if the
> +	 * kernel thread scheduling varies under a heavey system load.
> +	 */
> +
> +	nsPerCyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz;
> +	profiling_interval = (nsPerCyc * cycles_reset) >> SCALE_SHIFT;
> +
> +}
> +
> +/*
> + * Extract SPU PC from trace buffer entry
> + */
> +static void spu_pc_extract(int cpu, int entry)
> +{
> +	/* the trace buffer is 128 bits */
> +	u64 trace_buffer[2];
> +	u64 spu_mask;
> +	int spu;
> +
> +	spu_mask = SPU_PC_MASK;
> +
> +	/* Each SPU PC is 16 bits; hence, four spus in each of
> +	 * the two 64-bit buffer entries that make up the
> +	 * 128-bit trace_buffer entry.	Process two 64-bit values
> +	 * simultaneously.
> +	 * trace[0] SPU PC contents are: 0 1 2 3
> +	 * trace[1] SPU PC contents are: 4 5 6 7
> +	 */
> +
> +	cbe_read_trace_buffer(cpu, trace_buffer);
> +
> +	for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) {
> +		/* spu PC trace entry is upper 16 bits of the
> +		 * 18 bit SPU program counter
> +		 */
> +		samples[spu * TRACE_ARRAY_SIZE + entry]
> +			= (spu_mask & trace_buffer[0]) << 2;
> +		samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry]
> +			= (spu_mask & trace_buffer[1]) << 2;
> +
> +		trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF;
> +		trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF;
> +	}
> +}
> +
> +static int cell_spu_pc_collection(int cpu)
> +{
> +	u32 trace_addr;
> +	int entry;
> +
> +	/* process the collected SPU PC for the node */
> +
> +	entry = 0;
> +
> +	trace_addr = cbe_read_pm(cpu, trace_address);
> +	while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY))
> +	{
> +		/* there is data in the trace buffer to process */
> +		spu_pc_extract(cpu, entry);
> +
> +		entry++;
> +
> +		if (entry >= TRACE_ARRAY_SIZE)
> +			/* spu_samples is full */
> +			break;
> +
> +		trace_addr = cbe_read_pm(cpu, trace_address);
> +	}
> +
> +	return(entry);
> +}
> +
> +
> +static enum hrtimer_restart profile_spus(struct hrtimer * timer)
> +{
> +	ktime_t kt;
> +	int cpu, node, k, num_samples, spu_num;
> +
> +	if (!spu_prof_running)
> +		goto stop;
> +
> +	for_each_online_cpu(cpu) {
> +		if (cbe_get_hw_thread_id(cpu))
> +			continue;
> +
> +		node = cbe_cpu_to_node(cpu);
> +
> +		/* There should only be on kernel thread at a time processing
> +		 * the samples.	 In the very unlikely case that the processing
> +		 * is taking a very long time and multiple kernel threads are
> +		 * started to process the samples.  Make sure only one kernel
> +		 * thread is working on the samples array at a time.  The
> +		 * sample array must be loaded and then processed for a given
> +		 * cpu.	 The sample array is not per cpu.
> +		 */
> +		spin_lock_irqsave(&sample_array_lock,
> +				  sample_array_lock_flags);
> +		num_samples = cell_spu_pc_collection(cpu);
> +
> +		if (num_samples == 0) {
> +			spin_unlock_irqrestore(&sample_array_lock,
> +					       sample_array_lock_flags);
> +			continue;
> +		}
> +
> +		for (k = 0; k < SPUS_PER_NODE; k++) {
> +			spu_num = k + (node * SPUS_PER_NODE);
> +			spu_sync_buffer(spu_num,
> +					samples + (k * TRACE_ARRAY_SIZE),
> +					num_samples);
> +		}
> +
> +		spin_unlock_irqrestore(&sample_array_lock,
> +				       sample_array_lock_flags);
> +
> +	}
> +	smp_wmb();

Why do you need this barrier here?

> +
> +	kt = ktime_set(0, profiling_interval);
> +	if (!spu_prof_running)
> +		goto stop;
> +	hrtimer_forward(timer, timer->base->get_time(), kt);
> +	return HRTIMER_RESTART;
> +
> + stop:
> +	printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n");
> +	return HRTIMER_NORESTART;
> +}
> +
> +static struct hrtimer timer;
> +/*
> + * Entry point for SPU profiling.
> + * NOTE:  SPU profiling is done system-wide, not per-CPU.
> + *
> + * cycles_reset is the count value specified by the user when
> + * setting up OProfile to count SPU_CYCLES.
> + */
> +void start_spu_profiling(unsigned int cycles_reset) {
> +
> +	ktime_t kt;
> +
> +	pr_debug("timer resolution: %lu\n",
> +		 TICK_NSEC);
> +	kt = ktime_set(0, profiling_interval);
> +	hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	timer.expires = kt;
> +	timer.function = profile_spus;
> +
> +	/* Allocate arrays for collecting SPU PC samples */
> +	samples = (u32 *) kzalloc(SPUS_PER_NODE *
> +				  TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL);
> +
> +	spu_prof_running = 1;
> +	hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
> +}
> +
> +void stop_spu_profiling(void)
> +{
> +	spu_prof_running = 0;
> +	hrtimer_cancel(&timer);
> +	kfree(samples);
> +	pr_debug("SPU_PROF: stop_spu_profiling issued\n");
> +}
> Index: linux-2.6/arch/powerpc/oprofile/cell/spu_task_sync.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6/arch/powerpc/oprofile/cell/spu_task_sync.c
> @@ -0,0 +1,464 @@
> +/*
> + * Cell Broadband Engine OProfile Support
> + *
> + * (C) Copyright IBM Corporation 2006
> + *
> + * Author: Maynard Johnson <maynardj at us.ibm.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +/* The purpose of this file is to handle SPU event task switching
> + * and to record SPU context information into the OProfile
> + * event buffer.
> + *
> + * Additionally, the spu_sync_buffer function is provided as a helper
> + * for recoding actual SPU program counter samples to the event buffer.
> + */
> +#include <linux/dcookies.h>
> +#include <linux/kref.h>
> +#include <linux/mm.h>
> +#include <linux/module.h>
> +#include <linux/notifier.h>
> +#include <linux/numa.h>
> +#include <linux/oprofile.h>
> +#include <linux/spinlock.h>
> +#include "pr_util.h"
> +
> +#define RELEASE_ALL 9999
> +
> +static spinlock_t buffer_lock = SPIN_LOCK_UNLOCKED;
> +static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED;

Same thing here with DEFINE_SPINLOCK()

> +static int num_spu_nodes;
> +int spu_prof_num_nodes;
> +int last_guard_val[MAX_NUMNODES * 8];
> +
> +/* Container for caching information about an active SPU task. */
> +struct cached_info {
> +	struct vma_to_fileoffset_map * map;
> +	struct spu * the_spu;	/* needed to access pointer to local_store */
> +	struct kref cache_ref;
> +};
> +
> +static struct cached_info * spu_info[MAX_NUMNODES * 8];

Lots of spaces after * all over this patch.

> +
> +static void destroy_cached_info(struct kref * kref)
> +{
> +	struct cached_info * info;
> +	info = container_of(kref, struct cached_info, cache_ref);
> +	vma_map_free(info->map);
> +	kfree(info);
> +	module_put(THIS_MODULE);
> +}
> +
> +/* Return the cached_info for the passed SPU number.
> + * ATTENTION:  Callers are responsible for obtaining the
> + *	       cache_lock if needed prior to invoking this function.
> + */
> +static struct cached_info * get_cached_info(struct spu * the_spu, int spu_num)
> +{
> +	struct kref * ref;
> +	struct cached_info * ret_info;
> +	if (spu_num >= num_spu_nodes) {
> +		printk(KERN_ERR "SPU_PROF: "
> +		       "%s, line %d: Invalid index %d into spu info cache\n",
> +		       __FUNCTION__, __LINE__, spu_num);
> +		ret_info = NULL;
> +		goto out;
> +	}
> +	if (!spu_info[spu_num] && the_spu) {
> +		ref = spu_get_profile_private_kref(the_spu->ctx);
> +		if (ref) {
> +			spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
> +			kref_get(&spu_info[spu_num]->cache_ref);
> +		}
> +	}
> +
> +	ret_info = spu_info[spu_num];
> + out:
> +	return ret_info;
> +}
> +
> +
> +/* Looks for cached info for the passed spu.  If not found, the
> + * cached info is created for the passed spu.
> + * Returns 0 for success; otherwise, -1 for error.
> + */
> +static int
> +prepare_cached_spu_info(struct spu * spu, unsigned long objectId)
> +{
> +	unsigned long flags;
> +	struct vma_to_fileoffset_map * new_map;
> +	int retval = 0;
> +	struct cached_info * info;
> +
> +	/* We won't bother getting cache_lock here since
> +	 * don't do anything with the cached_info that's returned.
> +	 */
> +	info = get_cached_info(spu, spu->number);
> +
> +	if (info) {
> +		pr_debug("Found cached SPU info.\n");
> +		goto out;
> +	}
> +
> +	/* Create cached_info and set spu_info[spu->number] to point to it.
> +	 * spu->number is a system-wide value, not a per-node value.
> +	 */
> +	info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
> +	if (!info) {
> +		printk(KERN_ERR "SPU_PROF: "
> +		       "%s, line %d: create vma_map failed\n",
> +		       __FUNCTION__, __LINE__);
> +		retval = -ENOMEM;
> +		goto err_alloc;
> +	}
> +	new_map = create_vma_map(spu, objectId);
> +	if (!new_map) {
> +		printk(KERN_ERR "SPU_PROF: "
> +		       "%s, line %d: create vma_map failed\n",
> +		       __FUNCTION__, __LINE__);
> +		retval = -ENOMEM;
> +		goto err_alloc;
> +	}
> +
> +	pr_debug("Created vma_map\n");
> +	info->map = new_map;
> +	info->the_spu = spu;
> +	kref_init(&info->cache_ref);
> +	spin_lock_irqsave(&cache_lock, flags);
> +	spu_info[spu->number] = info;
> +	/* Increment count before passing off ref to SPUFS. */
> +	kref_get(&info->cache_ref);
> +
> +	/* We increment the module refcount here since SPUFS is
> +	 * responsible for the final destruction of the cached_info,
> +	 * and it must be able to access the destroy_cached_info()
> +	 * function defined in the OProfile module.  We decrement
> +	 * the module refcount in destroy_cached_info.
> +	 */
> +	try_module_get(THIS_MODULE);
> +	spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
> +				destroy_cached_info);
> +	spin_unlock_irqrestore(&cache_lock, flags);
> +	goto out;
> +
> +err_alloc:
> +	kfree(info);
> +out:
> +	return retval;
> +}
> +
> +/*
> + * NOTE:  The caller is responsible for locking the
> + *	  cache_lock prior to calling this function.
> + */
> +static int release_cached_info(int spu_index)
> +{
> +	int index, end;
> +	if (spu_index == RELEASE_ALL) {
> +		end = num_spu_nodes;
> +		index = 0;
> +	} else {
> +		if (spu_index >= num_spu_nodes) {
> +		printk(KERN_ERR "SPU_PROF: "
> +				"%s, line %d: "
> +				"Invalid index %d into spu info cache\n",
> +				__FUNCTION__, __LINE__, spu_index);

Indentation is off here.

> +			goto out;
> +		}
> +		end = spu_index +1;
> +		index = spu_index;
> +	}
> +	for (; index < end; index++) {
> +		if (spu_info[index]) {
> +			kref_put(&spu_info[index]->cache_ref,
> +				 destroy_cached_info);
> +			spu_info[index] = NULL;
> +		}
> +	}
> +
> +out:
> +	return 0;
> +}
> +
> +/* The source code for fast_get_dcookie was "borrowed"
> + * from drivers/oprofile/buffer_sync.c.
> + */
> +
> +/* Optimisation. We can manage without taking the dcookie sem
> + * because we cannot reach this code without at least one
> + * dcookie user still being registered (namely, the reader
> + * of the event buffer).
> + */
> +static inline unsigned long fast_get_dcookie(struct dentry * dentry,
> +					     struct vfsmount * vfsmnt)
> +{
> +	unsigned long cookie;
> +
> +	if (dentry->d_cookie)
> +		return (unsigned long)dentry;
> +	get_dcookie(dentry, vfsmnt, &cookie);
> +	return cookie;
> +}
> +
> +/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
> + * which corresponds loosely to "application name". Also, determine
> + * the offset for the SPU ELF object.  If computed offset is
> + * non-zero, it implies an embedded SPU object; otherwise, it's a
> + * separate SPU binary, in which case we retrieve it's dcookie.
> + * For the embedded case, we must determine if SPU ELF is embedded
> + * in the executable application or another file (i.e., shared lib).
> + * If embedded in a shared lib, we must get the dcookie and return
> + * that to the caller.
> + */
> +static unsigned long
> +get_exec_dcookie_and_offset(struct spu * spu, unsigned int * offsetp,
> +			    unsigned long * spu_bin_dcookie,
> +			    unsigned long spu_ref)
> +{
> +	unsigned long app_cookie = 0;
> +	unsigned int my_offset = 0;
> +	struct file * app = NULL;
> +	struct vm_area_struct * vma;
> +	struct mm_struct * mm = spu->mm;
> +
> +	if (!mm)
> +		goto out;
> +
> +	for (vma = mm->mmap; vma; vma = vma->vm_next) {
> +		if (!vma->vm_file)
> +			continue;
> +		if (!(vma->vm_flags & VM_EXECUTABLE))
> +			continue;
> +		app_cookie = fast_get_dcookie(vma->vm_file->f_dentry,
> +					  vma->vm_file->f_vfsmnt);
> +		pr_debug("got dcookie for %s\n",
> +			 vma->vm_file->f_dentry->d_name.name);
> +		app = vma->vm_file;
> +		break;
> +	}
> +
> +	for (vma = mm->mmap; vma; vma = vma->vm_next) {
> +		if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
> +			continue;
> +		my_offset = spu_ref - vma->vm_start;
> +		if (!vma->vm_file)
> +			goto fail_no_image_cookie;
> +
> +		pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n",
> +			 my_offset, spu_ref,
> +			 vma->vm_file->f_dentry->d_name.name);
> +		*offsetp = my_offset;
> +		break;
> +	}
> +
> +	*spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry,
> +						 vma->vm_file->f_vfsmnt);
> +	pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
> +
> +out:
> +	return app_cookie;
> +
> +fail_no_image_cookie:
> +	printk(KERN_ERR "SPU_PROF: "
> +		"%s, line %d: Cannot find dcookie for SPU binary\n",
> +		__FUNCTION__, __LINE__);
> +	goto out;
> +}
> +
> +
> +
> +/* This function finds or creates cached context information for the
> + * passed SPU and records SPU context information into the OProfile
> + * event buffer.
> + */
> +static int process_context_switch(struct spu * spu, unsigned long objectId)
> +{
> +	unsigned long flags;
> +	int retval;
> +	unsigned int offset = 0;
> +	unsigned long spu_cookie = 0, app_dcookie;
> +
> +	retval = prepare_cached_spu_info(spu, objectId);
> +	if (retval)
> +		goto out;
> +
> +	/* Get dcookie first because a mutex_lock is taken in that
> +	 * code path, so interrupts must not be disabled.
> +	 */
> +	app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
> +	if (!app_dcookie || !spu_cookie) {
> +		retval  = -ENOENT;
> +		goto out;
> +	}
> +
> +	/* Record context info in event buffer */
> +	spin_lock_irqsave(&buffer_lock, flags);
> +	add_event_entry(ESCAPE_CODE);
> +	add_event_entry(SPU_CTX_SWITCH_CODE);
> +	add_event_entry(spu->number);
> +	add_event_entry(spu->pid);
> +	add_event_entry(spu->tgid);
> +	add_event_entry(app_dcookie);
> +	add_event_entry(spu_cookie);
> +	add_event_entry(offset);
> +	spin_unlock_irqrestore(&buffer_lock, flags);
> +	smp_wmb();
> +out:
> +	return retval;
> +}
> +
> +/*
> + * This function is invoked on either a bind_context or unbind_context.
> + * If called for an unbind_context, the val arg is 0; otherwise,
> + * it is the object-id value for the spu context.
> + * The data arg is of type 'struct spu *'.
> + */
> +static int spu_active_notify(struct notifier_block * self, unsigned long val,
> +				void * data)
> +{
> +	int retval;
> +	unsigned long flags;
> +	struct spu *the_spu = data;
> +	pr_debug("SPU event notification arrived\n");
> +	if (!val){
> +		spin_lock_irqsave(&cache_lock, flags);
> +		retval = release_cached_info(the_spu->number);
> +		spin_unlock_irqrestore(&cache_lock, flags);
> +	} else {
> +		retval = process_context_switch(the_spu, val);
> +	}
> +	return retval;
> +}
> +
> +static struct notifier_block spu_active = {
> +	.notifier_call = spu_active_notify,
> +};
> +
> +/* The main purpose of this function is to synchronize
> + * OProfile with SPUFS by registering to be notified of
> + * SPU task switches.
> + *
> + * NOTE: When profiling SPUs, we must ensure that only
> + * spu_sync_start is invoked and not the generic sync_start
> + * in drivers/oprofile/oprof.c.	 A return value of
> + * SKIP_GENERIC_SYNC or SYNC_START_ERROR will
> + * accomplish this.
> + */
> +int spu_sync_start(void) {

New line before {

> +	int k;
> +	int ret = SKIP_GENERIC_SYNC;
> +	int register_ret;
> +	unsigned long flags = 0;
> +	spu_prof_num_nodes = number_of_online_nodes();
> +	num_spu_nodes = spu_prof_num_nodes * 8;
> +
> +	spin_lock_irqsave(&buffer_lock, flags);
> +	add_event_entry(ESCAPE_CODE);
> +	add_event_entry(SPU_PROFILING_CODE);
> +	add_event_entry(num_spu_nodes);
> +	spin_unlock_irqrestore(&buffer_lock, flags);
> +
> +	/* Register for SPU events  */
> +	register_ret = spu_switch_event_register(&spu_active);
> +	if (register_ret) {
> +		ret = SYNC_START_ERROR;
> +		goto out;
> +	}
> +
> +	for (k = 0; k < (MAX_NUMNODES * 8); k++)
> +		last_guard_val[k] = 0;
> +	pr_debug("spu_sync_start -- running.\n");
> +out:
> +	return ret;
> +}
> +
> +/* Record SPU program counter samples to the oprofile event buffer. */
> +void spu_sync_buffer(int spu_num, unsigned int * samples,
> +		     int num_samples)
> +{
> +	unsigned long long file_offset;
> +	unsigned long flags;
> +	int i;
> +	struct vma_to_fileoffset_map * map;
> +	struct spu * the_spu;
> +	unsigned long long spu_num_ll = spu_num;
> +	unsigned long long spu_num_shifted = spu_num_ll << 32;
> +	struct cached_info * c_info;
> +
> +	/* We need to obtain the cache_lock here because it's
> +	 * possible that after getting the cached_info, the SPU job
> +	 * corresponding to this cached_info may end, thus resulting
> +	 * in the destruction of the cached_info.
> +	 */
> +	spin_lock_irqsave(&cache_lock, flags);
> +	c_info = get_cached_info(NULL, spu_num);
> +	if (!c_info) {
> +	/* This legitimately happens when the SPU task ends before all
> +	 * samples are recorded.  No big deal -- so we just drop a few samples.
> +	 */

Indentation

> +		pr_debug("SPU_PROF: No cached SPU contex "
> +			  "for SPU #%d. Dropping samples.\n", spu_num);
> +		goto out;
> +	}
> +
> +	map = c_info->map;
> +	the_spu = c_info->the_spu;
> +	spin_lock(&buffer_lock);
> +	for (i = 0; i < num_samples; i++) {
> +		unsigned int sample = *(samples+i);
> +		int grd_val = 0;
> +		file_offset = 0;
> +		if (sample == 0)
> +			continue;
> +		file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
> +
> +		/* If overlays are used by this SPU application, the guard
> +		 * value is non-zero, indicating which overlay section is in
> +		 * use.	 We need to discard samples taken during the time
> +		 * period which an overlay occurs (i.e., guard value changes).
> +		 */
> +		if (grd_val && grd_val != last_guard_val[spu_num]) {
> +			last_guard_val[spu_num] = grd_val;
> +			/* Drop the rest of the samples. */
> +			break;
> +		}
> +
> +		/* For now, we'll drop samples that can't be mapped.
> +		 * This can happen for generated stubs executed from
> +		 * the SPU stack.  Do we need to record these somehow?
> +		 */
> +		if (unlikely(file_offset == 0xffffffff))
> +			continue;
> +		add_event_entry(file_offset | spu_num_shifted);
> +	}
> +	spin_unlock(&buffer_lock);
> +out:
> +	spin_unlock_irqrestore(&cache_lock, flags);
> +}
> +
> +
> +int spu_sync_stop(void)
> +{
> +	unsigned long flags = 0;
> +	int ret = spu_switch_event_unregister(&spu_active);
> +	if (ret) {
> +		printk(KERN_ERR "SPU_PROF: "
> +			"%s, line %d: spu_switch_event_unregister returned %d\n",
> +			__FUNCTION__, __LINE__, ret);
> +		goto out;
> +	}
> +
> +	spin_lock_irqsave(&cache_lock, flags);
> +	ret = release_cached_info(RELEASE_ALL);
> +	spin_unlock_irqrestore(&cache_lock, flags);
> +out:
> +	pr_debug("spu_sync_stop -- done.\n");
> +	return ret;
> +}
> +
> +
> Index: linux-2.6/arch/powerpc/oprofile/cell/vma_map.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6/arch/powerpc/oprofile/cell/vma_map.c
> @@ -0,0 +1,279 @@
> + /*
> + * Cell Broadband Engine OProfile Support
> + *
> + * (C) Copyright IBM Corporation 2006
> + *
> + * Author: Maynard Johnson <maynardj at us.ibm.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +/* The code in this source file is responsible for generating
> + * vma-to-fileOffset maps for both overlay and non-overlay SPU
> + * applications.
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/string.h>
> +#include <linux/uaccess.h>
> +#include <linux/elf.h>
> +#include "pr_util.h"
> +
> +
> +void vma_map_free(struct vma_to_fileoffset_map *map)
> +{
> +	while (map) {
> +		struct vma_to_fileoffset_map *next = map->next;
> +		kfree(map);
> +		map = next;
> +	}
> +}
> +
> +unsigned int
> +vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma,
> +	       const struct spu * aSpu, int * grd_val)
> +{
> +	u32 offset = 0xffffffff;
> +	u32 ovly_grd;
> +	for (; map; map = map->next) {
> +		if (vma < map->vma || vma >= map->vma + map->size)
> +			continue;
> +
> +		if (map->guard_ptr) {
> +			ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr);
> +			if (ovly_grd != map->guard_val)
> +				continue;
> +			*grd_val = ovly_grd;
> +		}
> +		offset = vma - map->vma + map->offset;
> +		break;
> +	}
> +
> +	return offset;
> +}
> +
> +static struct vma_to_fileoffset_map *
> +vma_map_add(struct vma_to_fileoffset_map * map, unsigned int vma,
> +	    unsigned int size, unsigned int offset, unsigned int guard_ptr,
> +	    unsigned int guard_val)
> +{
> +	struct vma_to_fileoffset_map * new =
> +		kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
> +	if (!new) {
> +		printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
> +		       __FUNCTION__, __LINE__);
> +		vma_map_free(map);
> +		return NULL;
> +	}
> +
> +	new->next = map;
> +	new->vma = vma;
> +	new->size = size;
> +	new->offset = offset;
> +	new->guard_ptr = guard_ptr;
> +	new->guard_val = guard_val;
> +
> +	return new;
> +}
> +
> +
> +/* Parse SPE ELF header and generate a list of vma_maps.
> + * A pointer to the first vma_map in the generated list
> + * of vma_maps is returned.  */
> +struct vma_to_fileoffset_map * create_vma_map(const struct spu * aSpu,
> +					      unsigned long spu_elf_start)
> +{
> +	static const unsigned char expected[EI_PAD] = {
> +		[EI_MAG0] = ELFMAG0,
> +		[EI_MAG1] = ELFMAG1,
> +		[EI_MAG2] = ELFMAG2,
> +		[EI_MAG3] = ELFMAG3,
> +		[EI_CLASS] = ELFCLASS32,
> +		[EI_DATA] = ELFDATA2MSB,
> +		[EI_VERSION] = EV_CURRENT,
> +		[EI_OSABI] = ELFOSABI_NONE
> +	};
> +
> +	int grd_val;
> +	struct vma_to_fileoffset_map * map = NULL;
> +	struct spu_overlay_info ovly;
> +	unsigned int overlay_tbl_offset = -1;
> +	unsigned long phdr_start, shdr_start;
> +	Elf32_Ehdr ehdr;
> +	Elf32_Phdr phdr;
> +	Elf32_Shdr shdr, shdr_str;
> +	Elf32_Sym sym;
> +	int i, j;
> +	char name[32];
> +
> +	unsigned int ovly_table_sym = 0;
> +	unsigned int ovly_buf_table_sym = 0;
> +	unsigned int ovly_table_end_sym = 0;
> +	unsigned int ovly_buf_table_end_sym = 0;
> +	unsigned long ovly_table;
> +	unsigned int n_ovlys;
> +
> +	/* Get and validate ELF header.	 */
> +
> +	if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr)))
> +		goto fail;
> +
> +	if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
> +		printk(KERN_ERR "SPU_PROF: "
> +		       "%s, line %d: Unexpected e_ident parsing SPU ELF\n",
> +		       __FUNCTION__, __LINE__);
> +		goto fail;
> +	}
> +	if (ehdr.e_machine != EM_SPU) {
> +		printk(KERN_ERR "SPU_PROF: "
> +		       "%s, line %d: Unexpected e_machine parsing SPU ELF\n",
> +		       __FUNCTION__,  __LINE__);
> +		goto fail;
> +	}
> +	if (ehdr.e_type != ET_EXEC) {
> +		printk(KERN_ERR "SPU_PROF: "
> +		       "%s, line %d: Unexpected e_type parsing SPU ELF\n",
> +		       __FUNCTION__, __LINE__);
> +		goto fail;
> +	}
> +	phdr_start = spu_elf_start + ehdr.e_phoff;
> +	shdr_start = spu_elf_start + ehdr.e_shoff;
> +
> +	/* Traverse program headers.  */
> +	for (i = 0; i < ehdr.e_phnum; i++) {
> +		if (copy_from_user(&phdr,
> +				   (void *) (phdr_start + i * sizeof(phdr)),
> +				   sizeof(phdr)))
> +			goto fail;
> +
> +		if (phdr.p_type != PT_LOAD)
> +			continue;
> +		if (phdr.p_flags & (1 << 27))
> +			continue;
> +
> +		map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz,
> +				  phdr.p_offset, 0, 0);
> +		if (!map)
> +			goto fail;
> +	}
> +
> +	pr_debug("SPU_PROF: Created non-overlay maps\n");
> +	/* Traverse section table and search for overlay-related symbols.  */
> +	for (i = 0; i < ehdr.e_shnum; i++) {
> +		if (copy_from_user(&shdr,
> +				   (void *) (shdr_start + i * sizeof(shdr)),
> +				   sizeof(shdr)))
> +			goto fail;
> +
> +		if (shdr.sh_type != SHT_SYMTAB)
> +			continue;
> +		if (shdr.sh_entsize != sizeof (sym))
> +			continue;
> +
> +		if (copy_from_user(&shdr_str,
> +				   (void *) (shdr_start + shdr.sh_link *
> +					     sizeof(shdr)),
> +				   sizeof(shdr)))
> +			goto fail;
> +
> +		if (shdr_str.sh_type != SHT_STRTAB)
> +			goto fail;;
> +
> +		for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
> +			if (copy_from_user(&sym, (void *) (spu_elf_start +
> +						       shdr.sh_offset + j *
> +							   sizeof (sym)),
> +					   sizeof (sym)))
> +				goto fail;
> +
> +			if (copy_from_user(name, (void *)
> +					   (spu_elf_start + shdr_str.sh_offset +
> +					    sym.st_name),
> +					   20))
> +				goto fail;
> +
> +			if (memcmp(name, "_ovly_table", 12) == 0)
> +				ovly_table_sym = sym.st_value;
> +			if (memcmp(name, "_ovly_buf_table", 16) == 0)
> +				ovly_buf_table_sym = sym.st_value;
> +			if (memcmp(name, "_ovly_table_end", 16) == 0)
> +				ovly_table_end_sym = sym.st_value;
> +			if (memcmp(name, "_ovly_buf_table_end", 20) == 0)
> +				ovly_buf_table_end_sym = sym.st_value;
> +		}
> +	}
> +
> +	/* If we don't have overlays, we're done.  */
> +	if (ovly_table_sym == 0 || ovly_buf_table_sym == 0
> +	    || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) {
> +		pr_debug("SPU_PROF: No overlay table found\n");
> +		goto out;
> +	}
> +	else {
> +		pr_debug("SPU_PROF: Overlay table found\n");
> +	}
> +
> +	/* The _ovly_table symbol represents a table with one entry
> +	 * per overlay section.	 The _ovly_buf_table symbol represents
> +	 * a table with one entry per overlay region.
> +	 * The struct spu_overlay_info gives the structure of the _ovly_table
> +	 * entries.  The structure of _ovly_table_buf is simply one
> +	 * u32 word per entry.
> +	 */
> +	overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym, aSpu, &grd_val);
> +	if (overlay_tbl_offset < 0) {
> +		printk(KERN_ERR "SPU_PROF: "
> +		       "%s, line %d: Error finding SPU overlay table\n",
> +		       __FUNCTION__, __LINE__);
> +		goto fail;
> +	}
> +	ovly_table = spu_elf_start + overlay_tbl_offset;
> +
> +	n_ovlys = (ovly_table_end_sym -
> +		   ovly_table_sym) / sizeof (ovly);
> +
> +	/* Traverse overlay table.  */
> +	for (i = 0; i < n_ovlys; i++) {
> +		if (copy_from_user(&ovly, (void *)
> +				   (ovly_table + i * sizeof (ovly)),
> +				   sizeof (ovly)))
> +			goto fail;
> +
> +		/* The ovly.vma/size/offset arguments are analogous to the same
> +		 * arguments used above for non-overlay maps.  The final two
> +		 * args are referred to as the guard pointer and the guard
> +		 * value.
> +		 * The guard pointer is an entry in the _ovly_buf_table,
> +		 * computed using ovly.buf as the index into the table.	 Since
> +		 * ovly.buf values begin at '1' to reference the first (or 0th)
> +		 * entry in the _ovly_buf_table, the computation subtracts 1
> +		 * from ovly.buf.
> +		 * The guard value is stored in the _ovly_buf_table entry and
> +		 * is an index (starting at 1) back to the _ovly_table entry
> +		 * that is pointing at this _ovly_buf_table entry.  So, for
> +		 * example, for an overlay scenario with one overlay segment
> +		 * and two overlay sections:
> +		 *	- Section 1 points to the first entry of the
> +		 *	  _ovly_buf_table, which contains a guard value
> +		 *	  of '1', referencing the first (index=0) entry of
> +		 *	  _ovly_table.
> +		 *	- Section 2 points to the second entry of the
> +		 *	  _ovly_buf_table, which contains a guard value
> +		 *	  of '2', referencing the second (index=1) entry of
> +		 *	  _ovly_table.
> +		 */
> +		map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset,
> +				   ovly_buf_table_sym + (ovly.buf - 1) * 4, i + 1);
> +		if (!map)
> +			goto fail;
> +	}
> +	goto out;
> +
> + fail:
> +	map = NULL;
> + out:
> +	return map;
> +}
> Index: linux-2.6/arch/powerpc/oprofile/common.c
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/oprofile/common.c
> +++ linux-2.6/arch/powerpc/oprofile/common.c
> @@ -29,6 +29,8 @@ static struct op_powerpc_model *model;
>  static struct op_counter_config ctr[OP_MAX_COUNTER];
>  static struct op_system_config sys;
>  
> +static int op_powerpc_flag;

Bad variable name here. Took me a while to realize it's just used
to communicate errors from the per-cpu inits back to the global init
function.

> +
>  static void op_handle_interrupt(struct pt_regs *regs)
>  {
>  	model->handle_interrupt(regs, ctr);
> @@ -36,25 +38,41 @@ static void op_handle_interrupt(struct p
>  
>  static void op_powerpc_cpu_setup(void *dummy)
>  {
> -	model->cpu_setup(ctr);
> +	int ret;
> +
> +	ret = model->cpu_setup(ctr);
> +
> +	if (ret != 0)
> +		op_powerpc_flag = ret;
>  }
>  
>  static int op_powerpc_setup(void)
>  {
>  	int err;
>  
> +	op_powerpc_flag = 0;
> +
>  	/* Grab the hardware */
>  	err = reserve_pmc_hardware(op_handle_interrupt);
>  	if (err)
>  		return err;
>  
>  	/* Pre-compute the values to stuff in the hardware registers.  */
> -	model->reg_setup(ctr, &sys, model->num_counters);
> +	op_powerpc_flag = model->reg_setup(ctr, &sys, model->num_counters);
>  
> -	/* Configure the registers on all cpus.  */
> +	if (op_powerpc_flag)
> +		goto out;
> +
> +	/* Configure the registers on all cpus.	 If an error occurs on one
> +	 * of the cpus, op_powerpc_flag will be set to the error */
>  	on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1);
>  
> -	return 0;
> +out:	if (op_powerpc_flag) {
> +		/* error on setup release the performance counter hardware */
> +		release_pmc_hardware();
> +	}
> +
> +	return op_powerpc_flag;
>  }
>  
>  static void op_powerpc_shutdown(void)
> @@ -64,16 +82,29 @@ static void op_powerpc_shutdown(void)
>  
>  static void op_powerpc_cpu_start(void *dummy)
>  {
> -	model->start(ctr);
> +	/* If any of the cpus have return an error, set the
> +	 * global flag to the error so it can be returned
> +	 * to the generic OProfile caller.
> +	 */
> +	int ret;
> +
> +	ret = model->start(ctr);
> +	if (ret != 0)
> +		op_powerpc_flag = ret;
>  }
>  
>  static int op_powerpc_start(void)
>  {
> +	op_powerpc_flag = 0;
> +
>  	if (model->global_start)
> -		model->global_start(ctr);
> -	if (model->start)
> +		return model->global_start(ctr);
> +	if (model->start) {
>  		on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1);
> -	return 0;
> +		return op_powerpc_flag;
> +	}
> +	return -EIO; /* No start function is defined for this
> +			power architecture */
>  }
>  
>  static inline void op_powerpc_cpu_stop(void *dummy)
> @@ -147,11 +178,13 @@ int __init oprofile_arch_init(struct opr
>  
>  	switch (cur_cpu_spec->oprofile_type) {
>  #ifdef CONFIG_PPC64
> -#ifdef CONFIG_PPC_CELL_NATIVE
> +#ifdef CONFIG_OPROFILE_CELL
>  		case PPC_OPROFILE_CELL:
>  			if (firmware_has_feature(FW_FEATURE_LPAR))
>  				return -ENODEV;
>  			model = &op_model_cell;
> +			ops->sync_start = model->sync_start;
> +			ops->sync_stop = model->sync_stop;
>  			break;
>  #endif
>  		case PPC_OPROFILE_RS64:
> Index: linux-2.6/arch/powerpc/oprofile/Kconfig
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/oprofile/Kconfig
> +++ linux-2.6/arch/powerpc/oprofile/Kconfig
> @@ -15,3 +15,10 @@ config OPROFILE
>  
>  	  If unsure, say N.
>  
> +config OPROFILE_CELL
> +	bool "OProfile for Cell Broadband Engine"
> +	depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
> +	default y
> +	help
> +	  Profiling of Cell BE SPUs requires special support enabled
> +	  by this option.
> Index: linux-2.6/arch/powerpc/oprofile/Makefile
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/oprofile/Makefile
> +++ linux-2.6/arch/powerpc/oprofile/Makefile
> @@ -11,7 +11,9 @@ DRIVER_OBJS := $(addprefix ../../../driv
>  		timer_int.o )
>  
>  oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
> -oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o
> +oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \
> +		cell/spu_profiler.o cell/vma_map.o \
> +		cell/spu_task_sync.o
>  oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o
>  oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
>  oprofile-$(CONFIG_6xx) += op_model_7450.o
> Index: linux-2.6/arch/powerpc/oprofile/op_model_cell.c
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/oprofile/op_model_cell.c
> +++ linux-2.6/arch/powerpc/oprofile/op_model_cell.c
> @@ -5,8 +5,8 @@
>   *
>   * Author: David Erb (djerb at us.ibm.com)
>   * Modifications:
> - *         Carl Love <carll at us.ibm.com>
> - *         Maynard Johnson <maynardj at us.ibm.com>
> + *	   Carl Love <carll at us.ibm.com>
> + *	   Maynard Johnson <maynardj at us.ibm.com>
>   *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU General Public License
> @@ -38,12 +38,23 @@
>  
>  #include "../platforms/cell/interrupt.h"
>  #include "../platforms/cell/cbe_regs.h"
> +#include "cell/pr_util.h"
> +
> +/*
> + * spu_cycle_reset is the number of cycles between samples.
> + * This variable is used for SPU profiling and should ONLY be set
> + * at the beginning of cell_reg_setup; otherwise, it's read-only.
> + */
> +static unsigned int spu_cycle_reset = 0;
> +
> +#define NUM_SPUS_PER_NODE    8
> +#define SPU_CYCLES_EVENT_NUM 2	/*  event number for SPU_CYCLES */
>  
>  #define PPU_CYCLES_EVENT_NUM 1	/*  event number for CYCLES */
> -#define PPU_CYCLES_GRP_NUM   1  /* special group number for identifying
> -                                 * PPU_CYCLES event
> -                                 */
> -#define CBE_COUNT_ALL_CYCLES 0x42800000	/* PPU cycle event specifier */
> +#define PPU_CYCLES_GRP_NUM   1	/* special group number for identifying
> +				 * PPU_CYCLES event
> +				 */
> +#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
>  
>  #define NUM_THREADS 2         /* number of physical threads in
>  			       * physical processor
> @@ -51,6 +62,7 @@
>  #define NUM_TRACE_BUS_WORDS 4
>  #define NUM_INPUT_BUS_WORDS 2
>  
> +#define MAX_SPU_COUNT 0xFFFFFF	/* maximum 24 bit LFSR value */
>  
>  struct pmc_cntrl_data {
>  	unsigned long vcntr;
> @@ -62,11 +74,10 @@ struct pmc_cntrl_data {
>  /*
>   * ibm,cbe-perftools rtas parameters
>   */
> -
>  struct pm_signal {
>  	u16 cpu;		/* Processor to modify */
> -	u16 sub_unit;		/* hw subunit this applies to (if applicable) */
> -	short int signal_group;	/* Signal Group to Enable/Disable */
> +	u16 sub_unit;		/* hw subunit this applies to (if applicable)*/
> +	short int signal_group; /* Signal Group to Enable/Disable */
>  	u8 bus_word;		/* Enable/Disable on this Trace/Trigger/Event
>  				 * Bus Word(s) (bitmask)
>  				 */
> @@ -112,21 +123,42 @@ static DEFINE_PER_CPU(unsigned long[NR_P
>  
>  static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
>  
> -/* Interpetation of hdw_thread:
> +/*
> + * The CELL profiling code makes rtas calls to setup the debug bus to
> + * route the performance signals.  Additionally, SPU profiling requires
> + * a second rtas call to setup the hardware to capture the SPU PCs.
> + * The EIO error value is returned if the token lookups or the rtas
> + * call fail.  The EIO error number is the best choice of the existing
> + * error numbers.  The probability of rtas related error is very low.  But
> + * by returning EIO and printing additional information to dmsg the user
> + * will know that OProfile did not start and dmesg will tell them why.
> + * OProfile does not support returning errors on Stop.	Not a huge issue
> + * since failure to reset the debug bus or stop the SPU PC collection is
> + * not a fatel issue.  Chances are if the Stop failed, Start doesn't work
> + * either.
> + */
> +
> +/*
> + * Interpetation of hdw_thread:
>   * 0 - even virtual cpus 0, 2, 4,...
>   * 1 - odd virtual cpus 1, 3, 5, ...
> + *
> + * FIXME: this is strictly wrong, we need to clean this up in a number
> + * of places. It works for now. -arnd
>   */
>  static u32 hdw_thread;
>  
>  static u32 virt_cntr_inter_mask;
>  static struct timer_list timer_virt_cntr;
>  
> -/* pm_signal needs to be global since it is initialized in
> +/*
> + * pm_signal needs to be global since it is initialized in
>   * cell_reg_setup at the time when the necessary information
>   * is available.
>   */
>  static struct pm_signal pm_signal[NR_PHYS_CTRS];
> -static int pm_rtas_token;
> +static int pm_rtas_token;    /* token for debug bus setup call */
> +static int spu_rtas_token;   /* token for SPU cycle profiling */
>  
>  static u32 reset_value[NR_PHYS_CTRS];
>  static int num_counters;
> @@ -147,8 +179,8 @@ rtas_ibm_cbe_perftools(int subfunc, int 
>  {
>  	u64 paddr = __pa(address);
>  
> -	return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru,
> -			 paddr >> 32, paddr & 0xffffffff, length);
> +	return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
> +			 passthru, paddr >> 32, paddr & 0xffffffff, length);
>  }
>  
>  static void pm_rtas_reset_signals(u32 node)
> @@ -156,12 +188,13 @@ static void pm_rtas_reset_signals(u32 no
>  	int ret;
>  	struct pm_signal pm_signal_local;
>  
> -	/*  The debug bus is being set to the passthru disable state.
> -	 *  However, the FW still expects atleast one legal signal routing
> -	 *  entry or it will return an error on the arguments.  If we don't
> -	 *  supply a valid entry, we must ignore all return values.  Ignoring
> -	 *  all return values means we might miss an error we should be
> -	 *  concerned about.
> +	/*
> +	 * The debug bus is being set to the passthru disable state.
> +	 * However, the FW still expects atleast one legal signal routing
> +	 * entry or it will return an error on the arguments.	If we don't
> +	 * supply a valid entry, we must ignore all return values.  Ignoring
> +	 * all return values means we might miss an error we should be
> +	 * concerned about.
>  	 */
>  
>  	/*  fw expects physical cpu #. */
> @@ -175,18 +208,24 @@ static void pm_rtas_reset_signals(u32 no
>  				     &pm_signal_local,
>  				     sizeof(struct pm_signal));
>  
> -	if (ret)
> +	if (unlikely(ret))
> +		/*
> +		 * Not a fatal error. For Oprofile stop, the oprofile
> +		 * functions do not support returning an error for
> +		 * failure to stop OProfile.
> +		 */
>  		printk(KERN_WARNING "%s: rtas returned: %d\n",
>  		       __FUNCTION__, ret);
>  }
>  
> -static void pm_rtas_activate_signals(u32 node, u32 count)
> +static int pm_rtas_activate_signals(u32 node, u32 count)
>  {
>  	int ret;
>  	int i, j;
>  	struct pm_signal pm_signal_local[NR_PHYS_CTRS];
>  
> -	/* There is no debug setup required for the cycles event.
> +	/*
> +	 * There is no debug setup required for the cycles event.
>  	 * Note that only events in the same group can be used.
>  	 * Otherwise, there will be conflicts in correctly routing
>  	 * the signals on the debug bus.  It is the responsiblity
> @@ -213,10 +252,14 @@ static void pm_rtas_activate_signals(u32
>  					     pm_signal_local,
>  					     i * sizeof(struct pm_signal));
>  
> -		if (ret)
> +		if (unlikely(ret)) {
>  			printk(KERN_WARNING "%s: rtas returned: %d\n",
>  			       __FUNCTION__, ret);
> +			return -EIO;
> +		}
>  	}
> +
> +	return 0;
>  }
>  
>  /*
> @@ -260,11 +303,12 @@ static void set_pm_event(u32 ctr, int ev
>  	pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
>  	pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
>  
> -	/* Some of the islands signal selection is based on 64 bit words.
> +	/*
> +	 * Some of the islands signal selection is based on 64 bit words.
>  	 * The debug bus words are 32 bits, the input words to the performance
>  	 * counters are defined as 32 bits.  Need to convert the 64 bit island
>  	 * specification to the appropriate 32 input bit and bus word for the
> -	 * performance counter event selection.  See the CELL Performance
> +	 * performance counter event selection.	 See the CELL Performance
>  	 * monitoring signals manual and the Perf cntr hardware descriptions
>  	 * for the details.
>  	 */
> @@ -298,6 +342,7 @@ static void set_pm_event(u32 ctr, int ev
>  					input_bus[j] = i;
>  					pm_regs.group_control |=
>  					    (i << (31 - i));
> +
>  					break;
>  				}
>  			}
> @@ -309,7 +354,8 @@ out:
>  
>  static void write_pm_cntrl(int cpu)
>  {
> -	/* Oprofile will use 32 bit counters, set bits 7:10 to 0
> +	/*
> +	 * Oprofile will use 32 bit counters, set bits 7:10 to 0
>  	 * pmregs.pm_cntrl is a global
>  	 */
>  
> @@ -326,7 +372,8 @@ static void write_pm_cntrl(int cpu)
>  	if (pm_regs.pm_cntrl.freeze == 1)
>  		val |= CBE_PM_FREEZE_ALL_CTRS;
>  
> -	/* Routine set_count_mode must be called previously to set
> +	/*
> +	 * Routine set_count_mode must be called previously to set
>  	 * the count mode based on the user selection of user and kernel.
>  	 */
>  	val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
> @@ -336,7 +383,8 @@ static void write_pm_cntrl(int cpu)
>  static inline void
>  set_count_mode(u32 kernel, u32 user)
>  {
> -	/* The user must specify user and kernel if they want them. If
> +	/*
> +	 * The user must specify user and kernel if they want them. If
>  	 *  neither is specified, OProfile will count in hypervisor mode.
>  	 *  pm_regs.pm_cntrl is a global
>  	 */
> @@ -364,7 +412,7 @@ static inline void enable_ctr(u32 cpu, u
>  
>  /*
>   * Oprofile is expected to collect data on all CPUs simultaneously.
> - * However, there is one set of performance counters per node.  There are
> + * However, there is one set of performance counters per node.	There are
>   * two hardware threads or virtual CPUs on each node.  Hence, OProfile must
>   * multiplex in time the performance counter collection on the two virtual
>   * CPUs.  The multiplexing of the performance counters is done by this
> @@ -377,19 +425,19 @@ static inline void enable_ctr(u32 cpu, u
>   * pair of per-cpu arrays is used for storing the previous and next
>   * pmc values for a given node.
>   * NOTE: We use the per-cpu variable to improve cache performance.
> + *
> + * This routine will alternate loading the virtual counters for
> + * virtual CPUs
>   */
>  static void cell_virtual_cntr(unsigned long data)
>  {
> -	/* This routine will alternate loading the virtual counters for
> -	 * virtual CPUs
> -	 */
>  	int i, prev_hdw_thread, next_hdw_thread;
>  	u32 cpu;
>  	unsigned long flags;
>  
> -	/* Make sure that the interrupt_hander and
> -	 * the virt counter are not both playing with
> -	 * the counters on the same node.
> +	/*
> +	 * Make sure that the interrupt_hander and the virt counter are
> +	 * not both playing with the counters on the same node.
>  	 */
>  
>  	spin_lock_irqsave(&virt_cntr_lock, flags);
> @@ -400,22 +448,25 @@ static void cell_virtual_cntr(unsigned l
>  	hdw_thread = 1 ^ hdw_thread;
>  	next_hdw_thread = hdw_thread;
>  
> -	for (i = 0; i < num_counters; i++)
> -	/* There are some per thread events.  Must do the
> +	/*
> +	 * There are some per thread events.  Must do the
>  	 * set event, for the thread that is being started
>  	 */
> +	for (i = 0; i < num_counters; i++)
>  		set_pm_event(i,
>  			pmc_cntrl[next_hdw_thread][i].evnts,
>  			pmc_cntrl[next_hdw_thread][i].masks);
>  
> -	/* The following is done only once per each node, but
> +	/*
> +	 * The following is done only once per each node, but
>  	 * we need cpu #, not node #, to pass to the cbe_xxx functions.
>  	 */
>  	for_each_online_cpu(cpu) {
>  		if (cbe_get_hw_thread_id(cpu))
>  			continue;
>  
> -		/* stop counters, save counter values, restore counts
> +		/*
> +		 * stop counters, save counter values, restore counts
>  		 * for previous thread
>  		 */
>  		cbe_disable_pm(cpu);
> @@ -428,7 +479,7 @@ static void cell_virtual_cntr(unsigned l
>  			    == 0xFFFFFFFF)
>  				/* If the cntr value is 0xffffffff, we must
>  				 * reset that to 0xfffffff0 when the current
> -				 * thread is restarted.  This will generate a
> +				 * thread is restarted.	 This will generate a
>  				 * new interrupt and make sure that we never
>  				 * restore the counters to the max value.  If
>  				 * the counters were restored to the max value,
> @@ -444,13 +495,15 @@ static void cell_virtual_cntr(unsigned l
>  						      next_hdw_thread)[i]);
>  		}
>  
> -		/* Switch to the other thread. Change the interrupt
> +		/*
> +		 * Switch to the other thread. Change the interrupt
>  		 * and control regs to be scheduled on the CPU
>  		 * corresponding to the thread to execute.
>  		 */
>  		for (i = 0; i < num_counters; i++) {
>  			if (pmc_cntrl[next_hdw_thread][i].enabled) {
> -				/* There are some per thread events.
> +				/*
> +				 * There are some per thread events.
>  				 * Must do the set event, enable_cntr
>  				 * for each cpu.
>  				 */
> @@ -482,17 +535,42 @@ static void start_virt_cntrs(void)
>  }
>  
>  /* This function is called once for all cpus combined */
> -static void
> -cell_reg_setup(struct op_counter_config *ctr,
> -	       struct op_system_config *sys, int num_ctrs)
> +static int cell_reg_setup(struct op_counter_config *ctr,
> +			struct op_system_config *sys, int num_ctrs)
>  {
>  	int i, j, cpu;
> +	spu_cycle_reset = 0;
> +
> +	if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
> +		spu_cycle_reset = ctr[0].count;
> +
> +		/*
> +		 * Each node will need to make the rtas call to start
> +		 * and stop SPU profiling.  Get the token once and store it.
> +		 */
> +		spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
> +
> +		if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
> +			printk(KERN_ERR
> +			       "%s: rtas token ibm,cbe-spu-perftools unknown\n",
> +			       __FUNCTION__);
> +			return -EIO;
> +		}
> +	}
>  
>  	pm_rtas_token = rtas_token("ibm,cbe-perftools");
> -	if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
> -		printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
> +
> +	/*
> +	 * For all events excetp PPU CYCLEs, each node will need to make
> +	 * the rtas cbe-perftools call to setup and reset the debug bus.
> +	 * Make the token lookup call once and store it in the global
> +	 * variable pm_rtas_token.
> +	 */
> +	if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
> +		printk(KERN_ERR
> +		       "%s: rtas token ibm,cbe-perftools unknown\n",
>  		       __FUNCTION__);
> -		goto out;
> +		return -EIO;
>  	}
>  
>  	num_counters = num_ctrs;
> @@ -520,7 +598,8 @@ cell_reg_setup(struct op_counter_config 
>  			per_cpu(pmc_values, j)[i] = 0;
>  	}
>  
> -	/* Setup the thread 1 events, map the thread 0 event to the
> +	/*
> +	 * Setup the thread 1 events, map the thread 0 event to the
>  	 * equivalent thread 1 event.
>  	 */
>  	for (i = 0; i < num_ctrs; ++i) {
> @@ -544,9 +623,10 @@ cell_reg_setup(struct op_counter_config 
>  	for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
>  		input_bus[i] = 0xff;
>  
> -	/* Our counters count up, and "count" refers to
> +	/*
> +	 * Our counters count up, and "count" refers to
>  	 * how much before the next interrupt, and we interrupt
> -	 * on overflow.  So we calculate the starting value
> +	 * on overflow.	 So we calculate the starting value
>  	 * which will give us "count" until overflow.
>  	 * Then we set the events on the enabled counters.
>  	 */
> @@ -569,28 +649,27 @@ cell_reg_setup(struct op_counter_config 
>  		for (i = 0; i < num_counters; ++i) {
>  			per_cpu(pmc_values, cpu)[i] = reset_value[i];
>  		}
> -out:
> -	;
> +
> +	return 0;
>  }
>  
> +
> +
>  /* This function is called once for each cpu */
> -static void cell_cpu_setup(struct op_counter_config *cntr)
> +static int cell_cpu_setup(struct op_counter_config *cntr)
>  {
>  	u32 cpu = smp_processor_id();
>  	u32 num_enabled = 0;
>  	int i;
>  
> +	if (spu_cycle_reset)
> +		return 0;
> +
>  	/* There is one performance monitor per processor chip (i.e. node),
>  	 * so we only need to perform this function once per node.
>  	 */
>  	if (cbe_get_hw_thread_id(cpu))
> -		goto out;
> -
> -	if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
> -		printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
> -		       __FUNCTION__);
> -		goto out;
> -	}
> +		return 0;
>  
>  	/* Stop all counters */
>  	cbe_disable_pm(cpu);
> @@ -609,16 +688,282 @@ static void cell_cpu_setup(struct op_cou
>  		}
>  	}
>  
> -	pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
> +	/*
> +	 * The pm_rtas_activate_signals will return -EIO if the FW
> +	 * call failed.
> +	 */
> +	return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
> +}
> +
> +#define ENTRIES	 303
> +#define MAXLFSR	 0xFFFFFF
> +
> +/* precomputed table of 24 bit LFSR values */
> +static int initial_lfsr[] = {
> + 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
> + 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
> + 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
> + 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
> + 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
> + 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
> + 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
> + 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
> + 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
> + 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
> + 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
> + 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
> + 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
> + 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
> + 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
> + 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
> + 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
> + 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
> + 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
> + 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
> + 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
> + 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
> + 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
> + 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
> + 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
> + 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
> + 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
> + 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
> + 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
> + 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
> + 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
> + 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
> + 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
> + 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
> + 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
> + 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
> + 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
> + 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
> +};
> +
> +/*
> + * The hardware uses an LFSR counting sequence to determine when to capture
> + * the SPU PCs.	 An LFSR sequence is like a puesdo random number sequence
> + * where each number occurs once in the sequence but the sequence is not in
> + * numerical order. The SPU PC capture is done when the LFSR sequence reaches
> + * the last value in the sequence.  Hence the user specified value N
> + * corresponds to the LFSR number that is N from the end of the sequence.
> + *
> + * To avoid the time to compute the LFSR, a lookup table is used.  The 24 bit
> + * LFSR sequence is broken into four ranges.  The spacing of the precomputed
> + * values is adjusted in each range so the error between the user specifed
> + * number (N) of events between samples and the actual number of events based
> + * on the precomputed value will be les then about 6.2%.  Note, if the user
> + * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
> + * This is to prevent the loss of samples because the trace buffer is full.
> + *
> + *	   User specified N		     Step between	   Index in
> + *					 precomputed values	 precomputed
> + *								    table
> + * 0		    to	2^16-1			----		      0
> + * 2^16	    to	2^16+2^19-1		2^12		    1 to 128
> + * 2^16+2^19	    to	2^16+2^19+2^22-1	2^15		  129 to 256
> + * 2^16+2^19+2^22  to	2^24-1			2^18		  257 to 302
> + *
> + *
> + * For example, the LFSR values in the second range are computed for 2^16,
> + * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies
> + * 1, 2,..., 127, 128.
> + *
> + * The 24 bit LFSR value for the nth number in the sequence can be
> + * calculated using the following code:
> + *
> + * #define size 24
> + * int calculate_lfsr(int n)
> + * {
> + *	int i;
> + *	unsigned int newlfsr0;
> + *	unsigned int lfsr = 0xFFFFFF;
> + *	unsigned int howmany = n;
> + *
> + *	for (i = 2; i < howmany + 2; i++) {
> + *		newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^
> + *		((lfsr >> (size - 1 - 1)) & 1) ^
> + *		(((lfsr >> (size - 1 - 6)) & 1) ^
> + *		((lfsr >> (size - 1 - 23)) & 1)));
> + *
> + *		lfsr >>= 1;
> + *		lfsr = lfsr | (newlfsr0 << (size - 1));
> + *	}
> + *	return lfsr;
> + * }
> + */
> +
> +#define V2_16  (0x1 <<16)
> +#define V2_19  (0x1 <<19)
> +#define V2_22  (0x1 <<22)
> +
> +static int calculate_lfsr(int n)
> +{
> +	/*
> +	 * The ranges and steps are in powers of 2 so the calculations
> +	 * can be done using shifts rather then divide.
> +	 */
> +	int index;
> +
> +	if ((n >> 16) == 0)
> +		index = 0;
> +	else if (((n - V2_16) >> 19) == 0)
> +		index = ((n - V2_16) >> 12) + 1;
> +	else if (((n - V2_16 - V2_19) >> 22) == 0)
> +		index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
> +	else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
> +		index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
> +	else
> +		index = ENTRIES-1;
> +
> +	/* make sure index is valid */
> +	if ((index > ENTRIES) || (index < 0))
> +		index = ENTRIES-1;
> +
> +	return initial_lfsr[index];
> +}
> +
> +static int pm_rtas_activate_spu_profiling(u32 node)
> +{
> +	int ret, i;
> +	struct pm_signal pm_signal_local[NR_PHYS_CTRS];
> +
> +	/*
> +	 * Set up the rtas call to configure the debug bus to
> +	 * route the SPU PCs.  Setup the pm_signal for each SPU
> +	 */
> +	for (i = 0; i < NUM_SPUS_PER_NODE; i++) {
> +		pm_signal_local[i].cpu = node;
> +		pm_signal_local[i].signal_group = 41;
> +		/* spu i on word (i/2) */
> +		pm_signal_local[i].bus_word = 1 << i / 2;
> +		/* spu i */
> +		pm_signal_local[i].sub_unit = i;
> +		pm_signal_local[i].bit = 63;
> +	}
> +
> +	ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
> +				     PASSTHRU_ENABLE, pm_signal_local,
> +				     (NUM_SPUS_PER_NODE
> +				      * sizeof(struct pm_signal)));
> +
> +	if (unlikely(ret)) {
> +		printk(KERN_WARNING "%s: rtas returned: %d\n",
> +		       __FUNCTION__, ret);
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
> +#ifdef CONFIG_CPU_FREQ
> +static int
> +oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
> +{
> +	int ret = 0;
> +	struct cpufreq_freqs * frq = data;
> +	if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
> +	    (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) ||
> +	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE))
> +		set_profiling_frequency(frq->new, spu_cycle_reset);
> +	return ret;
> +}
> +
> +static struct notifier_block cpu_freq_notifier_block = {
> +	.notifier_call	= oprof_cpufreq_notify
> +};
> +#endif
> +
> +static int cell_global_start_spu(struct op_counter_config *ctr)
> +{
> +	int subfunc, rtn_value;
> +	unsigned int lfsr_value;
> +	int cpu;
> +	int ret;
> +	int rtas_error;
> +	unsigned int cpu_khzfreq = 0;
> +
> +	/* The SPU profiling uses time-based profiling based on
> +	 * cpu frequency, so if configured with the CPU_FREQ
> +	 * option, we should detect frequency changes and react
> +	 * accordingly.
> +	 */
> +#ifdef CONFIG_CPU_FREQ
> +	ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
> +					CPUFREQ_TRANSITION_NOTIFIER);
> +	if (ret < 0)
> +		/* this is not a fatal error */
> +		printk(KERN_ERR "CPU freq change registration failed: %d\n",
> +		       ret);
> +
> +	else
> +		cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
> +#endif
> +
> +	set_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
> +
> +	for_each_online_cpu(cpu) {
> +		if (cbe_get_hw_thread_id(cpu))
> +			continue;
> +
> +		/*
> +		 * Setup SPU cycle-based profiling.
> +		 * Set perf_mon_control bit 0 to a zero before
> +		 * enabling spu collection hardware.
> +		 */
> +		cbe_write_pm(cpu, pm_control, 0);
> +
> +		if (spu_cycle_reset > MAX_SPU_COUNT)
> +			/* use largest possible value */
> +			lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
> +		else
> +			lfsr_value = calculate_lfsr(spu_cycle_reset);
> +
> +		/* must use a non zero value. Zero disables data collection. */
> +		if (lfsr_value == 0)
> +			lfsr_value = calculate_lfsr(1);
> +
> +		lfsr_value = lfsr_value << 8; /* shift lfsr to correct
> +						* register location
> +						*/
> +
> +		/* debug bus setup */
> +		ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
> +
> +		if (unlikely(ret)) {
> +			rtas_error = ret;
> +			goto out;
> +		}
> +
> +
> +		subfunc = 2;	/* 2 - activate SPU tracing, 3 - deactivate */
> +
> +		/* start profiling */
> +		rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
> +		  cbe_cpu_to_node(cpu), lfsr_value);
> +
> +		if (unlikely(rtn_value != 0)) {
> +			printk(KERN_ERR
> +			       "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
> +			       __FUNCTION__, rtn_value);
> +			rtas_error = -EIO;
> +			goto out;
> +		}
> +	}
> +
> +	start_spu_profiling(spu_cycle_reset);
> +
> +	oprofile_running = 1;
> +	return 0;
> +
>  out:
> -	;
> +	return rtas_error;
>  }
>  
> -static void cell_global_start(struct op_counter_config *ctr)
> +static int cell_global_start_ppu(struct op_counter_config *ctr)
>  {
> -	u32 cpu;
> +	u32 cpu, i;
>  	u32 interrupt_mask = 0;
> -	u32 i;
>  
>  	/* This routine gets called once for the system.
>  	 * There is one performance monitor per node, so we
> @@ -651,19 +996,80 @@ static void cell_global_start(struct op_
>  	oprofile_running = 1;
>  	smp_wmb();
>  
> -	/* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
> -	 * executed which manipulates the PMU.  We start the "virtual counter"
> +	/*
> +	 * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
> +	 * executed which manipulates the PMU.	We start the "virtual counter"
>  	 * here so that we do not need to synchronize access to the PMU in
>  	 * the above for-loop.
>  	 */
>  	start_virt_cntrs();
> +
> +	return 0;
>  }
>  
> -static void cell_global_stop(void)
> +static int cell_global_start(struct op_counter_config *ctr)
> +{
> +	if (spu_cycle_reset) {
> +		return cell_global_start_spu(ctr);
> +	} else {
> +		return cell_global_start_ppu(ctr);
> +	}
> +}
> +
> +/*
> + * Note the generic OProfile stop calls do not support returning
> + * an error on stop.  Hence, will not return an error if the FW
> + * calls fail on stop.	Failure to reset the debug bus is not an issue.
> + * Failure to disable the SPU profiling is not an issue.  The FW calls
> + * to enable the performance counters and debug bus will work even if
> + * the hardware was not cleanly reset.
> + */
> +static void cell_global_stop_spu(void)
> +{
> +	int subfunc, rtn_value;
> +	unsigned int lfsr_value;
> +	int cpu;
> +
> +	oprofile_running = 0;
> +
> +#ifdef CONFIG_CPU_FREQ
> +	cpufreq_unregister_notifier(&cpu_freq_notifier_block,
> +				    CPUFREQ_TRANSITION_NOTIFIER);
> +#endif
> +
> +	for_each_online_cpu(cpu) {
> +		if (cbe_get_hw_thread_id(cpu))
> +			continue;
> +
> +		subfunc = 3;	/*
> +				 * 2 - activate SPU tracing,
> +				 * 3 - deactivate
> +				 */
> +		lfsr_value = 0x8f100000;
> +
> +		rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
> +				      subfunc, cbe_cpu_to_node(cpu),
> +				      lfsr_value);
> +
> +		if (unlikely(rtn_value != 0)) {
> +			printk(KERN_ERR
> +			       "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
> +			       __FUNCTION__, rtn_value);
> +		}
> +
> +		/* Deactivate the signals */
> +		pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
> +	}
> +
> +	stop_spu_profiling();
> +}
> +
> +static void cell_global_stop_ppu(void)
>  {
>  	int cpu;
>  
> -	/* This routine will be called once for the system.
> +	/*
> +	 * This routine will be called once for the system.
>  	 * There is one performance monitor per node, so we
>  	 * only need to perform this function once per node.
>  	 */
> @@ -687,8 +1093,17 @@ static void cell_global_stop(void)
>  	}
>  }
>  
> -static void
> -cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
> +static void cell_global_stop(void)
> +{
> +	if (spu_cycle_reset) {
> +		cell_global_stop_spu();
> +	} else {
> +		cell_global_stop_ppu();
> +	}
> +}
> +
> +static void cell_handle_interrupt(struct pt_regs *regs,
> +				struct op_counter_config *ctr)
>  {
>  	u32 cpu;
>  	u64 pc;
> @@ -699,13 +1114,15 @@ cell_handle_interrupt(struct pt_regs *re
>  
>  	cpu = smp_processor_id();
>  
> -	/* Need to make sure the interrupt handler and the virt counter
> +	/*
> +	 * Need to make sure the interrupt handler and the virt counter
>  	 * routine are not running at the same time. See the
>  	 * cell_virtual_cntr() routine for additional comments.
>  	 */
>  	spin_lock_irqsave(&virt_cntr_lock, flags);
>  
> -	/* Need to disable and reenable the performance counters
> +	/*
> +	 * Need to disable and reenable the performance counters
>  	 * to get the desired behavior from the hardware.  This
>  	 * is hardware specific.
>  	 */
> @@ -714,7 +1131,8 @@ cell_handle_interrupt(struct pt_regs *re
>  
>  	interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
>  
> -	/* If the interrupt mask has been cleared, then the virt cntr
> +	/*
> +	 * If the interrupt mask has been cleared, then the virt cntr
>  	 * has cleared the interrupt.  When the thread that generated
>  	 * the interrupt is restored, the data count will be restored to
>  	 * 0xffffff0 to cause the interrupt to be regenerated.
> @@ -732,18 +1150,20 @@ cell_handle_interrupt(struct pt_regs *re
>  			}
>  		}
>  
> -		/* The counters were frozen by the interrupt.
> +		/*
> +		 * The counters were frozen by the interrupt.
>  		 * Reenable the interrupt and restart the counters.
>  		 * If there was a race between the interrupt handler and
> -		 * the virtual counter routine.  The virutal counter
> +		 * the virtual counter routine.	 The virutal counter
>  		 * routine may have cleared the interrupts.  Hence must
>  		 * use the virt_cntr_inter_mask to re-enable the interrupts.
>  		 */
>  		cbe_enable_pm_interrupts(cpu, hdw_thread,
>  					 virt_cntr_inter_mask);
>  
> -		/* The writes to the various performance counters only writes
> -		 * to a latch.  The new values (interrupt setting bits, reset
> +		/*
> +		 * The writes to the various performance counters only writes
> +		 * to a latch.	The new values (interrupt setting bits, reset
>  		 * counter value etc.) are not copied to the actual registers
>  		 * until the performance monitor is enabled.  In order to get
>  		 * this to work as desired, the permormance monitor needs to
> @@ -755,10 +1175,33 @@ cell_handle_interrupt(struct pt_regs *re
>  	spin_unlock_irqrestore(&virt_cntr_lock, flags);
>  }
>  
> +/*
> + * This function is called from the generic OProfile
> + * driver.  When profiling PPUs, we need to do the
> + * generic sync start; otherwise, do spu_sync_start.
> + */
> +static int cell_sync_start(void)
> +{
> +	if (spu_cycle_reset)
> +		return spu_sync_start();
> +	else
> +		return DO_GENERIC_SYNC;
> +}
> +
> +static int cell_sync_stop(void)
> +{
> +	if (spu_cycle_reset)
> +		return spu_sync_stop();
> +	else
> +		return 1;
> +}
> +
>  struct op_powerpc_model op_model_cell = {
>  	.reg_setup = cell_reg_setup,
>  	.cpu_setup = cell_cpu_setup,
>  	.global_start = cell_global_start,
>  	.global_stop = cell_global_stop,
> +	.sync_start = cell_sync_start,
> +	.sync_stop = cell_sync_stop,
>  	.handle_interrupt = cell_handle_interrupt,
>  };
> Index: linux-2.6/arch/powerpc/platforms/cell/spufs/sched.c
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/sched.c
> +++ linux-2.6/arch/powerpc/platforms/cell/spufs/sched.c
> @@ -149,6 +149,7 @@ static void spu_bind_context(struct spu 
>  	ctx->spu = spu;
>  	ctx->ops = &spu_hw_ops;
>  	spu->pid = current->pid;
> +	spu->tgid = current->tgid;
>  	spu_associate_mm(spu, ctx->owner);
>  	spu->ibox_callback = spufs_ibox_callback;
>  	spu->wbox_callback = spufs_wbox_callback;
> @@ -188,6 +189,7 @@ static void spu_unbind_context(struct sp
>  	spu->dma_callback = NULL;
>  	spu_associate_mm(spu, NULL);
>  	spu->pid = 0;
> +	spu->tgid = 0;
>  	ctx->ops = &spu_backing_ops;
>  	ctx->spu = NULL;
>  	spu->flags = 0;
> @@ -421,7 +423,7 @@ void spu_deactivate(struct spu_context *
>  }
>  
>  /**
> - * spu_yield -  yield a physical spu if others are waiting
> + * spu_yield -	yield a physical spu if others are waiting
>   * @ctx:	spu context to yield
>   *
>   * Check if there is a higher priority context waiting and if yes
> Index: linux-2.6/drivers/oprofile/buffer_sync.c
> ===================================================================
> --- linux-2.6.orig/drivers/oprofile/buffer_sync.c
> +++ linux-2.6/drivers/oprofile/buffer_sync.c
> @@ -26,8 +26,9 @@
>  #include <linux/profile.h>
>  #include <linux/module.h>
>  #include <linux/fs.h>
> +#include <linux/oprofile.h>
>  #include <linux/sched.h>
> - 
> +
>  #include "oprofile_stats.h"
>  #include "event_buffer.h"
>  #include "cpu_buffer.h"
> Index: linux-2.6/drivers/oprofile/event_buffer.h
> ===================================================================
> --- linux-2.6.orig/drivers/oprofile/event_buffer.h
> +++ linux-2.6/drivers/oprofile/event_buffer.h
> @@ -19,28 +19,10 @@ void free_event_buffer(void);
>   
>  /* wake up the process sleeping on the event file */
>  void wake_up_buffer_waiter(void);
> - 
> -/* Each escaped entry is prefixed by ESCAPE_CODE
> - * then one of the following codes, then the
> - * relevant data.
> - */
> -#define ESCAPE_CODE			~0UL
> -#define CTX_SWITCH_CODE 		1
> -#define CPU_SWITCH_CODE 		2
> -#define COOKIE_SWITCH_CODE 		3
> -#define KERNEL_ENTER_SWITCH_CODE	4
> -#define KERNEL_EXIT_SWITCH_CODE		5
> -#define MODULE_LOADED_CODE		6
> -#define CTX_TGID_CODE			7
> -#define TRACE_BEGIN_CODE		8
> -#define TRACE_END_CODE			9
> - 
> +
>  #define INVALID_COOKIE ~0UL
>  #define NO_COOKIE 0UL
>  
> -/* add data to the event buffer */
> -void add_event_entry(unsigned long data);
> - 
>  extern const struct file_operations event_buffer_fops;
>   
>  /* mutex between sync_cpu_buffers() and the
> Index: linux-2.6/drivers/oprofile/oprof.c
> ===================================================================
> --- linux-2.6.orig/drivers/oprofile/oprof.c
> +++ linux-2.6/drivers/oprofile/oprof.c
> @@ -53,9 +53,23 @@ int oprofile_setup(void)
>  	 * us missing task deaths and eventually oopsing
>  	 * when trying to process the event buffer.
>  	 */
> +	if (oprofile_ops.sync_start) {
> +		int sync_ret = oprofile_ops.sync_start();
> +		switch (sync_ret) {
> +			case 0: goto post_sync;
> +				break;
> +			case 1: goto do_generic;
> +				break;
> +			case -1: goto out3;
> +				break;
> +			default: goto out3;

This (and below) are nonstandard indentations for switch statements.

> +		}
> +	}
> +do_generic:
>  	if ((err = sync_start()))
>  		goto out3;
>  
> +post_sync:
>  	is_setup = 1;
>  	mutex_unlock(&start_mutex);
>  	return 0;
> @@ -118,7 +132,19 @@ out:
>  void oprofile_shutdown(void)
>  {
>  	mutex_lock(&start_mutex);
> +	if (oprofile_ops.sync_stop) {
> +		int sync_ret = oprofile_ops.sync_stop();
> +		switch (sync_ret) {
> +			case 0: goto post_sync;
> +				break;
> +			case 1: goto do_generic;
> +				break;
> +			default: goto post_sync;
> +		}
> +	}
> +do_generic:
>  	sync_stop();
> +post_sync:
>  	if (oprofile_ops.shutdown)
>  		oprofile_ops.shutdown();
>  	is_setup = 0;
> Index: linux-2.6/include/asm-powerpc/oprofile_impl.h
> ===================================================================
> --- linux-2.6.orig/include/asm-powerpc/oprofile_impl.h
> +++ linux-2.6/include/asm-powerpc/oprofile_impl.h
> @@ -39,14 +39,16 @@ struct op_system_config {
>  
>  /* Per-arch configuration */
>  struct op_powerpc_model {
> -	void (*reg_setup) (struct op_counter_config *,
> +	int (*reg_setup) (struct op_counter_config *,
>  			   struct op_system_config *,
>  			   int num_counters);
> -	void (*cpu_setup) (struct op_counter_config *);
> -	void (*start) (struct op_counter_config *);
> -        void (*global_start) (struct op_counter_config *);
> +	int  (*cpu_setup) (struct op_counter_config *);
> +	int  (*start) (struct op_counter_config *);
> +	int  (*global_start) (struct op_counter_config *);
>  	void (*stop) (void);
>  	void (*global_stop) (void);
> +	int (*sync_start)(void);
> +	int (*sync_stop)(void);
>  	void (*handle_interrupt) (struct pt_regs *,
>  				  struct op_counter_config *);
>  	int num_counters;
> Index: linux-2.6/include/asm-powerpc/spu.h
> ===================================================================
> --- linux-2.6.orig/include/asm-powerpc/spu.h
> +++ linux-2.6/include/asm-powerpc/spu.h
> @@ -129,6 +129,7 @@ struct spu {
>  	struct spu_runqueue *rq;
>  	unsigned long long timestamp;
>  	pid_t pid;
> +	pid_t tgid;
>  	int class_0_pending;
>  	spinlock_t register_lock;
>  
> @@ -172,6 +173,20 @@ extern void spu_associate_mm(struct spu 
>  struct mm_struct;
>  extern void spu_flush_all_slbs(struct mm_struct *mm);
>  
> +/* This interface allows a profiler (e.g., OProfile) to store a ref
> + * to spu context information that it creates.	This caching technique
> + * avoids the need to recreate this information after a save/restore operation.
> + *
> + * Assumes the caller has already incremented the ref count to
> + * profile_info; then spu_context_destroy must call kref_put
> + * on prof_info_kref.
> + */
> +void spu_set_profile_private_kref(struct spu_context * ctx,
> +				  struct kref * prof_info_kref,
> +				  void (* prof_info_release) (struct kref * kref));
> +
> +void * spu_get_profile_private_kref(struct spu_context * ctx);
> +
>  /* system callbacks from the SPU */
>  struct spu_syscall_block {
>  	u64 nr_ret;
> Index: linux-2.6/include/linux/oprofile.h
> ===================================================================
> --- linux-2.6.orig/include/linux/oprofile.h
> +++ linux-2.6/include/linux/oprofile.h
> @@ -17,6 +17,26 @@
>  #include <linux/spinlock.h>
>  #include <asm/atomic.h>
>   
> +/* Each escaped entry is prefixed by ESCAPE_CODE
> + * then one of the following codes, then the
> + * relevant data.
> + * These #defines live in this file so that arch-specific
> + * buffer sync'ing code can access them.
> + */
> +#define ESCAPE_CODE			~0UL
> +#define CTX_SWITCH_CODE			1
> +#define CPU_SWITCH_CODE			2
> +#define COOKIE_SWITCH_CODE		3
> +#define KERNEL_ENTER_SWITCH_CODE	4
> +#define KERNEL_EXIT_SWITCH_CODE		5
> +#define MODULE_LOADED_CODE		6
> +#define CTX_TGID_CODE			7
> +#define TRACE_BEGIN_CODE		8
> +#define TRACE_END_CODE			9
> +#define XEN_ENTER_SWITCH_CODE		10
> +#define SPU_PROFILING_CODE		11
> +#define SPU_CTX_SWITCH_CODE		12
> +
>  struct super_block;
>  struct dentry;
>  struct file_operations;
> @@ -35,6 +55,14 @@ struct oprofile_operations {
>  	int (*start)(void);
>  	/* Stop delivering interrupts. */
>  	void (*stop)(void);
> +	/* Arch-specific buffer sync functions.
> +	 * Return value = 0:  Success
> +	 * Return value = -1: Failure
> +	 * Return value = 1:  Run generic sync function
> +	 */
> +	int (*sync_start)(void);
> +	int (*sync_stop)(void);
> +
>  	/* Initiate a stack backtrace. Optional. */
>  	void (*backtrace)(struct pt_regs * const regs, unsigned int depth);
>  	/* CPU identification string. */
> @@ -56,6 +84,13 @@ int oprofile_arch_init(struct oprofile_o
>  void oprofile_arch_exit(void);
>  
>  /**
> + * Add data to the event buffer.
> + * The data passed is free-form, but typically consists of
> + * file offsets, dcookies, context information, and ESCAPE codes.
> + */
> +void add_event_entry(unsigned long data);
> +
> +/**
>   * Add a sample. This may be called from any context. Pass
>   * smp_processor_id() as cpu.
>   */
> Index: linux-2.6/arch/powerpc/kernel/time.c
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/kernel/time.c
> +++ linux-2.6/arch/powerpc/kernel/time.c
> @@ -122,6 +122,7 @@ extern struct timezone sys_tz;
>  static long timezone_offset;
>  
>  unsigned long ppc_proc_freq;
> +EXPORT_SYMBOL(ppc_proc_freq);
>  unsigned long ppc_tb_freq;
>  
>  static u64 tb_last_jiffy __cacheline_aligned_in_smp;
> Index: linux-2.6/arch/powerpc/platforms/cell/spufs/spufs.h
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/spufs.h
> +++ linux-2.6/arch/powerpc/platforms/cell/spufs/spufs.h
> @@ -80,9 +80,11 @@ struct spu_context {
>  
>  	struct list_head gang_list;
>  	struct spu_gang *gang;
> +	struct kref *prof_priv_kref;
> +	void (* prof_priv_release) (struct kref *kref);
>  
>  	/* scheduler fields */
> - 	struct list_head rq;
> +	struct list_head rq;
>  	struct delayed_work sched_work;
>  	unsigned long sched_flags;
>  	unsigned long rt_priority;
> Index: linux-2.6/arch/powerpc/platforms/cell/spufs/context.c
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/context.c
> +++ linux-2.6/arch/powerpc/platforms/cell/spufs/context.c
> @@ -22,6 +22,7 @@
>  
>  #include <linux/fs.h>
>  #include <linux/mm.h>
> +#include <linux/module.h>
>  #include <linux/slab.h>
>  #include <asm/spu.h>
>  #include <asm/spu_csa.h>
> @@ -75,6 +76,8 @@ void destroy_spu_context(struct kref *kr
>  	spu_fini_csa(&ctx->csa);
>  	if (ctx->gang)
>  		spu_gang_remove_ctx(ctx->gang, ctx);
> +	if (ctx->prof_priv_kref)
> +		kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
>  	BUG_ON(!list_empty(&ctx->rq));
>  	kfree(ctx);
>  }
> @@ -162,3 +165,20 @@ void spu_acquire_saved(struct spu_contex
>  	if (ctx->state != SPU_STATE_SAVED)
>  		spu_deactivate(ctx);
>  }
> +
> +void spu_set_profile_private_kref(struct spu_context * ctx,
> +				  struct kref * prof_info_kref,
> +				  void (* prof_info_release) (struct kref * kref))
> +{
> +	ctx->prof_priv_kref = prof_info_kref;
> +	ctx->prof_priv_release = prof_info_release;
> +}
> +EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
> +
> +void * spu_get_profile_private_kref(struct spu_context * ctx)
> +{
> +	return ctx->prof_priv_kref;
> +}
> +EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
> +
> +
> Index: linux-2.6/include/linux/dcookies.h
> ===================================================================
> --- linux-2.6.orig/include/linux/dcookies.h
> +++ linux-2.6/include/linux/dcookies.h
> @@ -12,6 +12,7 @@
>  
>  #ifdef CONFIG_PROFILING
>   
> +#include <linux/dcache.h>
>  #include <linux/types.h>
>   
>  struct dcookie_user;
> Index: linux-2.6/include/linux/elf-em.h
> ===================================================================
> --- linux-2.6.orig/include/linux/elf-em.h
> +++ linux-2.6/include/linux/elf-em.h
> @@ -20,7 +20,8 @@
>  #define EM_PARISC	15	/* HPPA */
>  #define EM_SPARC32PLUS	18	/* Sun's "v8plus" */
>  #define EM_PPC		20	/* PowerPC */
> -#define EM_PPC64	21       /* PowerPC64 */
> +#define EM_PPC64	21	 /* PowerPC64 */
> +#define EM_SPU		23	/* Cell BE SPU */
>  #define EM_SH		42	/* SuperH */
>  #define EM_SPARCV9	43	/* SPARC v9 64-bit */
>  #define EM_IA_64	50	/* HP/Intel IA-64 */
> Index: linux-2.6/arch/powerpc/oprofile/op_model_rs64.c
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/oprofile/op_model_rs64.c
> +++ linux-2.6/arch/powerpc/oprofile/op_model_rs64.c
> @@ -88,7 +88,7 @@ static unsigned long reset_value[OP_MAX_
>  
>  static int num_counters;
>  
> -static void rs64_reg_setup(struct op_counter_config *ctr,
> +static int rs64_reg_setup(struct op_counter_config *ctr,
>  			   struct op_system_config *sys,
>  			   int num_ctrs)
>  {
> @@ -100,9 +100,10 @@ static void rs64_reg_setup(struct op_cou
>  		reset_value[i] = 0x80000000UL - ctr[i].count;
>  
>  	/* XXX setup user and kernel profiling */
> +	return 0;
>  }
>  
> -static void rs64_cpu_setup(struct op_counter_config *ctr)
> +static int rs64_cpu_setup(struct op_counter_config *ctr)
>  {
>  	unsigned int mmcr0;
>  
> @@ -125,9 +126,11 @@ static void rs64_cpu_setup(struct op_cou
>  	    mfspr(SPRN_MMCR0));
>  	dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
>  	    mfspr(SPRN_MMCR1));
> +
> +	return 0;
>  }
>  
> -static void rs64_start(struct op_counter_config *ctr)
> +static int rs64_start(struct op_counter_config *ctr)
>  {
>  	int i;
>  	unsigned int mmcr0;
> @@ -155,6 +158,7 @@ static void rs64_start(struct op_counter
>  	mtspr(SPRN_MMCR0, mmcr0);
>  
>  	dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
> +	return 0;
>  }
>  
>  static void rs64_stop(void)
> Index: linux-2.6/arch/powerpc/oprofile/op_model_power4.c
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/oprofile/op_model_power4.c
> +++ linux-2.6/arch/powerpc/oprofile/op_model_power4.c
> @@ -30,7 +30,7 @@ static u32 mmcr0_val;
>  static u64 mmcr1_val;
>  static u64 mmcra_val;
>  
> -static void power4_reg_setup(struct op_counter_config *ctr,
> +static int power4_reg_setup(struct op_counter_config *ctr,
>  			     struct op_system_config *sys,
>  			     int num_ctrs)
>  {
> @@ -58,6 +58,8 @@ static void power4_reg_setup(struct op_c
>  		mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
>  	else
>  		mmcr0_val |= MMCR0_PROBLEM_DISABLE;
> +
> +	return 0;
>  }
>  
>  extern void ppc64_enable_pmcs(void);
> @@ -82,7 +84,7 @@ static inline int mmcra_must_set_sample(
>  	return 0;
>  }
>  
> -static void power4_cpu_setup(struct op_counter_config *ctr)
> +static int power4_cpu_setup(struct op_counter_config *ctr)
>  {
>  	unsigned int mmcr0 = mmcr0_val;
>  	unsigned long mmcra = mmcra_val;
> @@ -109,9 +111,11 @@ static void power4_cpu_setup(struct op_c
>  	    mfspr(SPRN_MMCR1));
>  	dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
>  	    mfspr(SPRN_MMCRA));
> +
> +	return 0;
>  }
>  
> -static void power4_start(struct op_counter_config *ctr)
> +static int power4_start(struct op_counter_config *ctr)
>  {
>  	int i;
>  	unsigned int mmcr0;
> @@ -146,6 +150,7 @@ static void power4_start(struct op_count
>  	oprofile_running = 1;
>  
>  	dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
> +	return 0;
>  }
>  
>  static void power4_stop(void)
> 
> --
> 
> _______________________________________________
> cbe-oss-dev mailing list
> cbe-oss-dev at ozlabs.org
> https://ozlabs.org/mailman/listinfo/cbe-oss-dev