[Cbe-oss-dev] [PATCH] Cell OProfile: SPU mutex lock fix
Arnd Bergmann
arnd at arndb.de
Wed Apr 2 16:21:15 EST 2008
On Tuesday 25 March 2008, Carl Love wrote:
> This patch fixes a bug in the code that records the SPU data and
> context switches. The buffer_mutex lock must be held when the
> kernel is adding data to the buffer between the kernel and the
> OProfile daemon. The lock is not being held in the current code
> base. This patch fixes the bug using work queues. The data to
> be passed to the daemon is caputured by the interrupt handler.
> The workqueue function is invoked to grab the buffer_mutex lock
> and add the data to the buffer.
So what was the exact bug you're fixing with this? There was no
buffer_mutex before, so why do you need it now? Can't this be a
spinlock so you can get it from interrupt context instead of
using a workqueue?
> Index: linux-2.6.25-rc4/arch/powerpc/oprofile/cell/spu_profiler.c
> ===================================================================
> --- linux-2.6.25-rc4.orig/arch/powerpc/oprofile/cell/spu_profiler.c
> +++ linux-2.6.25-rc4/arch/powerpc/oprofile/cell/spu_profiler.c
> @@ -16,6 +16,7 @@
> #include <linux/smp.h>
> #include <linux/slab.h>
> #include <asm/cell-pmu.h>
> +#include <linux/workqueue.h>
> #include "pr_util.h"
>
Please keep #include statements in alphabetical order, with all linux/ files
before the asm/ files.
> #define TRACE_ARRAY_SIZE 1024
> @@ -32,9 +33,19 @@ static unsigned int profiling_interval;
>
> #define SPU_PC_MASK 0xFFFF
>
> +/* The generic OProfile code uses the buffer_mutex to protect the buffer
> + * between the kernel and the daemon. The SPU code needs to use the buffer
> + * to ensure that the kernel SPU writes complete as a single block before
> + * being consumed by the daemon.
> + */
> +extern struct mutex buffer_mutex;
> +
> static DEFINE_SPINLOCK(sample_array_lock);
> unsigned long sample_array_lock_flags;
>
> +struct work_struct spu_record_wq;
> +extern struct workqueue_struct *oprofile_spu_wq;
> +
> void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
> {
> unsigned long ns_per_cyc;
Never put extern statements in the implementation, they describe the
interface between two parts of the code and should be inside of a
common header.
Why do you want to have your own workqueue instead of using the
global one?
> @@ -123,14 +134,14 @@ static int cell_spu_pc_collection(int cp
> return entry;
> }
>
> -
> -static enum hrtimer_restart profile_spus(struct hrtimer *timer)
> -{
> - ktime_t kt;
> +static void profile_spus_record_samples (struct work_struct *ws) {
> + /* This routine is called via schedule_work() to record the
> + * spu data. It must be run in a normal kernel mode to
> + * grab the OProfile mutex lock.
> + */
> int cpu, node, k, num_samples, spu_num;
>
> - if (!spu_prof_running)
> - goto stop;
> + mutex_lock(&buffer_mutex);
>
> for_each_online_cpu(cpu) {
> if (cbe_get_hw_thread_id(cpu))
> @@ -170,6 +181,20 @@ static enum hrtimer_restart profile_spus
> smp_wmb(); /* insure spu event buffer updates are written */
> /* don't want events intermingled... */
>
> + mutex_unlock(&buffer_mutex);
> +}
> +
> +static enum hrtimer_restart profile_spus(struct hrtimer *timer)
> +{
> + ktime_t kt;
> +
> +
> + if (!spu_prof_running)
> + goto stop;
> +
> + /* schedule the funtion to record the data */
> + schedule_work(&spu_record_wq);
> +
> kt = ktime_set(0, profiling_interval);
> if (!spu_prof_running)
> goto stop;
This looks like you want to use a delayed_work rather than building your
own out of hrtimer and work. Is there any point why you want to use
an hrtimer?
> -static DEFINE_SPINLOCK(buffer_lock);
> +extern struct mutex buffer_mutex;
> +extern struct workqueue_struct *oprofile_spu_wq;
> +extern int calls_to_record_switch;
> +
Again, public interfaces need to go to a header file, and should
have a name that identifies the interface. "buffer_mutex" is
certainly not a suitable name for a kernel-wide global variable!
> static DEFINE_SPINLOCK(cache_lock);
> static int num_spu_nodes;
> +
> int spu_prof_num_nodes;
> int last_guard_val[MAX_NUMNODES * 8];
> +int cnt_swtch_processed_flag[MAX_NUMNODES * 8];
> +
> +struct spus_profiling_code_data_s {
> + int num_spu_nodes;
> + struct work_struct spu_prof_code_wq;
> +} spus_profiling_code_data;
> +
> +struct spu_context_switch_data_s {
> + struct spu *spu;
> + unsigned long spu_cookie;
> + unsigned long app_dcookie;
> + unsigned int offset;
> + unsigned long objectId;
> + int valid_entry;
> +} spu_context_switch_data;
I don't understand what these variables are really doing, but
having e.g. just one spu_context_switch_data for all the SPUs
doesn't seem to make much sense. What happens when two SPUs do
a context switch at the same time?
> +int calls_to_record_switch = 0;
> +int record_spu_start_flag = 0;
> +
> +struct spus_cntxt_sw_data_s {
> + int num_spu_nodes;
> + struct spu_context_switch_data_s spu_data[MAX_NUMNODES * 8];
> + struct work_struct spu_cntxt_work;
> +} spus_cntxt_sw_data;
Something is very wrong if you need so many global variables!
> /* Container for caching information about an active SPU task. */
> struct cached_info {
> @@ -44,6 +73,8 @@ struct cached_info {
> struct kref cache_ref;
> };
>
> +struct workqueue_struct *oprofile_spu_wq;
> +
> static struct cached_info *spu_info[MAX_NUMNODES * 8];
While you're cleaning this up, I guess the cached_info should
be moved into a pointer from struct spu as well, instead of
having this global variable here.
> @@ -375,16 +457,30 @@ int spu_sync_start(void)
> int k;
> int ret = SKIP_GENERIC_SYNC;
> int register_ret;
> - unsigned long flags = 0;
>
> spu_prof_num_nodes = number_of_online_nodes();
> num_spu_nodes = spu_prof_num_nodes * 8;
>
> - spin_lock_irqsave(&buffer_lock, flags);
> - add_event_entry(ESCAPE_CODE);
> - add_event_entry(SPU_PROFILING_CODE);
> - add_event_entry(num_spu_nodes);
> - spin_unlock_irqrestore(&buffer_lock, flags);
> + /* create private work queue, execution of work is time critical */
> + oprofile_spu_wq = create_workqueue("spu_oprofile");
> +
> + /* due to a race when the spu is already running stuff, need to
> + * set a flag to tell the spu context switch to record the start
> + * before recording the context switches.
> + */
> + record_spu_start_flag = 1;
> +
> + spus_profiling_code_data.num_spu_nodes = num_spu_nodes;
> +
> + /* setup work queue functiion for recording context switch info */
> + spus_cntxt_sw_data.num_spu_nodes = num_spu_nodes;
> + for (k = 0; k<(MAX_NUMNODES * 8); k++) {
> + spus_cntxt_sw_data.spu_data[k].valid_entry = 0;
> + cnt_swtch_processed_flag[k] = 0;
> + }
> +
> + INIT_WORK(&spus_cntxt_sw_data.spu_cntxt_work,
> + record_spu_process_switch);
I would guess that you need one work struct per SPU instead of a global
one, if you want to pass the SPU pointer as an argument.
Arnd <><
More information about the cbe-oss-dev
mailing list