perf events ring buffer memory barrier on powerpc
Peter Zijlstra
peterz at infradead.org
Wed Oct 30 21:42:46 EST 2013
On Tue, Oct 29, 2013 at 03:27:05PM -0400, Vince Weaver wrote:
> On Tue, 29 Oct 2013, Peter Zijlstra wrote:
>
> > On Tue, Oct 29, 2013 at 11:21:31AM +0100, Peter Zijlstra wrote:
> > --- linux-2.6.orig/include/uapi/linux/perf_event.h
> > +++ linux-2.6/include/uapi/linux/perf_event.h
> > @@ -479,13 +479,15 @@ struct perf_event_mmap_page {
> > /*
> > * Control data for the mmap() data buffer.
> > *
> > - * User-space reading the @data_head value should issue an rmb(), on
> > - * SMP capable platforms, after reading this value -- see
> > - * perf_event_wakeup().
> > + * User-space reading the @data_head value should issue an smp_rmb(),
> > + * after reading this value.
>
> so where's the patch fixing perf to use the new recommendations?
Fair enough, thanks for reminding me about that. See below.
> Is this purely a performance thing or a correctness change?
Correctness, although I suppose on most archs you'd be hard pushed to
notice.
> A change like this a bit of a pain, especially as userspace doesn't really
> have nice access to smb_mb() defines so a lot of cut-and-pasting will
> ensue for everyone who's trying to parse the mmap buffer.
Agreed; we should maybe push for a user visible asm/barrier.h or so.
---
Subject: perf, tool: Add required memory barriers
To match patch bf378d341e48 ("perf: Fix perf ring buffer memory
ordering") change userspace to also adhere to the ordering outlined.
Most barrier implementations were gleaned from
arch/*/include/asm/barrier.h and with the exception of metag I'm fairly
sure they're correct.
Cc: James Hogan <james.hogan at imgtec.com>
Signed-off-by: Peter Zijlstra <peterz at infradead.org>
---
tools/perf/perf.h | 39 +++++++++++++++++++++++++++++++++++++--
tools/perf/util/evlist.h | 2 +-
2 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index f61c230beec4..1b8a0a2a63d4 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -4,6 +4,8 @@
#include <asm/unistd.h>
#if defined(__i386__)
+#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
@@ -13,6 +15,8 @@
#endif
#if defined(__x86_64__)
+#define mb() asm volatile("mfence" ::: "memory")
+#define wmb() asm volatile("sfence" ::: "memory")
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
@@ -23,20 +27,28 @@
#ifdef __powerpc__
#include "../../arch/powerpc/include/uapi/asm/unistd.h"
+#define mb() asm volatile ("sync" ::: "memory")
+#define wmb() asm volatile ("sync" ::: "memory")
#define rmb() asm volatile ("sync" ::: "memory")
#define cpu_relax() asm volatile ("" ::: "memory");
#define CPUINFO_PROC "cpu"
#endif
#ifdef __s390__
+#define mb() asm volatile("bcr 15,0" ::: "memory")
+#define wmb() asm volatile("bcr 15,0" ::: "memory")
#define rmb() asm volatile("bcr 15,0" ::: "memory")
#define cpu_relax() asm volatile("" ::: "memory");
#endif
#ifdef __sh__
#if defined(__SH4A__) || defined(__SH5__)
+# define mb() asm volatile("synco" ::: "memory")
+# define wmb() asm volatile("synco" ::: "memory")
# define rmb() asm volatile("synco" ::: "memory")
#else
+# define mb() asm volatile("" ::: "memory")
+# define wmb() asm volatile("" ::: "memory")
# define rmb() asm volatile("" ::: "memory")
#endif
#define cpu_relax() asm volatile("" ::: "memory")
@@ -44,24 +56,38 @@
#endif
#ifdef __hppa__
+#define mb() asm volatile("" ::: "memory")
+#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define cpu_relax() asm volatile("" ::: "memory");
#define CPUINFO_PROC "cpu"
#endif
#ifdef __sparc__
+#ifdef __LP64__
+#define mb() asm volatile("ba,pt %%xcc, 1f\n" \
+ "membar #StoreLoad\n" \
+ "1:\n"":::"memory")
+#else
+#define mb() asm volatile("":::"memory")
+#endif
+#define wmb() asm volatile("":::"memory")
#define rmb() asm volatile("":::"memory")
#define cpu_relax() asm volatile("":::"memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __alpha__
+#define mb() asm volatile("mb" ::: "memory")
+#define wmb() asm volatile("wmb" ::: "memory")
#define rmb() asm volatile("mb" ::: "memory")
#define cpu_relax() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu model"
#endif
#ifdef __ia64__
+#define mb() asm volatile ("mf" ::: "memory")
+#define wmb() asm volatile ("mf" ::: "memory")
#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
#define CPUINFO_PROC "model name"
@@ -72,35 +98,44 @@
* Use the __kuser_memory_barrier helper in the CPU helper page. See
* arch/arm/kernel/entry-armv.S in the kernel source for details.
*/
+#define mb() ((void(*)(void))0xffff0fa0)()
+#define wmb() ((void(*)(void))0xffff0fa0)()
#define rmb() ((void(*)(void))0xffff0fa0)()
#define cpu_relax() asm volatile("":::"memory")
#define CPUINFO_PROC "Processor"
#endif
#ifdef __aarch64__
-#define rmb() asm volatile("dmb ld" ::: "memory")
+#define mb() asm volatile("dmb ish" ::: "memory")
+#define wmb() asm volatile("dmb ishld" ::: "memory")
+#define rmb() asm volatile("dmb ishst" ::: "memory")
#define cpu_relax() asm volatile("yield" ::: "memory")
#endif
#ifdef __mips__
-#define rmb() asm volatile( \
+#define mb() asm volatile( \
".set mips2\n\t" \
"sync\n\t" \
".set mips0" \
: /* no output */ \
: /* no input */ \
: "memory")
+#define wmb() mb()
+#define rmb() mb()
#define cpu_relax() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu model"
#endif
#ifdef __arc__
+#define mb() asm volatile("" ::: "memory")
+#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define cpu_relax() rmb()
#define CPUINFO_PROC "Processor"
#endif
#ifdef __metag__
+/* XXX no clue */
#define rmb() asm volatile("" ::: "memory")
#define cpu_relax() asm volatile("" ::: "memory")
#define CPUINFO_PROC "CPU"
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 6e8acc9abe38..8ab1b5ae4a0e 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -189,7 +189,7 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md,
/*
* ensure all reads are done before we write the tail out.
*/
- /* mb(); */
+ mb();
pc->data_tail = tail;
}
More information about the Linuxppc-dev
mailing list