[PATCH] powernv: Add OPAL tracepoints
Paul E. McKenney
paulmck at linux.vnet.ibm.com
Thu Jul 10 05:42:10 EST 2014
On Thu, Jul 03, 2014 at 05:20:50PM +1000, Anton Blanchard wrote:
> Knowing how long we spend in firmware calls is an important part of
> minimising OS jitter.
>
> This patch adds tracepoints to each OPAL call. If tracepoints are
> enabled we branch out to a common routine that calls an entry and exit
> tracepoint.
>
> This allows us to write tools that monitor the frequency and duration
> of OPAL calls, eg:
>
> name count total(ms) min(ms) max(ms) avg(ms) period(ms)
> OPAL_HANDLE_INTERRUPT 5 0.199 0.037 0.042 0.040 12547.545
> OPAL_POLL_EVENTS 204 2.590 0.012 0.036 0.013 2264.899
> OPAL_PCI_MSI_EOI 2830 3.066 0.001 0.005 0.001 81.166
>
> We use jump labels if configured, which means we only add a single
> nop instruction to every OPAL call when the tracepoints are disabled.
>
> Signed-off-by: Anton Blanchard <anton at samba.org>
That is what I call invoking tracepoints the hard way -- from assembly!
Just one question -- can these tracepoints be invoked from the idle
loop? If so, you need to use the _rcuidle suffix, for example, as
in trace_opal_entry_rcuidle(). If not:
Acked-by: Paul E. McKenney <paulmck at linux.vnet.ibm.com>
> ---
>
> Index: b/arch/powerpc/include/asm/trace.h
> ===================================================================
> --- a/arch/powerpc/include/asm/trace.h
> +++ b/arch/powerpc/include/asm/trace.h
> @@ -99,6 +99,51 @@ TRACE_EVENT_FN(hcall_exit,
> );
> #endif
>
> +#ifdef CONFIG_PPC_POWERNV
> +extern void opal_tracepoint_regfunc(void);
> +extern void opal_tracepoint_unregfunc(void);
> +
> +TRACE_EVENT_FN(opal_entry,
> +
> + TP_PROTO(unsigned long opcode, unsigned long *args),
> +
> + TP_ARGS(opcode, args),
> +
> + TP_STRUCT__entry(
> + __field(unsigned long, opcode)
> + ),
> +
> + TP_fast_assign(
> + __entry->opcode = opcode;
> + ),
> +
> + TP_printk("opcode=%lu", __entry->opcode),
> +
> + opal_tracepoint_regfunc, opal_tracepoint_unregfunc
> +);
> +
> +TRACE_EVENT_FN(opal_exit,
> +
> + TP_PROTO(unsigned long opcode, unsigned long retval),
> +
> + TP_ARGS(opcode, retval),
> +
> + TP_STRUCT__entry(
> + __field(unsigned long, opcode)
> + __field(unsigned long, retval)
> + ),
> +
> + TP_fast_assign(
> + __entry->opcode = opcode;
> + __entry->retval = retval;
> + ),
> +
> + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
> +
> + opal_tracepoint_regfunc, opal_tracepoint_unregfunc
> +);
> +#endif
> +
> #endif /* _TRACE_POWERPC_H */
>
> #undef TRACE_INCLUDE_PATH
> Index: b/arch/powerpc/platforms/powernv/Makefile
> ===================================================================
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -8,3 +8,4 @@ obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o
> obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o
> obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
> obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
> +obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
> Index: b/arch/powerpc/platforms/powernv/opal-wrappers.S
> ===================================================================
> --- a/arch/powerpc/platforms/powernv/opal-wrappers.S
> +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
> @@ -13,30 +13,69 @@
> #include <asm/hvcall.h>
> #include <asm/asm-offsets.h>
> #include <asm/opal.h>
> +#include <asm/jump_label.h>
> +
> + .section ".text"
> +
> +#ifdef CONFIG_TRACEPOINTS
> +#ifdef CONFIG_JUMP_LABEL
> +#define OPAL_BRANCH(LABEL) \
> + ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key)
> +#else
> +
> + .section ".toc","aw"
> +
> + .globl opal_tracepoint_refcount
> +opal_tracepoint_refcount:
> + .llong 0
> +
> + .section ".text"
> +
> +/*
> + * We branch around this in early init by using an unconditional cpu
> + * feature.
> + */
> +#define OPAL_BRANCH(LABEL) \
> +BEGIN_FTR_SECTION; \
> + b 1f; \
> +END_FTR_SECTION(0, 1); \
> + ld r12,opal_tracepoint_refcount at toc(r2); \
> + std r12,32(r1); \
> + cmpdi r12,0; \
> + bne- LABEL; \
> +1:
> +
> +#endif
> +
> +#else
> +#define OPAL_BRANCH(LABEL)
> +#endif
>
> /* TODO:
> *
> * - Trace irqs in/off (needs saving/restoring all args, argh...)
> * - Get r11 feed up by Dave so I can have better register usage
> */
> +
> #define OPAL_CALL(name, token) \
> _GLOBAL(name); \
> mflr r0; \
> - mfcr r12; \
> std r0,16(r1); \
> + li r0,token; \
> + OPAL_BRANCH(opal_tracepoint_entry) \
> + mfcr r12; \
> stw r12,8(r1); \
> std r1,PACAR1(r13); \
> - li r0,0; \
> + li r11,0; \
> mfmsr r12; \
> - ori r0,r0,MSR_EE; \
> + ori r11,r11,MSR_EE; \
> std r12,PACASAVEDMSR(r13); \
> - andc r12,r12,r0; \
> + andc r12,r12,r11; \
> mtmsrd r12,1; \
> - LOAD_REG_ADDR(r0,opal_return); \
> - mtlr r0; \
> - li r0,MSR_DR|MSR_IR|MSR_LE;\
> - andc r12,r12,r0; \
> - li r0,token; \
> + LOAD_REG_ADDR(r11,opal_return); \
> + mtlr r11; \
> + li r11,MSR_DR|MSR_IR|MSR_LE;\
> + andc r12,r12,r11; \
> mtspr SPRN_HSRR1,r12; \
> LOAD_REG_ADDR(r11,opal); \
> ld r12,8(r11); \
> @@ -61,6 +100,64 @@ opal_return:
> mtcr r4;
> rfid
>
> +#ifdef CONFIG_TRACEPOINTS
> +opal_tracepoint_entry:
> + stdu r1,-STACKFRAMESIZE(r1)
> + std r0,STK_REG(R23)(r1)
> + std r3,STK_REG(R24)(r1)
> + std r4,STK_REG(R25)(r1)
> + std r5,STK_REG(R26)(r1)
> + std r6,STK_REG(R27)(r1)
> + std r7,STK_REG(R28)(r1)
> + std r8,STK_REG(R29)(r1)
> + std r9,STK_REG(R30)(r1)
> + std r10,STK_REG(R31)(r1)
> + mr r3,r0
> + addi r4,r1,STK_REG(R24)
> + bl __trace_opal_entry
> + ld r0,STK_REG(R23)(r1)
> + ld r3,STK_REG(R24)(r1)
> + ld r4,STK_REG(R25)(r1)
> + ld r5,STK_REG(R26)(r1)
> + ld r6,STK_REG(R27)(r1)
> + ld r7,STK_REG(R28)(r1)
> + ld r8,STK_REG(R29)(r1)
> + ld r9,STK_REG(R30)(r1)
> + ld r10,STK_REG(R31)(r1)
> + LOAD_REG_ADDR(r11,opal_tracepoint_return)
> + mfcr r12
> + std r11,16(r1)
> + stw r12,8(r1)
> + std r1,PACAR1(r13)
> + li r11,0
> + mfmsr r12
> + ori r11,r11,MSR_EE
> + std r12,PACASAVEDMSR(r13)
> + andc r12,r12,r11
> + mtmsrd r12,1
> + LOAD_REG_ADDR(r11,opal_return)
> + mtlr r11
> + li r11,MSR_DR|MSR_IR|MSR_LE
> + andc r12,r12,r11
> + mtspr SPRN_HSRR1,r12
> + LOAD_REG_ADDR(r11,opal)
> + ld r12,8(r11)
> + ld r2,0(r11)
> + mtspr SPRN_HSRR0,r12
> + hrfid
> +
> +opal_tracepoint_return:
> + std r3,STK_REG(R31)(r1)
> + mr r4,r3
> + ld r0,STK_REG(R23)(r1)
> + bl __trace_opal_exit
> + ld r3,STK_REG(R31)(r1)
> + addi r1,r1,STACKFRAMESIZE
> + ld r0,16(r1)
> + mtlr r0
> + blr
> +#endif
> +
> OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
> OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
> OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
> Index: b/arch/powerpc/platforms/powernv/opal-tracepoints.c
> ===================================================================
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c
> @@ -0,0 +1,84 @@
> +#include <linux/percpu.h>
> +#include <linux/jump_label.h>
> +#include <asm/trace.h>
> +
> +#ifdef CONFIG_JUMP_LABEL
> +struct static_key opal_tracepoint_key = STATIC_KEY_INIT;
> +
> +void opal_tracepoint_regfunc(void)
> +{
> + static_key_slow_inc(&opal_tracepoint_key);
> +}
> +
> +void opal_tracepoint_unregfunc(void)
> +{
> + static_key_slow_dec(&opal_tracepoint_key);
> +}
> +#else
> +/*
> + * We optimise OPAL calls by placing opal_tracepoint_refcount
> + * directly in the TOC so we can check if the opal tracepoints are
> + * enabled via a single load.
> + */
> +
> +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
> +extern long opal_tracepoint_refcount;
> +
> +void opal_tracepoint_regfunc(void)
> +{
> + opal_tracepoint_refcount++;
> +}
> +
> +void opal_tracepoint_unregfunc(void)
> +{
> + opal_tracepoint_refcount--;
> +}
> +#endif
> +
> +/*
> + * Since the tracing code might execute OPAL calls we need to guard against
> + * recursion.
> + */
> +static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
> +
> +void __trace_opal_entry(unsigned long opcode, unsigned long *args)
> +{
> + unsigned long flags;
> + unsigned int *depth;
> +
> + local_irq_save(flags);
> +
> + depth = &__get_cpu_var(opal_trace_depth);
> +
> + if (*depth)
> + goto out;
> +
> + (*depth)++;
> + preempt_disable();
> + trace_opal_entry(opcode, args);
> + (*depth)--;
> +
> +out:
> + local_irq_restore(flags);
> +}
> +
> +void __trace_opal_exit(long opcode, unsigned long retval)
> +{
> + unsigned long flags;
> + unsigned int *depth;
> +
> + local_irq_save(flags);
> +
> + depth = &__get_cpu_var(opal_trace_depth);
> +
> + if (*depth)
> + goto out;
> +
> + (*depth)++;
> + trace_opal_exit(opcode, retval);
> + preempt_enable();
> + (*depth)--;
> +
> +out:
> + local_irq_restore(flags);
> +}
>
More information about the Linuxppc-dev
mailing list