[PATCH] powernv: Add OPAL tracepoints

Paul E. McKenney paulmck at linux.vnet.ibm.com
Thu Jul 10 05:42:10 EST 2014


On Thu, Jul 03, 2014 at 05:20:50PM +1000, Anton Blanchard wrote:
> Knowing how long we spend in firmware calls is an important part of
> minimising OS jitter.
> 
> This patch adds tracepoints to each OPAL call. If tracepoints are
> enabled we branch out to a common routine that calls an entry and exit
> tracepoint.
> 
> This allows us to write tools that monitor the frequency and duration
> of OPAL calls, eg:
> 
> name                  count  total(ms)  min(ms)  max(ms)  avg(ms)  period(ms)
> OPAL_HANDLE_INTERRUPT     5      0.199    0.037    0.042    0.040   12547.545
> OPAL_POLL_EVENTS        204      2.590    0.012    0.036    0.013    2264.899
> OPAL_PCI_MSI_EOI       2830      3.066    0.001    0.005    0.001      81.166
> 
> We use jump labels if configured, which means we only add a single
> nop instruction to every OPAL call when the tracepoints are disabled.
> 
> Signed-off-by: Anton Blanchard <anton at samba.org>

That is what I call invoking tracepoints the hard way -- from assembly!
Just one question -- can these tracepoints be invoked from the idle
loop?  If so, you need to use the _rcuidle suffix, for example, as
in trace_opal_entry_rcuidle().  If not:

Acked-by: Paul E. McKenney <paulmck at linux.vnet.ibm.com>

> ---
> 
> Index: b/arch/powerpc/include/asm/trace.h
> ===================================================================
> --- a/arch/powerpc/include/asm/trace.h
> +++ b/arch/powerpc/include/asm/trace.h
> @@ -99,6 +99,51 @@ TRACE_EVENT_FN(hcall_exit,
>  );
>  #endif
> 
> +#ifdef CONFIG_PPC_POWERNV
> +extern void opal_tracepoint_regfunc(void);
> +extern void opal_tracepoint_unregfunc(void);
> +
> +TRACE_EVENT_FN(opal_entry,
> +
> +	TP_PROTO(unsigned long opcode, unsigned long *args),
> +
> +	TP_ARGS(opcode, args),
> +
> +	TP_STRUCT__entry(
> +		__field(unsigned long, opcode)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->opcode = opcode;
> +	),
> +
> +	TP_printk("opcode=%lu", __entry->opcode),
> +
> +	opal_tracepoint_regfunc, opal_tracepoint_unregfunc
> +);
> +
> +TRACE_EVENT_FN(opal_exit,
> +
> +	TP_PROTO(unsigned long opcode, unsigned long retval),
> +
> +	TP_ARGS(opcode, retval),
> +
> +	TP_STRUCT__entry(
> +		__field(unsigned long, opcode)
> +		__field(unsigned long, retval)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->opcode = opcode;
> +		__entry->retval = retval;
> +	),
> +
> +	TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
> +
> +	opal_tracepoint_regfunc, opal_tracepoint_unregfunc
> +);
> +#endif
> +
>  #endif /* _TRACE_POWERPC_H */
> 
>  #undef TRACE_INCLUDE_PATH
> Index: b/arch/powerpc/platforms/powernv/Makefile
> ===================================================================
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -8,3 +8,4 @@ obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o
>  obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
>  obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o
>  obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
> +obj-$(CONFIG_TRACEPOINTS)	+= opal-tracepoints.o
> Index: b/arch/powerpc/platforms/powernv/opal-wrappers.S
> ===================================================================
> --- a/arch/powerpc/platforms/powernv/opal-wrappers.S
> +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
> @@ -13,30 +13,69 @@
>  #include <asm/hvcall.h>
>  #include <asm/asm-offsets.h>
>  #include <asm/opal.h>
> +#include <asm/jump_label.h>
> +
> +	.section	".text"
> +
> +#ifdef CONFIG_TRACEPOINTS
> +#ifdef CONFIG_JUMP_LABEL
> +#define OPAL_BRANCH(LABEL)					\
> +	ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key)
> +#else
> +
> +	.section	".toc","aw"
> +
> +	.globl opal_tracepoint_refcount
> +opal_tracepoint_refcount:
> +	.llong	0
> +
> +	.section	".text"
> +
> +/*
> + * We branch around this in early init by using an unconditional cpu
> + * feature.
> + */
> +#define OPAL_BRANCH(LABEL)					\
> +BEGIN_FTR_SECTION;						\
> +	b	1f;						\
> +END_FTR_SECTION(0, 1);						\
> +	ld	r12,opal_tracepoint_refcount at toc(r2);		\
> +	std	r12,32(r1);					\
> +	cmpdi	r12,0;						\
> +	bne-	LABEL;						\
> +1:
> +
> +#endif
> +
> +#else
> +#define OPAL_BRANCH(LABEL)
> +#endif
> 
>  /* TODO:
>   *
>   * - Trace irqs in/off (needs saving/restoring all args, argh...)
>   * - Get r11 feed up by Dave so I can have better register usage
>   */
> +
>  #define OPAL_CALL(name, token)		\
>   _GLOBAL(name);				\
>  	mflr	r0;			\
> -	mfcr	r12;			\
>  	std	r0,16(r1);		\
> +	li	r0,token;		\
> +	OPAL_BRANCH(opal_tracepoint_entry) \
> +	mfcr	r12;			\
>  	stw	r12,8(r1);		\
>  	std	r1,PACAR1(r13);		\
> -	li	r0,0;			\
> +	li	r11,0;			\
>  	mfmsr	r12;			\
> -	ori	r0,r0,MSR_EE;		\
> +	ori	r11,r11,MSR_EE;		\
>  	std	r12,PACASAVEDMSR(r13);	\
> -	andc	r12,r12,r0;		\
> +	andc	r12,r12,r11;		\
>  	mtmsrd	r12,1;			\
> -	LOAD_REG_ADDR(r0,opal_return);	\
> -	mtlr	r0;			\
> -	li	r0,MSR_DR|MSR_IR|MSR_LE;\
> -	andc	r12,r12,r0;		\
> -	li	r0,token;		\
> +	LOAD_REG_ADDR(r11,opal_return);	\
> +	mtlr	r11;			\
> +	li	r11,MSR_DR|MSR_IR|MSR_LE;\
> +	andc	r12,r12,r11;		\
>  	mtspr	SPRN_HSRR1,r12;		\
>  	LOAD_REG_ADDR(r11,opal);	\
>  	ld	r12,8(r11);		\
> @@ -61,6 +100,64 @@ opal_return:
>  	mtcr	r4;
>  	rfid
> 
> +#ifdef CONFIG_TRACEPOINTS
> +opal_tracepoint_entry:
> +	stdu	r1,-STACKFRAMESIZE(r1)
> +	std	r0,STK_REG(R23)(r1)
> +	std	r3,STK_REG(R24)(r1)
> +	std	r4,STK_REG(R25)(r1)
> +	std	r5,STK_REG(R26)(r1)
> +	std	r6,STK_REG(R27)(r1)
> +	std	r7,STK_REG(R28)(r1)
> +	std	r8,STK_REG(R29)(r1)
> +	std	r9,STK_REG(R30)(r1)
> +	std	r10,STK_REG(R31)(r1)
> +	mr	r3,r0
> +	addi	r4,r1,STK_REG(R24)
> +	bl	__trace_opal_entry
> +	ld	r0,STK_REG(R23)(r1)
> +	ld	r3,STK_REG(R24)(r1)
> +	ld	r4,STK_REG(R25)(r1)
> +	ld	r5,STK_REG(R26)(r1)
> +	ld	r6,STK_REG(R27)(r1)
> +	ld	r7,STK_REG(R28)(r1)
> +	ld	r8,STK_REG(R29)(r1)
> +	ld	r9,STK_REG(R30)(r1)
> +	ld	r10,STK_REG(R31)(r1)
> +	LOAD_REG_ADDR(r11,opal_tracepoint_return)
> +	mfcr	r12
> +	std	r11,16(r1)
> +	stw	r12,8(r1)
> +	std	r1,PACAR1(r13)
> +	li	r11,0
> +	mfmsr	r12
> +	ori	r11,r11,MSR_EE
> +	std	r12,PACASAVEDMSR(r13)
> +	andc	r12,r12,r11
> +	mtmsrd	r12,1
> +	LOAD_REG_ADDR(r11,opal_return)
> +	mtlr	r11
> +	li	r11,MSR_DR|MSR_IR|MSR_LE
> +	andc	r12,r12,r11
> +	mtspr	SPRN_HSRR1,r12
> +	LOAD_REG_ADDR(r11,opal)
> +	ld	r12,8(r11)
> +	ld	r2,0(r11)
> +	mtspr	SPRN_HSRR0,r12
> +	hrfid
> +
> +opal_tracepoint_return:
> +	std	r3,STK_REG(R31)(r1)
> +	mr	r4,r3
> +	ld	r0,STK_REG(R23)(r1)
> +	bl	__trace_opal_exit
> +	ld	r3,STK_REG(R31)(r1)
> +	addi	r1,r1,STACKFRAMESIZE
> +	ld	r0,16(r1)
> +	mtlr	r0
> +	blr
> +#endif
> +
>  OPAL_CALL(opal_invalid_call,			OPAL_INVALID_CALL);
>  OPAL_CALL(opal_console_write,			OPAL_CONSOLE_WRITE);
>  OPAL_CALL(opal_console_read,			OPAL_CONSOLE_READ);
> Index: b/arch/powerpc/platforms/powernv/opal-tracepoints.c
> ===================================================================
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c
> @@ -0,0 +1,84 @@
> +#include <linux/percpu.h>
> +#include <linux/jump_label.h>
> +#include <asm/trace.h>
> +
> +#ifdef CONFIG_JUMP_LABEL
> +struct static_key opal_tracepoint_key = STATIC_KEY_INIT;
> +
> +void opal_tracepoint_regfunc(void)
> +{
> +	static_key_slow_inc(&opal_tracepoint_key);
> +}
> +
> +void opal_tracepoint_unregfunc(void)
> +{
> +	static_key_slow_dec(&opal_tracepoint_key);
> +}
> +#else
> +/*
> + * We optimise OPAL calls by placing opal_tracepoint_refcount
> + * directly in the TOC so we can check if the opal tracepoints are
> + * enabled via a single load.
> + */
> +
> +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
> +extern long opal_tracepoint_refcount;
> +
> +void opal_tracepoint_regfunc(void)
> +{
> +	opal_tracepoint_refcount++;
> +}
> +
> +void opal_tracepoint_unregfunc(void)
> +{
> +	opal_tracepoint_refcount--;
> +}
> +#endif
> +
> +/*
> + * Since the tracing code might execute OPAL calls we need to guard against
> + * recursion.
> + */
> +static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
> +
> +void __trace_opal_entry(unsigned long opcode, unsigned long *args)
> +{
> +	unsigned long flags;
> +	unsigned int *depth;
> +
> +	local_irq_save(flags);
> +
> +	depth = &__get_cpu_var(opal_trace_depth);
> +
> +	if (*depth)
> +		goto out;
> +
> +	(*depth)++;
> +	preempt_disable();
> +	trace_opal_entry(opcode, args);
> +	(*depth)--;
> +
> +out:
> +	local_irq_restore(flags);
> +}
> +
> +void __trace_opal_exit(long opcode, unsigned long retval)
> +{
> +	unsigned long flags;
> +	unsigned int *depth;
> +
> +	local_irq_save(flags);
> +
> +	depth = &__get_cpu_var(opal_trace_depth);
> +
> +	if (*depth)
> +		goto out;
> +
> +	(*depth)++;
> +	trace_opal_exit(opcode, retval);
> +	preempt_enable();
> +	(*depth)--;
> +
> +out:
> +	local_irq_restore(flags);
> +}
> 



More information about the Linuxppc-dev mailing list