[PATCH] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10

Athira Rajeev atrajeev at linux.vnet.ibm.com
Mon Jul 12 19:31:41 AEST 2021



> On 08-Jul-2021, at 6:26 PM, Nicholas Piggin <npiggin at gmail.com> wrote:
> 
> Excerpts from Athira Rajeev's message of July 7, 2021 4:39 pm:
>> From: Athira Rajeev <atrajeev at linux.vnet.ibm.cm>
>> 
>> Power10 performance monitoring unit (PMU) driver uses performance
>> monitor counter 5 (PMC5) and performance monitor counter 6 (PMC6)
>> for counting instructions and cycles. Event used for cycles is
>> PM_RUN_CYC and instructions is PM_RUN_INST_CMPL. But counting of these
>> events in wait state is controlled by the CC56RUN bit setting in
>> Monitor Mode Control Register0 (MMCR0). If the CC56RUN bit is not
>> set, PMC5/6 will not count when CTRL[RUN] is zero.
> 
> What's the acutal bug here, can you explain a bit more? I thought
> PM_RUN_CYC is supposed to be gated by the runlatch.
> 
> POWER9 code looks similar, it doesn't have the same problem?
> 
> Thanks,
> Nick

Hi Nick,

Thanks for the review.

In power9 and before, the default event used for counting "cycles" - PM_CYC (0x0001e) and for counting instruction - PM_INST_CMPL (0x00002) . 
These events, uses two programmable PMC to count cycles and instructions (this causes multiplexing for basic set of events supported by perf stat). 
And PM_CYC/PM_INST_CMPL both by default count irrespective of the run latch state.

So in power10, we decided to use PMC5 and PMC6 for counting instructions and cycles respectively. But PMC5 and PMC6 by default counts only when runlatch is enabled, this can cause issues in case of system wide profiling. So in order to make PMC5/6 behave as PM_CYC and PM_INST_CMPL, we are enabling MMCR0[CC56RUN]] bit.

Thanks
Athira
> 
>> 
>> Patch sets the CC56RUN bit in MMCR0 for power10 which makes PMC5 and
>> PMC6 count instructions and cycles regardless of the run bit. With this
>> change, these events are also now renamed to PM_CYC and PM_INST_CMPL
>> rather than PM_RUN_CYC and PM_RUN_INST_CMPL.
>> 
>> Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
>> Signed-off-by: Athira Rajeev <atrajeev at linux.vnet.ibm.cm>
>> Reviewed-by: Madhavan Srinivasan <maddy at linux.ibm.com>
>> ---
>> Notes on testing done for this change:
>> Tested this patch change with a kernel module that
>> turns off and turns on the runlatch. kernel module also
>> reads the counter values for PMC5 and PMC6 during the
>> period when runlatch is off.
>> - Started PMU counters via "perf stat" and loaded the
>>   test module.
>> - Checked the counter values captured from module during
>>   the runlatch off period.
>> - Verified that counters were frozen without the patch and
>>   with the patch, observed counters were incrementing.
>> 
>> arch/powerpc/perf/power10-events-list.h |  8 +++---
>> arch/powerpc/perf/power10-pmu.c         | 44 +++++++++++++++++++++++----------
>> 2 files changed, 35 insertions(+), 17 deletions(-)
>> 
>> diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
>> index 93be719..564f1409 100644
>> --- a/arch/powerpc/perf/power10-events-list.h
>> +++ b/arch/powerpc/perf/power10-events-list.h
>> @@ -9,10 +9,10 @@
>> /*
>>  * Power10 event codes.
>>  */
>> -EVENT(PM_RUN_CYC,				0x600f4);
>> +EVENT(PM_CYC,				0x600f4);
>> EVENT(PM_DISP_STALL_CYC,			0x100f8);
>> EVENT(PM_EXEC_STALL,				0x30008);
>> -EVENT(PM_RUN_INST_CMPL,				0x500fa);
>> +EVENT(PM_INST_CMPL,				0x500fa);
>> EVENT(PM_BR_CMPL,                               0x4d05e);
>> EVENT(PM_BR_MPRED_CMPL,                         0x400f6);
>> EVENT(PM_BR_FIN,				0x2f04a);
>> @@ -50,8 +50,8 @@
>> /* ITLB Reloaded */
>> EVENT(PM_ITLB_MISS,				0x400fc);
>> 
>> -EVENT(PM_RUN_CYC_ALT,				0x0001e);
>> -EVENT(PM_RUN_INST_CMPL_ALT,			0x00002);
>> +EVENT(PM_CYC_ALT,				0x0001e);
>> +EVENT(PM_INST_CMPL_ALT,				0x00002);
>> 
>> /*
>>  * Memory Access Events
>> diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
>> index f9d64c6..9dd75f3 100644
>> --- a/arch/powerpc/perf/power10-pmu.c
>> +++ b/arch/powerpc/perf/power10-pmu.c
>> @@ -91,8 +91,8 @@
>> 
>> /* Table of alternatives, sorted by column 0 */
>> static const unsigned int power10_event_alternatives[][MAX_ALT] = {
>> -	{ PM_RUN_CYC_ALT,		PM_RUN_CYC },
>> -	{ PM_RUN_INST_CMPL_ALT,		PM_RUN_INST_CMPL },
>> +	{ PM_CYC_ALT,			PM_CYC },
>> +	{ PM_INST_CMPL_ALT,		PM_INST_CMPL },
>> };
>> 
>> static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>> @@ -118,8 +118,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>> 	return 0;
>> }
>> 
>> -GENERIC_EVENT_ATTR(cpu-cycles,			PM_RUN_CYC);
>> -GENERIC_EVENT_ATTR(instructions,		PM_RUN_INST_CMPL);
>> +GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
>> +GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
>> GENERIC_EVENT_ATTR(branch-instructions,		PM_BR_CMPL);
>> GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
>> GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
>> @@ -148,8 +148,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>> CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
>> 
>> static struct attribute *power10_events_attr_dd1[] = {
>> -	GENERIC_EVENT_PTR(PM_RUN_CYC),
>> -	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
>> +	GENERIC_EVENT_PTR(PM_CYC),
>> +	GENERIC_EVENT_PTR(PM_INST_CMPL),
>> 	GENERIC_EVENT_PTR(PM_BR_CMPL),
>> 	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
>> 	GENERIC_EVENT_PTR(PM_LD_REF_L1),
>> @@ -173,8 +173,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>> };
>> 
>> static struct attribute *power10_events_attr[] = {
>> -	GENERIC_EVENT_PTR(PM_RUN_CYC),
>> -	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
>> +	GENERIC_EVENT_PTR(PM_CYC),
>> +	GENERIC_EVENT_PTR(PM_INST_CMPL),
>> 	GENERIC_EVENT_PTR(PM_BR_FIN),
>> 	GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
>> 	GENERIC_EVENT_PTR(PM_LD_REF_L1),
>> @@ -271,8 +271,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>> };
>> 
>> static int power10_generic_events_dd1[] = {
>> -	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
>> -	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
>> +	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
>> +	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
>> 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL,
>> 	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
>> 	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
>> @@ -280,8 +280,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>> };
>> 
>> static int power10_generic_events[] = {
>> -	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
>> -	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
>> +	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
>> +	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
>> 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_FIN,
>> 	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_MPRED_BR_FIN,
>> 	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
>> @@ -548,6 +548,24 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
>> 
>> #undef C
>> 
>> +/*
>> + * Set the MMCR0[CC56RUN] bit to enable counting for
>> + * PMC5 and PMC6 regardless of the state of CTRL[RUN],
>> + * so that we can use counters 5 and 6 as PM_INST_CMPL and
>> + * PM_CYC.
>> + */
>> +static int power10_compute_mmcr(u64 event[], int n_ev,
>> +				unsigned int hwc[], struct mmcr_regs *mmcr,
>> +				struct perf_event *pevents[], u32 flags)
>> +{
>> +	int ret;
>> +
>> +	ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
>> +	if (!ret)
>> +		mmcr->mmcr0 |= MMCR0_C56RUN;
>> +	return ret;
>> +}
>> +
>> static struct power_pmu power10_pmu = {
>> 	.name			= "POWER10",
>> 	.n_counter		= MAX_PMU_COUNTERS,
>> @@ -555,7 +573,7 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
>> 	.test_adder		= ISA207_TEST_ADDER,
>> 	.group_constraint_mask	= CNST_CACHE_PMC4_MASK,
>> 	.group_constraint_val	= CNST_CACHE_PMC4_VAL,
>> -	.compute_mmcr		= isa207_compute_mmcr,
>> +	.compute_mmcr		= power10_compute_mmcr,
>> 	.config_bhrb		= power10_config_bhrb,
>> 	.bhrb_filter_map	= power10_bhrb_filter_map,
>> 	.get_constraint		= isa207_get_constraint,
>> -- 
>> 1.8.3.1
>> 
>> 



More information about the Linuxppc-dev mailing list