[PATCH] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10
Nicholas Piggin
npiggin at gmail.com
Thu Jul 8 22:56:57 AEST 2021
Excerpts from Athira Rajeev's message of July 7, 2021 4:39 pm:
> From: Athira Rajeev <atrajeev at linux.vnet.ibm.cm>
>
> Power10 performance monitoring unit (PMU) driver uses performance
> monitor counter 5 (PMC5) and performance monitor counter 6 (PMC6)
> for counting instructions and cycles. Event used for cycles is
> PM_RUN_CYC and instructions is PM_RUN_INST_CMPL. But counting of these
> events in wait state is controlled by the CC56RUN bit setting in
> Monitor Mode Control Register0 (MMCR0). If the CC56RUN bit is not
> set, PMC5/6 will not count when CTRL[RUN] is zero.
What's the acutal bug here, can you explain a bit more? I thought
PM_RUN_CYC is supposed to be gated by the runlatch.
POWER9 code looks similar, it doesn't have the same problem?
Thanks,
Nick
>
> Patch sets the CC56RUN bit in MMCR0 for power10 which makes PMC5 and
> PMC6 count instructions and cycles regardless of the run bit. With this
> change, these events are also now renamed to PM_CYC and PM_INST_CMPL
> rather than PM_RUN_CYC and PM_RUN_INST_CMPL.
>
> Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
> Signed-off-by: Athira Rajeev <atrajeev at linux.vnet.ibm.cm>
> Reviewed-by: Madhavan Srinivasan <maddy at linux.ibm.com>
> ---
> Notes on testing done for this change:
> Tested this patch change with a kernel module that
> turns off and turns on the runlatch. kernel module also
> reads the counter values for PMC5 and PMC6 during the
> period when runlatch is off.
> - Started PMU counters via "perf stat" and loaded the
> test module.
> - Checked the counter values captured from module during
> the runlatch off period.
> - Verified that counters were frozen without the patch and
> with the patch, observed counters were incrementing.
>
> arch/powerpc/perf/power10-events-list.h | 8 +++---
> arch/powerpc/perf/power10-pmu.c | 44 +++++++++++++++++++++++----------
> 2 files changed, 35 insertions(+), 17 deletions(-)
>
> diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
> index 93be719..564f1409 100644
> --- a/arch/powerpc/perf/power10-events-list.h
> +++ b/arch/powerpc/perf/power10-events-list.h
> @@ -9,10 +9,10 @@
> /*
> * Power10 event codes.
> */
> -EVENT(PM_RUN_CYC, 0x600f4);
> +EVENT(PM_CYC, 0x600f4);
> EVENT(PM_DISP_STALL_CYC, 0x100f8);
> EVENT(PM_EXEC_STALL, 0x30008);
> -EVENT(PM_RUN_INST_CMPL, 0x500fa);
> +EVENT(PM_INST_CMPL, 0x500fa);
> EVENT(PM_BR_CMPL, 0x4d05e);
> EVENT(PM_BR_MPRED_CMPL, 0x400f6);
> EVENT(PM_BR_FIN, 0x2f04a);
> @@ -50,8 +50,8 @@
> /* ITLB Reloaded */
> EVENT(PM_ITLB_MISS, 0x400fc);
>
> -EVENT(PM_RUN_CYC_ALT, 0x0001e);
> -EVENT(PM_RUN_INST_CMPL_ALT, 0x00002);
> +EVENT(PM_CYC_ALT, 0x0001e);
> +EVENT(PM_INST_CMPL_ALT, 0x00002);
>
> /*
> * Memory Access Events
> diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
> index f9d64c6..9dd75f3 100644
> --- a/arch/powerpc/perf/power10-pmu.c
> +++ b/arch/powerpc/perf/power10-pmu.c
> @@ -91,8 +91,8 @@
>
> /* Table of alternatives, sorted by column 0 */
> static const unsigned int power10_event_alternatives[][MAX_ALT] = {
> - { PM_RUN_CYC_ALT, PM_RUN_CYC },
> - { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
> + { PM_CYC_ALT, PM_CYC },
> + { PM_INST_CMPL_ALT, PM_INST_CMPL },
> };
>
> static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
> @@ -118,8 +118,8 @@ static int power10_check_attr_config(struct perf_event *ev)
> return 0;
> }
>
> -GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC);
> -GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL);
> +GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
> +GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
> GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL);
> GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
> GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
> @@ -148,8 +148,8 @@ static int power10_check_attr_config(struct perf_event *ev)
> CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
>
> static struct attribute *power10_events_attr_dd1[] = {
> - GENERIC_EVENT_PTR(PM_RUN_CYC),
> - GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
> + GENERIC_EVENT_PTR(PM_CYC),
> + GENERIC_EVENT_PTR(PM_INST_CMPL),
> GENERIC_EVENT_PTR(PM_BR_CMPL),
> GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
> GENERIC_EVENT_PTR(PM_LD_REF_L1),
> @@ -173,8 +173,8 @@ static int power10_check_attr_config(struct perf_event *ev)
> };
>
> static struct attribute *power10_events_attr[] = {
> - GENERIC_EVENT_PTR(PM_RUN_CYC),
> - GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
> + GENERIC_EVENT_PTR(PM_CYC),
> + GENERIC_EVENT_PTR(PM_INST_CMPL),
> GENERIC_EVENT_PTR(PM_BR_FIN),
> GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
> GENERIC_EVENT_PTR(PM_LD_REF_L1),
> @@ -271,8 +271,8 @@ static int power10_check_attr_config(struct perf_event *ev)
> };
>
> static int power10_generic_events_dd1[] = {
> - [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC,
> - [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL,
> + [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
> + [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
> [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL,
> [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
> [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
> @@ -280,8 +280,8 @@ static int power10_check_attr_config(struct perf_event *ev)
> };
>
> static int power10_generic_events[] = {
> - [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC,
> - [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL,
> + [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
> + [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
> [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_FIN,
> [PERF_COUNT_HW_BRANCH_MISSES] = PM_MPRED_BR_FIN,
> [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
> @@ -548,6 +548,24 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
>
> #undef C
>
> +/*
> + * Set the MMCR0[CC56RUN] bit to enable counting for
> + * PMC5 and PMC6 regardless of the state of CTRL[RUN],
> + * so that we can use counters 5 and 6 as PM_INST_CMPL and
> + * PM_CYC.
> + */
> +static int power10_compute_mmcr(u64 event[], int n_ev,
> + unsigned int hwc[], struct mmcr_regs *mmcr,
> + struct perf_event *pevents[], u32 flags)
> +{
> + int ret;
> +
> + ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
> + if (!ret)
> + mmcr->mmcr0 |= MMCR0_C56RUN;
> + return ret;
> +}
> +
> static struct power_pmu power10_pmu = {
> .name = "POWER10",
> .n_counter = MAX_PMU_COUNTERS,
> @@ -555,7 +573,7 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
> .test_adder = ISA207_TEST_ADDER,
> .group_constraint_mask = CNST_CACHE_PMC4_MASK,
> .group_constraint_val = CNST_CACHE_PMC4_VAL,
> - .compute_mmcr = isa207_compute_mmcr,
> + .compute_mmcr = power10_compute_mmcr,
> .config_bhrb = power10_config_bhrb,
> .bhrb_filter_map = power10_bhrb_filter_map,
> .get_constraint = isa207_get_constraint,
> --
> 1.8.3.1
>
>
More information about the Linuxppc-dev
mailing list