[PATCH v2 04/19] powerpc/perf: move perf irq/nmi handling details into traps.c

Athira Rajeev atrajeev at linux.vnet.ibm.com
Tue Nov 24 04:54:03 AEDT 2020



> On 11-Nov-2020, at 3:13 PM, Nicholas Piggin <npiggin at gmail.com> wrote:
> 
> This is required in order to allow more significant differences between
> NMI type interrupt handlers and regular asynchronous handlers.
> 
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---
> arch/powerpc/kernel/traps.c      | 31 +++++++++++++++++++++++++++-
> arch/powerpc/perf/core-book3s.c  | 35 ++------------------------------
> arch/powerpc/perf/core-fsl-emb.c | 25 -----------------------
> 3 files changed, 32 insertions(+), 59 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 902fcbd1a778..7dda72eb97cc 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -1919,11 +1919,40 @@ void vsx_unavailable_tm(struct pt_regs *regs)
> }
> #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
> 
> -void performance_monitor_exception(struct pt_regs *regs)
> +static void performance_monitor_exception_nmi(struct pt_regs *regs)
> +{
> +	nmi_enter();
> +
> +	__this_cpu_inc(irq_stat.pmu_irqs);
> +
> +	perf_irq(regs);
> +
> +	nmi_exit();
> +}
> +
> +static void performance_monitor_exception_async(struct pt_regs *regs)
> {
> +	irq_enter();
> +
> 	__this_cpu_inc(irq_stat.pmu_irqs);
> 
> 	perf_irq(regs);
> +
> +	irq_exit();
> +}
> +
> +void performance_monitor_exception(struct pt_regs *regs)
> +{
> +	/*
> +	 * On 64-bit, if perf interrupts hit in a local_irq_disable
> +	 * (soft-masked) region, we consider them as NMIs. This is required to
> +	 * prevent hash faults on user addresses when reading callchains (and
> +	 * looks better from an irq tracing perspective).
> +	 */
> +	if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
> +		performance_monitor_exception_nmi(regs);
> +	else
> +		performance_monitor_exception_async(regs);
> }
> 
> #ifdef CONFIG_PPC_ADV_DEBUG_REGS
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 08643cba1494..9fd8cae09218 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -109,10 +109,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
> {
> 	regs->result = 0;
> }
> -static inline int perf_intr_is_nmi(struct pt_regs *regs)
> -{
> -	return 0;
> -}
> 
> static inline int siar_valid(struct pt_regs *regs)
> {
> @@ -328,15 +324,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
> 	regs->result = use_siar;
> }
> 
> -/*
> - * If interrupts were soft-disabled when a PMU interrupt occurs, treat
> - * it as an NMI.
> - */
> -static inline int perf_intr_is_nmi(struct pt_regs *regs)
> -{
> -	return (regs->softe & IRQS_DISABLED);
> -}
> -

Hi Nick,

arch_irq_disabled_regs checks the regs->softe value, if it has IRQS_DISABLED set.
Core-book3s is also using same logic in perf_intr_is_nmi to check if it is an NMI. With the
changes in this patch, if I understood correctly, we will do the irq/nmi handling in traps.c 
rather than doing it in the PMI interrupt handler.  But can you please help to understand
better on what is the perf weirdness (sometimes NMI, sometimes not) mentioned in the cover
letter that we are fixing with this change ?

Thanks
Athira

> /*
>  * On processors like P7+ that have the SIAR-Valid bit, marked instructions
>  * must be sampled only if the SIAR-valid bit is set.
> @@ -2224,7 +2211,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)
> 	struct perf_event *event;
> 	unsigned long val[8];
> 	int found, active;
> -	int nmi;
> 
> 	if (cpuhw->n_limited)
> 		freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
> @@ -2232,18 +2218,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)
> 
> 	perf_read_regs(regs);
> 
> -	/*
> -	 * If perf interrupts hit in a local_irq_disable (soft-masked) region,
> -	 * we consider them as NMIs. This is required to prevent hash faults on
> -	 * user addresses when reading callchains. See the NMI test in
> -	 * do_hash_page.
> -	 */
> -	nmi = perf_intr_is_nmi(regs);
> -	if (nmi)
> -		nmi_enter();
> -	else
> -		irq_enter();
> -
> 	/* Read all the PMCs since we'll need them a bunch of times */
> 	for (i = 0; i < ppmu->n_counter; ++i)
> 		val[i] = read_pmc(i + 1);
> @@ -2289,8 +2263,8 @@ static void __perf_event_interrupt(struct pt_regs *regs)
> 			}
> 		}
> 	}
> -	if (!found && !nmi && printk_ratelimit())
> -		printk(KERN_WARNING "Can't find PMC that caused IRQ\n");
> +	if (unlikely(!found) && !arch_irq_disabled_regs(regs))
> +		printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
> 
> 	/*
> 	 * Reset MMCR0 to its normal value.  This will set PMXE and
> @@ -2300,11 +2274,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)
> 	 * we get back out of this interrupt.
> 	 */
> 	write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);
> -
> -	if (nmi)
> -		nmi_exit();
> -	else
> -		irq_exit();
> }
> 
> static void perf_event_interrupt(struct pt_regs *regs)
> diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
> index e0e7e276bfd2..ee721f420a7b 100644
> --- a/arch/powerpc/perf/core-fsl-emb.c
> +++ b/arch/powerpc/perf/core-fsl-emb.c
> @@ -31,19 +31,6 @@ static atomic_t num_events;
> /* Used to avoid races in calling reserve/release_pmc_hardware */
> static DEFINE_MUTEX(pmc_reserve_mutex);
> 
> -/*
> - * If interrupts were soft-disabled when a PMU interrupt occurs, treat
> - * it as an NMI.
> - */
> -static inline int perf_intr_is_nmi(struct pt_regs *regs)
> -{
> -#ifdef __powerpc64__
> -	return (regs->softe & IRQS_DISABLED);
> -#else
> -	return 0;
> -#endif
> -}
> -
> static void perf_event_interrupt(struct pt_regs *regs);
> 
> /*
> @@ -659,13 +646,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
> 	struct perf_event *event;
> 	unsigned long val;
> 	int found = 0;
> -	int nmi;
> -
> -	nmi = perf_intr_is_nmi(regs);
> -	if (nmi)
> -		nmi_enter();
> -	else
> -		irq_enter();
> 
> 	for (i = 0; i < ppmu->n_counter; ++i) {
> 		event = cpuhw->event[i];
> @@ -690,11 +670,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
> 	mtmsr(mfmsr() | MSR_PMM);
> 	mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
> 	isync();
> -
> -	if (nmi)
> -		nmi_exit();
> -	else
> -		irq_exit();
> }
> 
> void hw_perf_event_setup(int cpu)
> -- 
> 2.23.0
> 



More information about the Linuxppc-dev mailing list