[PATCH v6 14/39] powerpc/perf: move perf irq/nmi handling details into traps.c
Athira Rajeev
atrajeev at linux.vnet.ibm.com
Tue Jan 19 21:24:34 AEDT 2021
> On 15-Jan-2021, at 10:19 PM, Nicholas Piggin <npiggin at gmail.com> wrote:
>
> This is required in order to allow more significant differences between
> NMI type interrupt handlers and regular asynchronous handlers.
>
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---
> arch/powerpc/kernel/traps.c | 31 +++++++++++++++++++++++++++-
> arch/powerpc/perf/core-book3s.c | 35 ++------------------------------
> arch/powerpc/perf/core-fsl-emb.c | 25 -----------------------
> 3 files changed, 32 insertions(+), 59 deletions(-)
Hi Nick,
Reviewed this perf patch which moves the nmi_enter/irq_enter to traps.c and code-wise changes
for perf looks fine to me. Further, I was trying to test this by picking the whole Patch series on top
of 5.11.0-rc3 kernel and using below test scenario:
Intention of testcase is to check whether the perf nmi and asynchronous interrupts are getting
captured as expected. My test kernel module below tries to create one of performance monitor
counter ( PMC6 ) overflow between local_irq_save/local_irq_restore.
[ Here interrupts are disabled and has IRQS_DISABLED as regs->softe ].
I am expecting the PMI to come as an NMI in this case. I am also configuring ftrace so that I
can confirm whether it comes as an NMI or a replayed interrupt from the trace.
Environment :One CPU online
prerequisite for ftrace:
# cd /sys/kernel/debug/tracing
# echo 100 > buffer_percent
# echo 200000 > buffer_size_kb
# echo ppc-tb > trace_clock
# echo function > current_tracer
Part of sample kernel test module to trigger a PMI between
local_irq_save and local_irq_restore:
<<>>
static ulong delay = 1;
static void busy_wait(ulong time)
{
udelay(delay);
}
static __always_inline void irq_test(void)
{
unsigned long flags;
local_irq_save(flags);
trace_printk("IN IRQ TEST\n");
mtspr(SPRN_MMCR0, 0x80000000);
mtspr(SPRN_PMC6, 0x80000000 - 100);
mtspr(SPRN_MMCR0, 0x6004000);
busy_wait(delay);
trace_printk("IN IRQ TEST DONE\n");
local_irq_restore(flags);
mtspr(SPRN_MMCR0, 0x80000000);
mtspr(SPRN_PMC6, 0);
}
<<>>
But this resulted in soft lockup, Adding a snippet of call-trace below:
[ 883.900762] watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [swapper/0:0]
[ 883.901381] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G OE 5.11.0-rc3+ #34
--
[ 883.901999] NIP [c0000000000168d0] replay_soft_interrupts+0x70/0x2f0
[ 883.902032] LR [c00000000003b2b8] interrupt_exit_kernel_prepare+0x1e8/0x240
[ 883.902063] Call Trace:
[ 883.902085] [c000000001c96f50] [c00000000003b2b8] interrupt_exit_kernel_prepare+0x1e8/0x240 (unreliable)
[ 883.902139] [c000000001c96fb0] [c00000000000fd88] interrupt_return+0x158/0x200
[ 883.902185] --- interrupt: ea0 at __rb_reserve_next+0xc0/0x5b0
[ 883.902224] NIP: c0000000002d8980 LR: c0000000002d897c CTR: c0000000001aad90
[ 883.902262] REGS: c000000001c97020 TRAP: 0ea0 Tainted: G OE (5.11.0-rc3+)
[ 883.902301] MSR: 9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE> CR: 28000484 XER: 20040000
[ 883.902387] CFAR: c00000000000fe00 IRQMASK: 0
--
[ 883.902757] NIP [c0000000002d8980] __rb_reserve_next+0xc0/0x5b0
[ 883.902786] LR [c0000000002d897c] __rb_reserve_next+0xbc/0x5b0
[ 883.902824] --- interrupt: ea0
[ 883.902848] [c000000001c97360] [c0000000002d8fcc] ring_buffer_lock_reserve+0x15c/0x580
[ 883.902894] [c000000001c973f0] [c0000000002e82fc] trace_function+0x4c/0x1c0
[ 883.902930] [c000000001c97440] [c0000000002f6f50] function_trace_call+0x140/0x190
[ 883.902976] [c000000001c97470] [c00000000007d6f8] ftrace_call+0x4/0x44
[ 883.903021] [c000000001c97660] [c000000000dcf70c] __do_softirq+0x15c/0x3d4
[ 883.903066] [c000000001c97750] [c00000000015fc68] irq_exit+0x198/0x1b0
[ 883.903102] [c000000001c97780] [c000000000dc1790] timer_interrupt+0x170/0x3b0
[ 883.903148] [c000000001c977e0] [c000000000016994] replay_soft_interrupts+0x134/0x2f0
[ 883.903193] [c000000001c979d0] [c00000000003b2b8] interrupt_exit_kernel_prepare+0x1e8/0x240
[ 883.903240] [c000000001c97a30] [c00000000000fd88] interrupt_return+0x158/0x200
[ 883.903276] --- interrupt: ea0 at arch_local_irq_restore+0x70/0xc0
Thanks
Athira
>
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 738370519937..bd55f201115b 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -1892,11 +1892,40 @@ void vsx_unavailable_tm(struct pt_regs *regs)
> }
> #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
>
> -void performance_monitor_exception(struct pt_regs *regs)
> +static void performance_monitor_exception_nmi(struct pt_regs *regs)
> +{
> + nmi_enter();
> +
> + __this_cpu_inc(irq_stat.pmu_irqs);
> +
> + perf_irq(regs);
> +
> + nmi_exit();
> +}
> +
> +static void performance_monitor_exception_async(struct pt_regs *regs)
> {
> + irq_enter();
> +
> __this_cpu_inc(irq_stat.pmu_irqs);
>
> perf_irq(regs);
> +
> + irq_exit();
> +}
> +
> +void performance_monitor_exception(struct pt_regs *regs)
> +{
> + /*
> + * On 64-bit, if perf interrupts hit in a local_irq_disable
> + * (soft-masked) region, we consider them as NMIs. This is required to
> + * prevent hash faults on user addresses when reading callchains (and
> + * looks better from an irq tracing perspective).
> + */
> + if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
> + performance_monitor_exception_nmi(regs);
> + else
> + performance_monitor_exception_async(regs);
> }
>
> #ifdef CONFIG_PPC_ADV_DEBUG_REGS
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 28206b1fe172..9fd06010e8b6 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -110,10 +110,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
> {
> regs->result = 0;
> }
> -static inline int perf_intr_is_nmi(struct pt_regs *regs)
> -{
> - return 0;
> -}
>
> static inline int siar_valid(struct pt_regs *regs)
> {
> @@ -353,15 +349,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
> regs->result = use_siar;
> }
>
> -/*
> - * If interrupts were soft-disabled when a PMU interrupt occurs, treat
> - * it as an NMI.
> - */
> -static inline int perf_intr_is_nmi(struct pt_regs *regs)
> -{
> - return (regs->softe & IRQS_DISABLED);
> -}
> -
> /*
> * On processors like P7+ that have the SIAR-Valid bit, marked instructions
> * must be sampled only if the SIAR-valid bit is set.
> @@ -2279,7 +2266,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)
> struct perf_event *event;
> unsigned long val[8];
> int found, active;
> - int nmi;
>
> if (cpuhw->n_limited)
> freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
> @@ -2287,18 +2273,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)
>
> perf_read_regs(regs);
>
> - /*
> - * If perf interrupts hit in a local_irq_disable (soft-masked) region,
> - * we consider them as NMIs. This is required to prevent hash faults on
> - * user addresses when reading callchains. See the NMI test in
> - * do_hash_page.
> - */
> - nmi = perf_intr_is_nmi(regs);
> - if (nmi)
> - nmi_enter();
> - else
> - irq_enter();
> -
> /* Read all the PMCs since we'll need them a bunch of times */
> for (i = 0; i < ppmu->n_counter; ++i)
> val[i] = read_pmc(i + 1);
> @@ -2344,8 +2318,8 @@ static void __perf_event_interrupt(struct pt_regs *regs)
> }
> }
> }
> - if (!found && !nmi && printk_ratelimit())
> - printk(KERN_WARNING "Can't find PMC that caused IRQ\n");
> + if (unlikely(!found) && !arch_irq_disabled_regs(regs))
> + printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
>
> /*
> * Reset MMCR0 to its normal value. This will set PMXE and
> @@ -2355,11 +2329,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)
> * we get back out of this interrupt.
> */
> write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);
> -
> - if (nmi)
> - nmi_exit();
> - else
> - irq_exit();
> }
>
> static void perf_event_interrupt(struct pt_regs *regs)
> diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
> index e0e7e276bfd2..ee721f420a7b 100644
> --- a/arch/powerpc/perf/core-fsl-emb.c
> +++ b/arch/powerpc/perf/core-fsl-emb.c
> @@ -31,19 +31,6 @@ static atomic_t num_events;
> /* Used to avoid races in calling reserve/release_pmc_hardware */
> static DEFINE_MUTEX(pmc_reserve_mutex);
>
> -/*
> - * If interrupts were soft-disabled when a PMU interrupt occurs, treat
> - * it as an NMI.
> - */
> -static inline int perf_intr_is_nmi(struct pt_regs *regs)
> -{
> -#ifdef __powerpc64__
> - return (regs->softe & IRQS_DISABLED);
> -#else
> - return 0;
> -#endif
> -}
> -
> static void perf_event_interrupt(struct pt_regs *regs);
>
> /*
> @@ -659,13 +646,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
> struct perf_event *event;
> unsigned long val;
> int found = 0;
> - int nmi;
> -
> - nmi = perf_intr_is_nmi(regs);
> - if (nmi)
> - nmi_enter();
> - else
> - irq_enter();
>
> for (i = 0; i < ppmu->n_counter; ++i) {
> event = cpuhw->event[i];
> @@ -690,11 +670,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
> mtmsr(mfmsr() | MSR_PMM);
> mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
> isync();
> -
> - if (nmi)
> - nmi_exit();
> - else
> - irq_exit();
> }
>
> void hw_perf_event_setup(int cpu)
> --
> 2.23.0
>
More information about the Linuxppc-dev
mailing list