[PATCH v5 29/48] powerpc: add set_dec_or_work API for safely updating decrementer
Nicholas Piggin
npiggin at gmail.com
Fri Apr 2 22:04:02 AEDT 2021
Excerpts from Nicholas Piggin's message of April 2, 2021 1:03 am:
> Decrementer updates must always check for new irq work to avoid an
> irq work decrementer interrupt being lost.
>
> Add an API for this in the timer code so callers don't have to care
> about details.
Oh I forgot to update the changelog for this, it's significantly
changed, so I better withdraw the Reviewed-by as well. I think this
re-arm API is the better one, there's no reason it can't all be in
the host time.c code.
This implementation also avoids what used to be inevitable double
interrupt to take a host timer from guest (first hdec to get into
the host and set dec to some -ve value, then taking that dec as
soon as we enable interrupts) by just marking the dec pending in
that case, so it gets replayed when we enable irqs.
Thanks,
Nick
>
> Reviewed-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---
> arch/powerpc/include/asm/time.h | 4 ++++
> arch/powerpc/kernel/time.c | 41 +++++++++++++++++++++++++--------
> arch/powerpc/kvm/book3s_hv.c | 6 +----
> 3 files changed, 37 insertions(+), 14 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
> index 0128cd9769bc..924b2157882f 100644
> --- a/arch/powerpc/include/asm/time.h
> +++ b/arch/powerpc/include/asm/time.h
> @@ -106,6 +106,10 @@ static inline u64 timer_get_next_tb(void)
> return __this_cpu_read(decrementers_next_tb);
> }
>
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +void timer_rearm_host_dec(u64 now);
> +#endif
> +
> /* Convert timebase ticks to nanoseconds */
> unsigned long long tb_to_ns(unsigned long long tb_ticks);
>
> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
> index 8b9b38a8ce57..8bbcc6be40c0 100644
> --- a/arch/powerpc/kernel/time.c
> +++ b/arch/powerpc/kernel/time.c
> @@ -563,13 +563,43 @@ void arch_irq_work_raise(void)
> preempt_enable();
> }
>
> +static void set_dec_or_work(u64 val)
> +{
> + set_dec(val);
> + /* We may have raced with new irq work */
> + if (unlikely(test_irq_work_pending()))
> + set_dec(1);
> +}
> +
> #else /* CONFIG_IRQ_WORK */
>
> #define test_irq_work_pending() 0
> #define clear_irq_work_pending()
>
> +static void set_dec_or_work(u64 val)
> +{
> + set_dec(val);
> +}
> #endif /* CONFIG_IRQ_WORK */
>
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +void timer_rearm_host_dec(u64 now)
> +{
> + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
> +
> + WARN_ON_ONCE(!arch_irqs_disabled());
> + WARN_ON_ONCE(mfmsr() & MSR_EE);
> +
> + if (now >= *next_tb) {
> + now = *next_tb - now;
> + set_dec_or_work(now);
> + } else {
> + local_paca->irq_happened |= PACA_IRQ_DEC;
> + }
> +}
> +EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
> +#endif
> +
> /*
> * timer_interrupt - gets called when the decrementer overflows,
> * with interrupts disabled.
> @@ -630,10 +660,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
> } else {
> now = *next_tb - now;
> if (now <= decrementer_max)
> - set_dec(now);
> - /* We may have raced with new irq work */
> - if (test_irq_work_pending())
> - set_dec(1);
> + set_dec_or_work(now);
> __this_cpu_inc(irq_stat.timer_irqs_others);
> }
>
> @@ -875,11 +902,7 @@ static int decrementer_set_next_event(unsigned long evt,
> struct clock_event_device *dev)
> {
> __this_cpu_write(decrementers_next_tb, get_tb() + evt);
> - set_dec(evt);
> -
> - /* We may have raced with new irq work */
> - if (test_irq_work_pending())
> - set_dec(1);
> + set_dec_or_work(evt);
>
> return 0;
> }
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 8c8df88eec8c..287042b4afb5 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -3901,11 +3901,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> vc->entry_exit_map = 0x101;
> vc->in_guest = 0;
>
> - next_timer = timer_get_next_tb();
> - set_dec(next_timer - tb);
> - /* We may have raced with new irq work */
> - if (test_irq_work_pending())
> - set_dec(1);
> + timer_rearm_host_dec(tb);
> mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
>
> kvmhv_load_host_pmu();
> --
> 2.23.0
>
>
More information about the Linuxppc-dev
mailing list