[PATCH v1 11/55] powerpc/time: add API for KVM to re-arm the host timer/decrementer
Christophe Leroy
christophe.leroy at csgroup.eu
Thu Aug 5 17:22:05 AEST 2021
Le 26/07/2021 à 05:49, Nicholas Piggin a écrit :
> Rather than have KVM look up the host timer and fiddle with the
> irq-work internal details, have the powerpc/time.c code provide a
> function for KVM to re-arm the Linux timer code when exiting a
> guest.
>
> This is implementation has an improvement over existing code of
> marking a decrementer interrupt as soft-pending if a timer has
> expired, rather than setting DEC to a -ve value, which tended to
> cause host timers to take two interrupts (first hdec to exit the
> guest, then the immediate dec).
>
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---
> arch/powerpc/include/asm/time.h | 16 +++-------
> arch/powerpc/kernel/time.c | 52 +++++++++++++++++++++++++++------
> arch/powerpc/kvm/book3s_hv.c | 7 ++---
> 3 files changed, 49 insertions(+), 26 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
> index 69b6be617772..924b2157882f 100644
> --- a/arch/powerpc/include/asm/time.h
> +++ b/arch/powerpc/include/asm/time.h
> @@ -99,18 +99,6 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
> extern void secondary_cpu_time_init(void);
> extern void __init time_init(void);
>
> -#ifdef CONFIG_PPC64
> -static inline unsigned long test_irq_work_pending(void)
> -{
> - unsigned long x;
> -
> - asm volatile("lbz %0,%1(13)"
> - : "=r" (x)
> - : "i" (offsetof(struct paca_struct, irq_work_pending)));
> - return x;
> -}
> -#endif
> -
> DECLARE_PER_CPU(u64, decrementers_next_tb);
>
> static inline u64 timer_get_next_tb(void)
> @@ -118,6 +106,10 @@ static inline u64 timer_get_next_tb(void)
> return __this_cpu_read(decrementers_next_tb);
> }
>
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +void timer_rearm_host_dec(u64 now);
> +#endif
> +
> /* Convert timebase ticks to nanoseconds */
> unsigned long long tb_to_ns(unsigned long long tb_ticks);
>
> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
> index 72d872b49167..016828b7401b 100644
> --- a/arch/powerpc/kernel/time.c
> +++ b/arch/powerpc/kernel/time.c
> @@ -499,6 +499,16 @@ EXPORT_SYMBOL(profile_pc);
> * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
> */
> #ifdef CONFIG_PPC64
> +static inline unsigned long test_irq_work_pending(void)
> +{
> + unsigned long x;
> +
> + asm volatile("lbz %0,%1(13)"
> + : "=r" (x)
> + : "i" (offsetof(struct paca_struct, irq_work_pending)));
Can we just use READ_ONCE() instead of hard coding the read ?
> + return x;
> +}
> +
> static inline void set_irq_work_pending_flag(void)
> {
> asm volatile("stb %0,%1(13)" : :
> @@ -542,13 +552,44 @@ void arch_irq_work_raise(void)
> preempt_enable();
> }
>
> +static void set_dec_or_work(u64 val)
> +{
> + set_dec(val);
> + /* We may have raced with new irq work */
> + if (unlikely(test_irq_work_pending()))
> + set_dec(1);
> +}
> +
> #else /* CONFIG_IRQ_WORK */
>
> #define test_irq_work_pending() 0
> #define clear_irq_work_pending()
>
> +static void set_dec_or_work(u64 val)
> +{
> + set_dec(val);
> +}
> #endif /* CONFIG_IRQ_WORK */
>
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +void timer_rearm_host_dec(u64 now)
> +{
> + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
> +
> + WARN_ON_ONCE(!arch_irqs_disabled());
> + WARN_ON_ONCE(mfmsr() & MSR_EE);
> +
> + if (now >= *next_tb) {
> + local_paca->irq_happened |= PACA_IRQ_DEC;
> + } else {
> + now = *next_tb - now;
> + if (now <= decrementer_max)
> + set_dec_or_work(now);
> + }
> +}
> +EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
> +#endif
> +
> /*
> * timer_interrupt - gets called when the decrementer overflows,
> * with interrupts disabled.
> @@ -609,10 +650,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
> } else {
> now = *next_tb - now;
> if (now <= decrementer_max)
> - set_dec(now);
> - /* We may have raced with new irq work */
> - if (test_irq_work_pending())
> - set_dec(1);
> + set_dec_or_work(now);
> __this_cpu_inc(irq_stat.timer_irqs_others);
> }
>
> @@ -854,11 +892,7 @@ static int decrementer_set_next_event(unsigned long evt,
> struct clock_event_device *dev)
> {
> __this_cpu_write(decrementers_next_tb, get_tb() + evt);
> - set_dec(evt);
> -
> - /* We may have raced with new irq work */
> - if (test_irq_work_pending())
> - set_dec(1);
> + set_dec_or_work(evt);
>
> return 0;
> }
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 6e6cfb10e9bb..0cef578930f9 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -4018,11 +4018,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> vc->entry_exit_map = 0x101;
> vc->in_guest = 0;
>
> - next_timer = timer_get_next_tb();
> - set_dec(next_timer - tb);
> - /* We may have raced with new irq work */
> - if (test_irq_work_pending())
> - set_dec(1);
> + timer_rearm_host_dec(tb);
> +
> mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
>
> kvmhv_load_host_pmu();
>
More information about the Linuxppc-dev
mailing list