[PATCH 15/15] sched/cputime: Handle dyntick-idle steal time correctly

Shrikanth Hegde sshegde at linux.ibm.com
Tue Mar 3 22:17:45 AEDT 2026



On 2/6/26 7:52 PM, Frederic Weisbecker wrote:
> The dyntick-idle steal time is currently accounted when the tick
> restarts but the stolen idle time is not substracted from the idle time
> that was already accounted. This is to avoid observing the idle time
> going backward as the dyntick-idle cputime accessors can't reliably know
> in advance the stolen idle time.
> 
> In order to maintain a forward progressing idle cputime while
> substracting idle steal time from it, keep track of the previously
> accounted idle stolen time and substract it from _later_ idle cputime
> accounting.
> 

s/substract/subtract ?

> Signed-off-by: Frederic Weisbecker <frederic at kernel.org>
> ---
>   include/linux/kernel_stat.h |  1 +
>   kernel/sched/cputime.c      | 21 +++++++++++++++------
>   2 files changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
> index 512104b0ff49..24a54a6151ba 100644
> --- a/include/linux/kernel_stat.h
> +++ b/include/linux/kernel_stat.h
> @@ -39,6 +39,7 @@ struct kernel_cpustat {
>   	bool		idle_elapse;
>   	seqcount_t	idle_sleeptime_seq;
>   	u64		idle_entrytime;
> +	u64		idle_stealtime;
>   #endif
>   	u64		cpustat[NR_STATS];
>   };
> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index 92fa2f037b6e..7e79288eb327 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -424,19 +424,25 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
>   static void kcpustat_idle_stop(struct kernel_cpustat *kc, u64 now)
>   {
>   	u64 *cpustat = kc->cpustat;
> -	u64 delta;
> +	u64 delta, steal, steal_delta;
>   
>   	if (!kc->idle_elapse)
>   		return;
>   
>   	delta = now - kc->idle_entrytime;
> +	steal = steal_account_process_time(delta);
>   
>   	write_seqcount_begin(&kc->idle_sleeptime_seq);
> +	steal_delta = min_t(u64, kc->idle_stealtime, delta);
> +	delta -= steal_delta;

I didn;t get this logic. Why do we need idle_stealtime?

Lets say 10ms was steal time and 50ms was delta. but idle_stealtime is
sum of past accumulated steal time. we only need to subtract steal time there no?

Shouldn't this be delta -= steal ?


> +	kc->idle_stealtime -= steal_delta;
> +
>   	if (nr_iowait_cpu(smp_processor_id()) > 0)
>   		cpustat[CPUTIME_IOWAIT] += delta;
>   	else
>   		cpustat[CPUTIME_IDLE] += delta;
>   
> +	kc->idle_stealtime += steal;
>   	kc->idle_entrytime = now;
>   	kc->idle_elapse = false;
>   	write_seqcount_end(&kc->idle_sleeptime_seq);
> @@ -460,7 +466,6 @@ void kcpustat_dyntick_stop(u64 now)
>   		kc->idle_dyntick = false;
>   		irqtime_dyntick_stop();
>   		vtime_dyntick_stop();
> -		steal_account_process_time(ULONG_MAX);
>   	}
>   }
>   
> @@ -505,10 +510,14 @@ static u64 kcpustat_field_dyntick(int cpu, enum cpu_usage_stat idx,
>   	do {
>   		seq = read_seqcount_begin(&kc->idle_sleeptime_seq);
>   
> -		if (kc->idle_elapse && compute_delta)
> -			idle = cpustat[idx] + (now - kc->idle_entrytime);
> -		else
> -			idle = cpustat[idx];
> +		idle = cpustat[idx];
> +
> +		if (kc->idle_elapse && compute_delta) {
> +			u64 delta = now - kc->idle_entrytime;
> +
> +			delta -= min_t(u64, kc->idle_stealtime, delta);
> +			idle += delta;
> +		}
>   	} while (read_seqcount_retry(&kc->idle_sleeptime_seq, seq));
>   
>   	return idle;



More information about the Linuxppc-dev mailing list