[PATCH 09/17] powerpc/qspinlock: implement option to yield to previous node

Fri Aug 12 12:07:08 AEST 2022

On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> Queued waiters which are not at the head of the queue don't spin on
> the lock word but their qnode lock word, waiting for the previous queued
> CPU to release them. Add an option which allows these waiters to yield
> to the previous CPU if its vCPU is preempted.
> 
> Disable this option by default for now, i.e., no logical change.
> ---
>  arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 45 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 55286ac91da5..b39f8c5b329c 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -26,6 +26,7 @@ static bool MAYBE_STEALERS __read_mostly = true;
>  static int HEAD_SPINS __read_mostly = (1<<8);
>  
>  static bool pv_yield_owner __read_mostly = true;
> +static bool pv_yield_prev __read_mostly = true;

Similiar suggestion, maybe pv_yield_prev_enabled would read better.

Isn't this enabled by default contrary to the commit message?

>  
>  static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
>  
> @@ -224,6 +225,31 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va
>  	cpu_relax();
>  }
>  
> +static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)

yield_to_locked_owner() takes a raw val and works out the cpu to yield to.
I think for consistency have yield_to_prev() take the raw val and work it out too.

> +{
> +	u32 yield_count;
> +
> +	if (!paravirt)
> +		goto relax;
> +
> +	if (!pv_yield_prev)
> +		goto relax;
> +
> +	yield_count = yield_count_of(prev_cpu);
> +	if ((yield_count & 1) == 0)
> +		goto relax; /* owner vcpu is running */
> +
> +	smp_rmb(); /* See yield_to_locked_owner comment */
> +
> +	if (!node->locked) {
> +		yield_to_preempted(prev_cpu, yield_count);
> +		return;
> +	}
> +
> +relax:
> +	cpu_relax();
> +}
> +
>  
>  static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
>  {
> @@ -291,13 +317,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  	 */
>  	if (old & _Q_TAIL_CPU_MASK) {
>  		struct qnode *prev = get_tail_qnode(lock, old);
> +		int prev_cpu = get_tail_cpu(old);

This could then be removed.

>  
>  		/* Link @node into the waitqueue. */
>  		WRITE_ONCE(prev->next, node);
>  
>  		/* Wait for mcs node lock to be released */
>  		while (!node->locked)
> -			cpu_relax();
> +			yield_to_prev(lock, node, prev_cpu, paravirt);

And would have this as:
			yield_to_prev(lock, node, old, paravirt);

>  
>  		smp_rmb(); /* acquire barrier for the mcs lock */
>  	}
> @@ -448,12 +475,29 @@ static int pv_yield_owner_get(void *data, u64 *val)
>  
>  DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
>  
> +static int pv_yield_prev_set(void *data, u64 val)
> +{
> +	pv_yield_prev = !!val;
> +
> +	return 0;
> +}
> +
> +static int pv_yield_prev_get(void *data, u64 *val)
> +{
> +	*val = pv_yield_prev;
> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
> +
>  static __init int spinlock_debugfs_init(void)
>  {
>  	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
>  	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
>  	if (is_shared_processor()) {
>  		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
> +		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
>  	}
>  
>  	return 0;