[PATCH 11/17] powerpc/qspinlock: allow propagation of yield CPU down the queue

Jordan Niethe jniethe5 at gmail.com
Fri Aug 12 14:17:17 AEST 2022


On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> Having all CPUs poll the lock word for the owner CPU that should be
> yielded to defeats most of the purpose of using MCS queueing for
> scalability. Yet it may be desirable for queued waiters to to yield
> to a preempted owner.
> 
> s390 addreses this problem by having queued waiters sample the lock
> word to find the owner much less frequently. In this approach, the
> waiters never sample it directly, but the queue head propagates the
> owner CPU back to the next waiter if it ever finds the owner has
> been preempted. Queued waiters then subsequently propagate the owner
> CPU back to the next waiter, and so on.
> 
> Disable this option by default for now, i.e., no logical change.
> ---
>  arch/powerpc/lib/qspinlock.c | 85 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 84 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 94f007f66942..28c85a2d5635 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -12,6 +12,7 @@
>  struct qnode {
>  	struct qnode	*next;
>  	struct qspinlock *lock;
> +	int		yield_cpu;
>  	u8		locked; /* 1 if lock acquired */
>  };
>  
> @@ -28,6 +29,7 @@ static int HEAD_SPINS __read_mostly = (1<<8);
>  static bool pv_yield_owner __read_mostly = true;
>  static bool pv_yield_allow_steal __read_mostly = false;
>  static bool pv_yield_prev __read_mostly = true;
> +static bool pv_yield_propagate_owner __read_mostly = true;

This also seems to be enabled by default.

>  
>  static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
>  
> @@ -257,13 +259,66 @@ static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u
>  	__yield_to_locked_owner(lock, val, paravirt, clear_mustq);
>  }
>  
> +static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
> +{
> +	struct qnode *next;
> +	int owner;
> +
> +	if (!paravirt)
> +		return;
> +	if (!pv_yield_propagate_owner)
> +		return;
> +
> +	owner = get_owner_cpu(val);
> +	if (*set_yield_cpu == owner)
> +		return;
> +
> +	next = READ_ONCE(node->next);
> +	if (!next)
> +		return;
> +
> +	if (vcpu_is_preempted(owner)) {

Is there a difference about using vcpu_is_preempted() here
vs checking bit 0 in other places?


> +		next->yield_cpu = owner;
> +		*set_yield_cpu = owner;
> +	} else if (*set_yield_cpu != -1) {

It might be worth giving the -1 CPU a #define.

> +		next->yield_cpu = owner;
> +		*set_yield_cpu = owner;
> +	}
> +}

Does this need to pass set_yield_cpu by reference? Couldn't it's new value be
returned? To me it makes it more clear the function is used to change
set_yield_cpu. I think this would work:

int set_yield_cpu = -1;

static __always_inline int propagate_yield_cpu(struct qnode *node, u32 val, int set_yield_cpu, bool paravirt)
{
	struct qnode *next;
	int owner;

	if (!paravirt)
		goto out;
	if (!pv_yield_propagate_owner)
		goto out;

	owner = get_owner_cpu(val);
	if (set_yield_cpu == owner)
		goto out;

	next = READ_ONCE(node->next);
	if (!next)
		goto out;

	if (vcpu_is_preempted(owner)) {
		next->yield_cpu = owner;
		return owner;
	} else if (set_yield_cpu != -1) {
		next->yield_cpu = owner;
		return owner;
	}

out:
	return set_yield_cpu;
}

set_yield_cpu = propagate_yield_cpu(...  set_yield_cpu ...);



> +
>  static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
>  {
>  	u32 yield_count;
> +	int yield_cpu;
>  
>  	if (!paravirt)
>  		goto relax;
>  
> +	if (!pv_yield_propagate_owner)
> +		goto yield_prev;
> +
> +	yield_cpu = READ_ONCE(node->yield_cpu);
> +	if (yield_cpu == -1) {
> +		/* Propagate back the -1 CPU */
> +		if (node->next && node->next->yield_cpu != -1)
> +			node->next->yield_cpu = yield_cpu;
> +		goto yield_prev;
> +	}
> +
> +	yield_count = yield_count_of(yield_cpu);
> +	if ((yield_count & 1) == 0)
> +		goto yield_prev; /* owner vcpu is running */
> +
> +	smp_rmb();
> +
> +	if (yield_cpu == node->yield_cpu) {
> +		if (node->next && node->next->yield_cpu != yield_cpu)
> +			node->next->yield_cpu = yield_cpu;
> +		yield_to_preempted(yield_cpu, yield_count);
> +		return;
> +	}
> +
> +yield_prev:
>  	if (!pv_yield_prev)
>  		goto relax;
>  
> @@ -337,6 +392,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  	node = &qnodesp->nodes[idx];
>  	node->next = NULL;
>  	node->lock = lock;
> +	node->yield_cpu = -1;
>  	node->locked = 0;
>  
>  	tail = encode_tail_cpu();
> @@ -358,13 +414,21 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  		while (!node->locked)
>  			yield_to_prev(lock, node, prev_cpu, paravirt);
>  
> +		/* Clear out stale propagated yield_cpu */
> +		if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
> +			node->yield_cpu = -1;
> +
>  		smp_rmb(); /* acquire barrier for the mcs lock */
>  	}
>  
>  	if (!MAYBE_STEALERS) {
> +		int set_yield_cpu = -1;
> +
>  		/* We're at the head of the waitqueue, wait for the lock. */
> -		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL)
> +		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
> +			propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
>  			yield_head_to_locked_owner(lock, val, paravirt, false);
> +		}
>  
>  		/* If we're the last queued, must clean up the tail. */
>  		if ((val & _Q_TAIL_CPU_MASK) == tail) {
> @@ -376,12 +440,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  		/* We must be the owner, just set the lock bit and acquire */
>  		lock_set_locked(lock);
>  	} else {
> +		int set_yield_cpu = -1;
>  		int iters = 0;
>  		bool set_mustq = false;
>  
>  again:
>  		/* We're at the head of the waitqueue, wait for the lock. */
>  		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
> +			propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
>  			yield_head_to_locked_owner(lock, val, paravirt,
>  					pv_yield_allow_steal && set_mustq);
>  
> @@ -540,6 +606,22 @@ static int pv_yield_prev_get(void *data, u64 *val)
>  
>  DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
>  
> +static int pv_yield_propagate_owner_set(void *data, u64 val)
> +{
> +	pv_yield_propagate_owner = !!val;
> +
> +	return 0;
> +}
> +
> +static int pv_yield_propagate_owner_get(void *data, u64 *val)
> +{
> +	*val = pv_yield_propagate_owner;
> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
> +
>  static __init int spinlock_debugfs_init(void)
>  {
>  	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
> @@ -548,6 +630,7 @@ static __init int spinlock_debugfs_init(void)
>  		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
>  		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
>  		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
> +		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
>  	}
>  
>  	return 0;



More information about the Linuxppc-dev mailing list