[PATCH 11/17] powerpc/qspinlock: allow propagation of yield CPU down the queue
Jordan Niethe
jniethe5 at gmail.com
Fri Aug 12 14:17:17 AEST 2022
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> Having all CPUs poll the lock word for the owner CPU that should be
> yielded to defeats most of the purpose of using MCS queueing for
> scalability. Yet it may be desirable for queued waiters to to yield
> to a preempted owner.
>
> s390 addreses this problem by having queued waiters sample the lock
> word to find the owner much less frequently. In this approach, the
> waiters never sample it directly, but the queue head propagates the
> owner CPU back to the next waiter if it ever finds the owner has
> been preempted. Queued waiters then subsequently propagate the owner
> CPU back to the next waiter, and so on.
>
> Disable this option by default for now, i.e., no logical change.
> ---
> arch/powerpc/lib/qspinlock.c | 85 +++++++++++++++++++++++++++++++++++-
> 1 file changed, 84 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 94f007f66942..28c85a2d5635 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -12,6 +12,7 @@
> struct qnode {
> struct qnode *next;
> struct qspinlock *lock;
> + int yield_cpu;
> u8 locked; /* 1 if lock acquired */
> };
>
> @@ -28,6 +29,7 @@ static int HEAD_SPINS __read_mostly = (1<<8);
> static bool pv_yield_owner __read_mostly = true;
> static bool pv_yield_allow_steal __read_mostly = false;
> static bool pv_yield_prev __read_mostly = true;
> +static bool pv_yield_propagate_owner __read_mostly = true;
This also seems to be enabled by default.
>
> static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
>
> @@ -257,13 +259,66 @@ static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u
> __yield_to_locked_owner(lock, val, paravirt, clear_mustq);
> }
>
> +static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
> +{
> + struct qnode *next;
> + int owner;
> +
> + if (!paravirt)
> + return;
> + if (!pv_yield_propagate_owner)
> + return;
> +
> + owner = get_owner_cpu(val);
> + if (*set_yield_cpu == owner)
> + return;
> +
> + next = READ_ONCE(node->next);
> + if (!next)
> + return;
> +
> + if (vcpu_is_preempted(owner)) {
Is there a difference about using vcpu_is_preempted() here
vs checking bit 0 in other places?
> + next->yield_cpu = owner;
> + *set_yield_cpu = owner;
> + } else if (*set_yield_cpu != -1) {
It might be worth giving the -1 CPU a #define.
> + next->yield_cpu = owner;
> + *set_yield_cpu = owner;
> + }
> +}
Does this need to pass set_yield_cpu by reference? Couldn't it's new value be
returned? To me it makes it more clear the function is used to change
set_yield_cpu. I think this would work:
int set_yield_cpu = -1;
static __always_inline int propagate_yield_cpu(struct qnode *node, u32 val, int set_yield_cpu, bool paravirt)
{
struct qnode *next;
int owner;
if (!paravirt)
goto out;
if (!pv_yield_propagate_owner)
goto out;
owner = get_owner_cpu(val);
if (set_yield_cpu == owner)
goto out;
next = READ_ONCE(node->next);
if (!next)
goto out;
if (vcpu_is_preempted(owner)) {
next->yield_cpu = owner;
return owner;
} else if (set_yield_cpu != -1) {
next->yield_cpu = owner;
return owner;
}
out:
return set_yield_cpu;
}
set_yield_cpu = propagate_yield_cpu(... set_yield_cpu ...);
> +
> static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
> {
> u32 yield_count;
> + int yield_cpu;
>
> if (!paravirt)
> goto relax;
>
> + if (!pv_yield_propagate_owner)
> + goto yield_prev;
> +
> + yield_cpu = READ_ONCE(node->yield_cpu);
> + if (yield_cpu == -1) {
> + /* Propagate back the -1 CPU */
> + if (node->next && node->next->yield_cpu != -1)
> + node->next->yield_cpu = yield_cpu;
> + goto yield_prev;
> + }
> +
> + yield_count = yield_count_of(yield_cpu);
> + if ((yield_count & 1) == 0)
> + goto yield_prev; /* owner vcpu is running */
> +
> + smp_rmb();
> +
> + if (yield_cpu == node->yield_cpu) {
> + if (node->next && node->next->yield_cpu != yield_cpu)
> + node->next->yield_cpu = yield_cpu;
> + yield_to_preempted(yield_cpu, yield_count);
> + return;
> + }
> +
> +yield_prev:
> if (!pv_yield_prev)
> goto relax;
>
> @@ -337,6 +392,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
> node = &qnodesp->nodes[idx];
> node->next = NULL;
> node->lock = lock;
> + node->yield_cpu = -1;
> node->locked = 0;
>
> tail = encode_tail_cpu();
> @@ -358,13 +414,21 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
> while (!node->locked)
> yield_to_prev(lock, node, prev_cpu, paravirt);
>
> + /* Clear out stale propagated yield_cpu */
> + if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
> + node->yield_cpu = -1;
> +
> smp_rmb(); /* acquire barrier for the mcs lock */
> }
>
> if (!MAYBE_STEALERS) {
> + int set_yield_cpu = -1;
> +
> /* We're at the head of the waitqueue, wait for the lock. */
> - while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL)
> + while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
> + propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
> yield_head_to_locked_owner(lock, val, paravirt, false);
> + }
>
> /* If we're the last queued, must clean up the tail. */
> if ((val & _Q_TAIL_CPU_MASK) == tail) {
> @@ -376,12 +440,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
> /* We must be the owner, just set the lock bit and acquire */
> lock_set_locked(lock);
> } else {
> + int set_yield_cpu = -1;
> int iters = 0;
> bool set_mustq = false;
>
> again:
> /* We're at the head of the waitqueue, wait for the lock. */
> while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
> + propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
> yield_head_to_locked_owner(lock, val, paravirt,
> pv_yield_allow_steal && set_mustq);
>
> @@ -540,6 +606,22 @@ static int pv_yield_prev_get(void *data, u64 *val)
>
> DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
>
> +static int pv_yield_propagate_owner_set(void *data, u64 val)
> +{
> + pv_yield_propagate_owner = !!val;
> +
> + return 0;
> +}
> +
> +static int pv_yield_propagate_owner_get(void *data, u64 *val)
> +{
> + *val = pv_yield_propagate_owner;
> +
> + return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
> +
> static __init int spinlock_debugfs_init(void)
> {
> debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
> @@ -548,6 +630,7 @@ static __init int spinlock_debugfs_init(void)
> debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
> debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
> debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
> + debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
> }
>
> return 0;
More information about the Linuxppc-dev
mailing list