From: Mateusz Guzik Subject: [PATCH] convert mpl ticket lock to anderson's lock To: mpi@grenadille.net Cc: tech@openbsd.org, Mateusz Guzik Date: Sat, 27 Dec 2025 06:24:36 +0100 Example microbenchmark doing fstat in 8 threads (ops/s): before: 2305501 after: 5891611 (+155%) Booted with WITNESS et al, survived several kernel builds no problem. Needs more testing of course. --- sys/kern/kern_lock.c | 60 ++++++++++++++++++++++++++++++++++++-------- sys/sys/mplock.h | 17 +++++++++---- 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index fe75b7ae895..81b8b950c05 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -109,16 +109,42 @@ _kernel_lock_held(void) #ifdef __USE_MI_MPLOCK -/* Ticket lock implementation */ +/* + * Anderson's lock implementation + * + * This is a stop-gap primitive providing FIFO ordering while damage-controlling + * performance loss under contention. + * + * BUGS: + * - there is no awareness of virtualized environments, where lock owner or one + * of the waiters can be on a vcpu preempted by the host, leading to time + * waste spinning instead of yielding + * - poor NUMA performance, to be addressed after it becomes a factor -- at the + * moment the kernel does not have NUMA-awareness in the first place + * + * Incomplete list of potential alternatives: + * - ticket lock -- centralized spinning, bad scalability + * - CLH -- viable other choice, not picked since Anderson's lock was good enough. + * - MCS -- less problematic on NUMA as CPUs retain their nodes, but this comes at + * a cost of more expensive unlock + */ #include +/* + * Making sure MAXCPUS is a power of 2 allows mplc_index to wrap around to 0 + * and still have the expected value. + */ +CTASSERT(powerof2(MAXCPUS)); + void ___mp_lock_init(struct __mp_lock *mpl, const struct lock_type *type) { memset(mpl->mpl_cpus, 0, sizeof(mpl->mpl_cpus)); - mpl->mpl_users = 0; - mpl->mpl_ticket = 1; + mpl->mpl_locks[0].taken = false; + for (int i = 1; i < MAXCPUS; i++) + mpl->mpl_locks[i].taken = true; + mpl->mpl_index = 0; #ifdef WITNESS mpl->mpl_lock_obj.lo_name = type->lt_name; @@ -131,15 +157,21 @@ ___mp_lock_init(struct __mp_lock *mpl, const struct lock_type *type) } static __inline void -__mp_lock_spin(struct __mp_lock *mpl, u_int me) +__mp_lock_spin(struct __mp_lock *mpl, u_int index) { + struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()]; struct schedstate_percpu *spc = &curcpu()->ci_schedstate; #ifdef MP_LOCKDEBUG long nticks = __mp_lock_spinout; #endif + if (!READ_ONCE(mpl->mpl_locks[index].taken)) { + cpu->mplc_owned = true; + return; + } + spc->spc_spinning++; - while (mpl->mpl_ticket != me) { + do { CPU_BUSY_CYCLE(); #ifdef MP_LOCKDEBUG @@ -149,8 +181,10 @@ __mp_lock_spin(struct __mp_lock *mpl, u_int me) nticks = __mp_lock_spinout; } #endif - } + } while (READ_ONCE(mpl->mpl_locks[index].taken)); + spc->spc_spinning--; + cpu->mplc_owned = true; } void @@ -167,10 +201,10 @@ __mp_lock(struct __mp_lock *mpl) s = intr_disable(); if (cpu->mplc_depth++ == 0) - cpu->mplc_ticket = atomic_inc_int_nv(&mpl->mpl_users); + cpu->mplc_index = (atomic_inc_int_nv(&mpl->mpl_index) - 1) % MAXCPUS; intr_restore(s); - __mp_lock_spin(mpl, cpu->mplc_ticket); + __mp_lock_spin(mpl, cpu->mplc_index); membar_enter_after_atomic(); WITNESS_LOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); @@ -193,8 +227,10 @@ __mp_unlock(struct __mp_lock *mpl) s = intr_disable(); if (--cpu->mplc_depth == 0) { + WRITE_ONCE(mpl->mpl_locks[cpu->mplc_index].taken, true); membar_exit(); - mpl->mpl_ticket++; + WRITE_ONCE(mpl->mpl_locks[(cpu->mplc_index + 1) % MAXCPUS].taken, false); + cpu->mplc_owned = false; } intr_restore(s); } @@ -216,8 +252,10 @@ __mp_release_all(struct __mp_lock *mpl) WITNESS_UNLOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); #endif cpu->mplc_depth = 0; + WRITE_ONCE(mpl->mpl_locks[cpu->mplc_index].taken, true); membar_exit(); - mpl->mpl_ticket++; + WRITE_ONCE(mpl->mpl_locks[(cpu->mplc_index + 1) % MAXCPUS].taken, false); + cpu->mplc_owned = false; intr_restore(s); return (rv); @@ -235,7 +273,7 @@ __mp_lock_held(struct __mp_lock *mpl, struct cpu_info *ci) { struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[CPU_INFO_UNIT(ci)]; - return (cpu->mplc_ticket == mpl->mpl_ticket && cpu->mplc_depth > 0); + return (cpu->mplc_owned); } #endif /* __USE_MI_MPLOCK */ diff --git a/sys/sys/mplock.h b/sys/sys/mplock.h index b28ac240d94..ccbc257bf14 100644 --- a/sys/sys/mplock.h +++ b/sys/sys/mplock.h @@ -33,19 +33,26 @@ #include +#define CACHELINESIZE 64 /* FIXME */ + struct __mp_lock_cpu { - u_int mplc_ticket; + u_int mplc_index; u_int mplc_depth; -}; + bool mplc_owned; +} __aligned(CACHELINESIZE); + +struct __mp_lock_state { + bool taken; +} __aligned(CACHELINESIZE); struct __mp_lock { struct __mp_lock_cpu mpl_cpus[MAXCPUS]; - volatile u_int mpl_ticket; - u_int mpl_users; + struct __mp_lock_state mpl_locks[MAXCPUS]; + volatile u_int mpl_index __aligned(CACHELINESIZE); #ifdef WITNESS struct lock_object mpl_lock_obj; #endif -}; +} __aligned(CACHELINESIZE); void ___mp_lock_init(struct __mp_lock *, const struct lock_type *); void __mp_lock(struct __mp_lock *); -- 2.44.0