Download raw body.
[PATCH] convert mpl ticket lock to anderson's lock
Example microbenchmark doing fstat in 8 threads (ops/s):
before: 2305501
after: 5891611 (+155%)
Booted with WITNESS et al, survived several kernel builds no problem.
Needs more testing of course.
---
sys/kern/kern_lock.c | 60 ++++++++++++++++++++++++++++++++++++--------
sys/sys/mplock.h | 17 +++++++++----
2 files changed, 61 insertions(+), 16 deletions(-)
diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c
index fe75b7ae895..81b8b950c05 100644
--- a/sys/kern/kern_lock.c
+++ b/sys/kern/kern_lock.c
@@ -109,16 +109,42 @@ _kernel_lock_held(void)
#ifdef __USE_MI_MPLOCK
-/* Ticket lock implementation */
+/*
+ * Anderson's lock implementation
+ *
+ * This is a stop-gap primitive providing FIFO ordering while damage-controlling
+ * performance loss under contention.
+ *
+ * BUGS:
+ * - there is no awareness of virtualized environments, where lock owner or one
+ * of the waiters can be on a vcpu preempted by the host, leading to time
+ * waste spinning instead of yielding
+ * - poor NUMA performance, to be addressed after it becomes a factor -- at the
+ * moment the kernel does not have NUMA-awareness in the first place
+ *
+ * Incomplete list of potential alternatives:
+ * - ticket lock -- centralized spinning, bad scalability
+ * - CLH -- viable other choice, not picked since Anderson's lock was good enough.
+ * - MCS -- less problematic on NUMA as CPUs retain their nodes, but this comes at
+ * a cost of more expensive unlock
+ */
#include <machine/cpu.h>
+/*
+ * Making sure MAXCPUS is a power of 2 allows mplc_index to wrap around to 0
+ * and still have the expected value.
+ */
+CTASSERT(powerof2(MAXCPUS));
+
void
___mp_lock_init(struct __mp_lock *mpl, const struct lock_type *type)
{
memset(mpl->mpl_cpus, 0, sizeof(mpl->mpl_cpus));
- mpl->mpl_users = 0;
- mpl->mpl_ticket = 1;
+ mpl->mpl_locks[0].taken = false;
+ for (int i = 1; i < MAXCPUS; i++)
+ mpl->mpl_locks[i].taken = true;
+ mpl->mpl_index = 0;
#ifdef WITNESS
mpl->mpl_lock_obj.lo_name = type->lt_name;
@@ -131,15 +157,21 @@ ___mp_lock_init(struct __mp_lock *mpl, const struct lock_type *type)
}
static __inline void
-__mp_lock_spin(struct __mp_lock *mpl, u_int me)
+__mp_lock_spin(struct __mp_lock *mpl, u_int index)
{
+ struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()];
struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
#ifdef MP_LOCKDEBUG
long nticks = __mp_lock_spinout;
#endif
+ if (!READ_ONCE(mpl->mpl_locks[index].taken)) {
+ cpu->mplc_owned = true;
+ return;
+ }
+
spc->spc_spinning++;
- while (mpl->mpl_ticket != me) {
+ do {
CPU_BUSY_CYCLE();
#ifdef MP_LOCKDEBUG
@@ -149,8 +181,10 @@ __mp_lock_spin(struct __mp_lock *mpl, u_int me)
nticks = __mp_lock_spinout;
}
#endif
- }
+ } while (READ_ONCE(mpl->mpl_locks[index].taken));
+
spc->spc_spinning--;
+ cpu->mplc_owned = true;
}
void
@@ -167,10 +201,10 @@ __mp_lock(struct __mp_lock *mpl)
s = intr_disable();
if (cpu->mplc_depth++ == 0)
- cpu->mplc_ticket = atomic_inc_int_nv(&mpl->mpl_users);
+ cpu->mplc_index = (atomic_inc_int_nv(&mpl->mpl_index) - 1) % MAXCPUS;
intr_restore(s);
- __mp_lock_spin(mpl, cpu->mplc_ticket);
+ __mp_lock_spin(mpl, cpu->mplc_index);
membar_enter_after_atomic();
WITNESS_LOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE);
@@ -193,8 +227,10 @@ __mp_unlock(struct __mp_lock *mpl)
s = intr_disable();
if (--cpu->mplc_depth == 0) {
+ WRITE_ONCE(mpl->mpl_locks[cpu->mplc_index].taken, true);
membar_exit();
- mpl->mpl_ticket++;
+ WRITE_ONCE(mpl->mpl_locks[(cpu->mplc_index + 1) % MAXCPUS].taken, false);
+ cpu->mplc_owned = false;
}
intr_restore(s);
}
@@ -216,8 +252,10 @@ __mp_release_all(struct __mp_lock *mpl)
WITNESS_UNLOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE);
#endif
cpu->mplc_depth = 0;
+ WRITE_ONCE(mpl->mpl_locks[cpu->mplc_index].taken, true);
membar_exit();
- mpl->mpl_ticket++;
+ WRITE_ONCE(mpl->mpl_locks[(cpu->mplc_index + 1) % MAXCPUS].taken, false);
+ cpu->mplc_owned = false;
intr_restore(s);
return (rv);
@@ -235,7 +273,7 @@ __mp_lock_held(struct __mp_lock *mpl, struct cpu_info *ci)
{
struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[CPU_INFO_UNIT(ci)];
- return (cpu->mplc_ticket == mpl->mpl_ticket && cpu->mplc_depth > 0);
+ return (cpu->mplc_owned);
}
#endif /* __USE_MI_MPLOCK */
diff --git a/sys/sys/mplock.h b/sys/sys/mplock.h
index b28ac240d94..ccbc257bf14 100644
--- a/sys/sys/mplock.h
+++ b/sys/sys/mplock.h
@@ -33,19 +33,26 @@
#include <sys/_lock.h>
+#define CACHELINESIZE 64 /* FIXME */
+
struct __mp_lock_cpu {
- u_int mplc_ticket;
+ u_int mplc_index;
u_int mplc_depth;
-};
+ bool mplc_owned;
+} __aligned(CACHELINESIZE);
+
+struct __mp_lock_state {
+ bool taken;
+} __aligned(CACHELINESIZE);
struct __mp_lock {
struct __mp_lock_cpu mpl_cpus[MAXCPUS];
- volatile u_int mpl_ticket;
- u_int mpl_users;
+ struct __mp_lock_state mpl_locks[MAXCPUS];
+ volatile u_int mpl_index __aligned(CACHELINESIZE);
#ifdef WITNESS
struct lock_object mpl_lock_obj;
#endif
-};
+} __aligned(CACHELINESIZE);
void ___mp_lock_init(struct __mp_lock *, const struct lock_type *);
void __mp_lock(struct __mp_lock *);
--
2.44.0
[PATCH] convert mpl ticket lock to anderson's lock