Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
mbuf cluster m_extref_mtx contention
To:
tech@openbsd.org
Date:
Sun, 8 Feb 2026 10:32:29 -0500

Download raw body.

Thread
Hi,

I some of my tests, especially with socket splicing, I see contention
on the m_extref_mtx mutex.  It is a global lock to list mbufs that
point to the same external cluster.

This happens when freeing mbuf chains in the TX path of the driver
while the TCP stack still has a reference to the cluster for possible
retransmits.  This diff avoids to grab the mutex for each m_freem().
Instead the mbufs are cached in a per CPU mbuf list.  When 50 mbufs
have been collected, the mutex is taken once to free all of them.

The number 50 is arbitrary, it means at most 4 MB of memory per CPU
may be wasted.  Should I add an #ifdef MULTIPROCESSOR ?

opinions?  ok?

bluhm

Index: kern/uipc_mbuf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_mbuf.c,v
diff -u -p -r1.302 uipc_mbuf.c
--- kern/uipc_mbuf.c	6 Aug 2025 14:00:33 -0000	1.302
+++ kern/uipc_mbuf.c	30 Dec 2025 22:26:06 -0000
@@ -124,6 +124,7 @@ int max_protohdr;		/* largest protocol h
 int max_hdr;			/* largest link+protocol header */
 
 struct	mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET);
+struct	cpumem *m_extfree_cpu;
 
 void	m_extfree(struct mbuf *);
 void	m_zero(struct mbuf *);
@@ -207,6 +208,8 @@ mbcpuinit(void)
 
 	for (i = 0; i < nitems(mclsizes); i++)
 		pool_cache_init(&mclpools[i]);
+
+	m_extfree_cpu = cpumem_malloc(sizeof(struct mbuf_list), M_MBUF);
 }
 
 int
@@ -426,9 +429,28 @@ m_free(struct mbuf *m)
 		pf_mbuf_unlink_inpcb(m);
 #endif	/* NPF > 0 */
 	}
-	if (m->m_flags & M_EXT)
-		m_extfree(m);
+	if (m->m_flags & M_EXT) {
+		if (MCLISREFERENCED(m)) {
+			struct mbuf_list *ml;
+
+			ml = cpumem_enter(m_extfree_cpu);
+			s = splnet();
+			ml_enqueue(ml, m);
+			if (ml_len(ml) > 50) {
+				mtx_enter(&m_extref_mtx);
+				MBUF_LIST_FOREACH(ml, m)
+					m_extfree(m);
+				mtx_leave(&m_extref_mtx);
+				while ((m = ml_dequeue(ml)) != NULL)
+					pool_put(&mbpool, m);
+			}
+			splx(s);
+			cpumem_leave(m_extfree_cpu, ml);
 
+			return (n);
+		}
+		m_extfree(m);
+	}
 	pool_put(&mbpool, m);
 
 	return (n);
@@ -456,22 +478,15 @@ m_extref(struct mbuf *o, struct mbuf *n)
 static inline u_int
 m_extunref(struct mbuf *m)
 {
-	int refs = 0;
-
 	if (!MCLISREFERENCED(m))
 		return (0);
 
-	mtx_enter(&m_extref_mtx);
-	if (MCLISREFERENCED(m)) {
-		m->m_ext.ext_nextref->m_ext.ext_prevref =
-		    m->m_ext.ext_prevref;
-		m->m_ext.ext_prevref->m_ext.ext_nextref =
-		    m->m_ext.ext_nextref;
-		refs = 1;
-	}
-	mtx_leave(&m_extref_mtx);
+	MUTEX_ASSERT_LOCKED(&m_extref_mtx);
+
+	m->m_ext.ext_nextref->m_ext.ext_prevref = m->m_ext.ext_prevref;
+	m->m_ext.ext_prevref->m_ext.ext_nextref = m->m_ext.ext_nextref;
 
-	return (refs);
+	return (1);
 }
 
 /*
@@ -555,8 +570,15 @@ m_defrag(struct mbuf *m, int how)
 	/* free chain behind and possible ext buf on the first mbuf */
 	m_freem(m->m_next);
 	m->m_next = NULL;
-	if (m->m_flags & M_EXT)
+	if (m->m_flags & M_EXT) {
+		int lock = MCLISREFERENCED(m);
+
+		if (lock)
+			mtx_enter(&m_extref_mtx);
 		m_extfree(m);
+		if (lock)
+			mtx_leave(&m_extref_mtx);
+	}
 
 	/*
 	 * Bounce copy mbuf over to the original mbuf and set everything up.
Index: sys/malloc.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/malloc.h,v
diff -u -p -r1.127 malloc.h
--- sys/malloc.h	5 Feb 2025 18:29:17 -0000	1.127
+++ sys/malloc.h	30 Dec 2025 21:47:58 -0000
@@ -65,7 +65,7 @@
 #define	M_FREE		0	/* should be on free list */
 /* 1 - free */
 #define	M_DEVBUF	2	/* device driver memory */
-/* 3 - free */
+#define M_MBUF		3	/* mbuf */
 #define	M_PCB		4	/* protocol control blocks */
 #define	M_RTABLE	5	/* routing tables */
 #define	M_PF		6	/* packet filter structures */