Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
unlock mld6 fast timeout
To:
tech@openbsd.org
Date:
Thu, 13 Nov 2025 18:28:44 +0100

Download raw body.

Thread
  • Alexander Bluhm:

    unlock mld6 fast timeout

Hi,

I want to get rid of the exclusive net lock in multicast listener
discovery protocol.  Let's start with fast timeout.

As a first step, locking is a combination between shared net lock
and per interface mutex.  Converted code uses if_maddrlist with
mutex if_maddrmtx.  Old code still uses exclusive net lock so that
I don't have to touch everything in a single step.

The other idea is to split out sending ICMP6 packets.  We traverse
the if_maddrlist with a mutex, create a temporary list with notificaton
packets we have to send, and later send the list without mutex as
ip6_output() may sleep when grabbing rw-lock.

ok?

bluhm

Index: net/if_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v
diff -u -p -r1.139 if_var.h
--- net/if_var.h	19 Jul 2025 16:40:40 -0000	1.139
+++ net/if_var.h	13 Nov 2025 17:14:12 -0000
@@ -81,6 +81,7 @@
  *	K	kernel lock
  *	N	net lock
  *	T	if_tmplist_lock
+ *	m	interface multicast mutex if_maddrmtx
  *
  *  For SRP related structures that allow lock-free reads, the write lock
  *  is indicated below.
@@ -148,8 +149,9 @@ struct ifnet {				/* and the entries */
 	TAILQ_ENTRY(ifnet) if_list;	/* [NK] all struct ifnets are chained */
 	TAILQ_ENTRY(ifnet) if_tmplist;	/* [T] temporary list */
 	TAILQ_HEAD(, ifaddr) if_addrlist; /* [N] list of addresses per if */
-	TAILQ_HEAD(, ifmaddr) if_maddrlist; /* [N] list of multicast records */
+	TAILQ_HEAD(, ifmaddr) if_maddrlist; /* [Nm] list of multicast records */
 	TAILQ_HEAD(, ifg_list) if_groups; /* [N] list of groups per if */
+	struct mutex if_maddrmtx;
 	struct task_list if_addrhooks;	/* [I] address change callbacks */
 	struct task_list if_linkstatehooks; /* [I] link change callbacks*/
 	struct task_list if_detachhooks; /* [I] detach callbacks */
Index: netinet6/in6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6.c,v
diff -u -p -r1.274 in6.c
--- netinet6/in6.c	12 Nov 2025 11:37:08 -0000	1.274
+++ netinet6/in6.c	13 Nov 2025 17:14:12 -0000
@@ -1079,7 +1079,7 @@ in6_addmulti(const struct in6_addr *addr
 		 * Let MLD6 know that we have joined a new IP6 multicast
 		 * group.
 		 */
-		mld6_start_listening(in6m);
+		mld6_start_listening(in6m, ifp);
 	}
 
 	return (in6m);
@@ -1096,34 +1096,34 @@ in6_delmulti(struct in6_multi *in6m)
 
 	NET_ASSERT_LOCKED();
 
-	if (refcnt_rele(&in6m->in6m_refcnt) != 0) {
+	if (refcnt_rele(&in6m->in6m_refcnt) == 0)
+		return;
+
+	ifp = if_get(in6m->in6m_ifidx);
+	if (ifp != NULL) {
 		/*
 		 * No remaining claims to this record; let MLD6 know
 		 * that we are leaving the multicast group.
 		 */
-		mld6_stop_listening(in6m);
-		ifp = if_get(in6m->in6m_ifidx);
+		mld6_stop_listening(in6m, ifp);
 
 		/*
 		 * Notify the network driver to update its multicast
 		 * reception filter.
 		 */
-		if (ifp != NULL) {
-			bzero(&ifr.ifr_addr, sizeof(struct sockaddr_in6));
-			ifr.ifr_addr.sin6_len = sizeof(struct sockaddr_in6);
-			ifr.ifr_addr.sin6_family = AF_INET6;
-			ifr.ifr_addr.sin6_addr = in6m->in6m_addr;
-			KERNEL_LOCK();
-			(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
-			KERNEL_UNLOCK();
-
-			TAILQ_REMOVE(&ifp->if_maddrlist, &in6m->in6m_ifma,
-			    ifma_list);
-		}
-		if_put(ifp);
+		bzero(&ifr.ifr_addr, sizeof(struct sockaddr_in6));
+		ifr.ifr_addr.sin6_len = sizeof(struct sockaddr_in6);
+		ifr.ifr_addr.sin6_family = AF_INET6;
+		ifr.ifr_addr.sin6_addr = in6m->in6m_addr;
+		KERNEL_LOCK();
+		(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
+		KERNEL_UNLOCK();
 
-		free(in6m, M_IPMADDR, sizeof(*in6m));
+		TAILQ_REMOVE(&ifp->if_maddrlist, &in6m->in6m_ifma, ifma_list);
+		if_put(ifp);
 	}
+
+	free(in6m, M_IPMADDR, sizeof(*in6m));
 }
 
 /*
Index: netinet6/mld6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/mld6.c,v
diff -u -p -r1.70 mld6.c
--- netinet6/mld6.c	12 Nov 2025 19:11:10 -0000	1.70
+++ netinet6/mld6.c	13 Nov 2025 17:16:25 -0000
@@ -68,6 +68,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
+#include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/protosw.h>
 
@@ -82,11 +83,20 @@
 #include <netinet6/mld6.h>
 #include <netinet6/mld6_var.h>
 
+struct mld6_pktinfo {
+	STAILQ_ENTRY(mld6_pktinfo)	mpi_list;
+	struct in6_addr			mpi_addr;
+	unsigned int			mpi_rdomain;
+	unsigned int			mpi_ifidx;
+	int				mpi_type;
+};
+STAILQ_HEAD(mld6_pktlist, mld6_pktinfo);
+
 static struct ip6_pktopts ip6_opts;
 int	mld6_timers_are_running;	/* [a] shortcut for fast timer */
 
 int mld6_checktimer(struct ifnet *);
-static void mld6_sendpkt(struct in6_multi *, int, const struct in6_addr *);
+void mld6_sendpkt(const struct mld6_pktinfo *);
 
 void
 mld6_init(void)
@@ -112,7 +122,7 @@ mld6_init(void)
 }
 
 void
-mld6_start_listening(struct in6_multi *in6m)
+mld6_start_listening(struct in6_multi *in6m, struct ifnet *ifp)
 {
 	/* XXX: These are necessary for KAME's link-local hack */
 	struct in6_addr all_nodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
@@ -132,10 +142,15 @@ mld6_start_listening(struct in6_multi *i
 		in6m->in6m_timer = 0;
 		in6m->in6m_state = MLD_OTHERLISTENER;
 	} else {
-		mld6_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
+		struct mld6_pktinfo pkt;
+
+		pkt.mpi_addr = in6m->in6m_addr;
+		pkt.mpi_rdomain = ifp->if_rdomain;
+		pkt.mpi_ifidx = in6m->in6m_ifidx;
+		pkt.mpi_type = MLD_LISTENER_REPORT;
+		mld6_sendpkt(&pkt);
 		in6m->in6m_timer =
-		    MLD_RANDOM_DELAY(MLD_V1_MAX_RI *
-		    PR_FASTHZ);
+		    MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_FASTHZ);
 		in6m->in6m_state = MLD_IREPORTEDLAST;
 		running = 1;
 	}
@@ -147,7 +162,7 @@ mld6_start_listening(struct in6_multi *i
 }
 
 void
-mld6_stop_listening(struct in6_multi *in6m)
+mld6_stop_listening(struct in6_multi *in6m, struct ifnet *ifp)
 {
 	/* XXX: These are necessary for KAME's link-local hack */
 	struct in6_addr all_nodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
@@ -160,8 +175,15 @@ mld6_stop_listening(struct in6_multi *in
 	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
 	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_nodes)) &&
 	    __IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
-	    __IPV6_ADDR_SCOPE_INTFACELOCAL)
-		mld6_sendpkt(in6m, MLD_LISTENER_DONE, &all_routers);
+	    __IPV6_ADDR_SCOPE_INTFACELOCAL) {
+		struct mld6_pktinfo pkt;
+
+		pkt.mpi_addr = all_routers;
+		pkt.mpi_rdomain = ifp->if_rdomain;
+		pkt.mpi_ifidx = in6m->in6m_ifidx;
+		pkt.mpi_type = MLD_LISTENER_DONE;
+		mld6_sendpkt(&pkt);
+	}
 }
 
 void
@@ -269,8 +291,13 @@ mld6_input(struct mbuf *m, int off)
 			{
 				if (timer == 0) {
 					/* send a report immediately */
-					mld6_sendpkt(in6m, MLD_LISTENER_REPORT,
-					    NULL);
+					struct mld6_pktinfo pkt;
+
+					pkt.mpi_addr = in6m->in6m_addr;
+					pkt.mpi_rdomain = ifp->if_rdomain;
+					pkt.mpi_ifidx = in6m->in6m_ifidx;
+					pkt.mpi_type = MLD_LISTENER_REPORT;
+					mld6_sendpkt(&pkt);
 					in6m->in6m_timer = 0; /* reset timer */
 					in6m->in6m_state = MLD_IREPORTEDLAST;
 				} else if (in6m->in6m_timer == 0 || /* idle */
@@ -355,7 +382,7 @@ mld6_fasttimo(void)
 		return;
 	membar_consumer();
 
-	NET_LOCK();
+	NET_LOCK_SHARED();
 
 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
 		if (mld6_checktimer(ifp))
@@ -365,7 +392,7 @@ mld6_fasttimo(void)
 	membar_producer();
 	atomic_store_int(&mld6_timers_are_running, running);
 
-	NET_UNLOCK();
+	NET_UNLOCK_SHARED();
 }
 
 int
@@ -373,10 +400,11 @@ mld6_checktimer(struct ifnet *ifp)
 {
 	struct in6_multi *in6m;
 	struct ifmaddr *ifma;
+	struct mld6_pktlist pktlist;
 	int running = 0;
 
-	NET_ASSERT_LOCKED();
-
+	mtx_enter(&ifp->if_maddrmtx);
+	STAILQ_INIT(&pktlist);
 	TAILQ_FOREACH(ifma, &ifp->if_maddrlist, ifma_list) {
 		if (ifma->ifma_addr->sa_family != AF_INET6)
 			continue;
@@ -384,18 +412,37 @@ mld6_checktimer(struct ifnet *ifp)
 		if (in6m->in6m_timer == 0) {
 			/* do nothing */
 		} else if (--in6m->in6m_timer == 0) {
-			mld6_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
+			struct mld6_pktinfo *pkt;
+
 			in6m->in6m_state = MLD_IREPORTEDLAST;
+			pkt = malloc(sizeof(*pkt), M_MRTABLE, M_NOWAIT);
+			if (pkt == NULL)
+				continue;
+			pkt->mpi_addr = in6m->in6m_addr;
+			pkt->mpi_rdomain = ifp->if_rdomain;
+			pkt->mpi_ifidx = in6m->in6m_ifidx;
+			pkt->mpi_type = MLD_LISTENER_REPORT;
+			STAILQ_INSERT_TAIL(&pktlist, pkt, mpi_list);
 		} else {
 			running = 1;
 		}
 	}
+	mtx_leave(&ifp->if_maddrmtx);
+
+	while (!STAILQ_EMPTY(&pktlist)) {
+		struct mld6_pktinfo *pkt;
+ 
+		pkt = STAILQ_FIRST(&pktlist);
+		STAILQ_REMOVE_HEAD(&pktlist, mpi_list);
+		mld6_sendpkt(pkt);
+		free(pkt, M_MRTABLE, sizeof(*pkt));
+	}
 
 	return (running);
 }
 
-static void
-mld6_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst)
+void
+mld6_sendpkt(const struct mld6_pktinfo *pkt)
 {
 	struct mbuf *mh, *md;
 	struct mld_hdr *mldh;
@@ -405,7 +452,7 @@ mld6_sendpkt(struct in6_multi *in6m, int
 	struct ifnet *ifp;
 	int ignflags;
 
-	ifp = if_get(in6m->in6m_ifidx);
+	ifp = if_get(pkt->mpi_ifidx);
 	if (ifp == NULL)
 		return;
 
@@ -441,8 +488,7 @@ mld6_sendpkt(struct in6_multi *in6m, int
 	}
 	mh->m_next = md;
 
-	mh->m_pkthdr.ph_ifidx = 0;
-	mh->m_pkthdr.ph_rtableid = ifp->if_rdomain;
+	mh->m_pkthdr.ph_rtableid = pkt->mpi_rdomain;
 	mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
 	mh->m_len = sizeof(struct ip6_hdr);
 	m_align(mh, sizeof(struct ip6_hdr));
@@ -456,25 +502,25 @@ mld6_sendpkt(struct in6_multi *in6m, int
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	/* ip6_hlim will be set by im6o.im6o_hlim */
 	ip6->ip6_src = ia6 ? ia6->ia_addr.sin6_addr : in6addr_any;
-	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
+	ip6->ip6_dst = pkt->mpi_addr;
 
 	/* fill in the MLD header */
 	md->m_len = sizeof(struct mld_hdr);
 	mldh = mtod(md, struct mld_hdr *);
-	mldh->mld_type = type;
+	mldh->mld_type = pkt->mpi_type;
 	mldh->mld_code = 0;
 	mldh->mld_cksum = 0;
 	/* XXX: we assume the function will not be called for query messages */
 	mldh->mld_maxdelay = 0;
 	mldh->mld_reserved = 0;
-	mldh->mld_addr = in6m->in6m_addr;
+	mldh->mld_addr = pkt->mpi_addr;
 	if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld_addr))
 		mldh->mld_addr.s6_addr16[1] = 0; /* XXX */
 	mh->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
 
 	/* construct multicast option */
 	bzero(&im6o, sizeof(im6o));
-	im6o.im6o_ifidx = ifp->if_index;
+	im6o.im6o_ifidx = pkt->mpi_ifidx;
 	im6o.im6o_hlim = 1;
 
 	/*
@@ -482,10 +528,10 @@ mld6_sendpkt(struct in6_multi *in6m, int
 	 * router, so that the process-level routing daemon can hear it.
 	 */
 #ifdef MROUTING
-	im6o.im6o_loop = (ip6_mrouter[ifp->if_rdomain] != NULL);
+	im6o.im6o_loop = (ip6_mrouter[pkt->mpi_rdomain] != NULL);
 #endif
 	if_put(ifp);
 
-	icmp6stat_inc(icp6s_outhist + type);
+	icmp6stat_inc(icp6s_outhist + pkt->mpi_type);
 	ip6_output(mh, &ip6_opts, NULL, ia6 ? 0 : IPV6_UNSPECSRC, &im6o, NULL);
 }
Index: netinet6/mld6_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/mld6_var.h,v
diff -u -p -r1.7 mld6_var.h
--- netinet6/mld6_var.h	12 Nov 2025 19:11:10 -0000	1.7
+++ netinet6/mld6_var.h	13 Nov 2025 17:14:12 -0000
@@ -45,8 +45,8 @@
 
 void	mld6_init(void);
 void	mld6_input(struct mbuf *, int);
-void	mld6_start_listening(struct in6_multi *);
-void	mld6_stop_listening(struct in6_multi *);
+void	mld6_start_listening(struct in6_multi *, struct ifnet *ifp);
+void	mld6_stop_listening(struct in6_multi *, struct ifnet *ifp);
 void	mld6_fasttimo(void);
 #endif /* _KERNEL */