Download raw body.
unlock mld6 fast timeout
Hi,
I want to get rid of the exclusive net lock in multicast listener
discovery protocol. Let's start with fast timeout.
As a first step, locking is a combination between shared net lock
and per interface mutex. Converted code uses if_maddrlist with
mutex if_maddrmtx. Old code still uses exclusive net lock so that
I don't have to touch everything in a single step.
The other idea is to split out sending ICMP6 packets. We traverse
the if_maddrlist with a mutex, create a temporary list with notificaton
packets we have to send, and later send the list without mutex as
ip6_output() may sleep when grabbing rw-lock.
ok?
bluhm
Index: net/if_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v
diff -u -p -r1.139 if_var.h
--- net/if_var.h 19 Jul 2025 16:40:40 -0000 1.139
+++ net/if_var.h 13 Nov 2025 17:14:12 -0000
@@ -81,6 +81,7 @@
* K kernel lock
* N net lock
* T if_tmplist_lock
+ * m interface multicast mutex if_maddrmtx
*
* For SRP related structures that allow lock-free reads, the write lock
* is indicated below.
@@ -148,8 +149,9 @@ struct ifnet { /* and the entries */
TAILQ_ENTRY(ifnet) if_list; /* [NK] all struct ifnets are chained */
TAILQ_ENTRY(ifnet) if_tmplist; /* [T] temporary list */
TAILQ_HEAD(, ifaddr) if_addrlist; /* [N] list of addresses per if */
- TAILQ_HEAD(, ifmaddr) if_maddrlist; /* [N] list of multicast records */
+ TAILQ_HEAD(, ifmaddr) if_maddrlist; /* [Nm] list of multicast records */
TAILQ_HEAD(, ifg_list) if_groups; /* [N] list of groups per if */
+ struct mutex if_maddrmtx;
struct task_list if_addrhooks; /* [I] address change callbacks */
struct task_list if_linkstatehooks; /* [I] link change callbacks*/
struct task_list if_detachhooks; /* [I] detach callbacks */
Index: netinet6/in6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6.c,v
diff -u -p -r1.274 in6.c
--- netinet6/in6.c 12 Nov 2025 11:37:08 -0000 1.274
+++ netinet6/in6.c 13 Nov 2025 17:14:12 -0000
@@ -1079,7 +1079,7 @@ in6_addmulti(const struct in6_addr *addr
* Let MLD6 know that we have joined a new IP6 multicast
* group.
*/
- mld6_start_listening(in6m);
+ mld6_start_listening(in6m, ifp);
}
return (in6m);
@@ -1096,34 +1096,34 @@ in6_delmulti(struct in6_multi *in6m)
NET_ASSERT_LOCKED();
- if (refcnt_rele(&in6m->in6m_refcnt) != 0) {
+ if (refcnt_rele(&in6m->in6m_refcnt) == 0)
+ return;
+
+ ifp = if_get(in6m->in6m_ifidx);
+ if (ifp != NULL) {
/*
* No remaining claims to this record; let MLD6 know
* that we are leaving the multicast group.
*/
- mld6_stop_listening(in6m);
- ifp = if_get(in6m->in6m_ifidx);
+ mld6_stop_listening(in6m, ifp);
/*
* Notify the network driver to update its multicast
* reception filter.
*/
- if (ifp != NULL) {
- bzero(&ifr.ifr_addr, sizeof(struct sockaddr_in6));
- ifr.ifr_addr.sin6_len = sizeof(struct sockaddr_in6);
- ifr.ifr_addr.sin6_family = AF_INET6;
- ifr.ifr_addr.sin6_addr = in6m->in6m_addr;
- KERNEL_LOCK();
- (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
- KERNEL_UNLOCK();
-
- TAILQ_REMOVE(&ifp->if_maddrlist, &in6m->in6m_ifma,
- ifma_list);
- }
- if_put(ifp);
+ bzero(&ifr.ifr_addr, sizeof(struct sockaddr_in6));
+ ifr.ifr_addr.sin6_len = sizeof(struct sockaddr_in6);
+ ifr.ifr_addr.sin6_family = AF_INET6;
+ ifr.ifr_addr.sin6_addr = in6m->in6m_addr;
+ KERNEL_LOCK();
+ (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
+ KERNEL_UNLOCK();
- free(in6m, M_IPMADDR, sizeof(*in6m));
+ TAILQ_REMOVE(&ifp->if_maddrlist, &in6m->in6m_ifma, ifma_list);
+ if_put(ifp);
}
+
+ free(in6m, M_IPMADDR, sizeof(*in6m));
}
/*
Index: netinet6/mld6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/mld6.c,v
diff -u -p -r1.70 mld6.c
--- netinet6/mld6.c 12 Nov 2025 19:11:10 -0000 1.70
+++ netinet6/mld6.c 13 Nov 2025 17:16:25 -0000
@@ -68,6 +68,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
+#include <sys/mutex.h>
#include <sys/socket.h>
#include <sys/protosw.h>
@@ -82,11 +83,20 @@
#include <netinet6/mld6.h>
#include <netinet6/mld6_var.h>
+struct mld6_pktinfo {
+ STAILQ_ENTRY(mld6_pktinfo) mpi_list;
+ struct in6_addr mpi_addr;
+ unsigned int mpi_rdomain;
+ unsigned int mpi_ifidx;
+ int mpi_type;
+};
+STAILQ_HEAD(mld6_pktlist, mld6_pktinfo);
+
static struct ip6_pktopts ip6_opts;
int mld6_timers_are_running; /* [a] shortcut for fast timer */
int mld6_checktimer(struct ifnet *);
-static void mld6_sendpkt(struct in6_multi *, int, const struct in6_addr *);
+void mld6_sendpkt(const struct mld6_pktinfo *);
void
mld6_init(void)
@@ -112,7 +122,7 @@ mld6_init(void)
}
void
-mld6_start_listening(struct in6_multi *in6m)
+mld6_start_listening(struct in6_multi *in6m, struct ifnet *ifp)
{
/* XXX: These are necessary for KAME's link-local hack */
struct in6_addr all_nodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
@@ -132,10 +142,15 @@ mld6_start_listening(struct in6_multi *i
in6m->in6m_timer = 0;
in6m->in6m_state = MLD_OTHERLISTENER;
} else {
- mld6_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
+ struct mld6_pktinfo pkt;
+
+ pkt.mpi_addr = in6m->in6m_addr;
+ pkt.mpi_rdomain = ifp->if_rdomain;
+ pkt.mpi_ifidx = in6m->in6m_ifidx;
+ pkt.mpi_type = MLD_LISTENER_REPORT;
+ mld6_sendpkt(&pkt);
in6m->in6m_timer =
- MLD_RANDOM_DELAY(MLD_V1_MAX_RI *
- PR_FASTHZ);
+ MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_FASTHZ);
in6m->in6m_state = MLD_IREPORTEDLAST;
running = 1;
}
@@ -147,7 +162,7 @@ mld6_start_listening(struct in6_multi *i
}
void
-mld6_stop_listening(struct in6_multi *in6m)
+mld6_stop_listening(struct in6_multi *in6m, struct ifnet *ifp)
{
/* XXX: These are necessary for KAME's link-local hack */
struct in6_addr all_nodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
@@ -160,8 +175,15 @@ mld6_stop_listening(struct in6_multi *in
if (in6m->in6m_state == MLD_IREPORTEDLAST &&
(!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_nodes)) &&
__IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
- __IPV6_ADDR_SCOPE_INTFACELOCAL)
- mld6_sendpkt(in6m, MLD_LISTENER_DONE, &all_routers);
+ __IPV6_ADDR_SCOPE_INTFACELOCAL) {
+ struct mld6_pktinfo pkt;
+
+ pkt.mpi_addr = all_routers;
+ pkt.mpi_rdomain = ifp->if_rdomain;
+ pkt.mpi_ifidx = in6m->in6m_ifidx;
+ pkt.mpi_type = MLD_LISTENER_DONE;
+ mld6_sendpkt(&pkt);
+ }
}
void
@@ -269,8 +291,13 @@ mld6_input(struct mbuf *m, int off)
{
if (timer == 0) {
/* send a report immediately */
- mld6_sendpkt(in6m, MLD_LISTENER_REPORT,
- NULL);
+ struct mld6_pktinfo pkt;
+
+ pkt.mpi_addr = in6m->in6m_addr;
+ pkt.mpi_rdomain = ifp->if_rdomain;
+ pkt.mpi_ifidx = in6m->in6m_ifidx;
+ pkt.mpi_type = MLD_LISTENER_REPORT;
+ mld6_sendpkt(&pkt);
in6m->in6m_timer = 0; /* reset timer */
in6m->in6m_state = MLD_IREPORTEDLAST;
} else if (in6m->in6m_timer == 0 || /* idle */
@@ -355,7 +382,7 @@ mld6_fasttimo(void)
return;
membar_consumer();
- NET_LOCK();
+ NET_LOCK_SHARED();
TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
if (mld6_checktimer(ifp))
@@ -365,7 +392,7 @@ mld6_fasttimo(void)
membar_producer();
atomic_store_int(&mld6_timers_are_running, running);
- NET_UNLOCK();
+ NET_UNLOCK_SHARED();
}
int
@@ -373,10 +400,11 @@ mld6_checktimer(struct ifnet *ifp)
{
struct in6_multi *in6m;
struct ifmaddr *ifma;
+ struct mld6_pktlist pktlist;
int running = 0;
- NET_ASSERT_LOCKED();
-
+ mtx_enter(&ifp->if_maddrmtx);
+ STAILQ_INIT(&pktlist);
TAILQ_FOREACH(ifma, &ifp->if_maddrlist, ifma_list) {
if (ifma->ifma_addr->sa_family != AF_INET6)
continue;
@@ -384,18 +412,37 @@ mld6_checktimer(struct ifnet *ifp)
if (in6m->in6m_timer == 0) {
/* do nothing */
} else if (--in6m->in6m_timer == 0) {
- mld6_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
+ struct mld6_pktinfo *pkt;
+
in6m->in6m_state = MLD_IREPORTEDLAST;
+ pkt = malloc(sizeof(*pkt), M_MRTABLE, M_NOWAIT);
+ if (pkt == NULL)
+ continue;
+ pkt->mpi_addr = in6m->in6m_addr;
+ pkt->mpi_rdomain = ifp->if_rdomain;
+ pkt->mpi_ifidx = in6m->in6m_ifidx;
+ pkt->mpi_type = MLD_LISTENER_REPORT;
+ STAILQ_INSERT_TAIL(&pktlist, pkt, mpi_list);
} else {
running = 1;
}
}
+ mtx_leave(&ifp->if_maddrmtx);
+
+ while (!STAILQ_EMPTY(&pktlist)) {
+ struct mld6_pktinfo *pkt;
+
+ pkt = STAILQ_FIRST(&pktlist);
+ STAILQ_REMOVE_HEAD(&pktlist, mpi_list);
+ mld6_sendpkt(pkt);
+ free(pkt, M_MRTABLE, sizeof(*pkt));
+ }
return (running);
}
-static void
-mld6_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst)
+void
+mld6_sendpkt(const struct mld6_pktinfo *pkt)
{
struct mbuf *mh, *md;
struct mld_hdr *mldh;
@@ -405,7 +452,7 @@ mld6_sendpkt(struct in6_multi *in6m, int
struct ifnet *ifp;
int ignflags;
- ifp = if_get(in6m->in6m_ifidx);
+ ifp = if_get(pkt->mpi_ifidx);
if (ifp == NULL)
return;
@@ -441,8 +488,7 @@ mld6_sendpkt(struct in6_multi *in6m, int
}
mh->m_next = md;
- mh->m_pkthdr.ph_ifidx = 0;
- mh->m_pkthdr.ph_rtableid = ifp->if_rdomain;
+ mh->m_pkthdr.ph_rtableid = pkt->mpi_rdomain;
mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
mh->m_len = sizeof(struct ip6_hdr);
m_align(mh, sizeof(struct ip6_hdr));
@@ -456,25 +502,25 @@ mld6_sendpkt(struct in6_multi *in6m, int
ip6->ip6_nxt = IPPROTO_ICMPV6;
/* ip6_hlim will be set by im6o.im6o_hlim */
ip6->ip6_src = ia6 ? ia6->ia_addr.sin6_addr : in6addr_any;
- ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
+ ip6->ip6_dst = pkt->mpi_addr;
/* fill in the MLD header */
md->m_len = sizeof(struct mld_hdr);
mldh = mtod(md, struct mld_hdr *);
- mldh->mld_type = type;
+ mldh->mld_type = pkt->mpi_type;
mldh->mld_code = 0;
mldh->mld_cksum = 0;
/* XXX: we assume the function will not be called for query messages */
mldh->mld_maxdelay = 0;
mldh->mld_reserved = 0;
- mldh->mld_addr = in6m->in6m_addr;
+ mldh->mld_addr = pkt->mpi_addr;
if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld_addr))
mldh->mld_addr.s6_addr16[1] = 0; /* XXX */
mh->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
/* construct multicast option */
bzero(&im6o, sizeof(im6o));
- im6o.im6o_ifidx = ifp->if_index;
+ im6o.im6o_ifidx = pkt->mpi_ifidx;
im6o.im6o_hlim = 1;
/*
@@ -482,10 +528,10 @@ mld6_sendpkt(struct in6_multi *in6m, int
* router, so that the process-level routing daemon can hear it.
*/
#ifdef MROUTING
- im6o.im6o_loop = (ip6_mrouter[ifp->if_rdomain] != NULL);
+ im6o.im6o_loop = (ip6_mrouter[pkt->mpi_rdomain] != NULL);
#endif
if_put(ifp);
- icmp6stat_inc(icp6s_outhist + type);
+ icmp6stat_inc(icp6s_outhist + pkt->mpi_type);
ip6_output(mh, &ip6_opts, NULL, ia6 ? 0 : IPV6_UNSPECSRC, &im6o, NULL);
}
Index: netinet6/mld6_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/mld6_var.h,v
diff -u -p -r1.7 mld6_var.h
--- netinet6/mld6_var.h 12 Nov 2025 19:11:10 -0000 1.7
+++ netinet6/mld6_var.h 13 Nov 2025 17:14:12 -0000
@@ -45,8 +45,8 @@
void mld6_init(void);
void mld6_input(struct mbuf *, int);
-void mld6_start_listening(struct in6_multi *);
-void mld6_stop_listening(struct in6_multi *);
+void mld6_start_listening(struct in6_multi *, struct ifnet *ifp);
+void mld6_stop_listening(struct in6_multi *, struct ifnet *ifp);
void mld6_fasttimo(void);
#endif /* _KERNEL */
unlock mld6 fast timeout