From: Alexander Bluhm Subject: unlock mld6 fast timeout To: tech@openbsd.org Date: Thu, 13 Nov 2025 18:28:44 +0100 Hi, I want to get rid of the exclusive net lock in multicast listener discovery protocol. Let's start with fast timeout. As a first step, locking is a combination between shared net lock and per interface mutex. Converted code uses if_maddrlist with mutex if_maddrmtx. Old code still uses exclusive net lock so that I don't have to touch everything in a single step. The other idea is to split out sending ICMP6 packets. We traverse the if_maddrlist with a mutex, create a temporary list with notificaton packets we have to send, and later send the list without mutex as ip6_output() may sleep when grabbing rw-lock. ok? bluhm Index: net/if_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v diff -u -p -r1.139 if_var.h --- net/if_var.h 19 Jul 2025 16:40:40 -0000 1.139 +++ net/if_var.h 13 Nov 2025 17:14:12 -0000 @@ -81,6 +81,7 @@ * K kernel lock * N net lock * T if_tmplist_lock + * m interface multicast mutex if_maddrmtx * * For SRP related structures that allow lock-free reads, the write lock * is indicated below. @@ -148,8 +149,9 @@ struct ifnet { /* and the entries */ TAILQ_ENTRY(ifnet) if_list; /* [NK] all struct ifnets are chained */ TAILQ_ENTRY(ifnet) if_tmplist; /* [T] temporary list */ TAILQ_HEAD(, ifaddr) if_addrlist; /* [N] list of addresses per if */ - TAILQ_HEAD(, ifmaddr) if_maddrlist; /* [N] list of multicast records */ + TAILQ_HEAD(, ifmaddr) if_maddrlist; /* [Nm] list of multicast records */ TAILQ_HEAD(, ifg_list) if_groups; /* [N] list of groups per if */ + struct mutex if_maddrmtx; struct task_list if_addrhooks; /* [I] address change callbacks */ struct task_list if_linkstatehooks; /* [I] link change callbacks*/ struct task_list if_detachhooks; /* [I] detach callbacks */ Index: netinet6/in6.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6.c,v diff -u -p -r1.274 in6.c --- netinet6/in6.c 12 Nov 2025 11:37:08 -0000 1.274 +++ netinet6/in6.c 13 Nov 2025 17:14:12 -0000 @@ -1079,7 +1079,7 @@ in6_addmulti(const struct in6_addr *addr * Let MLD6 know that we have joined a new IP6 multicast * group. */ - mld6_start_listening(in6m); + mld6_start_listening(in6m, ifp); } return (in6m); @@ -1096,34 +1096,34 @@ in6_delmulti(struct in6_multi *in6m) NET_ASSERT_LOCKED(); - if (refcnt_rele(&in6m->in6m_refcnt) != 0) { + if (refcnt_rele(&in6m->in6m_refcnt) == 0) + return; + + ifp = if_get(in6m->in6m_ifidx); + if (ifp != NULL) { /* * No remaining claims to this record; let MLD6 know * that we are leaving the multicast group. */ - mld6_stop_listening(in6m); - ifp = if_get(in6m->in6m_ifidx); + mld6_stop_listening(in6m, ifp); /* * Notify the network driver to update its multicast * reception filter. */ - if (ifp != NULL) { - bzero(&ifr.ifr_addr, sizeof(struct sockaddr_in6)); - ifr.ifr_addr.sin6_len = sizeof(struct sockaddr_in6); - ifr.ifr_addr.sin6_family = AF_INET6; - ifr.ifr_addr.sin6_addr = in6m->in6m_addr; - KERNEL_LOCK(); - (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); - KERNEL_UNLOCK(); - - TAILQ_REMOVE(&ifp->if_maddrlist, &in6m->in6m_ifma, - ifma_list); - } - if_put(ifp); + bzero(&ifr.ifr_addr, sizeof(struct sockaddr_in6)); + ifr.ifr_addr.sin6_len = sizeof(struct sockaddr_in6); + ifr.ifr_addr.sin6_family = AF_INET6; + ifr.ifr_addr.sin6_addr = in6m->in6m_addr; + KERNEL_LOCK(); + (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); + KERNEL_UNLOCK(); - free(in6m, M_IPMADDR, sizeof(*in6m)); + TAILQ_REMOVE(&ifp->if_maddrlist, &in6m->in6m_ifma, ifma_list); + if_put(ifp); } + + free(in6m, M_IPMADDR, sizeof(*in6m)); } /* Index: netinet6/mld6.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/mld6.c,v diff -u -p -r1.70 mld6.c --- netinet6/mld6.c 12 Nov 2025 19:11:10 -0000 1.70 +++ netinet6/mld6.c 13 Nov 2025 17:16:25 -0000 @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -82,11 +83,20 @@ #include #include +struct mld6_pktinfo { + STAILQ_ENTRY(mld6_pktinfo) mpi_list; + struct in6_addr mpi_addr; + unsigned int mpi_rdomain; + unsigned int mpi_ifidx; + int mpi_type; +}; +STAILQ_HEAD(mld6_pktlist, mld6_pktinfo); + static struct ip6_pktopts ip6_opts; int mld6_timers_are_running; /* [a] shortcut for fast timer */ int mld6_checktimer(struct ifnet *); -static void mld6_sendpkt(struct in6_multi *, int, const struct in6_addr *); +void mld6_sendpkt(const struct mld6_pktinfo *); void mld6_init(void) @@ -112,7 +122,7 @@ mld6_init(void) } void -mld6_start_listening(struct in6_multi *in6m) +mld6_start_listening(struct in6_multi *in6m, struct ifnet *ifp) { /* XXX: These are necessary for KAME's link-local hack */ struct in6_addr all_nodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; @@ -132,10 +142,15 @@ mld6_start_listening(struct in6_multi *i in6m->in6m_timer = 0; in6m->in6m_state = MLD_OTHERLISTENER; } else { - mld6_sendpkt(in6m, MLD_LISTENER_REPORT, NULL); + struct mld6_pktinfo pkt; + + pkt.mpi_addr = in6m->in6m_addr; + pkt.mpi_rdomain = ifp->if_rdomain; + pkt.mpi_ifidx = in6m->in6m_ifidx; + pkt.mpi_type = MLD_LISTENER_REPORT; + mld6_sendpkt(&pkt); in6m->in6m_timer = - MLD_RANDOM_DELAY(MLD_V1_MAX_RI * - PR_FASTHZ); + MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_FASTHZ); in6m->in6m_state = MLD_IREPORTEDLAST; running = 1; } @@ -147,7 +162,7 @@ mld6_start_listening(struct in6_multi *i } void -mld6_stop_listening(struct in6_multi *in6m) +mld6_stop_listening(struct in6_multi *in6m, struct ifnet *ifp) { /* XXX: These are necessary for KAME's link-local hack */ struct in6_addr all_nodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; @@ -160,8 +175,15 @@ mld6_stop_listening(struct in6_multi *in if (in6m->in6m_state == MLD_IREPORTEDLAST && (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_nodes)) && __IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) > - __IPV6_ADDR_SCOPE_INTFACELOCAL) - mld6_sendpkt(in6m, MLD_LISTENER_DONE, &all_routers); + __IPV6_ADDR_SCOPE_INTFACELOCAL) { + struct mld6_pktinfo pkt; + + pkt.mpi_addr = all_routers; + pkt.mpi_rdomain = ifp->if_rdomain; + pkt.mpi_ifidx = in6m->in6m_ifidx; + pkt.mpi_type = MLD_LISTENER_DONE; + mld6_sendpkt(&pkt); + } } void @@ -269,8 +291,13 @@ mld6_input(struct mbuf *m, int off) { if (timer == 0) { /* send a report immediately */ - mld6_sendpkt(in6m, MLD_LISTENER_REPORT, - NULL); + struct mld6_pktinfo pkt; + + pkt.mpi_addr = in6m->in6m_addr; + pkt.mpi_rdomain = ifp->if_rdomain; + pkt.mpi_ifidx = in6m->in6m_ifidx; + pkt.mpi_type = MLD_LISTENER_REPORT; + mld6_sendpkt(&pkt); in6m->in6m_timer = 0; /* reset timer */ in6m->in6m_state = MLD_IREPORTEDLAST; } else if (in6m->in6m_timer == 0 || /* idle */ @@ -355,7 +382,7 @@ mld6_fasttimo(void) return; membar_consumer(); - NET_LOCK(); + NET_LOCK_SHARED(); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { if (mld6_checktimer(ifp)) @@ -365,7 +392,7 @@ mld6_fasttimo(void) membar_producer(); atomic_store_int(&mld6_timers_are_running, running); - NET_UNLOCK(); + NET_UNLOCK_SHARED(); } int @@ -373,10 +400,11 @@ mld6_checktimer(struct ifnet *ifp) { struct in6_multi *in6m; struct ifmaddr *ifma; + struct mld6_pktlist pktlist; int running = 0; - NET_ASSERT_LOCKED(); - + mtx_enter(&ifp->if_maddrmtx); + STAILQ_INIT(&pktlist); TAILQ_FOREACH(ifma, &ifp->if_maddrlist, ifma_list) { if (ifma->ifma_addr->sa_family != AF_INET6) continue; @@ -384,18 +412,37 @@ mld6_checktimer(struct ifnet *ifp) if (in6m->in6m_timer == 0) { /* do nothing */ } else if (--in6m->in6m_timer == 0) { - mld6_sendpkt(in6m, MLD_LISTENER_REPORT, NULL); + struct mld6_pktinfo *pkt; + in6m->in6m_state = MLD_IREPORTEDLAST; + pkt = malloc(sizeof(*pkt), M_MRTABLE, M_NOWAIT); + if (pkt == NULL) + continue; + pkt->mpi_addr = in6m->in6m_addr; + pkt->mpi_rdomain = ifp->if_rdomain; + pkt->mpi_ifidx = in6m->in6m_ifidx; + pkt->mpi_type = MLD_LISTENER_REPORT; + STAILQ_INSERT_TAIL(&pktlist, pkt, mpi_list); } else { running = 1; } } + mtx_leave(&ifp->if_maddrmtx); + + while (!STAILQ_EMPTY(&pktlist)) { + struct mld6_pktinfo *pkt; + + pkt = STAILQ_FIRST(&pktlist); + STAILQ_REMOVE_HEAD(&pktlist, mpi_list); + mld6_sendpkt(pkt); + free(pkt, M_MRTABLE, sizeof(*pkt)); + } return (running); } -static void -mld6_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst) +void +mld6_sendpkt(const struct mld6_pktinfo *pkt) { struct mbuf *mh, *md; struct mld_hdr *mldh; @@ -405,7 +452,7 @@ mld6_sendpkt(struct in6_multi *in6m, int struct ifnet *ifp; int ignflags; - ifp = if_get(in6m->in6m_ifidx); + ifp = if_get(pkt->mpi_ifidx); if (ifp == NULL) return; @@ -441,8 +488,7 @@ mld6_sendpkt(struct in6_multi *in6m, int } mh->m_next = md; - mh->m_pkthdr.ph_ifidx = 0; - mh->m_pkthdr.ph_rtableid = ifp->if_rdomain; + mh->m_pkthdr.ph_rtableid = pkt->mpi_rdomain; mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr); mh->m_len = sizeof(struct ip6_hdr); m_align(mh, sizeof(struct ip6_hdr)); @@ -456,25 +502,25 @@ mld6_sendpkt(struct in6_multi *in6m, int ip6->ip6_nxt = IPPROTO_ICMPV6; /* ip6_hlim will be set by im6o.im6o_hlim */ ip6->ip6_src = ia6 ? ia6->ia_addr.sin6_addr : in6addr_any; - ip6->ip6_dst = dst ? *dst : in6m->in6m_addr; + ip6->ip6_dst = pkt->mpi_addr; /* fill in the MLD header */ md->m_len = sizeof(struct mld_hdr); mldh = mtod(md, struct mld_hdr *); - mldh->mld_type = type; + mldh->mld_type = pkt->mpi_type; mldh->mld_code = 0; mldh->mld_cksum = 0; /* XXX: we assume the function will not be called for query messages */ mldh->mld_maxdelay = 0; mldh->mld_reserved = 0; - mldh->mld_addr = in6m->in6m_addr; + mldh->mld_addr = pkt->mpi_addr; if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld_addr)) mldh->mld_addr.s6_addr16[1] = 0; /* XXX */ mh->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT; /* construct multicast option */ bzero(&im6o, sizeof(im6o)); - im6o.im6o_ifidx = ifp->if_index; + im6o.im6o_ifidx = pkt->mpi_ifidx; im6o.im6o_hlim = 1; /* @@ -482,10 +528,10 @@ mld6_sendpkt(struct in6_multi *in6m, int * router, so that the process-level routing daemon can hear it. */ #ifdef MROUTING - im6o.im6o_loop = (ip6_mrouter[ifp->if_rdomain] != NULL); + im6o.im6o_loop = (ip6_mrouter[pkt->mpi_rdomain] != NULL); #endif if_put(ifp); - icmp6stat_inc(icp6s_outhist + type); + icmp6stat_inc(icp6s_outhist + pkt->mpi_type); ip6_output(mh, &ip6_opts, NULL, ia6 ? 0 : IPV6_UNSPECSRC, &im6o, NULL); } Index: netinet6/mld6_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/mld6_var.h,v diff -u -p -r1.7 mld6_var.h --- netinet6/mld6_var.h 12 Nov 2025 19:11:10 -0000 1.7 +++ netinet6/mld6_var.h 13 Nov 2025 17:14:12 -0000 @@ -45,8 +45,8 @@ void mld6_init(void); void mld6_input(struct mbuf *, int); -void mld6_start_listening(struct in6_multi *); -void mld6_stop_listening(struct in6_multi *); +void mld6_start_listening(struct in6_multi *, struct ifnet *ifp); +void mld6_stop_listening(struct in6_multi *, struct ifnet *ifp); void mld6_fasttimo(void); #endif /* _KERNEL */