From: Alexander Bluhm Subject: Re: unlock igmp and mld6 fast timer To: Vitaliy Makkoveev Cc: tech@openbsd.org Date: Sat, 3 Jan 2026 23:43:09 +0100 On Fri, Jan 02, 2026 at 01:58:09PM +0100, Alexander Bluhm wrote: > On Fri, Jan 02, 2026 at 06:32:31AM +0000, Vitaliy Makkoveev wrote: > > On Mon, Dec 29, 2025 at 01:20:05PM +0100, Alexander Bluhm wrote: > > > On Tue, Dec 02, 2025 at 11:01:36PM +0100, Alexander Bluhm wrote: > > > > Hi, > > > > > > > > After moving around code for a while, I think a rwlock to protect > > > > if_maddrlist is the next step. The malloc(M_WAITOK) in rti_fill() > > > > prevents us from using a mutex here. > > > > > > > > So protect the TAILQ if_maddrlist with rwlock if_maddrlock. Also > > > > struct in_multi and in6_multi use this lock for their state and > > > > timer. Sleeps in malloc and IP output are possible. > > > > > > > > This allows to run IGMP and MLD6 fast timeout with shared net lock. > > > > > > I would like to proceed with MP multicast. > > > > > > OK? objections? > > > > > > > Just for curiosity, why have you replaced mutex with rwlock? > > There is this call path, I found the sleeping point during testing. > > in_addmulti() > rw_enter_write(&ifp->if_maddrlock) > igmp_joingroup() > rti_fill() > malloc(M_WAITOK) > > Maybe I can pull the M_WAITOK out of the lock later. Sleeping for > memory with a rwlock held is not optimal. But moving the malloc > and replacing the lock with a mutex requires more refacotring. Not > sure if it is worth it. > > IPv6 is different, it uses M_NOWAIT. But that means a system call > can fail due to low memory situations. Also not good, sleeping > would be better. > > > in{,6}_var.h have missing locks description. Can you add something > > like of 'if_maddrlock rwlock of parent interface' for [m]? And > > description for [I] too. > > done > > > The rest looks good to me. The diff was not quite right. syzkaller found that in_addmulti -> igmp_joingroup -> igmp_sendpkt -> ip_output -> in_hasmulti is a bad idea. The lock was taken twice. I have backed it out. https://syzkaller.appspot.com/bug?extid=de6bcf8e746b8a631885 Fortunately I was working on a follow up diff. Idea is that igmp_sendpkt() is called after the lock has been released. Struct igmp_pktinfo contains everything that is needed to send an IGMP packet later. In case of timeout loops a bunch of such packets are queued. Diff below contains the previous locking diff with igmp_sendpkt() on top. MLD6 is analog to IGMP. If it helps review, I can also split it up. ok? bluhm Index: net/if.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v diff -u -p -r1.762 if.c --- net/if.c 3 Jan 2026 14:10:04 -0000 1.762 +++ net/if.c 3 Jan 2026 14:22:23 -0000 @@ -662,6 +662,7 @@ if_attach_common(struct ifnet *ifp) TAILQ_INIT(&ifp->if_addrlist); TAILQ_INIT(&ifp->if_maddrlist); TAILQ_INIT(&ifp->if_groups); + rw_init(&ifp->if_maddrlock, "maddr"); if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) { KASSERTMSG(ifp->if_qstart == NULL, Index: net/if_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v diff -u -p -r1.147 if_var.h --- net/if_var.h 3 Jan 2026 14:10:04 -0000 1.147 +++ net/if_var.h 3 Jan 2026 14:22:23 -0000 @@ -81,6 +81,7 @@ * K kernel lock * N net lock * T if_tmplist_lock + * m interface multicast rwlock if_maddrlock * * For SRP related structures that allow lock-free reads, the write lock * is indicated below. @@ -153,8 +154,9 @@ struct ifnet { /* and the entries */ TAILQ_ENTRY(ifnet) if_list; /* [NK] all struct ifnets are chained */ TAILQ_ENTRY(ifnet) if_tmplist; /* [T] temporary list */ TAILQ_HEAD(, ifaddr) if_addrlist; /* [N] list of addresses per if */ - TAILQ_HEAD(, ifmaddr) if_maddrlist; /* [N] list of multicast records */ + TAILQ_HEAD(, ifmaddr) if_maddrlist; /* [m] list of multicast records */ TAILQ_HEAD(, ifg_list) if_groups; /* [N] list of groups per if */ + struct rwlock if_maddrlock; struct task_list if_addrhooks; /* [I] address change callbacks */ struct task_list if_linkstatehooks; /* [I] link change callbacks*/ struct task_list if_detachhooks; /* [I] detach callbacks */ @@ -273,10 +275,10 @@ struct ifaddr { * Interface multicast address. */ struct ifmaddr { - struct sockaddr *ifma_addr; /* Protocol address */ - unsigned int ifma_ifidx; /* Index of the interface */ + TAILQ_ENTRY(ifmaddr) ifma_list; /* [m] Per-interface list */ + struct sockaddr *ifma_addr; /* [I] Protocol address */ struct refcnt ifma_refcnt; /* Count of references */ - TAILQ_ENTRY(ifmaddr) ifma_list; /* Per-interface list */ + unsigned int ifma_ifidx; /* [I] Index of the interface */ }; /* Index: netinet/igmp.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp.c,v diff -u -p -r1.95 igmp.c --- netinet/igmp.c 3 Jan 2026 14:10:04 -0000 1.95 +++ netinet/igmp.c 3 Jan 2026 14:31:34 -0000 @@ -118,8 +118,7 @@ static LIST_HEAD(, router_info) rti_head static struct mbuf *router_alert; struct cpumem *igmpcounters; -int igmp_checktimer(struct ifnet *); -void igmp_sendpkt(struct ifnet *, struct in_multi *, int, in_addr_t); +int igmp_checktimer(struct ifnet *, struct igmp_pktlist *); int rti_fill(struct in_multi *); int rti_reset(struct ifnet *); int igmp_input_if(struct ifnet *, struct mbuf **, int *, int, int, @@ -341,6 +340,7 @@ igmp_input_if(struct ifnet *ifp, struct * except those that are already running and those * that belong to a "local" group (224.0.0.X). */ + rw_enter_write(&ifp->if_maddrlock); TAILQ_FOREACH(ifma, &ifp->if_maddrlist, ifma_list) { if (ifma->ifma_addr->sa_family != AF_INET) continue; @@ -353,6 +353,7 @@ igmp_input_if(struct ifnet *ifp, struct running = 1; } } + rw_exit_write(&ifp->if_maddrlock); } else { if (!IN_MULTICAST(ip->ip_dst.s_addr)) { igmpstat_inc(igps_rcv_badqueries); @@ -372,6 +373,7 @@ igmp_input_if(struct ifnet *ifp, struct * timers already running, check if they need to be * reset. */ + rw_enter_write(&ifp->if_maddrlock); TAILQ_FOREACH(ifma, &ifp->if_maddrlist, ifma_list) { if (ifma->ifma_addr->sa_family != AF_INET) continue; @@ -400,6 +402,7 @@ igmp_input_if(struct ifnet *ifp, struct } } } + rw_exit_write(&ifp->if_maddrlock); } break; @@ -436,6 +439,7 @@ igmp_input_if(struct ifnet *ifp, struct * If we belong to the group being reported, stop * our timer for that group. */ + rw_enter_write(&ifp->if_maddrlock); inm = in_lookupmulti(&igmp->igmp_group, ifp); if (inm != NULL) { inm->inm_timer = 0; @@ -456,6 +460,7 @@ igmp_input_if(struct ifnet *ifp, struct break; } } + rw_exit_write(&ifp->if_maddrlock); break; @@ -504,6 +509,7 @@ igmp_input_if(struct ifnet *ifp, struct * If we belong to the group being reported, stop * our timer for that group. */ + rw_enter_write(&ifp->if_maddrlock); inm = in_lookupmulti(&igmp->igmp_group, ifp); if (inm != NULL) { inm->inm_timer = 0; @@ -520,6 +526,7 @@ igmp_input_if(struct ifnet *ifp, struct break; } } + rw_exit_write(&ifp->if_maddrlock); break; @@ -538,16 +545,22 @@ igmp_input_if(struct ifnet *ifp, struct } void -igmp_joingroup(struct in_multi *inm, struct ifnet *ifp) +igmp_joingroup(struct in_multi *inm, struct ifnet *ifp, + struct igmp_pktinfo *pkt) { - int i, running = 0; + int running = 0; + + rw_assert_wrlock(&ifp->if_maddrlock); inm->inm_state = IGMP_IDLE_MEMBER; if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) && (ifp->if_flags & IFF_LOOPBACK) == 0) { - i = rti_fill(inm); - igmp_sendpkt(ifp, inm, i, 0); + pkt->ipi_addr = inm->inm_addr; + pkt->ipi_rdomain = ifp->if_rdomain; + pkt->ipi_ifidx = inm->inm_ifidx; + pkt->ipi_type = rti_fill(inm); + inm->inm_state = IGMP_DELAYING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ); @@ -562,17 +575,22 @@ igmp_joingroup(struct in_multi *inm, str } void -igmp_leavegroup(struct in_multi *inm, struct ifnet *ifp) +igmp_leavegroup(struct in_multi *inm, struct ifnet *ifp, + struct igmp_pktinfo *pkt) { + rw_assert_anylock(&ifp->if_maddrlock); + switch (inm->inm_state) { case IGMP_DELAYING_MEMBER: case IGMP_IDLE_MEMBER: if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) && (ifp->if_flags & IFF_LOOPBACK) == 0) - if (inm->inm_rti->rti_type != IGMP_v1_ROUTER) - igmp_sendpkt(ifp, inm, - IGMP_HOST_LEAVE_MESSAGE, - INADDR_ALLROUTERS_GROUP); + if (inm->inm_rti->rti_type != IGMP_v1_ROUTER) { + pkt->ipi_addr.s_addr = INADDR_ALLROUTERS_GROUP; + pkt->ipi_rdomain = ifp->if_rdomain; + pkt->ipi_ifidx = inm->inm_ifidx; + pkt->ipi_type = IGMP_HOST_LEAVE_MESSAGE; + } break; case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: @@ -584,6 +602,7 @@ igmp_leavegroup(struct in_multi *inm, st void igmp_fasttimo(void) { + struct igmp_pktlist pktlist; struct ifnet *ifp; int running = 0; @@ -598,28 +617,37 @@ igmp_fasttimo(void) return; membar_consumer(); - NET_LOCK(); + NET_LOCK_SHARED(); + STAILQ_INIT(&pktlist); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { - if (igmp_checktimer(ifp)) + if (igmp_checktimer(ifp, &pktlist)) running = 1; } membar_producer(); atomic_store_int(&igmp_timers_are_running, running); - NET_UNLOCK(); + while (!STAILQ_EMPTY(&pktlist)) { + struct igmp_pktinfo *pkt; + + pkt = STAILQ_FIRST(&pktlist); + STAILQ_REMOVE_HEAD(&pktlist, ipi_list); + igmp_sendpkt(pkt); + free(pkt, M_MRTABLE, sizeof(*pkt)); + } + + NET_UNLOCK_SHARED(); } int -igmp_checktimer(struct ifnet *ifp) +igmp_checktimer(struct ifnet *ifp, struct igmp_pktlist *pktlist) { struct in_multi *inm; struct ifmaddr *ifma; int running = 0; - NET_ASSERT_LOCKED(); - + rw_enter_write(&ifp->if_maddrlock); TAILQ_FOREACH(ifma, &ifp->if_maddrlist, ifma_list) { if (ifma->ifma_addr->sa_family != AF_INET) continue; @@ -628,18 +656,26 @@ igmp_checktimer(struct ifnet *ifp) /* do nothing */ } else if (--inm->inm_timer == 0) { if (inm->inm_state == IGMP_DELAYING_MEMBER) { - if (inm->inm_rti->rti_type == IGMP_v1_ROUTER) - igmp_sendpkt(ifp, inm, - IGMP_v1_HOST_MEMBERSHIP_REPORT, 0); - else - igmp_sendpkt(ifp, inm, - IGMP_v2_HOST_MEMBERSHIP_REPORT, 0); + struct igmp_pktinfo *pkt; + inm->inm_state = IGMP_IDLE_MEMBER; + pkt = malloc(sizeof(*pkt), M_MRTABLE, M_NOWAIT); + if (pkt == NULL) + continue; + pkt->ipi_addr = inm->inm_addr; + pkt->ipi_rdomain = ifp->if_rdomain; + pkt->ipi_ifidx = inm->inm_ifidx; + pkt->ipi_type = + inm->inm_rti->rti_type == IGMP_v1_ROUTER ? + IGMP_v1_HOST_MEMBERSHIP_REPORT : + IGMP_v2_HOST_MEMBERSHIP_REPORT; + STAILQ_INSERT_TAIL(pktlist, pkt, ipi_list); } } else { running = 1; } } + rw_exit_write(&ifp->if_maddrlock); return (running); } @@ -660,8 +696,7 @@ igmp_slowtimo(void) } void -igmp_sendpkt(struct ifnet *ifp, struct in_multi *inm, int type, - in_addr_t addr) +igmp_sendpkt(struct igmp_pktinfo *pkt) { struct mbuf *m; struct igmp *igmp; @@ -686,25 +721,21 @@ igmp_sendpkt(struct ifnet *ifp, struct i ip->ip_off = 0; ip->ip_p = IPPROTO_IGMP; ip->ip_src.s_addr = INADDR_ANY; - if (addr) { - ip->ip_dst.s_addr = addr; - } else { - ip->ip_dst = inm->inm_addr; - } + ip->ip_dst = pkt->ipi_addr; m->m_data += sizeof(struct ip); m->m_len -= sizeof(struct ip); igmp = mtod(m, struct igmp *); - igmp->igmp_type = type; + igmp->igmp_type = pkt->ipi_type; igmp->igmp_code = 0; - igmp->igmp_group = inm->inm_addr; + igmp->igmp_group = pkt->ipi_addr; igmp->igmp_cksum = 0; igmp->igmp_cksum = in_cksum(m, IGMP_MINLEN); m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); - m->m_pkthdr.ph_rtableid = ifp->if_rdomain; - imo.imo_ifidx = inm->inm_ifidx; + m->m_pkthdr.ph_rtableid = pkt->ipi_rdomain; + imo.imo_ifidx = pkt->ipi_ifidx; imo.imo_ttl = 1; /* @@ -712,7 +743,7 @@ igmp_sendpkt(struct ifnet *ifp, struct i * router, so that the process-level routing daemon can hear it. */ #ifdef MROUTING - imo.imo_loop = (ip_mrouter[ifp->if_rdomain] != NULL); + imo.imo_loop = (ip_mrouter[pkt->ipi_rdomain] != NULL); #else imo.imo_loop = 0; #endif /* MROUTING */ Index: netinet/igmp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp_var.h,v diff -u -p -r1.16 igmp_var.h --- netinet/igmp_var.h 2 Mar 2025 21:28:32 -0000 1.16 +++ netinet/igmp_var.h 3 Jan 2026 14:31:34 -0000 @@ -105,12 +105,26 @@ igmpstat_inc(enum igmpstat_counters c) */ #define IGMP_RANDOM_DELAY(X) (arc4random_uniform(X) + 1) +struct igmp_pktinfo { + STAILQ_ENTRY(igmp_pktinfo) ipi_list; + struct in_addr ipi_addr; + unsigned int ipi_rdomain; + unsigned int ipi_ifidx; + int ipi_type; +}; +STAILQ_HEAD(igmp_pktlist, igmp_pktinfo); + void igmp_init(void); int igmp_input(struct mbuf **, int *, int, int, struct netstack *); -void igmp_joingroup(struct in_multi *, struct ifnet *); -void igmp_leavegroup(struct in_multi *, struct ifnet *); +void igmp_joingroup(struct in_multi *, struct ifnet *, + struct igmp_pktinfo *); +void igmp_leavegroup(struct in_multi *, struct ifnet *, + struct igmp_pktinfo *); void igmp_fasttimo(void); void igmp_slowtimo(void); int igmp_sysctl(int *, u_int, void *, size_t *, void *, size_t); +void igmp_sendpkt(struct igmp_pktinfo *); + #endif /* _KERNEL */ + #endif /* _NETINET_IGMP_VAR_H_ */ Index: netinet/in.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in.c,v diff -u -p -r1.192 in.c --- netinet/in.c 3 Jan 2026 14:10:04 -0000 1.192 +++ netinet/in.c 3 Jan 2026 14:31:34 -0000 @@ -853,7 +853,7 @@ in_lookupmulti(const struct in_addr *add struct in_multi *inm = NULL; struct ifmaddr *ifma; - NET_ASSERT_LOCKED(); + rw_assert_anylock(&ifp->if_maddrlock); TAILQ_FOREACH(ifma, &ifp->if_maddrlist, ifma_list) { if (ifma->ifma_addr->sa_family == AF_INET && @@ -871,54 +871,70 @@ in_lookupmulti(const struct in_addr *add struct in_multi * in_addmulti(const struct in_addr *addr, struct ifnet *ifp) { - struct in_multi *inm; + struct in_multi *inm, *new_inm = NULL; + struct igmp_pktinfo pkt; struct ifreq ifr; /* * See if address already in list. */ + rw_enter_write(&ifp->if_maddrlock); inm = in_lookupmulti(addr, ifp); - if (inm != NULL) { - /* - * Found it; just increment the reference count. - */ - refcnt_take(&inm->inm_refcnt); - } else { - /* - * New address; allocate a new multicast record - * and link it into the interface's multicast list. - */ - inm = malloc(sizeof(*inm), M_IPMADDR, M_WAITOK | M_ZERO); - inm->inm_sin.sin_len = sizeof(struct sockaddr_in); - inm->inm_sin.sin_family = AF_INET; - inm->inm_sin.sin_addr = *addr; - refcnt_init_trace(&inm->inm_refcnt, DT_REFCNT_IDX_IFMADDR); - inm->inm_ifidx = ifp->if_index; - inm->inm_ifma.ifma_addr = sintosa(&inm->inm_sin); + if (inm != NULL) + goto found; + rw_exit_write(&ifp->if_maddrlock); - /* - * Ask the network driver to update its multicast reception - * filter appropriately for the new address. - */ - memset(&ifr, 0, sizeof(ifr)); - memcpy(&ifr.ifr_addr, &inm->inm_sin, sizeof(inm->inm_sin)); - KERNEL_LOCK(); - if ((*ifp->if_ioctl)(ifp, SIOCADDMULTI,(caddr_t)&ifr) != 0) { - KERNEL_UNLOCK(); - free(inm, M_IPMADDR, sizeof(*inm)); - return (NULL); - } + /* + * New address; allocate a new multicast record + * and link it into the interface's multicast list. + */ + new_inm = malloc(sizeof(*inm), M_IPMADDR, M_WAITOK | M_ZERO); + + /* + * Ask the network driver to update its multicast reception + * filter appropriately for the new address. + */ + memset(&ifr, 0, sizeof(ifr)); + satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in); + satosin(&ifr.ifr_addr)->sin_family = AF_INET; + satosin(&ifr.ifr_addr)->sin_addr = *addr; + KERNEL_LOCK(); + if ((*ifp->if_ioctl)(ifp, SIOCADDMULTI,(caddr_t)&ifr) != 0) { KERNEL_UNLOCK(); + goto out; + } + KERNEL_UNLOCK(); - TAILQ_INSERT_HEAD(&ifp->if_maddrlist, &inm->inm_ifma, - ifma_list); + rw_enter_write(&ifp->if_maddrlock); + /* check again after unlock and lock */ + inm = in_lookupmulti(addr, ifp); + if (inm != NULL) + goto found; + inm = new_inm; + inm->inm_sin.sin_len = sizeof(struct sockaddr_in); + inm->inm_sin.sin_family = AF_INET; + inm->inm_sin.sin_addr = *addr; + refcnt_init_trace(&inm->inm_refcnt, DT_REFCNT_IDX_IFMADDR); + inm->inm_ifidx = ifp->if_index; + inm->inm_ifma.ifma_addr = sintosa(&inm->inm_sin); - /* - * Let IGMP know that we have joined a new IP multicast group. - */ - igmp_joingroup(inm, ifp); - } + /* + * Let IGMP know that we have joined a new IP multicast group. + */ + TAILQ_INSERT_HEAD(&ifp->if_maddrlist, &inm->inm_ifma, ifma_list); + pkt.ipi_ifidx = 0; + igmp_joingroup(inm, ifp, &pkt); + rw_exit_write(&ifp->if_maddrlock); + + if (pkt.ipi_ifidx) + igmp_sendpkt(&pkt); + return (inm); + found: + refcnt_take(&inm->inm_refcnt); + rw_exit_write(&ifp->if_maddrlock); + out: + free(new_inm, M_IPMADDR, sizeof(*inm)); return (inm); } @@ -928,21 +944,27 @@ in_addmulti(const struct in_addr *addr, void in_delmulti(struct in_multi *inm) { + struct igmp_pktinfo pkt; struct ifreq ifr; struct ifnet *ifp; - NET_ASSERT_LOCKED(); - if (refcnt_rele(&inm->inm_refcnt) == 0) return; ifp = if_get(inm->inm_ifidx); if (ifp != NULL) { + rw_enter_write(&ifp->if_maddrlock); /* * No remaining claims to this record; let IGMP know that * we are leaving the multicast group. */ - igmp_leavegroup(inm, ifp); + pkt.ipi_ifidx = 0; + igmp_leavegroup(inm, ifp, &pkt); + TAILQ_REMOVE(&ifp->if_maddrlist, &inm->inm_ifma, ifma_list); + rw_exit_write(&ifp->if_maddrlock); + + if (pkt.ipi_ifidx) + igmp_sendpkt(&pkt); /* * Notify the network driver to update its multicast @@ -956,9 +978,8 @@ in_delmulti(struct in_multi *inm) (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); KERNEL_UNLOCK(); - TAILQ_REMOVE(&ifp->if_maddrlist, &inm->inm_ifma, ifma_list); + if_put(ifp); } - if_put(ifp); free(inm, M_IPMADDR, sizeof(*inm)); } @@ -973,8 +994,10 @@ in_hasmulti(const struct in_addr *addr, struct in_multi *inm; int joined; + rw_enter_read(&ifp->if_maddrlock); inm = in_lookupmulti(addr, ifp); joined = (inm != NULL); + rw_exit_read(&ifp->if_maddrlock); return (joined); } Index: netinet/in_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_var.h,v diff -u -p -r1.46 in_var.h --- netinet/in_var.h 3 Jan 2026 14:10:04 -0000 1.46 +++ netinet/in_var.h 3 Jan 2026 14:22:23 -0000 @@ -35,6 +35,12 @@ #ifndef _NETINET_IN_VAR_H_ #define _NETINET_IN_VAR_H_ +/* + * Locks used to protect struct members in this file: + * I immutable after creation + * m multicast if_maddrlock rwlock of parent interface + */ + #include #ifdef _KERNEL @@ -87,11 +93,11 @@ struct in_multi { #define inm_refcnt inm_ifma.ifma_refcnt #define inm_ifidx inm_ifma.ifma_ifidx - struct sockaddr_in inm_sin; /* IPv4 multicast address */ + struct sockaddr_in inm_sin; /* [I] IPv4 multicast address */ #define inm_addr inm_sin.sin_addr - u_int inm_state; /* state of membership */ - u_int inm_timer; /* IGMP membership report timer */ + u_int inm_state; /* [m] state of membership */ + u_int inm_timer; /* [m] IGMP membership report */ struct router_info *inm_rti; /* router version info */ }; Index: netinet6/in6.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6.c,v diff -u -p -r1.277 in6.c --- netinet6/in6.c 3 Jan 2026 14:10:04 -0000 1.277 +++ netinet6/in6.c 3 Jan 2026 14:54:27 -0000 @@ -1007,7 +1007,7 @@ in6_lookupmulti(const struct in6_addr *a struct in6_multi *in6m = NULL; struct ifmaddr *ifma; - NET_ASSERT_LOCKED(); + rw_assert_anylock(&ifp->if_maddrlock); TAILQ_FOREACH(ifma, &ifp->if_maddrlist, ifma_list) { if (ifma->ifma_addr->sa_family == AF_INET6 && @@ -1026,62 +1026,75 @@ in6_lookupmulti(const struct in6_addr *a struct in6_multi * in6_addmulti(const struct in6_addr *addr, struct ifnet *ifp, int *errorp) { + struct in6_multi *in6m, *new_in6m = NULL; + struct mld6_pktinfo pkt; struct in6_ifreq ifr; - struct in6_multi *in6m; - - NET_ASSERT_LOCKED(); *errorp = 0; /* * See if address already in list. */ + rw_enter_write(&ifp->if_maddrlock); in6m = in6_lookupmulti(addr, ifp); - if (in6m != NULL) { - /* - * Found it; just increment the reference count. - */ - refcnt_take(&in6m->in6m_refcnt); - } else { - /* - * New address; allocate a new multicast record - * and link it into the interface's multicast list. - */ - in6m = malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT | M_ZERO); - if (in6m == NULL) { - *errorp = ENOBUFS; - return (NULL); - } + if (in6m != NULL) + goto found; + rw_exit_write(&ifp->if_maddrlock); - in6m->in6m_sin.sin6_len = sizeof(struct sockaddr_in6); - in6m->in6m_sin.sin6_family = AF_INET6; - in6m->in6m_sin.sin6_addr = *addr; - refcnt_init_trace(&in6m->in6m_refcnt, DT_REFCNT_IDX_IFMADDR); - in6m->in6m_ifidx = ifp->if_index; - in6m->in6m_ifma.ifma_addr = sin6tosa(&in6m->in6m_sin); + /* + * New address; allocate a new multicast record + * and link it into the interface's multicast list. + */ + new_in6m = malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT | M_ZERO); + if (new_in6m == NULL) { + *errorp = ENOBUFS; + return (NULL); + } - /* - * Ask the network driver to update its multicast reception - * filter appropriately for the new address. - */ - memcpy(&ifr.ifr_addr, &in6m->in6m_sin, sizeof(in6m->in6m_sin)); - KERNEL_LOCK(); - *errorp = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); - KERNEL_UNLOCK(); - if (*errorp) { - free(in6m, M_IPMADDR, sizeof(*in6m)); - return (NULL); - } + /* + * Ask the network driver to update its multicast reception + * filter appropriately for the new address. + */ + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_addr.sin6_len = sizeof(struct sockaddr_in6); + ifr.ifr_addr.sin6_family = AF_INET6; + ifr.ifr_addr.sin6_addr = *addr; + KERNEL_LOCK(); + *errorp = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); + KERNEL_UNLOCK(); + if (*errorp) + goto out; + + rw_enter_write(&ifp->if_maddrlock); + /* check again after unlock and lock */ + in6m = in6_lookupmulti(addr, ifp); + if (in6m != NULL) + goto found; + in6m = new_in6m; + in6m->in6m_sin.sin6_len = sizeof(struct sockaddr_in6); + in6m->in6m_sin.sin6_family = AF_INET6; + in6m->in6m_sin.sin6_addr = *addr; + refcnt_init_trace(&in6m->in6m_refcnt, DT_REFCNT_IDX_IFMADDR); + in6m->in6m_ifidx = ifp->if_index; + in6m->in6m_ifma.ifma_addr = sin6tosa(&in6m->in6m_sin); + + /* + * Let MLD6 know that we have joined a new IP6 multicast group. + */ + TAILQ_INSERT_HEAD(&ifp->if_maddrlist, &in6m->in6m_ifma, ifma_list); + pkt.mpi_ifidx = 0; + mld6_start_listening(in6m, ifp, &pkt); + rw_exit_write(&ifp->if_maddrlock); - TAILQ_INSERT_HEAD(&ifp->if_maddrlist, &in6m->in6m_ifma, - ifma_list); + if (pkt.mpi_ifidx) + mld6_sendpkt(&pkt); - /* - * Let MLD6 know that we have joined a new IP6 multicast - * group. - */ - mld6_start_listening(in6m); - } + return (in6m); + found: + refcnt_take(&in6m->in6m_refcnt); + rw_exit_write(&ifp->if_maddrlock); + out: + free(new_in6m, M_IPMADDR, sizeof(*in6m)); return (in6m); } @@ -1091,39 +1104,44 @@ in6_addmulti(const struct in6_addr *addr void in6_delmulti(struct in6_multi *in6m) { + struct mld6_pktinfo pkt; struct in6_ifreq ifr; struct ifnet *ifp; - NET_ASSERT_LOCKED(); + if (refcnt_rele(&in6m->in6m_refcnt) == 0) + return; - if (refcnt_rele(&in6m->in6m_refcnt) != 0) { + ifp = if_get(in6m->in6m_ifidx); + if (ifp != NULL) { + rw_enter_write(&ifp->if_maddrlock); /* * No remaining claims to this record; let MLD6 know * that we are leaving the multicast group. */ - mld6_stop_listening(in6m); - ifp = if_get(in6m->in6m_ifidx); + pkt.mpi_ifidx = 0; + mld6_stop_listening(in6m, ifp, &pkt); + TAILQ_REMOVE(&ifp->if_maddrlist, &in6m->in6m_ifma, ifma_list); + rw_exit_write(&ifp->if_maddrlock); + + if (pkt.mpi_ifidx) + mld6_sendpkt(&pkt); /* * Notify the network driver to update its multicast * reception filter. */ - if (ifp != NULL) { - bzero(&ifr.ifr_addr, sizeof(struct sockaddr_in6)); - ifr.ifr_addr.sin6_len = sizeof(struct sockaddr_in6); - ifr.ifr_addr.sin6_family = AF_INET6; - ifr.ifr_addr.sin6_addr = in6m->in6m_addr; - KERNEL_LOCK(); - (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); - KERNEL_UNLOCK(); + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_addr.sin6_len = sizeof(struct sockaddr_in6); + ifr.ifr_addr.sin6_family = AF_INET6; + ifr.ifr_addr.sin6_addr = in6m->in6m_addr; + KERNEL_LOCK(); + (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); + KERNEL_UNLOCK(); - TAILQ_REMOVE(&ifp->if_maddrlist, &in6m->in6m_ifma, - ifma_list); - } if_put(ifp); - - free(in6m, M_IPMADDR, sizeof(*in6m)); } + + free(in6m, M_IPMADDR, sizeof(*in6m)); } /* @@ -1136,8 +1154,10 @@ in6_hasmulti(const struct in6_addr *addr struct in6_multi *in6m; int joined; + rw_enter_read(&ifp->if_maddrlock); in6m = in6_lookupmulti(addr, ifp); joined = (in6m != NULL); + rw_exit_read(&ifp->if_maddrlock); return (joined); } Index: netinet6/in6_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_var.h,v diff -u -p -r1.84 in6_var.h --- netinet6/in6_var.h 3 Jan 2026 14:10:04 -0000 1.84 +++ netinet6/in6_var.h 3 Jan 2026 14:22:23 -0000 @@ -65,6 +65,12 @@ #define _NETINET6_IN6_VAR_H_ /* + * Locks used to protect struct members in this file: + * I immutable after creation + * m multicast if_maddrlock rwlock of parent interface + */ + +/* * Interface address, Internet version. One of these structures * is allocated for each interface with an Internet address. * The ifaddr structure contains the protocol-independent part @@ -316,11 +322,11 @@ struct in6_multi { #define in6m_refcnt in6m_ifma.ifma_refcnt #define in6m_ifidx in6m_ifma.ifma_ifidx - struct sockaddr_in6 in6m_sin; /* IPv6 multicast address */ + struct sockaddr_in6 in6m_sin; /* [I] IPv6 multicast address */ #define in6m_addr in6m_sin.sin6_addr - u_int in6m_state; /* state of membership */ - u_int in6m_timer; /* MLD6 membership report timer */ + u_int in6m_state; /* [m] state of membership */ + u_int in6m_timer; /* [m] MLD6 membership report */ }; static __inline struct in6_multi * Index: netinet6/mld6.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/mld6.c,v diff -u -p -r1.72 mld6.c --- netinet6/mld6.c 3 Jan 2026 14:10:04 -0000 1.72 +++ netinet6/mld6.c 3 Jan 2026 14:57:38 -0000 @@ -86,7 +86,6 @@ static struct ip6_pktopts ip6_opts; int mld6_timers_are_running; /* [a] shortcut for fast timer */ int mld6_checktimer(struct ifnet *); -static void mld6_sendpkt(struct in6_multi *, int, const struct in6_addr *); void mld6_init(void) @@ -112,12 +111,15 @@ mld6_init(void) } void -mld6_start_listening(struct in6_multi *in6m) +mld6_start_listening(struct in6_multi *in6m, struct ifnet *ifp, + struct mld6_pktinfo *pkt) { /* XXX: These are necessary for KAME's link-local hack */ struct in6_addr all_nodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; int running = 0; + rw_assert_wrlock(&ifp->if_maddrlock); + /* * RFC2710 page 10: * The node never sends a Report or Done for the link-scope all-nodes @@ -132,10 +134,12 @@ mld6_start_listening(struct in6_multi *i in6m->in6m_timer = 0; in6m->in6m_state = MLD_OTHERLISTENER; } else { - mld6_sendpkt(in6m, MLD_LISTENER_REPORT, NULL); + pkt->mpi_addr = in6m->in6m_addr; + pkt->mpi_rdomain = ifp->if_rdomain; + pkt->mpi_ifidx = in6m->in6m_ifidx; + pkt->mpi_type = MLD_LISTENER_REPORT; in6m->in6m_timer = - MLD_RANDOM_DELAY(MLD_V1_MAX_RI * - PR_FASTHZ); + MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_FASTHZ); in6m->in6m_state = MLD_IREPORTEDLAST; running = 1; } @@ -147,12 +151,15 @@ mld6_start_listening(struct in6_multi *i } void -mld6_stop_listening(struct in6_multi *in6m) +mld6_stop_listening(struct in6_multi *in6m, struct ifnet *ifp, + struct mld6_pktinfo *pkt) { /* XXX: These are necessary for KAME's link-local hack */ struct in6_addr all_nodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; struct in6_addr all_routers = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; + rw_assert_anylock(&ifp->if_maddrlock); + all_nodes.s6_addr16[1] = htons(in6m->in6m_ifidx); /* XXX: necessary when mrouting */ all_routers.s6_addr16[1] = htons(in6m->in6m_ifidx); @@ -160,8 +167,12 @@ mld6_stop_listening(struct in6_multi *in if (in6m->in6m_state == MLD_IREPORTEDLAST && (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_nodes)) && __IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) > - __IPV6_ADDR_SCOPE_INTFACELOCAL) - mld6_sendpkt(in6m, MLD_LISTENER_DONE, &all_routers); + __IPV6_ADDR_SCOPE_INTFACELOCAL) { + pkt->mpi_addr = all_routers; + pkt->mpi_rdomain = ifp->if_rdomain; + pkt->mpi_ifidx = in6m->in6m_ifidx; + pkt->mpi_type = MLD_LISTENER_DONE; + } } void @@ -254,6 +265,7 @@ mld6_input(struct mbuf *m, int off) timer = 1; all_nodes.s6_addr16[1] = htons(ifp->if_index); + rw_enter_write(&ifp->if_maddrlock); TAILQ_FOREACH(ifma, &ifp->if_maddrlist, ifma_list) { if (ifma->ifma_addr->sa_family != AF_INET6) continue; @@ -269,8 +281,13 @@ mld6_input(struct mbuf *m, int off) { if (timer == 0) { /* send a report immediately */ - mld6_sendpkt(in6m, MLD_LISTENER_REPORT, - NULL); + struct mld6_pktinfo pkt; + + pkt.mpi_addr = in6m->in6m_addr; + pkt.mpi_rdomain = ifp->if_rdomain; + pkt.mpi_ifidx = in6m->in6m_ifidx; + pkt.mpi_type = MLD_LISTENER_REPORT; + mld6_sendpkt(&pkt); in6m->in6m_timer = 0; /* reset timer */ in6m->in6m_state = MLD_IREPORTEDLAST; } else if (in6m->in6m_timer == 0 || /* idle */ @@ -281,6 +298,7 @@ mld6_input(struct mbuf *m, int off) } } } + rw_exit_write(&ifp->if_maddrlock); if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld_addr)) mldh->mld_addr.s6_addr16[1] = 0; /* XXX */ @@ -308,11 +326,13 @@ mld6_input(struct mbuf *m, int off) * If we belong to the group being reported, stop * our timer for that group. */ + rw_enter_write(&ifp->if_maddrlock); in6m = in6_lookupmulti(&mldh->mld_addr, ifp); if (in6m) { in6m->in6m_timer = 0; /* transit to idle state */ in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */ } + rw_exit_write(&ifp->if_maddrlock); if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld_addr)) mldh->mld_addr.s6_addr16[1] = 0; /* XXX */ @@ -355,7 +375,7 @@ mld6_fasttimo(void) return; membar_consumer(); - NET_LOCK(); + NET_LOCK_SHARED(); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { if (mld6_checktimer(ifp)) @@ -365,7 +385,7 @@ mld6_fasttimo(void) membar_producer(); atomic_store_int(&mld6_timers_are_running, running); - NET_UNLOCK(); + NET_UNLOCK_SHARED(); } int @@ -373,10 +393,11 @@ mld6_checktimer(struct ifnet *ifp) { struct in6_multi *in6m; struct ifmaddr *ifma; + struct mld6_pktlist pktlist; int running = 0; - NET_ASSERT_LOCKED(); - + rw_enter_write(&ifp->if_maddrlock); + STAILQ_INIT(&pktlist); TAILQ_FOREACH(ifma, &ifp->if_maddrlist, ifma_list) { if (ifma->ifma_addr->sa_family != AF_INET6) continue; @@ -384,18 +405,37 @@ mld6_checktimer(struct ifnet *ifp) if (in6m->in6m_timer == 0) { /* do nothing */ } else if (--in6m->in6m_timer == 0) { - mld6_sendpkt(in6m, MLD_LISTENER_REPORT, NULL); + struct mld6_pktinfo *pkt; + in6m->in6m_state = MLD_IREPORTEDLAST; + pkt = malloc(sizeof(*pkt), M_MRTABLE, M_NOWAIT); + if (pkt == NULL) + continue; + pkt->mpi_addr = in6m->in6m_addr; + pkt->mpi_rdomain = ifp->if_rdomain; + pkt->mpi_ifidx = in6m->in6m_ifidx; + pkt->mpi_type = MLD_LISTENER_REPORT; + STAILQ_INSERT_TAIL(&pktlist, pkt, mpi_list); } else { running = 1; } } + rw_exit_write(&ifp->if_maddrlock); + + while (!STAILQ_EMPTY(&pktlist)) { + struct mld6_pktinfo *pkt; + + pkt = STAILQ_FIRST(&pktlist); + STAILQ_REMOVE_HEAD(&pktlist, mpi_list); + mld6_sendpkt(pkt); + free(pkt, M_MRTABLE, sizeof(*pkt)); + } return (running); } -static void -mld6_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst) +void +mld6_sendpkt(const struct mld6_pktinfo *pkt) { struct mbuf *mh, *md; struct mld_hdr *mldh; @@ -405,7 +445,7 @@ mld6_sendpkt(struct in6_multi *in6m, int struct ifnet *ifp; int ignflags; - ifp = if_get(in6m->in6m_ifidx); + ifp = if_get(pkt->mpi_ifidx); if (ifp == NULL) return; @@ -441,8 +481,7 @@ mld6_sendpkt(struct in6_multi *in6m, int } mh->m_next = md; - mh->m_pkthdr.ph_ifidx = 0; - mh->m_pkthdr.ph_rtableid = ifp->if_rdomain; + mh->m_pkthdr.ph_rtableid = pkt->mpi_rdomain; mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr); mh->m_len = sizeof(struct ip6_hdr); m_align(mh, sizeof(struct ip6_hdr)); @@ -456,25 +495,25 @@ mld6_sendpkt(struct in6_multi *in6m, int ip6->ip6_nxt = IPPROTO_ICMPV6; /* ip6_hlim will be set by im6o.im6o_hlim */ ip6->ip6_src = ia6 ? ia6->ia_addr.sin6_addr : in6addr_any; - ip6->ip6_dst = dst ? *dst : in6m->in6m_addr; + ip6->ip6_dst = pkt->mpi_addr; /* fill in the MLD header */ md->m_len = sizeof(struct mld_hdr); mldh = mtod(md, struct mld_hdr *); - mldh->mld_type = type; + mldh->mld_type = pkt->mpi_type; mldh->mld_code = 0; mldh->mld_cksum = 0; /* XXX: we assume the function will not be called for query messages */ mldh->mld_maxdelay = 0; mldh->mld_reserved = 0; - mldh->mld_addr = in6m->in6m_addr; + mldh->mld_addr = pkt->mpi_addr; if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld_addr)) mldh->mld_addr.s6_addr16[1] = 0; /* XXX */ mh->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT; /* construct multicast option */ bzero(&im6o, sizeof(im6o)); - im6o.im6o_ifidx = ifp->if_index; + im6o.im6o_ifidx = pkt->mpi_ifidx; im6o.im6o_hlim = 1; /* @@ -482,10 +521,10 @@ mld6_sendpkt(struct in6_multi *in6m, int * router, so that the process-level routing daemon can hear it. */ #ifdef MROUTING - im6o.im6o_loop = (ip6_mrouter[ifp->if_rdomain] != NULL); + im6o.im6o_loop = (ip6_mrouter[pkt->mpi_rdomain] != NULL); #endif if_put(ifp); - icmp6stat_inc(icp6s_outhist + type); + icmp6stat_inc(icp6s_outhist + pkt->mpi_type); ip6_output(mh, &ip6_opts, NULL, ia6 ? 0 : IPV6_UNSPECSRC, &im6o, NULL); } Index: netinet6/mld6_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/mld6_var.h,v diff -u -p -r1.9 mld6_var.h --- netinet6/mld6_var.h 3 Jan 2026 14:10:04 -0000 1.9 +++ netinet6/mld6_var.h 3 Jan 2026 14:51:39 -0000 @@ -43,11 +43,24 @@ #define MLD_OTHERLISTENER 0 #define MLD_IREPORTEDLAST 1 +struct mld6_pktinfo { + STAILQ_ENTRY(mld6_pktinfo) mpi_list; + struct in6_addr mpi_addr; + unsigned int mpi_rdomain; + unsigned int mpi_ifidx; + int mpi_type; +}; +STAILQ_HEAD(mld6_pktlist, mld6_pktinfo); + void mld6_init(void); void mld6_input(struct mbuf *, int); -void mld6_start_listening(struct in6_multi *); -void mld6_stop_listening(struct in6_multi *); +void mld6_start_listening(struct in6_multi *, struct ifnet *, + struct mld6_pktinfo *); +void mld6_stop_listening(struct in6_multi *, struct ifnet *, + struct mld6_pktinfo *); void mld6_fasttimo(void); +void mld6_sendpkt(const struct mld6_pktinfo *); + #endif /* _KERNEL */ #endif /* _NETINET6_MLD6_VAR_H_ */