From: Alexander Bluhm Subject: multicast router lock To: tech@openbsd.org Date: Thu, 25 Jun 2026 21:14:11 +0200 Hi, When we want to go MP for multicast forward, we have to proect the multicast router structure. I have introduced a rwlock for mrt_mrouter and mrt_api_config. Since we support one router per route table, we should also have an api version per router. The existing code was inconsistent. Ref count the sockets in mrt_mrouter. Also fixed the name space a bit by using mrt_ prefix. ok? bluhm Index: netinet/igmp.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp.c,v diff -u -p -r1.98 igmp.c --- netinet/igmp.c 29 Mar 2026 18:08:07 -0000 1.98 +++ netinet/igmp.c 25 Jun 2026 12:32:14 -0000 @@ -741,7 +741,7 @@ igmp_sendpkt(struct igmp_pktinfo *pkt) * router, so that the process-level routing daemon can hear it. */ #ifdef MROUTING - imo.imo_loop = (ip_mrouter[pkt->ipi_rdomain] != NULL); + imo.imo_loop = ip_mrouter_active(pkt->ipi_rdomain); #else imo.imo_loop = 0; #endif /* MROUTING */ Index: netinet/ip_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v diff -u -p -r1.430 ip_input.c --- netinet/ip_input.c 22 Jun 2026 10:58:34 -0000 1.430 +++ netinet/ip_input.c 25 Jun 2026 12:32:14 -0000 @@ -532,7 +532,7 @@ ip_input_if(struct mbuf **mp, int *offp, #ifdef MROUTING if (atomic_load_int(&ipmforwarding) && - ip_mrouter[ifp->if_rdomain]) { + ip_mrouter_active(ifp->if_rdomain)) { int error; if (m->m_flags & M_EXT) { Index: netinet/ip_mroute.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_mroute.c,v diff -u -p -r1.153 ip_mroute.c --- netinet/ip_mroute.c 24 Jun 2026 12:33:49 -0000 1.153 +++ netinet/ip_mroute.c 25 Jun 2026 12:41:45 -0000 @@ -79,6 +79,7 @@ /* * Locks used to protect data: * I immutable after creation + * R multicast router lock */ /* #define MCAST_DEBUG */ @@ -96,14 +97,21 @@ int mcast_debug = 1; do { } while (0) #endif +struct rwlock mrt_routerlock = RWLOCK_INITIALIZER("mrouter"); + /* - * Globals. All but ip_mrouter and ip_mrtproto could be static, - * except for netstat or debugging purposes. + * Kernel multicast routing API capabilities and setup. + * If more API capabilities are added to the kernel, they should be + * recorded in `mrt_api_support'. */ -struct socket *ip_mrouter[RT_TABLEID_MAX + 1]; -struct rttimer_queue ip_mrouterq; +static const u_int32_t mrt_api_support = + MRT_MFC_FLAGS_DISABLE_WRONGVIF | MRT_MFC_RP; + +struct rttimer_queue mrt_timer; +struct socket *mrt_mrouter[RT_TABLEID_MAX + 1]; /* [R] */ +uint32_t mrt_api_config[RT_TABLEID_MAX + 1]; /* [R] */ uint64_t mrt_count[RT_TABLEID_MAX + 1]; -int ip_mrtproto = IGMP_DVMRP; /* [I] for netstat only */ +int ip_mrtproto = IGMP_DVMRP; /* [I] for netstat only */ struct cpumem *mrtcounters; @@ -124,9 +132,8 @@ int add_mfc(struct socket *, struct mbuf int del_mfc(struct socket *, struct mbuf *); int set_api_config(struct socket *, struct mbuf *); /* chose API capabilities */ int get_api_support(struct mbuf *); -int get_api_config(struct mbuf *); -int socket_send(struct socket *, struct mbuf *, - struct sockaddr_in *); +int get_api_config(struct socket *, struct mbuf *); +int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); int ip_mdq(struct mbuf *, struct ifnet *, struct rtentry *, int); struct ifnet *if_lookupbyvif(vifi_t, unsigned int); struct rtentry *rt_mcast_add(struct ifnet *, struct sockaddr *, @@ -134,15 +141,6 @@ struct rtentry *rt_mcast_add(struct ifne void mrt_mcast_del(struct rtentry *, unsigned int); /* - * Kernel multicast routing API capabilities and setup. - * If more API capabilities are added to the kernel, they should be - * recorded in `mrt_api_support'. - */ -static const u_int32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF | - MRT_MFC_RP); -static u_int32_t mrt_api_config = 0; - -/* * Find a route for a given Multicast group address. * Type of service parameter to be added in the future!!! * Statistics are updated by the caller if needed (mrts_mfc_lookups and @@ -185,39 +183,34 @@ mfc_find(struct ifnet *ifp, struct in_ad int ip_mrouter_set(struct socket *so, int optname, struct mbuf *m) { - struct inpcb *inp = sotoinpcb(so); int error; - if (optname != MRT_INIT && - so != ip_mrouter[inp->inp_rtableid]) + switch (optname) { + case MRT_INIT: + error = ip_mrouter_init(so, m); + break; + case MRT_DONE: + error = ip_mrouter_done(so); + break; + case MRT_ADD_VIF: + error = add_vif(so, m); + break; + case MRT_DEL_VIF: + error = del_vif(so, m); + break; + case MRT_ADD_MFC: + error = add_mfc(so, m); + break; + case MRT_DEL_MFC: + error = del_mfc(so, m); + break; + case MRT_API_CONFIG: + error = set_api_config(so, m); + break; + default: error = ENOPROTOOPT; - else - switch (optname) { - case MRT_INIT: - error = ip_mrouter_init(so, m); - break; - case MRT_DONE: - error = ip_mrouter_done(so); - break; - case MRT_ADD_VIF: - error = add_vif(so, m); - break; - case MRT_DEL_VIF: - error = del_vif(so, m); - break; - case MRT_ADD_MFC: - error = add_mfc(so, m); - break; - case MRT_DEL_MFC: - error = del_mfc(so, m); - break; - case MRT_API_CONFIG: - error = set_api_config(so, m); - break; - default: - error = ENOPROTOOPT; - break; - } + break; + } return (error); } @@ -228,37 +221,38 @@ ip_mrouter_set(struct socket *so, int op int ip_mrouter_get(struct socket *so, int optname, struct mbuf *m) { - struct inpcb *inp = sotoinpcb(so); int error; - if (so != ip_mrouter[inp->inp_rtableid]) + switch (optname) { + case MRT_VERSION: + error = get_version(m); + break; + case MRT_API_SUPPORT: + error = get_api_support(m); + break; + case MRT_API_CONFIG: + error = get_api_config(so, m); + break; + default: error = ENOPROTOOPT; - else { - switch (optname) { - case MRT_VERSION: - error = get_version(m); - break; - case MRT_API_SUPPORT: - error = get_api_support(m); - break; - case MRT_API_CONFIG: - error = get_api_config(m); - break; - default: - error = ENOPROTOOPT; - break; - } + break; } return (error); } +int +ip_mrouter_active(u_int rtableid) +{ + return (READ_ONCE(mrt_mrouter[rtableid]) != NULL); +} + void mrt_init(void) { mrtcounters = counters_alloc(mrts_ncounters); - rt_timer_queue_init(&ip_mrouterq, MCAST_EXPIRE_FREQUENCY, + rt_timer_queue_init(&mrt_timer, MCAST_EXPIRE_FREQUENCY, &mfc_expire_route); } @@ -274,30 +268,37 @@ mrt_ioctl(struct socket *so, u_long cmd, if (inp == NULL) return (ENOTCONN); - KERNEL_LOCK(); - if (so != ip_mrouter[inp->inp_rtableid]) - error = EINVAL; - else - switch (cmd) { - case SIOCGETVIFCNT: - NET_LOCK_SHARED(); - error = get_vif_cnt(inp->inp_rtableid, - (struct sioc_vif_req *)data); - NET_UNLOCK_SHARED(); - break; - case SIOCGETSGCNT: - NET_LOCK_SHARED(); - error = get_sg_cnt(inp->inp_rtableid, - (struct sioc_sg_req *)data); - NET_UNLOCK_SHARED(); - break; - default: - error = ENOTTY; - break; - } + NET_LOCK_SHARED(); + rw_enter_read(&mrt_routerlock); + + if (so != mrt_mrouter[inp->inp_rtableid]) { + error = EPROTONOSUPPORT; + goto out; + } + + switch (cmd) { + case SIOCGETVIFCNT: + KERNEL_LOCK(); + error = get_vif_cnt(inp->inp_rtableid, + (struct sioc_vif_req *)data); + KERNEL_UNLOCK(); + break; + case SIOCGETSGCNT: + KERNEL_LOCK(); + error = get_sg_cnt(inp->inp_rtableid, + (struct sioc_sg_req *)data); + KERNEL_UNLOCK(); + break; + default: + error = ENOTTY; + break; + } + + out: + rw_exit_read(&mrt_routerlock); + NET_UNLOCK_SHARED(); - KERNEL_UNLOCK(); return (error); } @@ -593,10 +594,15 @@ ip_mrouter_init(struct socket *so, struc if (*v != 1) return (EINVAL); - if (ip_mrouter[rtableid] != NULL) + rw_enter_write(&mrt_routerlock); + + if (mrt_mrouter[rtableid] != NULL) { + rw_exit_write(&mrt_routerlock); return (EADDRINUSE); + } + mrt_mrouter[rtableid] = soref(so); - ip_mrouter[rtableid] = so; + rw_exit_write(&mrt_routerlock); return (0); } @@ -625,6 +631,13 @@ ip_mrouter_done(struct socket *so) NET_ASSERT_LOCKED(); + rw_enter_write(&mrt_routerlock); + + if (so != mrt_mrouter[inp->inp_rtableid]) { + rw_exit_write(&mrt_routerlock); + return (EPROTONOSUPPORT); + } + /* Delete all remaining installed multicast routes. */ do { struct rtentry *rt = NULL; @@ -638,6 +651,8 @@ ip_mrouter_done(struct socket *so) rtfree(rt); } while (error == EAGAIN); + KASSERT(mrt_count[rtableid] == 0); + /* Unregister all interfaces in the domain. */ TAILQ_FOREACH(ifp, &ifnetlist, if_list) { if (ifp->if_rdomain != rtableid) @@ -646,10 +661,11 @@ ip_mrouter_done(struct socket *so) vif_delete(ifp); } - mrt_api_config = 0; + mrt_api_config[rtableid] = 0; + mrt_mrouter[rtableid] = NULL; + sorele(so); - ip_mrouter[rtableid] = NULL; - mrt_count[rtableid] = 0; + rw_exit_write(&mrt_routerlock); return (0); } @@ -672,7 +688,7 @@ set_api_config(struct socket *so, struct { struct inpcb *inp = sotoinpcb(so); struct ifnet *ifp; - u_int32_t *apival; + uint32_t *apival; unsigned int rtableid = inp->inp_rtableid; if (m == NULL || m->m_len < sizeof(u_int32_t)) @@ -680,6 +696,13 @@ set_api_config(struct socket *so, struct apival = mtod(m, u_int32_t *); + rw_enter_write(&mrt_routerlock); + + if (so != mrt_mrouter[inp->inp_rtableid]) { + rw_exit_write(&mrt_routerlock); + return (EPROTONOSUPPORT); + } + /* * We can set the API capabilities only if it is the first operation * after MRT_INIT. I.e.: @@ -692,16 +715,20 @@ set_api_config(struct socket *so, struct if (ifp->if_mcast == NULL) continue; + rw_exit_write(&mrt_routerlock); *apival = 0; return (EPERM); } if (mrt_count[rtableid] > 0) { + rw_exit_write(&mrt_routerlock); *apival = 0; return (EPERM); } - mrt_api_config = *apival & mrt_api_support; - *apival = mrt_api_config; + *apival &= mrt_api_support; + mrt_api_config[rtableid] = *apival; + + rw_exit_write(&mrt_routerlock); return (0); } @@ -712,12 +739,12 @@ set_api_config(struct socket *so, struct int get_api_support(struct mbuf *m) { - u_int32_t *apival; + uint32_t *apival; if (m == NULL || m->m_len < sizeof(u_int32_t)) return (EINVAL); - apival = mtod(m, u_int32_t *); + apival = mtod(m, uint32_t *); *apival = mrt_api_support; @@ -728,16 +755,24 @@ get_api_support(struct mbuf *m) * Get API configured capabilities */ int -get_api_config(struct mbuf *m) +get_api_config(struct socket *so, struct mbuf *m) { - u_int32_t *apival; + struct inpcb *inp = sotoinpcb(so); + uint32_t *apival; + unsigned int rtableid = inp->inp_rtableid; if (m == NULL || m->m_len < sizeof(u_int32_t)) return (EINVAL); - apival = mtod(m, u_int32_t *); + apival = mtod(m, uint32_t *); - *apival = mrt_api_config; + rw_enter_read(&mrt_routerlock); + if (so != mrt_mrouter[inp->inp_rtableid]) { + rw_exit_read(&mrt_routerlock); + return (EPROTONOSUPPORT); + } + *apival = mrt_api_config[rtableid]; + rw_exit_read(&mrt_routerlock); return (0); } @@ -759,6 +794,13 @@ add_vif(struct socket *so, struct mbuf * if (m == NULL || m->m_len < sizeof(struct vifctl)) return (EINVAL); + rw_enter_read(&mrt_routerlock); + if (so != mrt_mrouter[inp->inp_rtableid]) { + rw_exit_read(&mrt_routerlock); + return (EPROTONOSUPPORT); + } + rw_exit_read(&mrt_routerlock); + vifcp = mtod(m, struct vifctl *); if (vifcp->vifc_vifi >= MAXVIFS) return (EINVAL); @@ -826,6 +868,13 @@ del_vif(struct socket *so, struct mbuf * if (m == NULL || m->m_len < sizeof(vifi_t)) return (EINVAL); + rw_enter_read(&mrt_routerlock); + if (so != mrt_mrouter[inp->inp_rtableid]) { + rw_exit_read(&mrt_routerlock); + return (EPROTONOSUPPORT); + } + rw_exit_read(&mrt_routerlock); + vifip = mtod(m, vifi_t *); if ((ifp = if_lookupbyvif(*vifip, rtableid)) == NULL) return (EADDRNOTAVAIL); @@ -873,7 +922,7 @@ mfc_expire_route(struct rtentry *rt, u_i /* Not expired, add it back to the queue. */ if (mfc->mfc_expire == 0) { mfc->mfc_expire = 1; - rt_timer_add(rt, &ip_mrouterq, rtableid); + rt_timer_add(rt, &mrt_timer, rtableid); return; } @@ -887,6 +936,7 @@ mfc_add_route(struct ifnet *ifp, struct struct vif *v = ifp->if_mcast; struct rtentry *rt; struct mfc *mfc; + uint32_t api_config; unsigned int rtableid = ifp->if_rdomain; rt = rt_mcast_add(ifp, origin, group); @@ -907,19 +957,23 @@ mfc_add_route(struct ifnet *ifp, struct rt->rt_llinfo = (caddr_t)mfc; - rt_timer_add(rt, &ip_mrouterq, rtableid); + rt_timer_add(rt, &mrt_timer, rtableid); + + rw_enter_read(&mrt_routerlock); + api_config = mrt_api_config[rtableid]; + rw_exit_read(&mrt_routerlock); mfc->mfc_parent = mfccp->mfcc_parent; mfc->mfc_pkt_cnt = 0; mfc->mfc_byte_cnt = 0; mfc->mfc_wrong_if = 0; mfc->mfc_ttl = mfccp->mfcc_ttls[v->v_id]; - mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & mrt_api_config & + mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & api_config & MRT_MFC_FLAGS_ALL; mfc->mfc_expire = 0; /* set the RP address */ - if (mrt_api_config & MRT_MFC_RP) + if (api_config & MRT_MFC_RP) mfc->mfc_rp = mfccp->mfcc_rp; else mfc->mfc_rp = zeroin_addr; @@ -1053,11 +1107,20 @@ add_mfc(struct socket *so, struct mbuf * struct inpcb *inp = sotoinpcb(so); struct mfcctl2 mfcctl2; int mfcctl_size = sizeof(struct mfcctl); + uint32_t api_config; unsigned int rtableid = inp->inp_rtableid; NET_ASSERT_LOCKED(); - if (mrt_api_config & MRT_API_FLAGS_ALL) + rw_enter_read(&mrt_routerlock); + if (so != mrt_mrouter[inp->inp_rtableid]) { + rw_exit_read(&mrt_routerlock); + return (EPROTONOSUPPORT); + } + api_config = mrt_api_config[rtableid]; + rw_exit_read(&mrt_routerlock); + + if (api_config & MRT_API_FLAGS_ALL) mfcctl_size = sizeof(struct mfcctl2); if (m == NULL || m->m_len < mfcctl_size) @@ -1066,7 +1129,7 @@ add_mfc(struct socket *so, struct mbuf * /* * select data size depending on API version. */ - if (mrt_api_config & MRT_API_FLAGS_ALL) { + if (api_config & MRT_API_FLAGS_ALL) { struct mfcctl2 *mp2 = mtod(m, struct mfcctl2 *); memcpy((caddr_t)&mfcctl2, mp2, sizeof(*mp2)); } else { @@ -1103,6 +1166,13 @@ del_mfc(struct socket *so, struct mbuf * if (m == NULL || m->m_len < mfcctl_size) return (EINVAL); + rw_enter_read(&mrt_routerlock); + if (so != mrt_mrouter[inp->inp_rtableid]) { + rw_exit_read(&mrt_routerlock); + return (EPROTONOSUPPORT); + } + rw_exit_read(&mrt_routerlock); + mp = mtod(m, struct mfcctl *); memcpy((caddr_t)&mfcctl2, mp, sizeof(*mp)); @@ -1238,12 +1308,15 @@ ip_mforward(struct mbuf *m, struct ifnet mrtstat_inc(mrts_upcalls); sin.sin_addr = ip->ip_src; - if (socket_send(ip_mrouter[rtableid], mm, &sin) < 0) { + rw_enter_read(&mrt_routerlock); + if (socket_send(mrt_mrouter[rtableid], mm, &sin) < 0) { + rw_exit_read(&mrt_routerlock); log(LOG_WARNING, "ip_mforward: ip_mrouter " "socket queue full\n"); mrtstat_inc(mrts_upq_sockfull); return (ENOBUFS); } + rw_exit_read(&mrt_routerlock); mfc_add(NULL, &ip->ip_src, &ip->ip_dst, v->v_id, rtableid, M_NOWAIT); Index: netinet/ip_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v diff -u -p -r1.417 ip_output.c --- netinet/ip_output.c 6 May 2026 11:36:13 -0000 1.417 +++ netinet/ip_output.c 25 Jun 2026 12:32:14 -0000 @@ -325,7 +325,7 @@ reroute: * if necessary. */ if (atomic_load_int(&ipmforwarding) && - ip_mrouter[ifp->if_rdomain] && + ip_mrouter_active(ifp->if_rdomain) && (flags & IP_FORWARDING) == 0) { int rv; Index: netinet/ip_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v diff -u -p -r1.124 ip_var.h --- netinet/ip_var.h 21 Jun 2026 21:17:07 -0000 1.124 +++ netinet/ip_var.h 25 Jun 2026 12:32:14 -0000 @@ -247,6 +247,7 @@ int ip_getmoptions(int, struct ip_mopti void ip_init(void); struct mbuf* ip_insertoptions(struct mbuf *, struct mbuf *, int *); +int ip_mrouter_active(u_int); int ip_mforward(struct mbuf *, struct ifnet *, int); int ip_optcopy(struct ip *, struct ip *); int ip_output(struct mbuf *, struct mbuf *, struct route *, int, @@ -282,9 +283,6 @@ int rip_disconnect(struct socket *); int rip_shutdown(struct socket *); int rip_send(struct socket *, struct mbuf *, struct mbuf *, struct mbuf *); -#ifdef MROUTING -extern struct socket *ip_mrouter[]; /* multicast routing daemon */ -#endif #endif /* _KERNEL */ #endif /* _NETINET_IP_VAR_H_ */ Index: netinet/raw_ip.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v diff -u -p -r1.167 raw_ip.c --- netinet/raw_ip.c 8 Jul 2025 00:47:41 -0000 1.167 +++ netinet/raw_ip.c 25 Jun 2026 12:32:14 -0000 @@ -504,8 +504,7 @@ rip_detach(struct socket *so) return (EINVAL); #ifdef MROUTING - if (so == ip_mrouter[inp->inp_rtableid]) - ip_mrouter_done(so); + ip_mrouter_done(so); #endif in_pcbdetach(inp);