Download raw body.
multicast router lock
Hi,
When we want to go MP for multicast forward, we have to proect the
multicast router structure.
I have introduced a rwlock for mrt_mrouter and mrt_api_config.
Since we support one router per route table, we should also have
an api version per router. The existing code was inconsistent.
Ref count the sockets in mrt_mrouter.
Also fixed the name space a bit by using mrt_ prefix.
ok?
bluhm
Index: netinet/igmp.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp.c,v
diff -u -p -r1.98 igmp.c
--- netinet/igmp.c 29 Mar 2026 18:08:07 -0000 1.98
+++ netinet/igmp.c 25 Jun 2026 12:32:14 -0000
@@ -741,7 +741,7 @@ igmp_sendpkt(struct igmp_pktinfo *pkt)
* router, so that the process-level routing daemon can hear it.
*/
#ifdef MROUTING
- imo.imo_loop = (ip_mrouter[pkt->ipi_rdomain] != NULL);
+ imo.imo_loop = ip_mrouter_active(pkt->ipi_rdomain);
#else
imo.imo_loop = 0;
#endif /* MROUTING */
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.430 ip_input.c
--- netinet/ip_input.c 22 Jun 2026 10:58:34 -0000 1.430
+++ netinet/ip_input.c 25 Jun 2026 12:32:14 -0000
@@ -532,7 +532,7 @@ ip_input_if(struct mbuf **mp, int *offp,
#ifdef MROUTING
if (atomic_load_int(&ipmforwarding) &&
- ip_mrouter[ifp->if_rdomain]) {
+ ip_mrouter_active(ifp->if_rdomain)) {
int error;
if (m->m_flags & M_EXT) {
Index: netinet/ip_mroute.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_mroute.c,v
diff -u -p -r1.153 ip_mroute.c
--- netinet/ip_mroute.c 24 Jun 2026 12:33:49 -0000 1.153
+++ netinet/ip_mroute.c 25 Jun 2026 12:41:45 -0000
@@ -79,6 +79,7 @@
/*
* Locks used to protect data:
* I immutable after creation
+ * R multicast router lock
*/
/* #define MCAST_DEBUG */
@@ -96,14 +97,21 @@ int mcast_debug = 1;
do { } while (0)
#endif
+struct rwlock mrt_routerlock = RWLOCK_INITIALIZER("mrouter");
+
/*
- * Globals. All but ip_mrouter and ip_mrtproto could be static,
- * except for netstat or debugging purposes.
+ * Kernel multicast routing API capabilities and setup.
+ * If more API capabilities are added to the kernel, they should be
+ * recorded in `mrt_api_support'.
*/
-struct socket *ip_mrouter[RT_TABLEID_MAX + 1];
-struct rttimer_queue ip_mrouterq;
+static const u_int32_t mrt_api_support =
+ MRT_MFC_FLAGS_DISABLE_WRONGVIF | MRT_MFC_RP;
+
+struct rttimer_queue mrt_timer;
+struct socket *mrt_mrouter[RT_TABLEID_MAX + 1]; /* [R] */
+uint32_t mrt_api_config[RT_TABLEID_MAX + 1]; /* [R] */
uint64_t mrt_count[RT_TABLEID_MAX + 1];
-int ip_mrtproto = IGMP_DVMRP; /* [I] for netstat only */
+int ip_mrtproto = IGMP_DVMRP; /* [I] for netstat only */
struct cpumem *mrtcounters;
@@ -124,9 +132,8 @@ int add_mfc(struct socket *, struct mbuf
int del_mfc(struct socket *, struct mbuf *);
int set_api_config(struct socket *, struct mbuf *); /* chose API capabilities */
int get_api_support(struct mbuf *);
-int get_api_config(struct mbuf *);
-int socket_send(struct socket *, struct mbuf *,
- struct sockaddr_in *);
+int get_api_config(struct socket *, struct mbuf *);
+int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
int ip_mdq(struct mbuf *, struct ifnet *, struct rtentry *, int);
struct ifnet *if_lookupbyvif(vifi_t, unsigned int);
struct rtentry *rt_mcast_add(struct ifnet *, struct sockaddr *,
@@ -134,15 +141,6 @@ struct rtentry *rt_mcast_add(struct ifne
void mrt_mcast_del(struct rtentry *, unsigned int);
/*
- * Kernel multicast routing API capabilities and setup.
- * If more API capabilities are added to the kernel, they should be
- * recorded in `mrt_api_support'.
- */
-static const u_int32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
- MRT_MFC_RP);
-static u_int32_t mrt_api_config = 0;
-
-/*
* Find a route for a given Multicast group address.
* Type of service parameter to be added in the future!!!
* Statistics are updated by the caller if needed (mrts_mfc_lookups and
@@ -185,39 +183,34 @@ mfc_find(struct ifnet *ifp, struct in_ad
int
ip_mrouter_set(struct socket *so, int optname, struct mbuf *m)
{
- struct inpcb *inp = sotoinpcb(so);
int error;
- if (optname != MRT_INIT &&
- so != ip_mrouter[inp->inp_rtableid])
+ switch (optname) {
+ case MRT_INIT:
+ error = ip_mrouter_init(so, m);
+ break;
+ case MRT_DONE:
+ error = ip_mrouter_done(so);
+ break;
+ case MRT_ADD_VIF:
+ error = add_vif(so, m);
+ break;
+ case MRT_DEL_VIF:
+ error = del_vif(so, m);
+ break;
+ case MRT_ADD_MFC:
+ error = add_mfc(so, m);
+ break;
+ case MRT_DEL_MFC:
+ error = del_mfc(so, m);
+ break;
+ case MRT_API_CONFIG:
+ error = set_api_config(so, m);
+ break;
+ default:
error = ENOPROTOOPT;
- else
- switch (optname) {
- case MRT_INIT:
- error = ip_mrouter_init(so, m);
- break;
- case MRT_DONE:
- error = ip_mrouter_done(so);
- break;
- case MRT_ADD_VIF:
- error = add_vif(so, m);
- break;
- case MRT_DEL_VIF:
- error = del_vif(so, m);
- break;
- case MRT_ADD_MFC:
- error = add_mfc(so, m);
- break;
- case MRT_DEL_MFC:
- error = del_mfc(so, m);
- break;
- case MRT_API_CONFIG:
- error = set_api_config(so, m);
- break;
- default:
- error = ENOPROTOOPT;
- break;
- }
+ break;
+ }
return (error);
}
@@ -228,37 +221,38 @@ ip_mrouter_set(struct socket *so, int op
int
ip_mrouter_get(struct socket *so, int optname, struct mbuf *m)
{
- struct inpcb *inp = sotoinpcb(so);
int error;
- if (so != ip_mrouter[inp->inp_rtableid])
+ switch (optname) {
+ case MRT_VERSION:
+ error = get_version(m);
+ break;
+ case MRT_API_SUPPORT:
+ error = get_api_support(m);
+ break;
+ case MRT_API_CONFIG:
+ error = get_api_config(so, m);
+ break;
+ default:
error = ENOPROTOOPT;
- else {
- switch (optname) {
- case MRT_VERSION:
- error = get_version(m);
- break;
- case MRT_API_SUPPORT:
- error = get_api_support(m);
- break;
- case MRT_API_CONFIG:
- error = get_api_config(m);
- break;
- default:
- error = ENOPROTOOPT;
- break;
- }
+ break;
}
return (error);
}
+int
+ip_mrouter_active(u_int rtableid)
+{
+ return (READ_ONCE(mrt_mrouter[rtableid]) != NULL);
+}
+
void
mrt_init(void)
{
mrtcounters = counters_alloc(mrts_ncounters);
- rt_timer_queue_init(&ip_mrouterq, MCAST_EXPIRE_FREQUENCY,
+ rt_timer_queue_init(&mrt_timer, MCAST_EXPIRE_FREQUENCY,
&mfc_expire_route);
}
@@ -274,30 +268,37 @@ mrt_ioctl(struct socket *so, u_long cmd,
if (inp == NULL)
return (ENOTCONN);
- KERNEL_LOCK();
- if (so != ip_mrouter[inp->inp_rtableid])
- error = EINVAL;
- else
- switch (cmd) {
- case SIOCGETVIFCNT:
- NET_LOCK_SHARED();
- error = get_vif_cnt(inp->inp_rtableid,
- (struct sioc_vif_req *)data);
- NET_UNLOCK_SHARED();
- break;
- case SIOCGETSGCNT:
- NET_LOCK_SHARED();
- error = get_sg_cnt(inp->inp_rtableid,
- (struct sioc_sg_req *)data);
- NET_UNLOCK_SHARED();
- break;
- default:
- error = ENOTTY;
- break;
- }
+ NET_LOCK_SHARED();
+ rw_enter_read(&mrt_routerlock);
+
+ if (so != mrt_mrouter[inp->inp_rtableid]) {
+ error = EPROTONOSUPPORT;
+ goto out;
+ }
+
+ switch (cmd) {
+ case SIOCGETVIFCNT:
+ KERNEL_LOCK();
+ error = get_vif_cnt(inp->inp_rtableid,
+ (struct sioc_vif_req *)data);
+ KERNEL_UNLOCK();
+ break;
+ case SIOCGETSGCNT:
+ KERNEL_LOCK();
+ error = get_sg_cnt(inp->inp_rtableid,
+ (struct sioc_sg_req *)data);
+ KERNEL_UNLOCK();
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+
+ out:
+ rw_exit_read(&mrt_routerlock);
+ NET_UNLOCK_SHARED();
- KERNEL_UNLOCK();
return (error);
}
@@ -593,10 +594,15 @@ ip_mrouter_init(struct socket *so, struc
if (*v != 1)
return (EINVAL);
- if (ip_mrouter[rtableid] != NULL)
+ rw_enter_write(&mrt_routerlock);
+
+ if (mrt_mrouter[rtableid] != NULL) {
+ rw_exit_write(&mrt_routerlock);
return (EADDRINUSE);
+ }
+ mrt_mrouter[rtableid] = soref(so);
- ip_mrouter[rtableid] = so;
+ rw_exit_write(&mrt_routerlock);
return (0);
}
@@ -625,6 +631,13 @@ ip_mrouter_done(struct socket *so)
NET_ASSERT_LOCKED();
+ rw_enter_write(&mrt_routerlock);
+
+ if (so != mrt_mrouter[inp->inp_rtableid]) {
+ rw_exit_write(&mrt_routerlock);
+ return (EPROTONOSUPPORT);
+ }
+
/* Delete all remaining installed multicast routes. */
do {
struct rtentry *rt = NULL;
@@ -638,6 +651,8 @@ ip_mrouter_done(struct socket *so)
rtfree(rt);
} while (error == EAGAIN);
+ KASSERT(mrt_count[rtableid] == 0);
+
/* Unregister all interfaces in the domain. */
TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
if (ifp->if_rdomain != rtableid)
@@ -646,10 +661,11 @@ ip_mrouter_done(struct socket *so)
vif_delete(ifp);
}
- mrt_api_config = 0;
+ mrt_api_config[rtableid] = 0;
+ mrt_mrouter[rtableid] = NULL;
+ sorele(so);
- ip_mrouter[rtableid] = NULL;
- mrt_count[rtableid] = 0;
+ rw_exit_write(&mrt_routerlock);
return (0);
}
@@ -672,7 +688,7 @@ set_api_config(struct socket *so, struct
{
struct inpcb *inp = sotoinpcb(so);
struct ifnet *ifp;
- u_int32_t *apival;
+ uint32_t *apival;
unsigned int rtableid = inp->inp_rtableid;
if (m == NULL || m->m_len < sizeof(u_int32_t))
@@ -680,6 +696,13 @@ set_api_config(struct socket *so, struct
apival = mtod(m, u_int32_t *);
+ rw_enter_write(&mrt_routerlock);
+
+ if (so != mrt_mrouter[inp->inp_rtableid]) {
+ rw_exit_write(&mrt_routerlock);
+ return (EPROTONOSUPPORT);
+ }
+
/*
* We can set the API capabilities only if it is the first operation
* after MRT_INIT. I.e.:
@@ -692,16 +715,20 @@ set_api_config(struct socket *so, struct
if (ifp->if_mcast == NULL)
continue;
+ rw_exit_write(&mrt_routerlock);
*apival = 0;
return (EPERM);
}
if (mrt_count[rtableid] > 0) {
+ rw_exit_write(&mrt_routerlock);
*apival = 0;
return (EPERM);
}
- mrt_api_config = *apival & mrt_api_support;
- *apival = mrt_api_config;
+ *apival &= mrt_api_support;
+ mrt_api_config[rtableid] = *apival;
+
+ rw_exit_write(&mrt_routerlock);
return (0);
}
@@ -712,12 +739,12 @@ set_api_config(struct socket *so, struct
int
get_api_support(struct mbuf *m)
{
- u_int32_t *apival;
+ uint32_t *apival;
if (m == NULL || m->m_len < sizeof(u_int32_t))
return (EINVAL);
- apival = mtod(m, u_int32_t *);
+ apival = mtod(m, uint32_t *);
*apival = mrt_api_support;
@@ -728,16 +755,24 @@ get_api_support(struct mbuf *m)
* Get API configured capabilities
*/
int
-get_api_config(struct mbuf *m)
+get_api_config(struct socket *so, struct mbuf *m)
{
- u_int32_t *apival;
+ struct inpcb *inp = sotoinpcb(so);
+ uint32_t *apival;
+ unsigned int rtableid = inp->inp_rtableid;
if (m == NULL || m->m_len < sizeof(u_int32_t))
return (EINVAL);
- apival = mtod(m, u_int32_t *);
+ apival = mtod(m, uint32_t *);
- *apival = mrt_api_config;
+ rw_enter_read(&mrt_routerlock);
+ if (so != mrt_mrouter[inp->inp_rtableid]) {
+ rw_exit_read(&mrt_routerlock);
+ return (EPROTONOSUPPORT);
+ }
+ *apival = mrt_api_config[rtableid];
+ rw_exit_read(&mrt_routerlock);
return (0);
}
@@ -759,6 +794,13 @@ add_vif(struct socket *so, struct mbuf *
if (m == NULL || m->m_len < sizeof(struct vifctl))
return (EINVAL);
+ rw_enter_read(&mrt_routerlock);
+ if (so != mrt_mrouter[inp->inp_rtableid]) {
+ rw_exit_read(&mrt_routerlock);
+ return (EPROTONOSUPPORT);
+ }
+ rw_exit_read(&mrt_routerlock);
+
vifcp = mtod(m, struct vifctl *);
if (vifcp->vifc_vifi >= MAXVIFS)
return (EINVAL);
@@ -826,6 +868,13 @@ del_vif(struct socket *so, struct mbuf *
if (m == NULL || m->m_len < sizeof(vifi_t))
return (EINVAL);
+ rw_enter_read(&mrt_routerlock);
+ if (so != mrt_mrouter[inp->inp_rtableid]) {
+ rw_exit_read(&mrt_routerlock);
+ return (EPROTONOSUPPORT);
+ }
+ rw_exit_read(&mrt_routerlock);
+
vifip = mtod(m, vifi_t *);
if ((ifp = if_lookupbyvif(*vifip, rtableid)) == NULL)
return (EADDRNOTAVAIL);
@@ -873,7 +922,7 @@ mfc_expire_route(struct rtentry *rt, u_i
/* Not expired, add it back to the queue. */
if (mfc->mfc_expire == 0) {
mfc->mfc_expire = 1;
- rt_timer_add(rt, &ip_mrouterq, rtableid);
+ rt_timer_add(rt, &mrt_timer, rtableid);
return;
}
@@ -887,6 +936,7 @@ mfc_add_route(struct ifnet *ifp, struct
struct vif *v = ifp->if_mcast;
struct rtentry *rt;
struct mfc *mfc;
+ uint32_t api_config;
unsigned int rtableid = ifp->if_rdomain;
rt = rt_mcast_add(ifp, origin, group);
@@ -907,19 +957,23 @@ mfc_add_route(struct ifnet *ifp, struct
rt->rt_llinfo = (caddr_t)mfc;
- rt_timer_add(rt, &ip_mrouterq, rtableid);
+ rt_timer_add(rt, &mrt_timer, rtableid);
+
+ rw_enter_read(&mrt_routerlock);
+ api_config = mrt_api_config[rtableid];
+ rw_exit_read(&mrt_routerlock);
mfc->mfc_parent = mfccp->mfcc_parent;
mfc->mfc_pkt_cnt = 0;
mfc->mfc_byte_cnt = 0;
mfc->mfc_wrong_if = 0;
mfc->mfc_ttl = mfccp->mfcc_ttls[v->v_id];
- mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & mrt_api_config &
+ mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & api_config &
MRT_MFC_FLAGS_ALL;
mfc->mfc_expire = 0;
/* set the RP address */
- if (mrt_api_config & MRT_MFC_RP)
+ if (api_config & MRT_MFC_RP)
mfc->mfc_rp = mfccp->mfcc_rp;
else
mfc->mfc_rp = zeroin_addr;
@@ -1053,11 +1107,20 @@ add_mfc(struct socket *so, struct mbuf *
struct inpcb *inp = sotoinpcb(so);
struct mfcctl2 mfcctl2;
int mfcctl_size = sizeof(struct mfcctl);
+ uint32_t api_config;
unsigned int rtableid = inp->inp_rtableid;
NET_ASSERT_LOCKED();
- if (mrt_api_config & MRT_API_FLAGS_ALL)
+ rw_enter_read(&mrt_routerlock);
+ if (so != mrt_mrouter[inp->inp_rtableid]) {
+ rw_exit_read(&mrt_routerlock);
+ return (EPROTONOSUPPORT);
+ }
+ api_config = mrt_api_config[rtableid];
+ rw_exit_read(&mrt_routerlock);
+
+ if (api_config & MRT_API_FLAGS_ALL)
mfcctl_size = sizeof(struct mfcctl2);
if (m == NULL || m->m_len < mfcctl_size)
@@ -1066,7 +1129,7 @@ add_mfc(struct socket *so, struct mbuf *
/*
* select data size depending on API version.
*/
- if (mrt_api_config & MRT_API_FLAGS_ALL) {
+ if (api_config & MRT_API_FLAGS_ALL) {
struct mfcctl2 *mp2 = mtod(m, struct mfcctl2 *);
memcpy((caddr_t)&mfcctl2, mp2, sizeof(*mp2));
} else {
@@ -1103,6 +1166,13 @@ del_mfc(struct socket *so, struct mbuf *
if (m == NULL || m->m_len < mfcctl_size)
return (EINVAL);
+ rw_enter_read(&mrt_routerlock);
+ if (so != mrt_mrouter[inp->inp_rtableid]) {
+ rw_exit_read(&mrt_routerlock);
+ return (EPROTONOSUPPORT);
+ }
+ rw_exit_read(&mrt_routerlock);
+
mp = mtod(m, struct mfcctl *);
memcpy((caddr_t)&mfcctl2, mp, sizeof(*mp));
@@ -1238,12 +1308,15 @@ ip_mforward(struct mbuf *m, struct ifnet
mrtstat_inc(mrts_upcalls);
sin.sin_addr = ip->ip_src;
- if (socket_send(ip_mrouter[rtableid], mm, &sin) < 0) {
+ rw_enter_read(&mrt_routerlock);
+ if (socket_send(mrt_mrouter[rtableid], mm, &sin) < 0) {
+ rw_exit_read(&mrt_routerlock);
log(LOG_WARNING, "ip_mforward: ip_mrouter "
"socket queue full\n");
mrtstat_inc(mrts_upq_sockfull);
return (ENOBUFS);
}
+ rw_exit_read(&mrt_routerlock);
mfc_add(NULL, &ip->ip_src, &ip->ip_dst, v->v_id,
rtableid, M_NOWAIT);
Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
diff -u -p -r1.417 ip_output.c
--- netinet/ip_output.c 6 May 2026 11:36:13 -0000 1.417
+++ netinet/ip_output.c 25 Jun 2026 12:32:14 -0000
@@ -325,7 +325,7 @@ reroute:
* if necessary.
*/
if (atomic_load_int(&ipmforwarding) &&
- ip_mrouter[ifp->if_rdomain] &&
+ ip_mrouter_active(ifp->if_rdomain) &&
(flags & IP_FORWARDING) == 0) {
int rv;
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
diff -u -p -r1.124 ip_var.h
--- netinet/ip_var.h 21 Jun 2026 21:17:07 -0000 1.124
+++ netinet/ip_var.h 25 Jun 2026 12:32:14 -0000
@@ -247,6 +247,7 @@ int ip_getmoptions(int, struct ip_mopti
void ip_init(void);
struct mbuf*
ip_insertoptions(struct mbuf *, struct mbuf *, int *);
+int ip_mrouter_active(u_int);
int ip_mforward(struct mbuf *, struct ifnet *, int);
int ip_optcopy(struct ip *, struct ip *);
int ip_output(struct mbuf *, struct mbuf *, struct route *, int,
@@ -282,9 +283,6 @@ int rip_disconnect(struct socket *);
int rip_shutdown(struct socket *);
int rip_send(struct socket *, struct mbuf *, struct mbuf *,
struct mbuf *);
-#ifdef MROUTING
-extern struct socket *ip_mrouter[]; /* multicast routing daemon */
-#endif
#endif /* _KERNEL */
#endif /* _NETINET_IP_VAR_H_ */
Index: netinet/raw_ip.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
diff -u -p -r1.167 raw_ip.c
--- netinet/raw_ip.c 8 Jul 2025 00:47:41 -0000 1.167
+++ netinet/raw_ip.c 25 Jun 2026 12:32:14 -0000
@@ -504,8 +504,7 @@ rip_detach(struct socket *so)
return (EINVAL);
#ifdef MROUTING
- if (so == ip_mrouter[inp->inp_rtableid])
- ip_mrouter_done(so);
+ ip_mrouter_done(so);
#endif
in_pcbdetach(inp);
multicast router lock