Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
multicast router lock
To:
tech@openbsd.org
Date:
Thu, 25 Jun 2026 21:14:11 +0200

Download raw body.

Thread
  • Alexander Bluhm:

    multicast router lock

Hi,

When we want to go MP for multicast forward, we have to proect the
multicast router structure.

I have introduced a rwlock for mrt_mrouter and mrt_api_config.
Since we support one router per route table, we should also have
an api version per router.  The existing code was inconsistent.
Ref count the sockets in mrt_mrouter.

Also fixed the name space a bit by using mrt_ prefix.

ok?

bluhm

Index: netinet/igmp.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp.c,v
diff -u -p -r1.98 igmp.c
--- netinet/igmp.c	29 Mar 2026 18:08:07 -0000	1.98
+++ netinet/igmp.c	25 Jun 2026 12:32:14 -0000
@@ -741,7 +741,7 @@ igmp_sendpkt(struct igmp_pktinfo *pkt)
 	 * router, so that the process-level routing daemon can hear it.
 	 */
 #ifdef MROUTING
-	imo.imo_loop = (ip_mrouter[pkt->ipi_rdomain] != NULL);
+	imo.imo_loop = ip_mrouter_active(pkt->ipi_rdomain);
 #else
 	imo.imo_loop = 0;
 #endif /* MROUTING */
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.430 ip_input.c
--- netinet/ip_input.c	22 Jun 2026 10:58:34 -0000	1.430
+++ netinet/ip_input.c	25 Jun 2026 12:32:14 -0000
@@ -532,7 +532,7 @@ ip_input_if(struct mbuf **mp, int *offp,
 
 #ifdef MROUTING
 		if (atomic_load_int(&ipmforwarding) &&
-		    ip_mrouter[ifp->if_rdomain]) {
+		    ip_mrouter_active(ifp->if_rdomain)) {
 			int error;
 
 			if (m->m_flags & M_EXT) {
Index: netinet/ip_mroute.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_mroute.c,v
diff -u -p -r1.153 ip_mroute.c
--- netinet/ip_mroute.c	24 Jun 2026 12:33:49 -0000	1.153
+++ netinet/ip_mroute.c	25 Jun 2026 12:41:45 -0000
@@ -79,6 +79,7 @@
 /*
  * Locks used to protect data:
  *	I	immutable after creation
+ *	R	multicast router lock
  */
 
 /* #define MCAST_DEBUG */
@@ -96,14 +97,21 @@ int mcast_debug = 1;
 	do { } while (0)
 #endif
 
+struct rwlock mrt_routerlock = RWLOCK_INITIALIZER("mrouter");
+
 /*
- * Globals.  All but ip_mrouter and ip_mrtproto could be static,
- * except for netstat or debugging purposes.
+ * Kernel multicast routing API capabilities and setup.
+ * If more API capabilities are added to the kernel, they should be
+ * recorded in `mrt_api_support'.
  */
-struct socket	*ip_mrouter[RT_TABLEID_MAX + 1];
-struct rttimer_queue ip_mrouterq;
+static const u_int32_t mrt_api_support =
+    MRT_MFC_FLAGS_DISABLE_WRONGVIF | MRT_MFC_RP;
+
+struct rttimer_queue mrt_timer;
+struct socket	*mrt_mrouter[RT_TABLEID_MAX + 1];	/* [R] */
+uint32_t	 mrt_api_config[RT_TABLEID_MAX + 1];	/* [R] */
 uint64_t	 mrt_count[RT_TABLEID_MAX + 1];
-int		ip_mrtproto = IGMP_DVMRP;    /* [I] for netstat only */
+int		 ip_mrtproto = IGMP_DVMRP;	/* [I] for netstat only */
 
 struct cpumem *mrtcounters;
 
@@ -124,9 +132,8 @@ int add_mfc(struct socket *, struct mbuf
 int del_mfc(struct socket *, struct mbuf *);
 int set_api_config(struct socket *, struct mbuf *); /* chose API capabilities */
 int get_api_support(struct mbuf *);
-int get_api_config(struct mbuf *);
-int socket_send(struct socket *, struct mbuf *,
-			    struct sockaddr_in *);
+int get_api_config(struct socket *, struct mbuf *);
+int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
 int ip_mdq(struct mbuf *, struct ifnet *, struct rtentry *, int);
 struct ifnet *if_lookupbyvif(vifi_t, unsigned int);
 struct rtentry *rt_mcast_add(struct ifnet *, struct sockaddr *,
@@ -134,15 +141,6 @@ struct rtentry *rt_mcast_add(struct ifne
 void mrt_mcast_del(struct rtentry *, unsigned int);
 
 /*
- * Kernel multicast routing API capabilities and setup.
- * If more API capabilities are added to the kernel, they should be
- * recorded in `mrt_api_support'.
- */
-static const u_int32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
-					  MRT_MFC_RP);
-static u_int32_t mrt_api_config = 0;
-
-/*
  * Find a route for a given Multicast group address.
  * Type of service parameter to be added in the future!!!
  * Statistics are updated by the caller if needed (mrts_mfc_lookups and
@@ -185,39 +183,34 @@ mfc_find(struct ifnet *ifp, struct in_ad
 int
 ip_mrouter_set(struct socket *so, int optname, struct mbuf *m)
 {
-	struct inpcb *inp = sotoinpcb(so);
 	int error;
 
-	if (optname != MRT_INIT &&
-	    so != ip_mrouter[inp->inp_rtableid])
+	switch (optname) {
+	case MRT_INIT:
+		error = ip_mrouter_init(so, m);
+		break;
+	case MRT_DONE:
+		error = ip_mrouter_done(so);
+		break;
+	case MRT_ADD_VIF:
+		error = add_vif(so, m);
+		break;
+	case MRT_DEL_VIF:
+		error = del_vif(so, m);
+		break;
+	case MRT_ADD_MFC:
+		error = add_mfc(so, m);
+		break;
+	case MRT_DEL_MFC:
+		error = del_mfc(so, m);
+		break;
+	case MRT_API_CONFIG:
+		error = set_api_config(so, m);
+		break;
+	default:
 		error = ENOPROTOOPT;
-	else
-		switch (optname) {
-		case MRT_INIT:
-			error = ip_mrouter_init(so, m);
-			break;
-		case MRT_DONE:
-			error = ip_mrouter_done(so);
-			break;
-		case MRT_ADD_VIF:
-			error = add_vif(so, m);
-			break;
-		case MRT_DEL_VIF:
-			error = del_vif(so, m);
-			break;
-		case MRT_ADD_MFC:
-			error = add_mfc(so, m);
-			break;
-		case MRT_DEL_MFC:
-			error = del_mfc(so, m);
-			break;
-		case MRT_API_CONFIG:
-			error = set_api_config(so, m);
-			break;
-		default:
-			error = ENOPROTOOPT;
-			break;
-		}
+		break;
+	}
 
 	return (error);
 }
@@ -228,37 +221,38 @@ ip_mrouter_set(struct socket *so, int op
 int
 ip_mrouter_get(struct socket *so, int optname, struct mbuf *m)
 {
-	struct inpcb *inp = sotoinpcb(so);
 	int error;
 
-	if (so != ip_mrouter[inp->inp_rtableid])
+	switch (optname) {
+	case MRT_VERSION:
+		error = get_version(m);
+		break;
+	case MRT_API_SUPPORT:
+		error = get_api_support(m);
+		break;
+	case MRT_API_CONFIG:
+		error = get_api_config(so, m);
+		break;
+	default:
 		error = ENOPROTOOPT;
-	else {
-		switch (optname) {
-		case MRT_VERSION:
-			error = get_version(m);
-			break;
-		case MRT_API_SUPPORT:
-			error = get_api_support(m);
-			break;
-		case MRT_API_CONFIG:
-			error = get_api_config(m);
-			break;
-		default:
-			error = ENOPROTOOPT;
-			break;
-		}
+		break;
 	}
 
 	return (error);
 }
 
+int
+ip_mrouter_active(u_int rtableid)
+{
+	return (READ_ONCE(mrt_mrouter[rtableid]) != NULL);
+}
+
 void
 mrt_init(void)
 {
 	mrtcounters = counters_alloc(mrts_ncounters);
 
-	rt_timer_queue_init(&ip_mrouterq, MCAST_EXPIRE_FREQUENCY,
+	rt_timer_queue_init(&mrt_timer, MCAST_EXPIRE_FREQUENCY,
 	    &mfc_expire_route);
 }
 
@@ -274,30 +268,37 @@ mrt_ioctl(struct socket *so, u_long cmd,
 	if (inp == NULL)
 		return (ENOTCONN);
 
-	KERNEL_LOCK();
 
-	if (so != ip_mrouter[inp->inp_rtableid])
-		error = EINVAL;
-	else
-		switch (cmd) {
-		case SIOCGETVIFCNT:
-			NET_LOCK_SHARED();
-			error = get_vif_cnt(inp->inp_rtableid,
-			    (struct sioc_vif_req *)data);
-			NET_UNLOCK_SHARED();
-			break;
-		case SIOCGETSGCNT:
-			NET_LOCK_SHARED();
-			error = get_sg_cnt(inp->inp_rtableid,
-			    (struct sioc_sg_req *)data);
-			NET_UNLOCK_SHARED();
-			break;
-		default:
-			error = ENOTTY;
-			break;
-		}
+	NET_LOCK_SHARED();
+	rw_enter_read(&mrt_routerlock);
+
+	if (so != mrt_mrouter[inp->inp_rtableid]) {
+		error =  EPROTONOSUPPORT;
+		goto out;
+	}
+
+	switch (cmd) {
+	case SIOCGETVIFCNT:
+		KERNEL_LOCK();
+		error = get_vif_cnt(inp->inp_rtableid,
+		    (struct sioc_vif_req *)data);
+		KERNEL_UNLOCK();
+		break;
+	case SIOCGETSGCNT:
+		KERNEL_LOCK();
+		error = get_sg_cnt(inp->inp_rtableid,
+		    (struct sioc_sg_req *)data);
+		KERNEL_UNLOCK();
+		break;
+	default:
+		error = ENOTTY;
+		break;
+	}
+
+ out:
+	rw_exit_read(&mrt_routerlock);
+	NET_UNLOCK_SHARED();
 
-	KERNEL_UNLOCK();
 	return (error);
 }
 
@@ -593,10 +594,15 @@ ip_mrouter_init(struct socket *so, struc
 	if (*v != 1)
 		return (EINVAL);
 
-	if (ip_mrouter[rtableid] != NULL)
+	rw_enter_write(&mrt_routerlock);
+
+	if (mrt_mrouter[rtableid] != NULL) {
+		rw_exit_write(&mrt_routerlock);
 		return (EADDRINUSE);
+	}
+	mrt_mrouter[rtableid] = soref(so);
 
-	ip_mrouter[rtableid] = so;
+	rw_exit_write(&mrt_routerlock);
 
 	return (0);
 }
@@ -625,6 +631,13 @@ ip_mrouter_done(struct socket *so)
 
 	NET_ASSERT_LOCKED();
 
+	rw_enter_write(&mrt_routerlock);
+
+	if (so != mrt_mrouter[inp->inp_rtableid]) {
+		rw_exit_write(&mrt_routerlock);
+		return (EPROTONOSUPPORT);
+	}
+
 	/* Delete all remaining installed multicast routes. */
 	do {
 		struct rtentry *rt = NULL;
@@ -638,6 +651,8 @@ ip_mrouter_done(struct socket *so)
 		rtfree(rt);
 	} while (error == EAGAIN);
 
+	KASSERT(mrt_count[rtableid] == 0);
+
 	/* Unregister all interfaces in the domain. */
 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
 		if (ifp->if_rdomain != rtableid)
@@ -646,10 +661,11 @@ ip_mrouter_done(struct socket *so)
 		vif_delete(ifp);
 	}
 
-	mrt_api_config = 0;
+	mrt_api_config[rtableid] = 0;
+	mrt_mrouter[rtableid] = NULL;
+	sorele(so);
 
-	ip_mrouter[rtableid] = NULL;
-	mrt_count[rtableid] = 0;
+	rw_exit_write(&mrt_routerlock);
 
 	return (0);
 }
@@ -672,7 +688,7 @@ set_api_config(struct socket *so, struct
 {
 	struct inpcb *inp = sotoinpcb(so);
 	struct ifnet *ifp;
-	u_int32_t *apival;
+	uint32_t *apival;
 	unsigned int rtableid = inp->inp_rtableid;
 
 	if (m == NULL || m->m_len < sizeof(u_int32_t))
@@ -680,6 +696,13 @@ set_api_config(struct socket *so, struct
 
 	apival = mtod(m, u_int32_t *);
 
+	rw_enter_write(&mrt_routerlock);
+
+	if (so != mrt_mrouter[inp->inp_rtableid]) {
+		rw_exit_write(&mrt_routerlock);
+		return (EPROTONOSUPPORT);
+	}
+
 	/*
 	 * We can set the API capabilities only if it is the first operation
 	 * after MRT_INIT. I.e.:
@@ -692,16 +715,20 @@ set_api_config(struct socket *so, struct
 		if (ifp->if_mcast == NULL)
 			continue;
 
+		rw_exit_write(&mrt_routerlock);
 		*apival = 0;
 		return (EPERM);
 	}
 	if (mrt_count[rtableid] > 0) {
+		rw_exit_write(&mrt_routerlock);
 		*apival = 0;
 		return (EPERM);
 	}
 
-	mrt_api_config = *apival & mrt_api_support;
-	*apival = mrt_api_config;
+	*apival &= mrt_api_support;
+	mrt_api_config[rtableid] = *apival;
+
+	rw_exit_write(&mrt_routerlock);
 
 	return (0);
 }
@@ -712,12 +739,12 @@ set_api_config(struct socket *so, struct
 int
 get_api_support(struct mbuf *m)
 {
-	u_int32_t *apival;
+	uint32_t *apival;
 
 	if (m == NULL || m->m_len < sizeof(u_int32_t))
 		return (EINVAL);
 
-	apival = mtod(m, u_int32_t *);
+	apival = mtod(m, uint32_t *);
 
 	*apival = mrt_api_support;
 
@@ -728,16 +755,24 @@ get_api_support(struct mbuf *m)
  * Get API configured capabilities
  */
 int
-get_api_config(struct mbuf *m)
+get_api_config(struct socket *so, struct mbuf *m)
 {
-	u_int32_t *apival;
+	struct inpcb *inp = sotoinpcb(so);
+	uint32_t *apival;
+	unsigned int rtableid = inp->inp_rtableid;
 
 	if (m == NULL || m->m_len < sizeof(u_int32_t))
 		return (EINVAL);
 
-	apival = mtod(m, u_int32_t *);
+	apival = mtod(m, uint32_t *);
 
-	*apival = mrt_api_config;
+	rw_enter_read(&mrt_routerlock);
+	if (so != mrt_mrouter[inp->inp_rtableid]) {
+		rw_exit_read(&mrt_routerlock);
+		return (EPROTONOSUPPORT);
+	}
+	*apival = mrt_api_config[rtableid];
+	rw_exit_read(&mrt_routerlock);
 
 	return (0);
 }
@@ -759,6 +794,13 @@ add_vif(struct socket *so, struct mbuf *
 	if (m == NULL || m->m_len < sizeof(struct vifctl))
 		return (EINVAL);
 
+	rw_enter_read(&mrt_routerlock);
+	if (so != mrt_mrouter[inp->inp_rtableid]) {
+		rw_exit_read(&mrt_routerlock);
+		return (EPROTONOSUPPORT);
+	}
+	rw_exit_read(&mrt_routerlock);
+
 	vifcp = mtod(m, struct vifctl *);
 	if (vifcp->vifc_vifi >= MAXVIFS)
 		return (EINVAL);
@@ -826,6 +868,13 @@ del_vif(struct socket *so, struct mbuf *
 	if (m == NULL || m->m_len < sizeof(vifi_t))
 		return (EINVAL);
 
+	rw_enter_read(&mrt_routerlock);
+	if (so != mrt_mrouter[inp->inp_rtableid]) {
+		rw_exit_read(&mrt_routerlock);
+		return (EPROTONOSUPPORT);
+	}
+	rw_exit_read(&mrt_routerlock);
+
 	vifip = mtod(m, vifi_t *);
 	if ((ifp = if_lookupbyvif(*vifip, rtableid)) == NULL)
 		return (EADDRNOTAVAIL);
@@ -873,7 +922,7 @@ mfc_expire_route(struct rtentry *rt, u_i
 	/* Not expired, add it back to the queue. */
 	if (mfc->mfc_expire == 0) {
 		mfc->mfc_expire = 1;
-		rt_timer_add(rt, &ip_mrouterq, rtableid);
+		rt_timer_add(rt, &mrt_timer, rtableid);
 		return;
 	}
 
@@ -887,6 +936,7 @@ mfc_add_route(struct ifnet *ifp, struct 
 	struct vif		*v = ifp->if_mcast;
 	struct rtentry		*rt;
 	struct mfc		*mfc;
+	uint32_t		 api_config;
 	unsigned int		 rtableid = ifp->if_rdomain;
 
 	rt = rt_mcast_add(ifp, origin, group);
@@ -907,19 +957,23 @@ mfc_add_route(struct ifnet *ifp, struct 
 
 	rt->rt_llinfo = (caddr_t)mfc;
 
-	rt_timer_add(rt, &ip_mrouterq, rtableid);
+	rt_timer_add(rt, &mrt_timer, rtableid);
+
+	rw_enter_read(&mrt_routerlock);
+	api_config = mrt_api_config[rtableid];
+	rw_exit_read(&mrt_routerlock);
 
 	mfc->mfc_parent = mfccp->mfcc_parent;
 	mfc->mfc_pkt_cnt = 0;
 	mfc->mfc_byte_cnt = 0;
 	mfc->mfc_wrong_if = 0;
 	mfc->mfc_ttl = mfccp->mfcc_ttls[v->v_id];
-	mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & mrt_api_config &
+	mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & api_config &
 	    MRT_MFC_FLAGS_ALL;
 	mfc->mfc_expire = 0;
 
 	/* set the RP address */
-	if (mrt_api_config & MRT_MFC_RP)
+	if (api_config & MRT_MFC_RP)
 		mfc->mfc_rp = mfccp->mfcc_rp;
 	else
 		mfc->mfc_rp = zeroin_addr;
@@ -1053,11 +1107,20 @@ add_mfc(struct socket *so, struct mbuf *
 	struct inpcb *inp = sotoinpcb(so);
 	struct mfcctl2 mfcctl2;
 	int mfcctl_size = sizeof(struct mfcctl);
+	uint32_t api_config;
 	unsigned int rtableid = inp->inp_rtableid;
 
 	NET_ASSERT_LOCKED();
 
-	if (mrt_api_config & MRT_API_FLAGS_ALL)
+	rw_enter_read(&mrt_routerlock);
+	if (so != mrt_mrouter[inp->inp_rtableid]) {
+		rw_exit_read(&mrt_routerlock);
+		return (EPROTONOSUPPORT);
+	}
+	api_config = mrt_api_config[rtableid];
+	rw_exit_read(&mrt_routerlock);
+
+	if (api_config & MRT_API_FLAGS_ALL)
 		mfcctl_size = sizeof(struct mfcctl2);
 
 	if (m == NULL || m->m_len < mfcctl_size)
@@ -1066,7 +1129,7 @@ add_mfc(struct socket *so, struct mbuf *
 	/*
 	 * select data size depending on API version.
 	 */
-	if (mrt_api_config & MRT_API_FLAGS_ALL) {
+	if (api_config & MRT_API_FLAGS_ALL) {
 		struct mfcctl2 *mp2 = mtod(m, struct mfcctl2 *);
 		memcpy((caddr_t)&mfcctl2, mp2, sizeof(*mp2));
 	} else {
@@ -1103,6 +1166,13 @@ del_mfc(struct socket *so, struct mbuf *
 	if (m == NULL || m->m_len < mfcctl_size)
 		return (EINVAL);
 
+	rw_enter_read(&mrt_routerlock);
+	if (so != mrt_mrouter[inp->inp_rtableid]) {
+		rw_exit_read(&mrt_routerlock);
+		return (EPROTONOSUPPORT);
+	}
+	rw_exit_read(&mrt_routerlock);
+
 	mp = mtod(m, struct mfcctl *);
 
 	memcpy((caddr_t)&mfcctl2, mp, sizeof(*mp));
@@ -1238,12 +1308,15 @@ ip_mforward(struct mbuf *m, struct ifnet
 			mrtstat_inc(mrts_upcalls);
 
 			sin.sin_addr = ip->ip_src;
-			if (socket_send(ip_mrouter[rtableid], mm, &sin) < 0) {
+			rw_enter_read(&mrt_routerlock);
+			if (socket_send(mrt_mrouter[rtableid], mm, &sin) < 0) {
+				rw_exit_read(&mrt_routerlock);
 				log(LOG_WARNING, "ip_mforward: ip_mrouter "
 				    "socket queue full\n");
 				mrtstat_inc(mrts_upq_sockfull);
 				return (ENOBUFS);
 			}
+			rw_exit_read(&mrt_routerlock);
 
 			mfc_add(NULL, &ip->ip_src, &ip->ip_dst, v->v_id,
 			    rtableid, M_NOWAIT);
Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
diff -u -p -r1.417 ip_output.c
--- netinet/ip_output.c	6 May 2026 11:36:13 -0000	1.417
+++ netinet/ip_output.c	25 Jun 2026 12:32:14 -0000
@@ -325,7 +325,7 @@ reroute:
 			 * if necessary.
 			 */
 			if (atomic_load_int(&ipmforwarding) &&
-			    ip_mrouter[ifp->if_rdomain] &&
+			    ip_mrouter_active(ifp->if_rdomain) &&
 			    (flags & IP_FORWARDING) == 0) {
 				int rv;
 
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
diff -u -p -r1.124 ip_var.h
--- netinet/ip_var.h	21 Jun 2026 21:17:07 -0000	1.124
+++ netinet/ip_var.h	25 Jun 2026 12:32:14 -0000
@@ -247,6 +247,7 @@ int	 ip_getmoptions(int, struct ip_mopti
 void	 ip_init(void);
 struct mbuf*
 	 ip_insertoptions(struct mbuf *, struct mbuf *, int *);
+int	 ip_mrouter_active(u_int);
 int	 ip_mforward(struct mbuf *, struct ifnet *, int);
 int	 ip_optcopy(struct ip *, struct ip *);
 int	 ip_output(struct mbuf *, struct mbuf *, struct route *, int,
@@ -282,9 +283,6 @@ int	 rip_disconnect(struct socket *);
 int	 rip_shutdown(struct socket *);
 int	 rip_send(struct socket *, struct mbuf *, struct mbuf *,
 	     struct mbuf *);
-#ifdef MROUTING
-extern struct socket *ip_mrouter[];	/* multicast routing daemon */
-#endif
 
 #endif /* _KERNEL */
 #endif /* _NETINET_IP_VAR_H_ */
Index: netinet/raw_ip.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
diff -u -p -r1.167 raw_ip.c
--- netinet/raw_ip.c	8 Jul 2025 00:47:41 -0000	1.167
+++ netinet/raw_ip.c	25 Jun 2026 12:32:14 -0000
@@ -504,8 +504,7 @@ rip_detach(struct socket *so)
 		return (EINVAL);
 
 #ifdef MROUTING
-	if (so == ip_mrouter[inp->inp_rtableid])
-		ip_mrouter_done(so);
+	ip_mrouter_done(so);
 #endif
 	in_pcbdetach(inp);