Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Push netlock down to mrt_sysctl_vif()
To:
Alexander Bluhm <bluhm@openbsd.org>, tech@openbsd.org
Date:
Thu, 17 Jul 2025 21:32:12 +0300

Download raw body.

Thread
Link the network interfaces to the temporary `if_tmplist' list protected
with `if_tmplist_lock' rwlock(9). This moves copyuot() out of exclusive
netlock and allows to relax it to shared netlock.

ip_sysctl() became mp-safe, so mark it as PR_MPSYSCTL.

Note, this time we have a small amount of places where we use
`if_tmplist' to userland delivery. I could rework them to use iterator,
so the copyout(9) will be done lockless. The null interface index is
invalid, so the special iterator does not require to add something new
to the ifnet structure.

Index: sys/net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.737
diff -u -p -r1.737 if.c
--- sys/net/if.c	7 Jul 2025 02:28:50 -0000	1.737
+++ sys/net/if.c	17 Jul 2025 17:58:26 -0000
@@ -187,8 +187,6 @@ struct softnet *
  * if_get(0) returns NULL.
  */
 
-struct ifnet *if_ref(struct ifnet *);
-
 /*
  * struct if_idxmap
  *
Index: sys/net/if.h
===================================================================
RCS file: /cvs/src/sys/net/if.h,v
retrieving revision 1.219
diff -u -p -r1.219 if.h
--- sys/net/if.h	9 May 2025 03:12:36 -0000	1.219
+++ sys/net/if.h	17 Jul 2025 17:58:26 -0000
@@ -554,6 +554,7 @@ int	if_delgroup(struct ifnet *, const ch
 void	if_group_routechange(const struct sockaddr *, const struct sockaddr *);
 struct	ifnet *if_unit(const char *);
 struct	ifnet *if_get(unsigned int);
+struct	ifnet *if_ref(struct ifnet *);
 void	if_put(struct ifnet *);
 void	ifnewlladdr(struct ifnet *);
 void	if_congestion(void);
Index: sys/net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.138
diff -u -p -r1.138 if_var.h
--- sys/net/if_var.h	25 Jun 2025 20:26:32 -0000	1.138
+++ sys/net/if_var.h	17 Jul 2025 17:58:26 -0000
@@ -328,6 +328,7 @@ int		niq_enqueue(struct niqueue *, struc
 #define sysctl_niq(_n, _l, _op, _olp, _np, _nl, _niq) \
     sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q)
 
+extern struct rwlock if_tmplist_lock;
 extern struct ifnet_head ifnetlist;
 
 void	if_start(struct ifnet *);
Index: sys/netinet/in_proto.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_proto.c,v
retrieving revision 1.126
diff -u -p -r1.126 in_proto.c
--- sys/netinet/in_proto.c	8 Jul 2025 00:47:41 -0000	1.126
+++ sys/netinet/in_proto.c	17 Jul 2025 17:58:26 -0000
@@ -168,6 +168,7 @@ const struct protosw inetsw[] = {
   .pr_init	= ip_init,
   .pr_slowtimo	= ip_slowtimo,
 #ifndef SMALL_KERNEL
+  .pr_flags	= PR_MPSYSCTL,
   .pr_sysctl	= ip_sysctl
 #endif /* SMALL_KERNEL */
 },
Index: sys/netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.421
diff -u -p -r1.421 ip_input.c
--- sys/netinet/ip_input.c	17 Jul 2025 17:31:45 -0000	1.421
+++ sys/netinet/ip_input.c	17 Jul 2025 17:58:26 -0000
@@ -1807,10 +1807,7 @@ ip_sysctl(int *name, u_int namelen, void
 	case IPCTL_MRTVIF:
 		if (newp)
 			return (EPERM);
-		NET_LOCK();
-		error = mrt_sysctl_vif(oldp, oldlenp);
-		NET_UNLOCK();
-		return (error);
+		return (mrt_sysctl_vif(oldp, oldlenp));
 #else
 	case IPCTL_MRTPROTO:
 	case IPCTL_MRTSTATS:
Index: sys/netinet/ip_mroute.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_mroute.c,v
retrieving revision 1.149
diff -u -p -r1.149 ip_mroute.c
--- sys/netinet/ip_mroute.c	8 Jul 2025 00:47:41 -0000	1.149
+++ sys/netinet/ip_mroute.c	17 Jul 2025 17:58:26 -0000
@@ -360,6 +360,8 @@ get_vif_cnt(unsigned int rtableid, struc
 int
 mrt_sysctl_vif(void *oldp, size_t *oldlenp)
 {
+	TAILQ_HEAD(, ifnet) if_tmplist =
+	    TAILQ_HEAD_INITIALIZER(if_tmplist);
 	caddr_t where = oldp;
 	size_t needed, given;
 	struct ifnet *ifp;
@@ -369,9 +371,24 @@ mrt_sysctl_vif(void *oldp, size_t *oldle
 	given = *oldlenp;
 	needed = 0;
 	memset(&vinfo, 0, sizeof vinfo);
+
+	rw_enter_write(&if_tmplist_lock);
+	NET_LOCK_SHARED();
+
 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
-		if ((vifp = (struct vif *)ifp->if_mcast) == NULL)
+		if (ifp->if_mcast != NULL) {
+			if_ref(ifp);
+			TAILQ_INSERT_TAIL(&if_tmplist, ifp, if_tmplist);
+		}
+	}
+	NET_UNLOCK_SHARED();
+
+	TAILQ_FOREACH (ifp, &if_tmplist, if_tmplist) {
+		NET_LOCK_SHARED();
+		if ((vifp = (struct vif *)ifp->if_mcast) == NULL) {
+			NET_UNLOCK_SHARED();
 			continue;
+		}
 
 		vinfo.v_vifi = vifp->v_id;
 		vinfo.v_flags = vifp->v_flags;
@@ -382,6 +399,7 @@ mrt_sysctl_vif(void *oldp, size_t *oldle
 		vinfo.v_pkt_out = vifp->v_pkt_out;
 		vinfo.v_bytes_in = vifp->v_bytes_in;
 		vinfo.v_bytes_out = vifp->v_bytes_out;
+		NET_UNLOCK_SHARED();
 
 		needed += sizeof(vinfo);
 		if (where && needed <= given) {
@@ -389,10 +407,18 @@ mrt_sysctl_vif(void *oldp, size_t *oldle
 
 			error = copyout(&vinfo, where, sizeof(vinfo));
 			if (error)
-				return (error);
+				break;
 			where += sizeof(vinfo);
 		}
 	}
+
+	while ((ifp = TAILQ_FIRST(&if_tmplist))) {
+		TAILQ_REMOVE(&if_tmplist, ifp, if_tmplist);
+		if_put(ifp);
+	}
+
+	rw_exit_write(&if_tmplist_lock);
+
 	if (where) {
 		*oldlenp = needed;
 		if (given < needed)