From: Vitaliy Makkoveev Subject: Push netlock down to mrt_sysctl_vif() To: Alexander Bluhm , tech@openbsd.org Date: Thu, 17 Jul 2025 21:32:12 +0300 Link the network interfaces to the temporary `if_tmplist' list protected with `if_tmplist_lock' rwlock(9). This moves copyuot() out of exclusive netlock and allows to relax it to shared netlock. ip_sysctl() became mp-safe, so mark it as PR_MPSYSCTL. Note, this time we have a small amount of places where we use `if_tmplist' to userland delivery. I could rework them to use iterator, so the copyout(9) will be done lockless. The null interface index is invalid, so the special iterator does not require to add something new to the ifnet structure. Index: sys/net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.737 diff -u -p -r1.737 if.c --- sys/net/if.c 7 Jul 2025 02:28:50 -0000 1.737 +++ sys/net/if.c 17 Jul 2025 17:58:26 -0000 @@ -187,8 +187,6 @@ struct softnet * * if_get(0) returns NULL. */ -struct ifnet *if_ref(struct ifnet *); - /* * struct if_idxmap * Index: sys/net/if.h =================================================================== RCS file: /cvs/src/sys/net/if.h,v retrieving revision 1.219 diff -u -p -r1.219 if.h --- sys/net/if.h 9 May 2025 03:12:36 -0000 1.219 +++ sys/net/if.h 17 Jul 2025 17:58:26 -0000 @@ -554,6 +554,7 @@ int if_delgroup(struct ifnet *, const ch void if_group_routechange(const struct sockaddr *, const struct sockaddr *); struct ifnet *if_unit(const char *); struct ifnet *if_get(unsigned int); +struct ifnet *if_ref(struct ifnet *); void if_put(struct ifnet *); void ifnewlladdr(struct ifnet *); void if_congestion(void); Index: sys/net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.138 diff -u -p -r1.138 if_var.h --- sys/net/if_var.h 25 Jun 2025 20:26:32 -0000 1.138 +++ sys/net/if_var.h 17 Jul 2025 17:58:26 -0000 @@ -328,6 +328,7 @@ int niq_enqueue(struct niqueue *, struc #define sysctl_niq(_n, _l, _op, _olp, _np, _nl, _niq) \ sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q) +extern struct rwlock if_tmplist_lock; extern struct ifnet_head ifnetlist; void if_start(struct ifnet *); Index: sys/netinet/in_proto.c =================================================================== RCS file: /cvs/src/sys/netinet/in_proto.c,v retrieving revision 1.126 diff -u -p -r1.126 in_proto.c --- sys/netinet/in_proto.c 8 Jul 2025 00:47:41 -0000 1.126 +++ sys/netinet/in_proto.c 17 Jul 2025 17:58:26 -0000 @@ -168,6 +168,7 @@ const struct protosw inetsw[] = { .pr_init = ip_init, .pr_slowtimo = ip_slowtimo, #ifndef SMALL_KERNEL + .pr_flags = PR_MPSYSCTL, .pr_sysctl = ip_sysctl #endif /* SMALL_KERNEL */ }, Index: sys/netinet/ip_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.421 diff -u -p -r1.421 ip_input.c --- sys/netinet/ip_input.c 17 Jul 2025 17:31:45 -0000 1.421 +++ sys/netinet/ip_input.c 17 Jul 2025 17:58:26 -0000 @@ -1807,10 +1807,7 @@ ip_sysctl(int *name, u_int namelen, void case IPCTL_MRTVIF: if (newp) return (EPERM); - NET_LOCK(); - error = mrt_sysctl_vif(oldp, oldlenp); - NET_UNLOCK(); - return (error); + return (mrt_sysctl_vif(oldp, oldlenp)); #else case IPCTL_MRTPROTO: case IPCTL_MRTSTATS: Index: sys/netinet/ip_mroute.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_mroute.c,v retrieving revision 1.149 diff -u -p -r1.149 ip_mroute.c --- sys/netinet/ip_mroute.c 8 Jul 2025 00:47:41 -0000 1.149 +++ sys/netinet/ip_mroute.c 17 Jul 2025 17:58:26 -0000 @@ -360,6 +360,8 @@ get_vif_cnt(unsigned int rtableid, struc int mrt_sysctl_vif(void *oldp, size_t *oldlenp) { + TAILQ_HEAD(, ifnet) if_tmplist = + TAILQ_HEAD_INITIALIZER(if_tmplist); caddr_t where = oldp; size_t needed, given; struct ifnet *ifp; @@ -369,9 +371,24 @@ mrt_sysctl_vif(void *oldp, size_t *oldle given = *oldlenp; needed = 0; memset(&vinfo, 0, sizeof vinfo); + + rw_enter_write(&if_tmplist_lock); + NET_LOCK_SHARED(); + TAILQ_FOREACH(ifp, &ifnetlist, if_list) { - if ((vifp = (struct vif *)ifp->if_mcast) == NULL) + if (ifp->if_mcast != NULL) { + if_ref(ifp); + TAILQ_INSERT_TAIL(&if_tmplist, ifp, if_tmplist); + } + } + NET_UNLOCK_SHARED(); + + TAILQ_FOREACH (ifp, &if_tmplist, if_tmplist) { + NET_LOCK_SHARED(); + if ((vifp = (struct vif *)ifp->if_mcast) == NULL) { + NET_UNLOCK_SHARED(); continue; + } vinfo.v_vifi = vifp->v_id; vinfo.v_flags = vifp->v_flags; @@ -382,6 +399,7 @@ mrt_sysctl_vif(void *oldp, size_t *oldle vinfo.v_pkt_out = vifp->v_pkt_out; vinfo.v_bytes_in = vifp->v_bytes_in; vinfo.v_bytes_out = vifp->v_bytes_out; + NET_UNLOCK_SHARED(); needed += sizeof(vinfo); if (where && needed <= given) { @@ -389,10 +407,18 @@ mrt_sysctl_vif(void *oldp, size_t *oldle error = copyout(&vinfo, where, sizeof(vinfo)); if (error) - return (error); + break; where += sizeof(vinfo); } } + + while ((ifp = TAILQ_FIRST(&if_tmplist))) { + TAILQ_REMOVE(&if_tmplist, ifp, if_tmplist); + if_put(ifp); + } + + rw_exit_write(&if_tmplist_lock); + if (where) { *oldlenp = needed; if (given < needed)