From: Vitaliy Makkoveev Subject: Re: Push netlock down to mrt_sysctl_vif() To: Alexander Bluhm Cc: tech@openbsd.org Date: Fri, 18 Jul 2025 21:06:04 +0300 On Fri, Jul 18, 2025 at 04:06:31PM +0200, Alexander Bluhm wrote: > On Thu, Jul 17, 2025 at 09:32:12PM +0300, Vitaliy Makkoveev wrote: > > Link the network interfaces to the temporary `if_tmplist' list protected > > with `if_tmplist_lock' rwlock(9). This moves copyuot() out of exclusive > > netlock and allows to relax it to shared netlock. > > > > ip_sysctl() became mp-safe, so mark it as PR_MPSYSCTL. > > > > Note, this time we have a small amount of places where we use > > `if_tmplist' to userland delivery. I could rework them to use iterator, > > so the copyout(9) will be done lockless. The null interface index is > > invalid, so the special iterator does not require to add something new > > to the ifnet structure. > > A return (error) is missing, see below. > Thanks for pointing. Index: sys/net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.738 diff -u -p -r1.738 if.c --- sys/net/if.c 18 Jul 2025 15:44:44 -0000 1.738 +++ sys/net/if.c 18 Jul 2025 18:04:07 -0000 @@ -187,8 +187,6 @@ struct softnet * * if_get(0) returns NULL. */ -struct ifnet *if_ref(struct ifnet *); - /* * struct if_idxmap * Index: sys/net/if.h =================================================================== RCS file: /cvs/src/sys/net/if.h,v retrieving revision 1.219 diff -u -p -r1.219 if.h --- sys/net/if.h 9 May 2025 03:12:36 -0000 1.219 +++ sys/net/if.h 18 Jul 2025 18:04:07 -0000 @@ -554,6 +554,7 @@ int if_delgroup(struct ifnet *, const ch void if_group_routechange(const struct sockaddr *, const struct sockaddr *); struct ifnet *if_unit(const char *); struct ifnet *if_get(unsigned int); +struct ifnet *if_ref(struct ifnet *); void if_put(struct ifnet *); void ifnewlladdr(struct ifnet *); void if_congestion(void); Index: sys/net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.138 diff -u -p -r1.138 if_var.h --- sys/net/if_var.h 25 Jun 2025 20:26:32 -0000 1.138 +++ sys/net/if_var.h 18 Jul 2025 18:04:07 -0000 @@ -328,6 +328,7 @@ int niq_enqueue(struct niqueue *, struc #define sysctl_niq(_n, _l, _op, _olp, _np, _nl, _niq) \ sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q) +extern struct rwlock if_tmplist_lock; extern struct ifnet_head ifnetlist; void if_start(struct ifnet *); Index: sys/netinet/in_proto.c =================================================================== RCS file: /cvs/src/sys/netinet/in_proto.c,v retrieving revision 1.126 diff -u -p -r1.126 in_proto.c --- sys/netinet/in_proto.c 8 Jul 2025 00:47:41 -0000 1.126 +++ sys/netinet/in_proto.c 18 Jul 2025 18:04:07 -0000 @@ -168,6 +168,7 @@ const struct protosw inetsw[] = { .pr_init = ip_init, .pr_slowtimo = ip_slowtimo, #ifndef SMALL_KERNEL + .pr_flags = PR_MPSYSCTL, .pr_sysctl = ip_sysctl #endif /* SMALL_KERNEL */ }, Index: sys/netinet/ip_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.421 diff -u -p -r1.421 ip_input.c --- sys/netinet/ip_input.c 17 Jul 2025 17:31:45 -0000 1.421 +++ sys/netinet/ip_input.c 18 Jul 2025 18:04:07 -0000 @@ -1807,10 +1807,7 @@ ip_sysctl(int *name, u_int namelen, void case IPCTL_MRTVIF: if (newp) return (EPERM); - NET_LOCK(); - error = mrt_sysctl_vif(oldp, oldlenp); - NET_UNLOCK(); - return (error); + return (mrt_sysctl_vif(oldp, oldlenp)); #else case IPCTL_MRTPROTO: case IPCTL_MRTSTATS: Index: sys/netinet/ip_mroute.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_mroute.c,v retrieving revision 1.149 diff -u -p -r1.149 ip_mroute.c --- sys/netinet/ip_mroute.c 8 Jul 2025 00:47:41 -0000 1.149 +++ sys/netinet/ip_mroute.c 18 Jul 2025 18:04:07 -0000 @@ -360,18 +360,36 @@ get_vif_cnt(unsigned int rtableid, struc int mrt_sysctl_vif(void *oldp, size_t *oldlenp) { + TAILQ_HEAD(, ifnet) if_tmplist = + TAILQ_HEAD_INITIALIZER(if_tmplist); caddr_t where = oldp; size_t needed, given; struct ifnet *ifp; struct vif *vifp; struct vifinfo vinfo; + int error = 0; given = *oldlenp; needed = 0; memset(&vinfo, 0, sizeof vinfo); + + rw_enter_write(&if_tmplist_lock); + NET_LOCK_SHARED(); + TAILQ_FOREACH(ifp, &ifnetlist, if_list) { - if ((vifp = (struct vif *)ifp->if_mcast) == NULL) + if (ifp->if_mcast != NULL) { + if_ref(ifp); + TAILQ_INSERT_TAIL(&if_tmplist, ifp, if_tmplist); + } + } + NET_UNLOCK_SHARED(); + + TAILQ_FOREACH (ifp, &if_tmplist, if_tmplist) { + NET_LOCK_SHARED(); + if ((vifp = (struct vif *)ifp->if_mcast) == NULL) { + NET_UNLOCK_SHARED(); continue; + } vinfo.v_vifi = vifp->v_id; vinfo.v_flags = vifp->v_flags; @@ -382,17 +400,27 @@ mrt_sysctl_vif(void *oldp, size_t *oldle vinfo.v_pkt_out = vifp->v_pkt_out; vinfo.v_bytes_in = vifp->v_bytes_in; vinfo.v_bytes_out = vifp->v_bytes_out; + NET_UNLOCK_SHARED(); needed += sizeof(vinfo); if (where && needed <= given) { - int error; - error = copyout(&vinfo, where, sizeof(vinfo)); if (error) - return (error); + break; where += sizeof(vinfo); } } + + while ((ifp = TAILQ_FIRST(&if_tmplist))) { + TAILQ_REMOVE(&if_tmplist, ifp, if_tmplist); + if_put(ifp); + } + + rw_exit_write(&if_tmplist_lock); + + if (error) + return (error); + if (where) { *oldlenp = needed; if (given < needed)