From: Alexander Bluhm Subject: Re: Push netlock down to mrt_sysctl_vif() To: Vitaliy Makkoveev Cc: tech@openbsd.org Date: Fri, 18 Jul 2025 16:06:31 +0200 On Thu, Jul 17, 2025 at 09:32:12PM +0300, Vitaliy Makkoveev wrote: > Link the network interfaces to the temporary `if_tmplist' list protected > with `if_tmplist_lock' rwlock(9). This moves copyuot() out of exclusive > netlock and allows to relax it to shared netlock. > > ip_sysctl() became mp-safe, so mark it as PR_MPSYSCTL. > > Note, this time we have a small amount of places where we use > `if_tmplist' to userland delivery. I could rework them to use iterator, > so the copyout(9) will be done lockless. The null interface index is > invalid, so the special iterator does not require to add something new > to the ifnet structure. A return (error) is missing, see below. > Index: sys/net/if.c > =================================================================== > RCS file: /cvs/src/sys/net/if.c,v > retrieving revision 1.737 > diff -u -p -r1.737 if.c > --- sys/net/if.c 7 Jul 2025 02:28:50 -0000 1.737 > +++ sys/net/if.c 17 Jul 2025 17:58:26 -0000 > @@ -187,8 +187,6 @@ struct softnet * > * if_get(0) returns NULL. > */ > > -struct ifnet *if_ref(struct ifnet *); > - > /* > * struct if_idxmap > * > Index: sys/net/if.h > =================================================================== > RCS file: /cvs/src/sys/net/if.h,v > retrieving revision 1.219 > diff -u -p -r1.219 if.h > --- sys/net/if.h 9 May 2025 03:12:36 -0000 1.219 > +++ sys/net/if.h 17 Jul 2025 17:58:26 -0000 > @@ -554,6 +554,7 @@ int if_delgroup(struct ifnet *, const ch > void if_group_routechange(const struct sockaddr *, const struct sockaddr *); > struct ifnet *if_unit(const char *); > struct ifnet *if_get(unsigned int); > +struct ifnet *if_ref(struct ifnet *); > void if_put(struct ifnet *); > void ifnewlladdr(struct ifnet *); > void if_congestion(void); > Index: sys/net/if_var.h > =================================================================== > RCS file: /cvs/src/sys/net/if_var.h,v > retrieving revision 1.138 > diff -u -p -r1.138 if_var.h > --- sys/net/if_var.h 25 Jun 2025 20:26:32 -0000 1.138 > +++ sys/net/if_var.h 17 Jul 2025 17:58:26 -0000 > @@ -328,6 +328,7 @@ int niq_enqueue(struct niqueue *, struc > #define sysctl_niq(_n, _l, _op, _olp, _np, _nl, _niq) \ > sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q) > > +extern struct rwlock if_tmplist_lock; > extern struct ifnet_head ifnetlist; > > void if_start(struct ifnet *); > Index: sys/netinet/in_proto.c > =================================================================== > RCS file: /cvs/src/sys/netinet/in_proto.c,v > retrieving revision 1.126 > diff -u -p -r1.126 in_proto.c > --- sys/netinet/in_proto.c 8 Jul 2025 00:47:41 -0000 1.126 > +++ sys/netinet/in_proto.c 17 Jul 2025 17:58:26 -0000 > @@ -168,6 +168,7 @@ const struct protosw inetsw[] = { > .pr_init = ip_init, > .pr_slowtimo = ip_slowtimo, > #ifndef SMALL_KERNEL > + .pr_flags = PR_MPSYSCTL, > .pr_sysctl = ip_sysctl > #endif /* SMALL_KERNEL */ > }, > Index: sys/netinet/ip_input.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ip_input.c,v > retrieving revision 1.421 > diff -u -p -r1.421 ip_input.c > --- sys/netinet/ip_input.c 17 Jul 2025 17:31:45 -0000 1.421 > +++ sys/netinet/ip_input.c 17 Jul 2025 17:58:26 -0000 > @@ -1807,10 +1807,7 @@ ip_sysctl(int *name, u_int namelen, void > case IPCTL_MRTVIF: > if (newp) > return (EPERM); > - NET_LOCK(); > - error = mrt_sysctl_vif(oldp, oldlenp); > - NET_UNLOCK(); > - return (error); > + return (mrt_sysctl_vif(oldp, oldlenp)); > #else > case IPCTL_MRTPROTO: > case IPCTL_MRTSTATS: > Index: sys/netinet/ip_mroute.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ip_mroute.c,v > retrieving revision 1.149 > diff -u -p -r1.149 ip_mroute.c > --- sys/netinet/ip_mroute.c 8 Jul 2025 00:47:41 -0000 1.149 > +++ sys/netinet/ip_mroute.c 17 Jul 2025 17:58:26 -0000 > @@ -360,6 +360,8 @@ get_vif_cnt(unsigned int rtableid, struc > int > mrt_sysctl_vif(void *oldp, size_t *oldlenp) > { > + TAILQ_HEAD(, ifnet) if_tmplist = > + TAILQ_HEAD_INITIALIZER(if_tmplist); > caddr_t where = oldp; > size_t needed, given; > struct ifnet *ifp; > @@ -369,9 +371,24 @@ mrt_sysctl_vif(void *oldp, size_t *oldle > given = *oldlenp; > needed = 0; > memset(&vinfo, 0, sizeof vinfo); > + > + rw_enter_write(&if_tmplist_lock); > + NET_LOCK_SHARED(); > + > TAILQ_FOREACH(ifp, &ifnetlist, if_list) { > - if ((vifp = (struct vif *)ifp->if_mcast) == NULL) > + if (ifp->if_mcast != NULL) { > + if_ref(ifp); > + TAILQ_INSERT_TAIL(&if_tmplist, ifp, if_tmplist); > + } > + } > + NET_UNLOCK_SHARED(); > + > + TAILQ_FOREACH (ifp, &if_tmplist, if_tmplist) { > + NET_LOCK_SHARED(); > + if ((vifp = (struct vif *)ifp->if_mcast) == NULL) { > + NET_UNLOCK_SHARED(); > continue; > + } > > vinfo.v_vifi = vifp->v_id; > vinfo.v_flags = vifp->v_flags; > @@ -382,6 +399,7 @@ mrt_sysctl_vif(void *oldp, size_t *oldle > vinfo.v_pkt_out = vifp->v_pkt_out; > vinfo.v_bytes_in = vifp->v_bytes_in; > vinfo.v_bytes_out = vifp->v_bytes_out; > + NET_UNLOCK_SHARED(); > > needed += sizeof(vinfo); > if (where && needed <= given) { > @@ -389,10 +407,18 @@ mrt_sysctl_vif(void *oldp, size_t *oldle > > error = copyout(&vinfo, where, sizeof(vinfo)); > if (error) > - return (error); > + break; > where += sizeof(vinfo); > } > } > + > + while ((ifp = TAILQ_FIRST(&if_tmplist))) { > + TAILQ_REMOVE(&if_tmplist, ifp, if_tmplist); > + if_put(ifp); > + } > + > + rw_exit_write(&if_tmplist_lock); I copyout() failed, we should not update oldlenp but return the error. At least this was the old behavior. Add something like this here. if (error) return (error); > + > if (where) { > *oldlenp = needed; > if (given < needed)