Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: Push netlock down to mrt_sysctl_vif()
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
tech@openbsd.org
Date:
Sat, 19 Jul 2025 01:58:10 +0200

Download raw body.

Thread
On Fri, Jul 18, 2025 at 09:06:04PM +0300, Vitaliy Makkoveev wrote:
> On Fri, Jul 18, 2025 at 04:06:31PM +0200, Alexander Bluhm wrote:
> > On Thu, Jul 17, 2025 at 09:32:12PM +0300, Vitaliy Makkoveev wrote:
> > > Link the network interfaces to the temporary `if_tmplist' list protected
> > > with `if_tmplist_lock' rwlock(9). This moves copyuot() out of exclusive
> > > netlock and allows to relax it to shared netlock.
> > > 
> > > ip_sysctl() became mp-safe, so mark it as PR_MPSYSCTL.
> > > 
> > > Note, this time we have a small amount of places where we use
> > > `if_tmplist' to userland delivery. I could rework them to use iterator,
> > > so the copyout(9) will be done lockless. The null interface index is
> > > invalid, so the special iterator does not require to add something new
> > > to the ifnet structure.
> > 
> > A return (error) is missing, see below.
> >
> 
> Thanks for pointing.

OK bluhm@

> Index: sys/net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.738
> diff -u -p -r1.738 if.c
> --- sys/net/if.c	18 Jul 2025 15:44:44 -0000	1.738
> +++ sys/net/if.c	18 Jul 2025 18:04:07 -0000
> @@ -187,8 +187,6 @@ struct softnet *
>   * if_get(0) returns NULL.
>   */
>  
> -struct ifnet *if_ref(struct ifnet *);
> -
>  /*
>   * struct if_idxmap
>   *
> Index: sys/net/if.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if.h,v
> retrieving revision 1.219
> diff -u -p -r1.219 if.h
> --- sys/net/if.h	9 May 2025 03:12:36 -0000	1.219
> +++ sys/net/if.h	18 Jul 2025 18:04:07 -0000
> @@ -554,6 +554,7 @@ int	if_delgroup(struct ifnet *, const ch
>  void	if_group_routechange(const struct sockaddr *, const struct sockaddr *);
>  struct	ifnet *if_unit(const char *);
>  struct	ifnet *if_get(unsigned int);
> +struct	ifnet *if_ref(struct ifnet *);
>  void	if_put(struct ifnet *);
>  void	ifnewlladdr(struct ifnet *);
>  void	if_congestion(void);
> Index: sys/net/if_var.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if_var.h,v
> retrieving revision 1.138
> diff -u -p -r1.138 if_var.h
> --- sys/net/if_var.h	25 Jun 2025 20:26:32 -0000	1.138
> +++ sys/net/if_var.h	18 Jul 2025 18:04:07 -0000
> @@ -328,6 +328,7 @@ int		niq_enqueue(struct niqueue *, struc
>  #define sysctl_niq(_n, _l, _op, _olp, _np, _nl, _niq) \
>      sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q)
>  
> +extern struct rwlock if_tmplist_lock;
>  extern struct ifnet_head ifnetlist;
>  
>  void	if_start(struct ifnet *);
> Index: sys/netinet/in_proto.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/in_proto.c,v
> retrieving revision 1.126
> diff -u -p -r1.126 in_proto.c
> --- sys/netinet/in_proto.c	8 Jul 2025 00:47:41 -0000	1.126
> +++ sys/netinet/in_proto.c	18 Jul 2025 18:04:07 -0000
> @@ -168,6 +168,7 @@ const struct protosw inetsw[] = {
>    .pr_init	= ip_init,
>    .pr_slowtimo	= ip_slowtimo,
>  #ifndef SMALL_KERNEL
> +  .pr_flags	= PR_MPSYSCTL,
>    .pr_sysctl	= ip_sysctl
>  #endif /* SMALL_KERNEL */
>  },
> Index: sys/netinet/ip_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.421
> diff -u -p -r1.421 ip_input.c
> --- sys/netinet/ip_input.c	17 Jul 2025 17:31:45 -0000	1.421
> +++ sys/netinet/ip_input.c	18 Jul 2025 18:04:07 -0000
> @@ -1807,10 +1807,7 @@ ip_sysctl(int *name, u_int namelen, void
>  	case IPCTL_MRTVIF:
>  		if (newp)
>  			return (EPERM);
> -		NET_LOCK();
> -		error = mrt_sysctl_vif(oldp, oldlenp);
> -		NET_UNLOCK();
> -		return (error);
> +		return (mrt_sysctl_vif(oldp, oldlenp));
>  #else
>  	case IPCTL_MRTPROTO:
>  	case IPCTL_MRTSTATS:
> Index: sys/netinet/ip_mroute.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_mroute.c,v
> retrieving revision 1.149
> diff -u -p -r1.149 ip_mroute.c
> --- sys/netinet/ip_mroute.c	8 Jul 2025 00:47:41 -0000	1.149
> +++ sys/netinet/ip_mroute.c	18 Jul 2025 18:04:07 -0000
> @@ -360,18 +360,36 @@ get_vif_cnt(unsigned int rtableid, struc
>  int
>  mrt_sysctl_vif(void *oldp, size_t *oldlenp)
>  {
> +	TAILQ_HEAD(, ifnet) if_tmplist =
> +	    TAILQ_HEAD_INITIALIZER(if_tmplist);
>  	caddr_t where = oldp;
>  	size_t needed, given;
>  	struct ifnet *ifp;
>  	struct vif *vifp;
>  	struct vifinfo vinfo;
> +	int error = 0;
>  
>  	given = *oldlenp;
>  	needed = 0;
>  	memset(&vinfo, 0, sizeof vinfo);
> +
> +	rw_enter_write(&if_tmplist_lock);
> +	NET_LOCK_SHARED();
> +
>  	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
> -		if ((vifp = (struct vif *)ifp->if_mcast) == NULL)
> +		if (ifp->if_mcast != NULL) {
> +			if_ref(ifp);
> +			TAILQ_INSERT_TAIL(&if_tmplist, ifp, if_tmplist);
> +		}
> +	}
> +	NET_UNLOCK_SHARED();
> +
> +	TAILQ_FOREACH (ifp, &if_tmplist, if_tmplist) {
> +		NET_LOCK_SHARED();
> +		if ((vifp = (struct vif *)ifp->if_mcast) == NULL) {
> +			NET_UNLOCK_SHARED();
>  			continue;
> +		}
>  
>  		vinfo.v_vifi = vifp->v_id;
>  		vinfo.v_flags = vifp->v_flags;
> @@ -382,17 +400,27 @@ mrt_sysctl_vif(void *oldp, size_t *oldle
>  		vinfo.v_pkt_out = vifp->v_pkt_out;
>  		vinfo.v_bytes_in = vifp->v_bytes_in;
>  		vinfo.v_bytes_out = vifp->v_bytes_out;
> +		NET_UNLOCK_SHARED();
>  
>  		needed += sizeof(vinfo);
>  		if (where && needed <= given) {
> -			int error;
> -
>  			error = copyout(&vinfo, where, sizeof(vinfo));
>  			if (error)
> -				return (error);
> +				break;
>  			where += sizeof(vinfo);
>  		}
>  	}
> +
> +	while ((ifp = TAILQ_FIRST(&if_tmplist))) {
> +		TAILQ_REMOVE(&if_tmplist, ifp, if_tmplist);
> +		if_put(ifp);
> +	}
> +
> +	rw_exit_write(&if_tmplist_lock);
> +
> +	if (error)
> +		return (error);
> +
>  	if (where) {
>  		*oldlenp = needed;
>  		if (given < needed)
>