Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: ARP list iterator
To:
Alexander Bluhm <alexander.bluhm@gmx.net>
Cc:
tech@openbsd.org
Date:
Tue, 2 Sep 2025 12:51:15 +0300

Download raw body.

Thread
On Mon, Sep 01, 2025 at 09:37:25PM +0200, Alexander Bluhm wrote:
> On Sat, Aug 30, 2025 at 08:22:41PM +0300, Vitaliy Makkoveev wrote:
> > I like to put refcounting and `arp_mtx' release down to the 'if'
> > block. You don't need to do this all the times. With this fix diff is OK
> > mvs.
> 
> Yes, that is more efficient.
> 
> While porting this to ND6, it triggered a crash.  Although not seen
> with APR tests, the same race might be here, too.  Between releasing
> the mutex and grabbing the net lock another thread may free the ARP
> entry.  So we should check RTF_LLINFO in arptfree() while holding
> the exclusive net lock.
> 
> still ok?
> 

Sure.

> bluhm
> 
> Index: netinet/if_ether.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
> diff -u -p -r1.276 if_ether.c
> --- netinet/if_ether.c	17 Jul 2025 17:31:45 -0000	1.276
> +++ netinet/if_ether.c	1 Sep 2025 09:41:33 -0000
> @@ -73,8 +73,10 @@
>   */
>  
>  struct llinfo_arp {
> -	LIST_ENTRY(llinfo_arp)	 la_list;	/* [mN] global arp_list */
> +	LIST_ENTRY(llinfo_arp)	 la_list;	/* [m] global arp_list */
>  	struct rtentry		*la_rt;		/* [I] backpointer to rtentry */
> +	/* keep fields above in sync with struct llinfo_arp_iterator */
> +	struct refcnt		 la_refcnt;	/* entry refereced by list */
>  	struct mbuf_queue	 la_mq;		/* packet hold queue */
>  	time_t			 la_refreshed;	/* when was refresh sent */
>  	int			 la_asked;	/* number of queries sent */
> @@ -82,6 +84,12 @@ struct llinfo_arp {
>  #define LA_HOLD_QUEUE 10
>  #define LA_HOLD_TOTAL 100
>  
> +struct llinfo_arp_iterator {
> +	LIST_ENTRY(llinfo_arp)   la_list;       /* [m] global arp_list */
> +	struct rtentry          *la_rt;         /* [I] always NULL */
> +	/* keep fields above in sync with struct llinfo_arp */
> +};
> +
>  /* timer values */
>  int	arpt_prune = (5 * 60);	/* [I] walk list every 5 minutes */
>  int	arpt_keep = (20 * 60);	/* [a] once resolved, cache for 20 minutes */
> @@ -103,8 +111,8 @@ struct niqueue arpinq = NIQUEUE_INITIALI
>  /* llinfo_arp live time, rt_llinfo and RTF_LLINFO are protected by arp_mtx */
>  struct mutex arp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
>  
> -LIST_HEAD(, llinfo_arp) arp_list =
> -    LIST_HEAD_INITIALIZER(arp_list);	/* [mN] list of llinfo_arp structures */
> +LIST_HEAD(, llinfo_arp) arp_list = LIST_HEAD_INITIALIZER(arp_list);
> +				/* [m] list of llinfo_arp structures */
>  struct	pool arp_pool;		/* [I] pool for llinfo_arp structures */
>  int	arp_maxtries = 5;	/* [I] arp requests before set to rejected */
>  unsigned int	la_hold_total;	/* [a] packets currently in the arp queue */
> @@ -116,27 +124,63 @@ int revarp_finished;
>  unsigned int revarp_ifidx;
>  #endif /* NFSCLIENT */
>  
> +static struct llinfo_arp *
> +arpiterator(struct llinfo_arp *la, struct llinfo_arp_iterator *iter)
> +{
> +	struct llinfo_arp *tmp;
> +
> +	MUTEX_ASSERT_LOCKED(&arp_mtx);
> +
> +	if (la)
> +		tmp = LIST_NEXT((struct llinfo_arp *)iter, la_list);
> +	else
> +		tmp = LIST_FIRST(&arp_list);
> +
> +	while (tmp && tmp->la_rt == NULL)
> +		tmp = LIST_NEXT(tmp, la_list);
> +
> +	if (la) {
> +		LIST_REMOVE((struct llinfo_arp *)iter, la_list);
> +		if (refcnt_rele(&la->la_refcnt))
> +			pool_put(&arp_pool, la);
> +	}
> +	if (tmp) {
> +		LIST_INSERT_AFTER(tmp, (struct llinfo_arp *)iter, la_list);
> +		refcnt_take(&tmp->la_refcnt);
> +	}
> +
> +	return tmp;
> +}
> +
>  /*
> - * Timeout routine.  Age arp_tab entries periodically.
> + * Timeout routine.  Age arp table entries periodically.
>   */
>  void
>  arptimer(void *arg)
>  {
>  	struct timeout *to = arg;
> -	struct llinfo_arp *la, *nla;
> +	struct llinfo_arp_iterator iter = { .la_rt = NULL };
> +	struct llinfo_arp *la = NULL;
>  	time_t uptime;
>  
> -	NET_LOCK();
>  	uptime = getuptime();
>  	timeout_add_sec(to, arpt_prune);
> -	/* Net lock is exclusive, no arp mutex needed for arp_list here. */
> -	LIST_FOREACH_SAFE(la, &arp_list, la_list, nla) {
> +
> +	mtx_enter(&arp_mtx);
> +	while ((la = arpiterator(la, &iter)) != NULL) {
>  		struct rtentry *rt = la->la_rt;
>  
> -		if (rt->rt_expire && rt->rt_expire < uptime)
> +		if (rt->rt_expire && rt->rt_expire < uptime) {
> +			rtref(rt);
> +			mtx_leave(&arp_mtx);
> +			NET_LOCK();
>  			arptfree(rt); /* timer has expired; clear */
> +			NET_UNLOCK();
> +			rtfree(rt);
> +			mtx_enter(&arp_mtx);
> +		}
>  	}
> -	NET_UNLOCK();
> +	mtx_leave(&arp_mtx);
>  }
>  
>  void
> @@ -210,6 +254,7 @@ arp_rtrequest(struct ifnet *ifp, int req
>  			pool_put(&arp_pool, la);
>  			break;
>  		}
> +		refcnt_init(&la->la_refcnt);
>  		mq_init(&la->la_mq, LA_HOLD_QUEUE, IPL_SOFTNET);
>  		rt->rt_llinfo = (caddr_t)la;
>  		la->la_rt = rt;
> @@ -235,7 +280,8 @@ arp_rtrequest(struct ifnet *ifp, int req
>  		atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
>  		mtx_leave(&arp_mtx);
>  
> -		pool_put(&arp_pool, la);
> +		if (refcnt_rele(&la->la_refcnt))
> +			pool_put(&arp_pool, la);
>  		break;
>  
>  	case RTM_INVALIDATE:
> @@ -749,6 +795,12 @@ void
>  arptfree(struct rtentry *rt)
>  {
>  	struct ifnet *ifp;
> +
> +	NET_ASSERT_LOCKED_EXCLUSIVE();
> +
> +	/* might have been freed between leave arp_mtx and enter net lock */
> +	if (!ISSET(rt->rt_flags, RTF_LLINFO))
> +		return;
>  
>  	KASSERT(!ISSET(rt->rt_flags, RTF_LOCAL));
>  	arpinvalidate(rt);