Index | Thread | Search

From:
David Gwynne <david@gwynne.id.au>
Subject:
Re: multicast route counter
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Sat, 27 Jun 2026 10:10:09 +1000

Download raw body.

Thread
On Fri, Jun 26, 2026 at 10:42:44PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> I want to use per CPU counter for the multicast routes.

why?

the memory overhead of per cpu counters is non-trivial. are concurrent
accesses to these structs common enough to justify the memory
overhead compared to just using a per struct mutex or prod/cons
lock here?

>  Note that
> I have to call counters_alloc() with M_NOWAIT, so I extended the
> API.

this introduces failure points that you don't handle.

> 
> ok?
> 
> bluhm
> 
> Index: kern/subr_percpu.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/subr_percpu.c,v
> diff -u -p -r1.11 subr_percpu.c
> --- kern/subr_percpu.c	16 Sep 2023 09:33:27 -0000	1.11
> +++ kern/subr_percpu.c	26 Jun 2026 11:54:40 -0000
> @@ -59,17 +59,17 @@ cpumem_put(struct pool *pp, struct cpume
>  }
>  
>  struct cpumem *
> -cpumem_malloc(size_t sz, int type)
> +cpumem_malloc_wait(size_t sz, int type, int wait)
>  {
>  	struct cpumem *cm;
>  	unsigned int cpu;
>  
>  	sz = roundup(sz, CACHELINESIZE);
>  
> -	cm = pool_get(&cpumem_pl, PR_WAITOK);
> +	cm = pool_get(&cpumem_pl, wait == M_WAITOK ? PR_WAITOK : PR_NOWAIT);
>  
>  	for (cpu = 0; cpu < ncpusfound; cpu++)
> -		cm[cpu].mem = malloc(sz, type, M_WAITOK | M_ZERO);
> +		cm[cpu].mem = malloc(sz, type, wait | M_ZERO);
>  
>  	return (cm);
>  }
> @@ -124,7 +124,7 @@ cpumem_next(struct cpumem_iter *i, struc
>  }
>  
>  struct cpumem *
> -counters_alloc(unsigned int n)
> +counters_alloc_wait(unsigned int n, int wait)
>  {
>  	struct cpumem *cm;
>  	struct cpumem_iter cmi;
> @@ -134,7 +134,7 @@ counters_alloc(unsigned int n)
>  	KASSERT(n > 0);
>  
>  	n++; /* add space for a generation number */
> -	cm = cpumem_malloc(n * sizeof(uint64_t), M_COUNTERS);
> +	cm = cpumem_malloc_wait(n * sizeof(uint64_t), M_COUNTERS, wait);
>  
>  	CPUMEM_FOREACH(counters, &cmi, cm) {
>  		for (i = 0; i < n; i++)
> @@ -257,9 +257,9 @@ cpumem_put(struct pool *pp, struct cpume
>  }
>  
>  struct cpumem *
> -cpumem_malloc(size_t sz, int type)
> +cpumem_malloc_wait(size_t sz, int type, int wait)
>  {
> -	return (malloc(sz, type, M_WAITOK | M_ZERO));
> +	return (malloc(sz, type, wait | M_ZERO));
>  }
>  
>  struct cpumem *
> @@ -287,11 +287,11 @@ cpumem_next(struct cpumem_iter *i, struc
>  }
>  
>  struct cpumem *
> -counters_alloc(unsigned int n)
> +counters_alloc_wait(unsigned int n, int wait)
>  {
>  	KASSERT(n > 0);
>  
> -	return (cpumem_malloc(n * sizeof(uint64_t), M_COUNTERS));
> +	return (cpumem_malloc_wait(n * sizeof(uint64_t), M_COUNTERS, wait));
>  }
>  
>  struct cpumem *
> @@ -339,3 +339,15 @@ counters_zero(struct cpumem *cm, unsigne
>  }
>  
>  #endif /* MULTIPROCESSOR */
> +
> +struct cpumem *
> +cpumem_malloc(size_t sz, int type)
> +{
> +	return (cpumem_malloc_wait(sz, type, M_WAITOK));
> +}
> +
> +struct cpumem *
> +counters_alloc(unsigned int n)
> +{
> +	return (counters_alloc_wait(n, M_WAITOK));
> +}
> Index: netinet/ip_mroute.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_mroute.c,v
> diff -u -p -r1.153 ip_mroute.c
> --- netinet/ip_mroute.c	24 Jun 2026 12:33:49 -0000	1.153
> +++ netinet/ip_mroute.c	26 Jun 2026 11:54:40 -0000
> @@ -318,6 +318,8 @@ get_sg_cnt(unsigned int rtableid, struct
>  
>  	req->pktcnt = req->bytecnt = req->wrong_if = 0;
>  	do {
> +		uint64_t count[mfc_ncounters], scratch[mfc_ncounters];
> +
>  		/* Don't consider non multicast routes. */
>  		if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
>  		    (RTF_HOST | RTF_MULTICAST))
> @@ -327,9 +329,10 @@ get_sg_cnt(unsigned int rtableid, struct
>  		if (mfc == NULL)
>  			continue;
>  
> -		req->pktcnt += mfc->mfc_pkt_cnt;
> -		req->bytecnt += mfc->mfc_byte_cnt;
> -		req->wrong_if += mfc->mfc_wrong_if;
> +		counters_read(mfc->mfc_counter, count, mfc_ncounters, scratch);
> +		req->pktcnt += count[mfc_packets];
> +		req->bytecnt += count[mfc_bytes];
> +		req->wrong_if += count[mfc_wrong_if];
>  	} while ((rt = rtable_iterate(rt)) != NULL);
>  
>  	return (0);
> @@ -474,6 +477,8 @@ mrt_rtwalk_mfcsysctl(struct rtentry *rt,
>  	    (uint8_t *)(minfo + 1) <=
>  	    (uint8_t *)msa->msa_minfos + msa->msa_len;
>  	    minfo++) {
> +		uint64_t count[mfc_ncounters], scratch[mfc_ncounters];
> +
>  		/* Find a new entry or update old entry. */
>  		if (minfo->mfc_origin.s_addr !=
>  		    satosin(rt->rt_gateway)->sin_addr.s_addr ||
> @@ -489,8 +494,9 @@ mrt_rtwalk_mfcsysctl(struct rtentry *rt,
>  		minfo->mfc_origin = satosin(rt->rt_gateway)->sin_addr;
>  		minfo->mfc_mcastgrp = satosin(rt_key(rt))->sin_addr;
>  		minfo->mfc_parent = mfc->mfc_parent;
> -		minfo->mfc_pkt_cnt += mfc->mfc_pkt_cnt;
> -		minfo->mfc_byte_cnt += mfc->mfc_byte_cnt;
> +		counters_read(mfc->mfc_counter, count, mfc_ncounters, scratch);
> +		minfo->mfc_pkt_cnt += count[mfc_packets];
> +		minfo->mfc_byte_cnt += count[mfc_bytes];
>  		minfo->mfc_ttls[v->v_id] = mfc->mfc_ttl;
>  		break;
>  	}
> @@ -895,11 +901,13 @@ mfc_add_route(struct ifnet *ifp, struct 
>  
>  	mfc = malloc(sizeof(*mfc), M_MRTABLE, wait | M_ZERO);
>  	if (mfc == NULL) {
> -		DPRINTF("origin %#08X group %#08X parent %d (%s) "
> -		    "malloc failed",
> -		    satosin(origin)->sin_addr.s_addr,
> -		    satosin(group)->sin_addr.s_addr,
> -		    mfccp->mfcc_parent, ifp->if_xname);
> +		mrt_mcast_del(rt, rtableid);
> +		rtfree(rt);
> +		return (ENOMEM);
> +	}
> +	mfc->mfc_counter = counters_alloc_wait(mfc_ncounters, wait);
> +	if (mfc->mfc_counter == NULL) {
> +		free(mfc, M_MRTABLE, sizeof(struct mfc));
>  		mrt_mcast_del(rt, rtableid);
>  		rtfree(rt);
>  		return (ENOMEM);
> @@ -910,9 +918,6 @@ mfc_add_route(struct ifnet *ifp, struct 
>  	rt_timer_add(rt, &ip_mrouterq, rtableid);
>  
>  	mfc->mfc_parent = mfccp->mfcc_parent;
> -	mfc->mfc_pkt_cnt = 0;
> -	mfc->mfc_byte_cnt = 0;
> -	mfc->mfc_wrong_if = 0;
>  	mfc->mfc_ttl = mfccp->mfcc_ttls[v->v_id];
>  	mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & mrt_api_config &
>  	    MRT_MFC_FLAGS_ALL;
> @@ -1278,7 +1283,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp
>  	if (mfc->mfc_parent != v->v_id) {
>  		/* came in the wrong interface */
>  		mrtstat_inc(mrts_wrong_if);
> -		mfc->mfc_wrong_if++;
> +		counters_inc(mfc->mfc_counter, mfc_wrong_if);
>  		rtfree(rt);
>  		return (0);
>  	}
> @@ -1308,8 +1313,8 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp
>  		if (mfc == NULL)
>  			continue;
>  
> -		mfc->mfc_pkt_cnt++;
> -		mfc->mfc_byte_cnt += m->m_pkthdr.len;
> +		counters_pkt(mfc->mfc_counter, mfc_packets, mfc_bytes,
> +		    m->m_pkthdr.len);
>  
>  		/* Don't let this route expire. */
>  		mfc->mfc_expire = 0;
> @@ -1414,13 +1419,16 @@ void
>  mrt_mcast_del(struct rtentry *rt, unsigned int rtableid)
>  {
>  	struct ifnet		*ifp;
> +	struct mfc		*mfc;
>  	int			 error;
>  
>  	/* Remove all timers related to this route. */
>  	rt_timer_remove_all(rt);
>  
> -	free(rt->rt_llinfo, M_MRTABLE, sizeof(struct mfc));
> +	mfc = (struct mfc *)rt->rt_llinfo;
>  	rt->rt_llinfo = NULL;
> +	counters_free(mfc->mfc_counter, mfc_ncounters);
> +	free(mfc, M_MRTABLE, sizeof(struct mfc));
>  
>  	ifp = if_get(rt->rt_ifidx);
>  	if (ifp == NULL)
> Index: netinet/ip_mroute.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_mroute.h,v
> diff -u -p -r1.35 ip_mroute.h
> --- netinet/ip_mroute.h	24 Jun 2026 12:33:49 -0000	1.35
> +++ netinet/ip_mroute.h	26 Jun 2026 11:54:40 -0000
> @@ -224,14 +224,19 @@ struct vif {
>   * at a future point.)
>   */
>  struct mfc {
> +	struct	 cpumem *mfc_counter;		/* counters for src-grp */
> +	u_long	 mfc_expire;			/* expire timer */
> +	struct	 in_addr mfc_rp;		/* the RP address */
>  	vifi_t	 mfc_parent;			/* incoming vif */
> -	u_long	 mfc_pkt_cnt;			/* pkt count for src-grp */
> -	u_long	 mfc_byte_cnt;			/* byte count for src-grp */
> -	u_long	 mfc_wrong_if;			/* wrong if for src-grp	*/
>  	uint8_t	 mfc_ttl;			/* route interface ttl */
>  	uint8_t  mfc_flags;			/* MRT_MFC_FLAGS_* flags */
> -	struct in_addr	mfc_rp;			/* the RP address	     */
> -	u_long	 mfc_expire;			/* expire timer */
> +};
> +
> +enum mfc_counters {
> +	mfc_packets,			/* packet count for src-grp */
> +	mfc_bytes,			/* byte count for src-grp */
> +	mfc_wrong_if,			/* wrong if for src-grp	*/
> +	mfc_ncounters
>  };
>  
>  /*
> Index: sys/percpu.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/percpu.h,v
> diff -u -p -r1.9 percpu.h
> --- sys/percpu.h	16 Sep 2023 09:33:27 -0000	1.9
> +++ sys/percpu.h	26 Jun 2026 11:54:40 -0000
> @@ -54,6 +54,7 @@ struct cpumem	*cpumem_get(struct pool *)
>  void		 cpumem_put(struct pool *, struct cpumem *);
>  
>  struct cpumem	*cpumem_malloc(size_t, int);
> +struct cpumem	*cpumem_malloc_wait(size_t, int, int);
>  struct cpumem	*cpumem_malloc_ncpus(struct cpumem *, size_t, int);
>  void		 cpumem_free(struct cpumem *, int, size_t);
>  
> @@ -111,6 +112,7 @@ static struct {								\
>   */
>  
>  struct cpumem	*counters_alloc(unsigned int);
> +struct cpumem	*counters_alloc_wait(unsigned int, int);
>  struct cpumem	*counters_alloc_ncpus(struct cpumem *, unsigned int);
>  void		 counters_free(struct cpumem *, unsigned int);
>  void		 counters_read(struct cpumem *, uint64_t *, unsigned int,
>