Download raw body.
multicast route counter
On Fri, Jun 26, 2026 at 10:42:44PM +0200, Alexander Bluhm wrote:
> Hi,
>
> I want to use per CPU counter for the multicast routes.
why?
the memory overhead of per cpu counters is non-trivial. are concurrent
accesses to these structs common enough to justify the memory
overhead compared to just using a per struct mutex or prod/cons
lock here?
> Note that
> I have to call counters_alloc() with M_NOWAIT, so I extended the
> API.
this introduces failure points that you don't handle.
>
> ok?
>
> bluhm
>
> Index: kern/subr_percpu.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/subr_percpu.c,v
> diff -u -p -r1.11 subr_percpu.c
> --- kern/subr_percpu.c 16 Sep 2023 09:33:27 -0000 1.11
> +++ kern/subr_percpu.c 26 Jun 2026 11:54:40 -0000
> @@ -59,17 +59,17 @@ cpumem_put(struct pool *pp, struct cpume
> }
>
> struct cpumem *
> -cpumem_malloc(size_t sz, int type)
> +cpumem_malloc_wait(size_t sz, int type, int wait)
> {
> struct cpumem *cm;
> unsigned int cpu;
>
> sz = roundup(sz, CACHELINESIZE);
>
> - cm = pool_get(&cpumem_pl, PR_WAITOK);
> + cm = pool_get(&cpumem_pl, wait == M_WAITOK ? PR_WAITOK : PR_NOWAIT);
>
> for (cpu = 0; cpu < ncpusfound; cpu++)
> - cm[cpu].mem = malloc(sz, type, M_WAITOK | M_ZERO);
> + cm[cpu].mem = malloc(sz, type, wait | M_ZERO);
>
> return (cm);
> }
> @@ -124,7 +124,7 @@ cpumem_next(struct cpumem_iter *i, struc
> }
>
> struct cpumem *
> -counters_alloc(unsigned int n)
> +counters_alloc_wait(unsigned int n, int wait)
> {
> struct cpumem *cm;
> struct cpumem_iter cmi;
> @@ -134,7 +134,7 @@ counters_alloc(unsigned int n)
> KASSERT(n > 0);
>
> n++; /* add space for a generation number */
> - cm = cpumem_malloc(n * sizeof(uint64_t), M_COUNTERS);
> + cm = cpumem_malloc_wait(n * sizeof(uint64_t), M_COUNTERS, wait);
>
> CPUMEM_FOREACH(counters, &cmi, cm) {
> for (i = 0; i < n; i++)
> @@ -257,9 +257,9 @@ cpumem_put(struct pool *pp, struct cpume
> }
>
> struct cpumem *
> -cpumem_malloc(size_t sz, int type)
> +cpumem_malloc_wait(size_t sz, int type, int wait)
> {
> - return (malloc(sz, type, M_WAITOK | M_ZERO));
> + return (malloc(sz, type, wait | M_ZERO));
> }
>
> struct cpumem *
> @@ -287,11 +287,11 @@ cpumem_next(struct cpumem_iter *i, struc
> }
>
> struct cpumem *
> -counters_alloc(unsigned int n)
> +counters_alloc_wait(unsigned int n, int wait)
> {
> KASSERT(n > 0);
>
> - return (cpumem_malloc(n * sizeof(uint64_t), M_COUNTERS));
> + return (cpumem_malloc_wait(n * sizeof(uint64_t), M_COUNTERS, wait));
> }
>
> struct cpumem *
> @@ -339,3 +339,15 @@ counters_zero(struct cpumem *cm, unsigne
> }
>
> #endif /* MULTIPROCESSOR */
> +
> +struct cpumem *
> +cpumem_malloc(size_t sz, int type)
> +{
> + return (cpumem_malloc_wait(sz, type, M_WAITOK));
> +}
> +
> +struct cpumem *
> +counters_alloc(unsigned int n)
> +{
> + return (counters_alloc_wait(n, M_WAITOK));
> +}
> Index: netinet/ip_mroute.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_mroute.c,v
> diff -u -p -r1.153 ip_mroute.c
> --- netinet/ip_mroute.c 24 Jun 2026 12:33:49 -0000 1.153
> +++ netinet/ip_mroute.c 26 Jun 2026 11:54:40 -0000
> @@ -318,6 +318,8 @@ get_sg_cnt(unsigned int rtableid, struct
>
> req->pktcnt = req->bytecnt = req->wrong_if = 0;
> do {
> + uint64_t count[mfc_ncounters], scratch[mfc_ncounters];
> +
> /* Don't consider non multicast routes. */
> if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
> (RTF_HOST | RTF_MULTICAST))
> @@ -327,9 +329,10 @@ get_sg_cnt(unsigned int rtableid, struct
> if (mfc == NULL)
> continue;
>
> - req->pktcnt += mfc->mfc_pkt_cnt;
> - req->bytecnt += mfc->mfc_byte_cnt;
> - req->wrong_if += mfc->mfc_wrong_if;
> + counters_read(mfc->mfc_counter, count, mfc_ncounters, scratch);
> + req->pktcnt += count[mfc_packets];
> + req->bytecnt += count[mfc_bytes];
> + req->wrong_if += count[mfc_wrong_if];
> } while ((rt = rtable_iterate(rt)) != NULL);
>
> return (0);
> @@ -474,6 +477,8 @@ mrt_rtwalk_mfcsysctl(struct rtentry *rt,
> (uint8_t *)(minfo + 1) <=
> (uint8_t *)msa->msa_minfos + msa->msa_len;
> minfo++) {
> + uint64_t count[mfc_ncounters], scratch[mfc_ncounters];
> +
> /* Find a new entry or update old entry. */
> if (minfo->mfc_origin.s_addr !=
> satosin(rt->rt_gateway)->sin_addr.s_addr ||
> @@ -489,8 +494,9 @@ mrt_rtwalk_mfcsysctl(struct rtentry *rt,
> minfo->mfc_origin = satosin(rt->rt_gateway)->sin_addr;
> minfo->mfc_mcastgrp = satosin(rt_key(rt))->sin_addr;
> minfo->mfc_parent = mfc->mfc_parent;
> - minfo->mfc_pkt_cnt += mfc->mfc_pkt_cnt;
> - minfo->mfc_byte_cnt += mfc->mfc_byte_cnt;
> + counters_read(mfc->mfc_counter, count, mfc_ncounters, scratch);
> + minfo->mfc_pkt_cnt += count[mfc_packets];
> + minfo->mfc_byte_cnt += count[mfc_bytes];
> minfo->mfc_ttls[v->v_id] = mfc->mfc_ttl;
> break;
> }
> @@ -895,11 +901,13 @@ mfc_add_route(struct ifnet *ifp, struct
>
> mfc = malloc(sizeof(*mfc), M_MRTABLE, wait | M_ZERO);
> if (mfc == NULL) {
> - DPRINTF("origin %#08X group %#08X parent %d (%s) "
> - "malloc failed",
> - satosin(origin)->sin_addr.s_addr,
> - satosin(group)->sin_addr.s_addr,
> - mfccp->mfcc_parent, ifp->if_xname);
> + mrt_mcast_del(rt, rtableid);
> + rtfree(rt);
> + return (ENOMEM);
> + }
> + mfc->mfc_counter = counters_alloc_wait(mfc_ncounters, wait);
> + if (mfc->mfc_counter == NULL) {
> + free(mfc, M_MRTABLE, sizeof(struct mfc));
> mrt_mcast_del(rt, rtableid);
> rtfree(rt);
> return (ENOMEM);
> @@ -910,9 +918,6 @@ mfc_add_route(struct ifnet *ifp, struct
> rt_timer_add(rt, &ip_mrouterq, rtableid);
>
> mfc->mfc_parent = mfccp->mfcc_parent;
> - mfc->mfc_pkt_cnt = 0;
> - mfc->mfc_byte_cnt = 0;
> - mfc->mfc_wrong_if = 0;
> mfc->mfc_ttl = mfccp->mfcc_ttls[v->v_id];
> mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & mrt_api_config &
> MRT_MFC_FLAGS_ALL;
> @@ -1278,7 +1283,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp
> if (mfc->mfc_parent != v->v_id) {
> /* came in the wrong interface */
> mrtstat_inc(mrts_wrong_if);
> - mfc->mfc_wrong_if++;
> + counters_inc(mfc->mfc_counter, mfc_wrong_if);
> rtfree(rt);
> return (0);
> }
> @@ -1308,8 +1313,8 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp
> if (mfc == NULL)
> continue;
>
> - mfc->mfc_pkt_cnt++;
> - mfc->mfc_byte_cnt += m->m_pkthdr.len;
> + counters_pkt(mfc->mfc_counter, mfc_packets, mfc_bytes,
> + m->m_pkthdr.len);
>
> /* Don't let this route expire. */
> mfc->mfc_expire = 0;
> @@ -1414,13 +1419,16 @@ void
> mrt_mcast_del(struct rtentry *rt, unsigned int rtableid)
> {
> struct ifnet *ifp;
> + struct mfc *mfc;
> int error;
>
> /* Remove all timers related to this route. */
> rt_timer_remove_all(rt);
>
> - free(rt->rt_llinfo, M_MRTABLE, sizeof(struct mfc));
> + mfc = (struct mfc *)rt->rt_llinfo;
> rt->rt_llinfo = NULL;
> + counters_free(mfc->mfc_counter, mfc_ncounters);
> + free(mfc, M_MRTABLE, sizeof(struct mfc));
>
> ifp = if_get(rt->rt_ifidx);
> if (ifp == NULL)
> Index: netinet/ip_mroute.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_mroute.h,v
> diff -u -p -r1.35 ip_mroute.h
> --- netinet/ip_mroute.h 24 Jun 2026 12:33:49 -0000 1.35
> +++ netinet/ip_mroute.h 26 Jun 2026 11:54:40 -0000
> @@ -224,14 +224,19 @@ struct vif {
> * at a future point.)
> */
> struct mfc {
> + struct cpumem *mfc_counter; /* counters for src-grp */
> + u_long mfc_expire; /* expire timer */
> + struct in_addr mfc_rp; /* the RP address */
> vifi_t mfc_parent; /* incoming vif */
> - u_long mfc_pkt_cnt; /* pkt count for src-grp */
> - u_long mfc_byte_cnt; /* byte count for src-grp */
> - u_long mfc_wrong_if; /* wrong if for src-grp */
> uint8_t mfc_ttl; /* route interface ttl */
> uint8_t mfc_flags; /* MRT_MFC_FLAGS_* flags */
> - struct in_addr mfc_rp; /* the RP address */
> - u_long mfc_expire; /* expire timer */
> +};
> +
> +enum mfc_counters {
> + mfc_packets, /* packet count for src-grp */
> + mfc_bytes, /* byte count for src-grp */
> + mfc_wrong_if, /* wrong if for src-grp */
> + mfc_ncounters
> };
>
> /*
> Index: sys/percpu.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/percpu.h,v
> diff -u -p -r1.9 percpu.h
> --- sys/percpu.h 16 Sep 2023 09:33:27 -0000 1.9
> +++ sys/percpu.h 26 Jun 2026 11:54:40 -0000
> @@ -54,6 +54,7 @@ struct cpumem *cpumem_get(struct pool *)
> void cpumem_put(struct pool *, struct cpumem *);
>
> struct cpumem *cpumem_malloc(size_t, int);
> +struct cpumem *cpumem_malloc_wait(size_t, int, int);
> struct cpumem *cpumem_malloc_ncpus(struct cpumem *, size_t, int);
> void cpumem_free(struct cpumem *, int, size_t);
>
> @@ -111,6 +112,7 @@ static struct { \
> */
>
> struct cpumem *counters_alloc(unsigned int);
> +struct cpumem *counters_alloc_wait(unsigned int, int);
> struct cpumem *counters_alloc_ncpus(struct cpumem *, unsigned int);
> void counters_free(struct cpumem *, unsigned int);
> void counters_read(struct cpumem *, uint64_t *, unsigned int,
>
multicast route counter