From: David Gwynne Subject: Re: multicast route counter To: Alexander Bluhm Cc: tech@openbsd.org Date: Sat, 27 Jun 2026 10:10:09 +1000 On Fri, Jun 26, 2026 at 10:42:44PM +0200, Alexander Bluhm wrote: > Hi, > > I want to use per CPU counter for the multicast routes. why? the memory overhead of per cpu counters is non-trivial. are concurrent accesses to these structs common enough to justify the memory overhead compared to just using a per struct mutex or prod/cons lock here? > Note that > I have to call counters_alloc() with M_NOWAIT, so I extended the > API. this introduces failure points that you don't handle. > > ok? > > bluhm > > Index: kern/subr_percpu.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/subr_percpu.c,v > diff -u -p -r1.11 subr_percpu.c > --- kern/subr_percpu.c 16 Sep 2023 09:33:27 -0000 1.11 > +++ kern/subr_percpu.c 26 Jun 2026 11:54:40 -0000 > @@ -59,17 +59,17 @@ cpumem_put(struct pool *pp, struct cpume > } > > struct cpumem * > -cpumem_malloc(size_t sz, int type) > +cpumem_malloc_wait(size_t sz, int type, int wait) > { > struct cpumem *cm; > unsigned int cpu; > > sz = roundup(sz, CACHELINESIZE); > > - cm = pool_get(&cpumem_pl, PR_WAITOK); > + cm = pool_get(&cpumem_pl, wait == M_WAITOK ? PR_WAITOK : PR_NOWAIT); > > for (cpu = 0; cpu < ncpusfound; cpu++) > - cm[cpu].mem = malloc(sz, type, M_WAITOK | M_ZERO); > + cm[cpu].mem = malloc(sz, type, wait | M_ZERO); > > return (cm); > } > @@ -124,7 +124,7 @@ cpumem_next(struct cpumem_iter *i, struc > } > > struct cpumem * > -counters_alloc(unsigned int n) > +counters_alloc_wait(unsigned int n, int wait) > { > struct cpumem *cm; > struct cpumem_iter cmi; > @@ -134,7 +134,7 @@ counters_alloc(unsigned int n) > KASSERT(n > 0); > > n++; /* add space for a generation number */ > - cm = cpumem_malloc(n * sizeof(uint64_t), M_COUNTERS); > + cm = cpumem_malloc_wait(n * sizeof(uint64_t), M_COUNTERS, wait); > > CPUMEM_FOREACH(counters, &cmi, cm) { > for (i = 0; i < n; i++) > @@ -257,9 +257,9 @@ cpumem_put(struct pool *pp, struct cpume > } > > struct cpumem * > -cpumem_malloc(size_t sz, int type) > +cpumem_malloc_wait(size_t sz, int type, int wait) > { > - return (malloc(sz, type, M_WAITOK | M_ZERO)); > + return (malloc(sz, type, wait | M_ZERO)); > } > > struct cpumem * > @@ -287,11 +287,11 @@ cpumem_next(struct cpumem_iter *i, struc > } > > struct cpumem * > -counters_alloc(unsigned int n) > +counters_alloc_wait(unsigned int n, int wait) > { > KASSERT(n > 0); > > - return (cpumem_malloc(n * sizeof(uint64_t), M_COUNTERS)); > + return (cpumem_malloc_wait(n * sizeof(uint64_t), M_COUNTERS, wait)); > } > > struct cpumem * > @@ -339,3 +339,15 @@ counters_zero(struct cpumem *cm, unsigne > } > > #endif /* MULTIPROCESSOR */ > + > +struct cpumem * > +cpumem_malloc(size_t sz, int type) > +{ > + return (cpumem_malloc_wait(sz, type, M_WAITOK)); > +} > + > +struct cpumem * > +counters_alloc(unsigned int n) > +{ > + return (counters_alloc_wait(n, M_WAITOK)); > +} > Index: netinet/ip_mroute.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_mroute.c,v > diff -u -p -r1.153 ip_mroute.c > --- netinet/ip_mroute.c 24 Jun 2026 12:33:49 -0000 1.153 > +++ netinet/ip_mroute.c 26 Jun 2026 11:54:40 -0000 > @@ -318,6 +318,8 @@ get_sg_cnt(unsigned int rtableid, struct > > req->pktcnt = req->bytecnt = req->wrong_if = 0; > do { > + uint64_t count[mfc_ncounters], scratch[mfc_ncounters]; > + > /* Don't consider non multicast routes. */ > if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) != > (RTF_HOST | RTF_MULTICAST)) > @@ -327,9 +329,10 @@ get_sg_cnt(unsigned int rtableid, struct > if (mfc == NULL) > continue; > > - req->pktcnt += mfc->mfc_pkt_cnt; > - req->bytecnt += mfc->mfc_byte_cnt; > - req->wrong_if += mfc->mfc_wrong_if; > + counters_read(mfc->mfc_counter, count, mfc_ncounters, scratch); > + req->pktcnt += count[mfc_packets]; > + req->bytecnt += count[mfc_bytes]; > + req->wrong_if += count[mfc_wrong_if]; > } while ((rt = rtable_iterate(rt)) != NULL); > > return (0); > @@ -474,6 +477,8 @@ mrt_rtwalk_mfcsysctl(struct rtentry *rt, > (uint8_t *)(minfo + 1) <= > (uint8_t *)msa->msa_minfos + msa->msa_len; > minfo++) { > + uint64_t count[mfc_ncounters], scratch[mfc_ncounters]; > + > /* Find a new entry or update old entry. */ > if (minfo->mfc_origin.s_addr != > satosin(rt->rt_gateway)->sin_addr.s_addr || > @@ -489,8 +494,9 @@ mrt_rtwalk_mfcsysctl(struct rtentry *rt, > minfo->mfc_origin = satosin(rt->rt_gateway)->sin_addr; > minfo->mfc_mcastgrp = satosin(rt_key(rt))->sin_addr; > minfo->mfc_parent = mfc->mfc_parent; > - minfo->mfc_pkt_cnt += mfc->mfc_pkt_cnt; > - minfo->mfc_byte_cnt += mfc->mfc_byte_cnt; > + counters_read(mfc->mfc_counter, count, mfc_ncounters, scratch); > + minfo->mfc_pkt_cnt += count[mfc_packets]; > + minfo->mfc_byte_cnt += count[mfc_bytes]; > minfo->mfc_ttls[v->v_id] = mfc->mfc_ttl; > break; > } > @@ -895,11 +901,13 @@ mfc_add_route(struct ifnet *ifp, struct > > mfc = malloc(sizeof(*mfc), M_MRTABLE, wait | M_ZERO); > if (mfc == NULL) { > - DPRINTF("origin %#08X group %#08X parent %d (%s) " > - "malloc failed", > - satosin(origin)->sin_addr.s_addr, > - satosin(group)->sin_addr.s_addr, > - mfccp->mfcc_parent, ifp->if_xname); > + mrt_mcast_del(rt, rtableid); > + rtfree(rt); > + return (ENOMEM); > + } > + mfc->mfc_counter = counters_alloc_wait(mfc_ncounters, wait); > + if (mfc->mfc_counter == NULL) { > + free(mfc, M_MRTABLE, sizeof(struct mfc)); > mrt_mcast_del(rt, rtableid); > rtfree(rt); > return (ENOMEM); > @@ -910,9 +918,6 @@ mfc_add_route(struct ifnet *ifp, struct > rt_timer_add(rt, &ip_mrouterq, rtableid); > > mfc->mfc_parent = mfccp->mfcc_parent; > - mfc->mfc_pkt_cnt = 0; > - mfc->mfc_byte_cnt = 0; > - mfc->mfc_wrong_if = 0; > mfc->mfc_ttl = mfccp->mfcc_ttls[v->v_id]; > mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & mrt_api_config & > MRT_MFC_FLAGS_ALL; > @@ -1278,7 +1283,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp > if (mfc->mfc_parent != v->v_id) { > /* came in the wrong interface */ > mrtstat_inc(mrts_wrong_if); > - mfc->mfc_wrong_if++; > + counters_inc(mfc->mfc_counter, mfc_wrong_if); > rtfree(rt); > return (0); > } > @@ -1308,8 +1313,8 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp > if (mfc == NULL) > continue; > > - mfc->mfc_pkt_cnt++; > - mfc->mfc_byte_cnt += m->m_pkthdr.len; > + counters_pkt(mfc->mfc_counter, mfc_packets, mfc_bytes, > + m->m_pkthdr.len); > > /* Don't let this route expire. */ > mfc->mfc_expire = 0; > @@ -1414,13 +1419,16 @@ void > mrt_mcast_del(struct rtentry *rt, unsigned int rtableid) > { > struct ifnet *ifp; > + struct mfc *mfc; > int error; > > /* Remove all timers related to this route. */ > rt_timer_remove_all(rt); > > - free(rt->rt_llinfo, M_MRTABLE, sizeof(struct mfc)); > + mfc = (struct mfc *)rt->rt_llinfo; > rt->rt_llinfo = NULL; > + counters_free(mfc->mfc_counter, mfc_ncounters); > + free(mfc, M_MRTABLE, sizeof(struct mfc)); > > ifp = if_get(rt->rt_ifidx); > if (ifp == NULL) > Index: netinet/ip_mroute.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_mroute.h,v > diff -u -p -r1.35 ip_mroute.h > --- netinet/ip_mroute.h 24 Jun 2026 12:33:49 -0000 1.35 > +++ netinet/ip_mroute.h 26 Jun 2026 11:54:40 -0000 > @@ -224,14 +224,19 @@ struct vif { > * at a future point.) > */ > struct mfc { > + struct cpumem *mfc_counter; /* counters for src-grp */ > + u_long mfc_expire; /* expire timer */ > + struct in_addr mfc_rp; /* the RP address */ > vifi_t mfc_parent; /* incoming vif */ > - u_long mfc_pkt_cnt; /* pkt count for src-grp */ > - u_long mfc_byte_cnt; /* byte count for src-grp */ > - u_long mfc_wrong_if; /* wrong if for src-grp */ > uint8_t mfc_ttl; /* route interface ttl */ > uint8_t mfc_flags; /* MRT_MFC_FLAGS_* flags */ > - struct in_addr mfc_rp; /* the RP address */ > - u_long mfc_expire; /* expire timer */ > +}; > + > +enum mfc_counters { > + mfc_packets, /* packet count for src-grp */ > + mfc_bytes, /* byte count for src-grp */ > + mfc_wrong_if, /* wrong if for src-grp */ > + mfc_ncounters > }; > > /* > Index: sys/percpu.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/sys/percpu.h,v > diff -u -p -r1.9 percpu.h > --- sys/percpu.h 16 Sep 2023 09:33:27 -0000 1.9 > +++ sys/percpu.h 26 Jun 2026 11:54:40 -0000 > @@ -54,6 +54,7 @@ struct cpumem *cpumem_get(struct pool *) > void cpumem_put(struct pool *, struct cpumem *); > > struct cpumem *cpumem_malloc(size_t, int); > +struct cpumem *cpumem_malloc_wait(size_t, int, int); > struct cpumem *cpumem_malloc_ncpus(struct cpumem *, size_t, int); > void cpumem_free(struct cpumem *, int, size_t); > > @@ -111,6 +112,7 @@ static struct { \ > */ > > struct cpumem *counters_alloc(unsigned int); > +struct cpumem *counters_alloc_wait(unsigned int, int); > struct cpumem *counters_alloc_ncpus(struct cpumem *, unsigned int); > void counters_free(struct cpumem *, unsigned int); > void counters_read(struct cpumem *, uint64_t *, unsigned int, >