Download raw body.
route cache per softnet thread
On Mon, Feb 17, 2025 at 05:37:01PM +0100, Claudio Jeker wrote:
> On Fri, Feb 14, 2025 at 04:19:36PM +0100, Alexander Bluhm wrote:
> > Hi,
> >
> > When experimenting with parallel TCP, I realized that we need local
> > storage per softnet thread. That allows effective caching.
> >
> > The same is true for IP forwarding route. In one of my benchmarks,
> > which is forwarding 10 parallel TCP streams over OpenBSD, thoughput
> > increases by 30%.
> >
> > As you can see here, route6_mpath() uses 10.5% CPU time basically
> > for rtable_match().
> > http://bluhm.genua.de/perform/results/2025-02-09T21%3A10%3A25Z/2025-02-09T00%3A00%3A00Z/btrace/ssh_perform%40lt13_iperf3_-6_-cfdd7%3Ae83e%3A66bc%3A0346%3A%3A36_-P10_-t10-btrace-kstack.0.svg?s=route6_mpath
> >
> > With the patch below patch only 2.6% CPU are used in route6_mpath().
> > http://bluhm.genua.de/perform/results/2025-02-09T21%3A10%3A25Z/patch-sys-forward-route-cache.0/btrace/ssh_perform%40lt13_iperf3_-6_-cfdd7%3Ae83e%3A66bc%3A0346%3A%3A36_-P10_-t10-btrace-kstack.0.svg?s=route6_mpath
> >
> > Idea is that every softnet task has its own storage. It is passed
> > down to IP input in mbuf cookie. If the cookie is set, use the
> > route cache in struct softnet. Otherwise use route cache in stack
> > memory of IP input. From there the route cache is passed to IP
> > forward and IP output as usual.
> >
> > When I sent a similar diff with cache per CPU memory, there were
> > concerns with sleeping and switching CPU. Per softnet memory should
> > be safe.
> >
> > There was also the question when the cache is invalidated. I have
> > implemented a route generation number two releases ago. The cache
> > contains the this number when the route was stored. After a change
> > of the routing table, the cache is filled with a new route during
> > next lookup.
> >
> > ok?
>
> In general I dislike how ph_cookie is used in more and more places.
> That field has no validity checks and so code just dereferences the
> returned value. This is a ticking bomb.
> I would prefer to have a better concept of per softnet / task storage.
> Maybe dlg@ has a great idea on how to do that.
I am also not a big fan of ph_cookie. One alternative is to store
softnet index in mbuf. But we want to keep size of mbuf small.
Interface index is at most 16 bit. So we can steal some storage
there.
> Also if you use an array of struct softnet to back this then you should
> make sure that the objects end up on different cache lines so you don't
> constantly invalidate each others caches.
I added an __aligned(64).
Is the diff below better?
bluhm
Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
diff -u -p -r1.726 if.c
--- net/if.c 3 Feb 2025 08:58:52 -0000 1.726
+++ net/if.c 17 Feb 2025 22:20:49 -0000
@@ -241,19 +241,10 @@ struct rwlock if_tmplist_lock = RWLOCK_I
struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
void if_hooks_run(struct task_list *);
-int ifq_congestion;
-
-int netisr;
-
-struct softnet {
- char sn_name[16];
- struct taskq *sn_taskq;
-};
-
-#define NET_TASKQ 4
+int ifq_congestion;
+int netisr;
struct softnet softnets[NET_TASKQ];
-
-struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
+struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
/*
* Serialize socket operations to ensure no new sleeping points
@@ -979,7 +970,7 @@ if_output_local(struct ifnet *ifp, struc
}
void
-if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
+if_input_process(struct ifnet *ifp, struct mbuf_list *ml, unsigned int idx)
{
struct mbuf *m;
@@ -996,9 +987,14 @@ if_input_process(struct ifnet *ifp, stru
* read only or MP safe. Usually they hold the exclusive net lock.
*/
+ idx = net_idx(idx);
+ KASSERT(idx < 256 - 1);
+
NET_LOCK_SHARED();
- while ((m = ml_dequeue(ml)) != NULL)
+ while ((m = ml_dequeue(ml)) != NULL) {
+ m->m_pkthdr.ph_softidx = idx + 1;
(*ifp->if_input)(ifp, m);
+ }
NET_UNLOCK_SHARED();
}
@@ -3672,16 +3668,23 @@ unhandled_af(int af)
panic("unhandled af %d", af);
}
-struct taskq *
-net_tq(unsigned int ifindex)
+unsigned int
+net_idx(unsigned int ifindex)
{
- struct softnet *sn;
static int nettaskqs;
if (nettaskqs == 0)
nettaskqs = min(NET_TASKQ, ncpus);
- sn = &softnets[ifindex % nettaskqs];
+ return (ifindex % nettaskqs);
+}
+
+struct taskq *
+net_tq(unsigned int ifindex)
+{
+ struct softnet *sn;
+
+ sn = &softnets[net_idx(ifindex)];
return (sn->sn_taskq);
}
Index: net/if.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.h,v
diff -u -p -r1.217 if.h
--- net/if.h 9 Jun 2024 16:25:28 -0000 1.217
+++ net/if.h 17 Feb 2025 22:15:40 -0000
@@ -560,7 +560,10 @@ void if_congestion(void);
int if_congested(void);
__dead void unhandled_af(int);
int if_setlladdr(struct ifnet *, const uint8_t *);
-struct taskq * net_tq(unsigned int);
+unsigned int
+ net_idx(unsigned int);
+struct taskq *
+ net_tq(unsigned int);
void net_tq_barriers(const char *);
#endif /* _KERNEL */
Index: net/if_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v
diff -u -p -r1.135 if_var.h
--- net/if_var.h 24 Jan 2025 09:19:07 -0000 1.135
+++ net/if_var.h 17 Feb 2025 22:05:13 -0000
@@ -46,6 +46,7 @@
#include <sys/timeout.h>
#include <net/ifq.h>
+#include <net/route.h>
/*
* Structures defining a network interface, providing a packet
@@ -301,6 +302,14 @@ struct ifg_list {
#define IF_WWAN_DEFAULT_PRIORITY 6
#define IF_CARP_DEFAULT_PRIORITY 15
+struct softnet {
+ char sn_name[16];
+ struct taskq *sn_taskq;
+ struct route sn_route;
+} __aligned(64);
+#define NET_TASKQ 4
+extern struct softnet softnets[NET_TASKQ];
+
/*
* Network stack input queues.
*/
@@ -331,7 +340,7 @@ int if_enqueue(struct ifnet *, struct mb
int if_enqueue_ifq(struct ifnet *, struct mbuf *);
void if_input(struct ifnet *, struct mbuf_list *);
void if_vinput(struct ifnet *, struct mbuf *);
-void if_input_process(struct ifnet *, struct mbuf_list *);
+void if_input_process(struct ifnet *, struct mbuf_list *, unsigned int);
int if_input_local(struct ifnet *, struct mbuf *, sa_family_t);
int if_output_ml(struct ifnet *, struct mbuf_list *,
struct sockaddr *, struct rtentry *);
Index: net/ifq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v
diff -u -p -r1.56 ifq.c
--- net/ifq.c 3 Feb 2025 08:58:52 -0000 1.56
+++ net/ifq.c 17 Feb 2025 21:03:07 -0000
@@ -862,7 +862,7 @@ ifiq_process(void *arg)
ml_init(&ifiq->ifiq_ml);
mtx_leave(&ifiq->ifiq_mtx);
- if_input_process(ifiq->ifiq_if, &ml);
+ if_input_process(ifiq->ifiq_if, &ml, ifiq->ifiq_idx);
}
int
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.403 ip_input.c
--- netinet/ip_input.c 3 Jan 2025 21:27:40 -0000 1.403
+++ netinet/ip_input.c 17 Feb 2025 22:17:09 -0000
@@ -441,7 +441,7 @@ bad:
int
ip_input_if(struct mbuf **mp, int *offp, int nxt, int af, struct ifnet *ifp)
{
- struct route ro;
+ struct route iproute, *ro = NULL;
struct mbuf *m;
struct ip *ip;
int hlen;
@@ -452,7 +452,6 @@ ip_input_if(struct mbuf **mp, int *offp,
KASSERT(*offp == 0);
- ro.ro_rt = NULL;
ipstat_inc(ips_total);
m = *mp = ipv4_check(ifp, *mp);
if (m == NULL)
@@ -512,7 +511,16 @@ ip_input_if(struct mbuf **mp, int *offp,
goto out;
}
- switch(in_ouraddr(m, ifp, &ro, flags)) {
+ if ((*mp)->m_pkthdr.ph_softidx == 0) {
+ ro = &iproute;
+ ro->ro_rt = NULL;
+ } else {
+ struct softnet *sn;
+
+ sn = &softnets[m->m_pkthdr.ph_softidx - 1];
+ ro = &sn->sn_route;
+ }
+ switch(in_ouraddr(m, ifp, ro, flags)) {
case 2:
goto bad;
case 1:
@@ -614,15 +622,17 @@ ip_input_if(struct mbuf **mp, int *offp,
}
#endif /* IPSEC */
- ip_forward(m, ifp, &ro, flags);
+ ip_forward(m, ifp, ro, flags);
*mp = NULL;
- rtfree(ro.ro_rt);
+ if (ro == &iproute)
+ rtfree(ro->ro_rt);
return IPPROTO_DONE;
bad:
nxt = IPPROTO_DONE;
m_freemp(mp);
out:
- rtfree(ro.ro_rt);
+ if (ro == &iproute)
+ rtfree(ro->ro_rt);
return nxt;
}
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
diff -u -p -r1.267 ip6_input.c
--- netinet6/ip6_input.c 21 Nov 2024 20:15:44 -0000 1.267
+++ netinet6/ip6_input.c 17 Feb 2025 21:03:22 -0000
@@ -362,7 +362,7 @@ bad:
int
ip6_input_if(struct mbuf **mp, int *offp, int nxt, int af, struct ifnet *ifp)
{
- struct route ro;
+ struct route iproute, *ro = NULL;
struct mbuf *m;
struct ip6_hdr *ip6;
struct rtentry *rt;
@@ -375,7 +375,6 @@ ip6_input_if(struct mbuf **mp, int *offp
KASSERT(*offp == 0);
- ro.ro_rt = NULL;
ip6stat_inc(ip6s_total);
m = *mp = ipv6_check(ifp, *mp);
if (m == NULL)
@@ -533,7 +532,18 @@ ip6_input_if(struct mbuf **mp, int *offp
/*
* Unicast check
*/
- rt = route6_mpath(&ro, &ip6->ip6_dst, &ip6->ip6_src,
+ if ((*mp)->m_pkthdr.ph_cookie == NULL) {
+ ro = &iproute;
+ ro->ro_rt = NULL;
+ } else {
+ struct softnet *sn;
+
+ sn = (*mp)->m_pkthdr.ph_cookie;
+ /* sanity check that noone else uses mbuf cookie */
+ KASSERT(sn >= softnets && sn < softnets + sizeof(softnets));
+ ro = &sn->sn_route;
+ }
+ rt = route6_mpath(ro, &ip6->ip6_dst, &ip6->ip6_src,
m->m_pkthdr.ph_rtableid);
/*
@@ -631,15 +641,17 @@ ip6_input_if(struct mbuf **mp, int *offp
}
#endif /* IPSEC */
- ip6_forward(m, &ro, flags);
+ ip6_forward(m, ro, flags);
*mp = NULL;
- rtfree(ro.ro_rt);
+ if (ro == &iproute)
+ rtfree(ro->ro_rt);
return IPPROTO_DONE;
bad:
nxt = IPPROTO_DONE;
m_freemp(mp);
out:
- rtfree(ro.ro_rt);
+ if (ro == &iproute)
+ rtfree(ro->ro_rt);
return nxt;
}
Index: sys/mbuf.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
diff -u -p -r1.265 mbuf.h
--- sys/mbuf.h 5 Nov 2024 13:15:13 -0000 1.265
+++ sys/mbuf.h 17 Feb 2025 22:06:09 -0000
@@ -122,13 +122,13 @@ struct pkthdr_pf {
#endif
/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
-struct pkthdr {
+struct pkthdr {
void *ph_cookie; /* additional data */
SLIST_HEAD(, m_tag) ph_tags; /* list of packet tags */
int64_t ph_timestamp; /* packet timestamp */
int len; /* total packet length */
u_int ph_rtableid; /* routing table id */
- u_int ph_ifidx; /* rcv interface index */
+ u_int16_t ph_ifidx; /* rcv interface index */
u_int16_t ph_tagsset; /* mtags attached */
u_int16_t ph_flowid; /* pseudo unique flow id */
u_int16_t csum_flags; /* checksum flags */
@@ -136,6 +136,8 @@ struct pkthdr {
u_int16_t ph_mss; /* TCP max segment size */
u_int8_t ph_loopcnt; /* mbuf is looping in kernel */
u_int8_t ph_family; /* af, used when queueing */
+ u_int8_t ph_softidx; /* index of softnet thread */
+ u_int8_t ph_pad[1];
struct pkthdr_pf pf;
};
route cache per softnet thread