Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
route cache per softnet thread
To:
tech@openbsd.org
Date:
Fri, 14 Feb 2025 16:19:36 +0100

Download raw body.

Thread
Hi,

When experimenting with parallel TCP, I realized that we need local
storage per softnet thread.  That allows effective caching.

The same is true for IP forwarding route.  In one of my benchmarks,
which is forwarding 10 parallel TCP streams over OpenBSD, thoughput
increases by 30%.

As you can see here, route6_mpath() uses 10.5% CPU time basically
for rtable_match().
http://bluhm.genua.de/perform/results/2025-02-09T21%3A10%3A25Z/2025-02-09T00%3A00%3A00Z/btrace/ssh_perform%40lt13_iperf3_-6_-cfdd7%3Ae83e%3A66bc%3A0346%3A%3A36_-P10_-t10-btrace-kstack.0.svg?s=route6_mpath

With the patch below patch only 2.6% CPU are used in route6_mpath().
http://bluhm.genua.de/perform/results/2025-02-09T21%3A10%3A25Z/patch-sys-forward-route-cache.0/btrace/ssh_perform%40lt13_iperf3_-6_-cfdd7%3Ae83e%3A66bc%3A0346%3A%3A36_-P10_-t10-btrace-kstack.0.svg?s=route6_mpath

Idea is that every softnet task has its own storage.  It is passed
down to IP input in mbuf cookie.  If the cookie is set, use the
route cache in struct softnet.  Otherwise use route cache in stack
memory of IP input.  From there the route cache is passed to IP
forward and IP output as usual.

When I sent a similar diff with cache per CPU memory, there were
concerns with sleeping and switching CPU.  Per softnet memory should
be safe.

There was also the question when the cache is invalidated.  I have
implemented a route generation number two releases ago.  The cache
contains the this number when the route was stored.  After a change
of the routing table, the cache is filled with a new route during
next lookup.

ok?

bluhm

Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
diff -u -p -r1.726 if.c
--- net/if.c	3 Feb 2025 08:58:52 -0000	1.726
+++ net/if.c	14 Feb 2025 14:31:40 -0000
@@ -241,19 +241,10 @@ struct rwlock if_tmplist_lock = RWLOCK_I
 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
 void	if_hooks_run(struct task_list *);
 
-int	ifq_congestion;
-
-int		 netisr;
-
-struct softnet {
-	char		 sn_name[16];
-	struct taskq	*sn_taskq;
-};
-
-#define	NET_TASKQ	4
+int		ifq_congestion;
+int		netisr;
 struct softnet	softnets[NET_TASKQ];
-
-struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
+struct task	if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
 
 /*
  * Serialize socket operations to ensure no new sleeping points
@@ -979,9 +970,10 @@ if_output_local(struct ifnet *ifp, struc
 }
 
 void
-if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
+if_input_process(struct ifnet *ifp, struct mbuf_list *ml, unsigned int idx)
 {
 	struct mbuf *m;
+	struct softnet *sn;
 
 	if (ml_empty(ml))
 		return;
@@ -996,9 +988,13 @@ if_input_process(struct ifnet *ifp, stru
 	 * read only or MP safe.  Usually they hold the exclusive net lock.
 	 */
 
+	sn = net_sn(idx);
+
 	NET_LOCK_SHARED();
-	while ((m = ml_dequeue(ml)) != NULL)
+	while ((m = ml_dequeue(ml)) != NULL) {
+		m->m_pkthdr.ph_cookie = sn;
 		(*ifp->if_input)(ifp, m);
+	}
 	NET_UNLOCK_SHARED();
 }
 
@@ -3672,18 +3668,21 @@ unhandled_af(int af)
 	panic("unhandled af %d", af);
 }
 
-struct taskq *
-net_tq(unsigned int ifindex)
+struct softnet *
+net_sn(unsigned int ifindex)
 {
-	struct softnet *sn;
 	static int nettaskqs;
 
 	if (nettaskqs == 0)
 		nettaskqs = min(NET_TASKQ, ncpus);
 
-	sn = &softnets[ifindex % nettaskqs];
+	return (&softnets[ifindex % nettaskqs]);
+}
 
-	return (sn->sn_taskq);
+struct taskq *
+net_tq(unsigned int ifindex)
+{
+	return (net_sn(ifindex)->sn_taskq);
 }
 
 void
Index: net/if.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.h,v
diff -u -p -r1.217 if.h
--- net/if.h	9 Jun 2024 16:25:28 -0000	1.217
+++ net/if.h	14 Feb 2025 14:31:40 -0000
@@ -560,7 +560,10 @@ void	if_congestion(void);
 int	if_congested(void);
 __dead void	unhandled_af(int);
 int	if_setlladdr(struct ifnet *, const uint8_t *);
-struct taskq * net_tq(unsigned int);
+struct softnet *
+	net_sn(unsigned int);
+struct taskq *
+	net_tq(unsigned int);
 void	net_tq_barriers(const char *);
 
 #endif /* _KERNEL */
Index: net/if_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v
diff -u -p -r1.135 if_var.h
--- net/if_var.h	24 Jan 2025 09:19:07 -0000	1.135
+++ net/if_var.h	14 Feb 2025 14:31:40 -0000
@@ -46,6 +46,7 @@
 #include <sys/timeout.h>
 
 #include <net/ifq.h>
+#include <net/route.h>
 
 /*
  * Structures defining a network interface, providing a packet
@@ -301,6 +302,14 @@ struct ifg_list {
 #define IF_WWAN_DEFAULT_PRIORITY	6
 #define IF_CARP_DEFAULT_PRIORITY	15
 
+struct softnet {
+	char			 sn_name[16];
+	struct taskq		*sn_taskq;
+	struct route		 sn_route;
+};
+#define NET_TASKQ	4
+extern struct softnet	softnets[NET_TASKQ];
+
 /*
  * Network stack input queues.
  */
@@ -331,7 +340,7 @@ int	if_enqueue(struct ifnet *, struct mb
 int	if_enqueue_ifq(struct ifnet *, struct mbuf *);
 void	if_input(struct ifnet *, struct mbuf_list *);
 void	if_vinput(struct ifnet *, struct mbuf *);
-void	if_input_process(struct ifnet *, struct mbuf_list *);
+void	if_input_process(struct ifnet *, struct mbuf_list *, unsigned int);
 int	if_input_local(struct ifnet *, struct mbuf *, sa_family_t);
 int	if_output_ml(struct ifnet *, struct mbuf_list *,
 	    struct sockaddr *, struct rtentry *);
Index: net/ifq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v
diff -u -p -r1.56 ifq.c
--- net/ifq.c	3 Feb 2025 08:58:52 -0000	1.56
+++ net/ifq.c	14 Feb 2025 14:31:40 -0000
@@ -862,7 +862,7 @@ ifiq_process(void *arg)
 	ml_init(&ifiq->ifiq_ml);
 	mtx_leave(&ifiq->ifiq_mtx);
 
-	if_input_process(ifiq->ifiq_if, &ml);
+	if_input_process(ifiq->ifiq_if, &ml, ifiq->ifiq_idx);
 }
 
 int
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.403 ip_input.c
--- netinet/ip_input.c	3 Jan 2025 21:27:40 -0000	1.403
+++ netinet/ip_input.c	14 Feb 2025 14:31:40 -0000
@@ -441,7 +441,7 @@ bad:
 int
 ip_input_if(struct mbuf **mp, int *offp, int nxt, int af, struct ifnet *ifp)
 {
-	struct route ro;
+	struct route iproute, *ro = NULL;
 	struct mbuf *m;
 	struct ip *ip;
 	int hlen;
@@ -452,7 +452,6 @@ ip_input_if(struct mbuf **mp, int *offp,
 
 	KASSERT(*offp == 0);
 
-	ro.ro_rt = NULL;
 	ipstat_inc(ips_total);
 	m = *mp = ipv4_check(ifp, *mp);
 	if (m == NULL)
@@ -512,7 +511,18 @@ ip_input_if(struct mbuf **mp, int *offp,
 		goto out;
 	}
 
-	switch(in_ouraddr(m, ifp, &ro, flags)) {
+	if ((*mp)->m_pkthdr.ph_cookie == NULL) {
+		ro = &iproute;
+		ro->ro_rt = NULL;
+	} else {
+		struct softnet *sn;
+
+		sn = (*mp)->m_pkthdr.ph_cookie;
+		/* sanity check that noone else uses mbuf cookie */
+		KASSERT(sn >= softnets && sn < softnets + sizeof(softnets));
+		ro = &sn->sn_route;
+	}
+	switch(in_ouraddr(m, ifp, ro, flags)) {
 	case 2:
 		goto bad;
 	case 1:
@@ -614,15 +624,17 @@ ip_input_if(struct mbuf **mp, int *offp,
 	}
 #endif /* IPSEC */
 
-	ip_forward(m, ifp, &ro, flags);
+	ip_forward(m, ifp, ro, flags);
 	*mp = NULL;
-	rtfree(ro.ro_rt);
+	if (ro == &iproute)
+		rtfree(ro->ro_rt);
 	return IPPROTO_DONE;
  bad:
 	nxt = IPPROTO_DONE;
 	m_freemp(mp);
  out:
-	rtfree(ro.ro_rt);
+	if (ro == &iproute)
+		rtfree(ro->ro_rt);
 	return nxt;
 }
 
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
diff -u -p -r1.267 ip6_input.c
--- netinet6/ip6_input.c	21 Nov 2024 20:15:44 -0000	1.267
+++ netinet6/ip6_input.c	14 Feb 2025 14:31:40 -0000
@@ -362,7 +362,7 @@ bad:
 int
 ip6_input_if(struct mbuf **mp, int *offp, int nxt, int af, struct ifnet *ifp)
 {
-	struct route ro;
+	struct route iproute, *ro = NULL;
 	struct mbuf *m;
 	struct ip6_hdr *ip6;
 	struct rtentry *rt;
@@ -375,7 +375,6 @@ ip6_input_if(struct mbuf **mp, int *offp
 
 	KASSERT(*offp == 0);
 
-	ro.ro_rt = NULL;
 	ip6stat_inc(ip6s_total);
 	m = *mp = ipv6_check(ifp, *mp);
 	if (m == NULL)
@@ -533,7 +532,18 @@ ip6_input_if(struct mbuf **mp, int *offp
 	/*
 	 *  Unicast check
 	 */
-	rt = route6_mpath(&ro, &ip6->ip6_dst, &ip6->ip6_src,
+	if ((*mp)->m_pkthdr.ph_cookie == NULL) {
+		ro = &iproute;
+		ro->ro_rt = NULL;
+	} else {
+		struct softnet *sn;
+
+		sn = (*mp)->m_pkthdr.ph_cookie;
+		/* sanity check that noone else uses mbuf cookie */
+		KASSERT(sn >= softnets && sn < softnets + sizeof(softnets));
+		ro = &sn->sn_route;
+	}
+	rt = route6_mpath(ro, &ip6->ip6_dst, &ip6->ip6_src,
 	    m->m_pkthdr.ph_rtableid);
 
 	/*
@@ -631,15 +641,17 @@ ip6_input_if(struct mbuf **mp, int *offp
 	}
 #endif /* IPSEC */
 
-	ip6_forward(m, &ro, flags);
+	ip6_forward(m, ro, flags);
 	*mp = NULL;
-	rtfree(ro.ro_rt);
+	if (ro == &iproute)
+		rtfree(ro->ro_rt);
 	return IPPROTO_DONE;
  bad:
 	nxt = IPPROTO_DONE;
 	m_freemp(mp);
  out:
-	rtfree(ro.ro_rt);
+	if (ro == &iproute)
+		rtfree(ro->ro_rt);
 	return nxt;
 }