Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
cache route at pf state
To:
tech@openbsd.org
Date:
Sat, 20 Jul 2024 19:20:32 +0200

Download raw body.

Thread
Hi,

When forwarding packets, rtable_match takes quite some time.

http://bluhm.genua.de/perform/results/2024-07-19T07:25:47Z/2024-07-19T00%3A00%3A00Z/btrace/ssh_perform%40lt13_iperf3_-6_-cfdd7%3Ae83e%3A66bc%3A0346%3A%3A36_-P10_-t10-btrace-kstack.0.svg?s=rtable_match

This can be avoided by caching the route at the pf state.

Comments, tests?

bluhm

Index: net/if_bridge.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v
diff -u -p -r1.370 if_bridge.c
--- net/if_bridge.c	14 Apr 2024 20:46:27 -0000	1.370
+++ net/if_bridge.c	20 Jul 2024 14:52:40 -0000
@@ -1619,7 +1619,7 @@ bridge_ipsec(struct ifnet *ifp, struct e
 #if NPF > 0
 			if ((encif = enc_getif(tdb->tdb_rdomain,
 			    tdb->tdb_tap)) == NULL ||
-			    pf_test(af, dir, encif, &m) != PF_PASS) {
+			    pf_test(af, dir, encif, &m, NULL) != PF_PASS) {
 				m_freem(m);
 				tdb_unref(tdb);
 				return (1);
@@ -1725,7 +1725,7 @@ bridge_ip(struct ifnet *brifp, int dir, 
 #endif /* IPSEC */
 #if NPF > 0
 		/* Finally, we get to filter the packet! */
-		if (pf_test(AF_INET, dir, ifp, &m) != PF_PASS)
+		if (pf_test(AF_INET, dir, ifp, &m, NULL) != PF_PASS)
 			goto dropit;
 		if (m == NULL)
 			goto dropit;
@@ -1768,7 +1768,7 @@ bridge_ip(struct ifnet *brifp, int dir, 
 #endif /* IPSEC */
 
 #if NPF > 0
-		if (pf_test(AF_INET6, dir, ifp, &m) != PF_PASS)
+		if (pf_test(AF_INET6, dir, ifp, &m, NULL) != PF_PASS)
 			goto dropit;
 		if (m == NULL)
 			return (NULL);
Index: net/if_tpmr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_tpmr.c,v
diff -u -p -r1.35 if_tpmr.c
--- net/if_tpmr.c	23 Dec 2023 10:52:54 -0000	1.35
+++ net/if_tpmr.c	20 Jul 2024 14:52:40 -0000
@@ -295,7 +295,7 @@ tpmr_pf(struct ifnet *ifp0, int dir, str
 			return (NULL);
 	}
 
-	if (pf_test(fam->af, dir, ifp0, &m) != PF_PASS) {
+	if (pf_test(fam->af, dir, ifp0, &m, NULL) != PF_PASS) {
 		m_freem(m);
 		return (NULL);
 	}
Index: net/if_veb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_veb.c,v
diff -u -p -r1.35 if_veb.c
--- net/if_veb.c	13 Feb 2024 12:22:09 -0000	1.35
+++ net/if_veb.c	20 Jul 2024 14:52:40 -0000
@@ -644,7 +644,7 @@ veb_pf(struct ifnet *ifp0, int dir, stru
 	if (m == NULL)
 		return (NULL);
 
-	if (pf_test(fam->af, dir, ifp0, &m) != PF_PASS) {
+	if (pf_test(fam->af, dir, ifp0, &m, NULL) != PF_PASS) {
 		m_freem(m);
 		return (NULL);
 	}
@@ -839,7 +839,7 @@ veb_ipsec_proto_out(struct mbuf *m, sa_f
 #if NPF > 0
 	encifp = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap);
 	if (encifp != NULL) {
-		if (pf_test(af, PF_OUT, encifp, &m) != PF_PASS) {
+		if (pf_test(af, PF_OUT, encifp, &m, NULL) != PF_PASS) {
 			m_freem(m);
 			return (NULL);
 		}
Index: net/pf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
diff -u -p -r1.1203 pf.c
--- net/pf.c	14 Jul 2024 18:53:39 -0000	1.1203
+++ net/pf.c	20 Jul 2024 15:13:53 -0000
@@ -113,6 +113,7 @@ struct pf_queuehead	*pf_queues_inactive;
 struct pf_status	 pf_status;
 
 struct mutex		 pf_inp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
+struct mutex		 pf_st_ro_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
 
 int			 pf_hdr_limit = 20;  /* arbitrary limit, tune in ddb */
 
@@ -6638,7 +6639,7 @@ pf_route(struct pf_pdesc *pd, struct pf_
 		ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr;
 
 	if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
-		if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)
+		if (pf_test(AF_INET, PF_OUT, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
 		else if (m0 == NULL)
 			goto done;
@@ -6758,7 +6759,7 @@ pf_route6(struct pf_pdesc *pd, struct pf
 		ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
 
 	if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
-		if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS)
+		if (pf_test(AF_INET6, PF_OUT, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
 		else if (m0 == NULL)
 			goto done;
@@ -7533,7 +7534,8 @@ pf_counters_inc(int action, struct pf_pd
 }
 
 int
-pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
+pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0,
+    struct route *ro)
 {
 #if NCARP > 0
 	struct ifnet		*ifp0;
@@ -7761,7 +7763,7 @@ pf_test(sa_family_t af, int fwdir, struc
 		    pf_syncookie_validate(&pd)) {
 			struct mbuf	*msyn = pf_syncookie_recreate_syn(&pd);
 			if (msyn) {
-				action = pf_test(af, fwdir, ifp, &msyn);
+				action = pf_test(af, fwdir, ifp, &msyn, NULL);
 				m_freem(msyn);
 				if (action == PF_PASS || action == PF_AFRT) {
 					PF_STATE_ENTER_READ();
@@ -8039,12 +8041,47 @@ done:
 			action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL);
 	}
 #endif	/* INET6 */
+	/* update interface index for pflow(4) */
 	if (st && action != PF_DROP) {
 		if (!st->if_index_in && dir == PF_IN)
 			st->if_index_in = ifp->if_index;
 		else if (!st->if_index_out && dir == PF_OUT)
 			st->if_index_out = ifp->if_index;
 	}
+	/* the pf state keeps cache of route */
+	if (ro && st && pd.m && action == PF_PASS && pf_ouraddr(pd.m) != 1) {
+		/* check cache without lock, worst case is additional lookup */
+		if (memcmp(ro, &st->route, sizeof(*ro)) != 0) {
+			mtx_enter(&pf_st_ro_mtx);
+			if (st->route.ro_rt != NULL) {
+				rtfree(ro->ro_rt);
+				*ro = st->route;
+				rtref(ro->ro_rt);
+			}
+			mtx_leave(&pf_st_ro_mtx);
+		}
+		switch (pd.naf) {
+		case AF_INET:
+			route_mpath(ro, &pd.ndaddr.v4, &pd.nsaddr.v4,
+			    pd.m->m_pkthdr.ph_rtableid);
+			break;
+#ifdef INET6
+		case AF_INET6:
+			route6_mpath(ro, &pd.ndaddr.v6, &pd.nsaddr.v6,
+			    pd.m->m_pkthdr.ph_rtableid);
+			break;
+#endif /* INET6 */
+		}
+		if (ro->ro_rt != NULL) {
+			if (memcmp(&st->route, ro, sizeof(*ro)) != 0) {
+				mtx_enter(&pf_st_ro_mtx);
+				rtfree(st->route.ro_rt);
+				st->route = *ro;
+				rtref(st->route.ro_rt);
+				mtx_leave(&pf_st_ro_mtx);
+			}
+		}
+	}
 
  out:
 	*m0 = pd.m;
@@ -8327,6 +8364,7 @@ pf_state_unref(struct pf_state *st)
 
 		pf_state_key_unref(st->key[PF_SK_WIRE]);
 		pf_state_key_unref(st->key[PF_SK_STACK]);
+		rtfree(st->route.ro_rt);
 
 		pool_put(&pf_state_pl, st);
 	}
Index: net/pfvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar.h,v
diff -u -p -r1.538 pfvar.h
--- net/pfvar.h	13 May 2024 01:15:53 -0000	1.538
+++ net/pfvar.h	20 Jul 2024 14:52:40 -0000
@@ -1640,7 +1640,8 @@ int				 pf_setup_pdesc(struct pf_pdesc *
 				    int, struct pfi_kif *, struct mbuf *,
 				    u_short *);
 
-int	pf_test(sa_family_t, int, struct ifnet *, struct mbuf **);
+int	pf_test(sa_family_t, int, struct ifnet *, struct mbuf **,
+	    struct route *);
 
 void	pf_poolmask(struct pf_addr *, struct pf_addr*,
 	    struct pf_addr *, struct pf_addr *, sa_family_t);
Index: net/pfvar_priv.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar_priv.h,v
diff -u -p -r1.37 pfvar_priv.h
--- net/pfvar_priv.h	21 Jun 2024 12:51:29 -0000	1.37
+++ net/pfvar_priv.h	20 Jul 2024 14:52:40 -0000
@@ -97,6 +97,7 @@ RBT_PROTOTYPE(pf_state_tree, pf_state_ke
  *	S	pfsync
  *	L	pf_state_list
  *	g	pf_purge gc
+ *	R	pf_st_ro_mtx
  */
 
 struct pf_state {
@@ -120,6 +121,7 @@ struct pf_state {
 	struct pf_sn_head	 src_nodes;	/* [I] */
 	struct pf_state_key	*key[2];	/* [I] stack and wire */
 	struct pfi_kif		*kif;		/* [I] */
+	struct route		 route;		/* [R] */
 	struct mutex		 mtx;
 	pf_refcnt_t		 refcnt;
 	u_int64_t		 packets[2];
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.400 ip_input.c
--- netinet/ip_input.c	19 Jul 2024 16:58:31 -0000	1.400
+++ netinet/ip_input.c	20 Jul 2024 14:52:40 -0000
@@ -464,7 +464,7 @@ ip_input_if(struct mbuf **mp, int *offp,
 	 * Packet filter
 	 */
 	odst = ip->ip_dst;
-	if (pf_test(AF_INET, PF_IN, ifp, mp) != PF_PASS)
+	if (pf_test(AF_INET, PF_IN, ifp, mp, &ro) != PF_PASS)
 		goto bad;
 	m = *mp;
 	if (m == NULL)
Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
diff -u -p -r1.401 ip_output.c
--- netinet/ip_output.c	2 Jul 2024 18:33:47 -0000	1.401
+++ netinet/ip_output.c	20 Jul 2024 14:52:40 -0000
@@ -403,7 +403,7 @@ sendit:
 	 */
 #if NPF > 0
 	if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT,
-	    ifp, &m) != PF_PASS) {
+	    ifp, &m, NULL) != PF_PASS) {
 		error = EACCES;
 		goto bad;
 	}
@@ -584,7 +584,8 @@ ip_output_ipsec_send(struct tdb *tdb, st
 	 * Packet filter
 	 */
 	if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL ||
-	    pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) {
+	    pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m, NULL)
+	    != PF_PASS) {
 		m_freem(m);
 		return EACCES;
 	}
Index: netinet/ipsec_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_input.c,v
diff -u -p -r1.206 ipsec_input.c
--- netinet/ipsec_input.c	16 Sep 2023 09:33:27 -0000	1.206
+++ netinet/ipsec_input.c	20 Jul 2024 14:52:40 -0000
@@ -580,7 +580,7 @@ ipsec_common_input_cb(struct mbuf **mp, 
 		if ((ifp = if_get(m->m_pkthdr.ph_ifidx)) == NULL) {
 			goto baddone;
 		}
-		if (pf_test(af, PF_IN, ifp, mp) != PF_PASS) {
+		if (pf_test(af, PF_IN, ifp, mp, NULL) != PF_PASS) {
 			if_put(ifp);
 			goto baddone;
 		}
Index: netinet6/ip6_forward.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
diff -u -p -r1.124 ip6_forward.c
--- netinet6/ip6_forward.c	19 Jul 2024 16:58:32 -0000	1.124
+++ netinet6/ip6_forward.c	20 Jul 2024 14:52:40 -0000
@@ -322,7 +322,7 @@ reroute:
 		ip6->ip6_dst.s6_addr16[1] = 0;
 
 #if NPF > 0
-	if (pf_test(AF_INET6, PF_FWD, ifp, &m) != PF_PASS) {
+	if (pf_test(AF_INET6, PF_FWD, ifp, &m, NULL) != PF_PASS) {
 		m_freem(m);
 		goto senderr;
 	}
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
diff -u -p -r1.266 ip6_input.c
--- netinet6/ip6_input.c	19 Jul 2024 16:58:32 -0000	1.266
+++ netinet6/ip6_input.c	20 Jul 2024 14:52:40 -0000
@@ -405,7 +405,7 @@ ip6_input_if(struct mbuf **mp, int *offp
 	 * Packet filter
 	 */
 	odst = ip6->ip6_dst;
-	if (pf_test(AF_INET6, PF_IN, ifp, mp) != PF_PASS)
+	if (pf_test(AF_INET6, PF_IN, ifp, mp, &ro) != PF_PASS)
 		goto bad;
 	m = *mp;
 	if (m == NULL)
Index: netinet6/ip6_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
diff -u -p -r1.292 ip6_output.c
--- netinet6/ip6_output.c	4 Jul 2024 12:50:08 -0000	1.292
+++ netinet6/ip6_output.c	20 Jul 2024 14:52:40 -0000
@@ -619,7 +619,7 @@ reroute:
 	}
 
 #if NPF > 0
-	if (pf_test(AF_INET6, PF_OUT, ifp, &m) != PF_PASS) {
+	if (pf_test(AF_INET6, PF_OUT, ifp, &m, NULL) != PF_PASS) {
 		error = EACCES;
 		m_freem(m);
 		goto done;
@@ -2840,7 +2840,8 @@ ip6_output_ipsec_send(struct tdb *tdb, s
 	 * Packet filter
 	 */
 	if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL ||
-	    pf_test(AF_INET6, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) {
+	    pf_test(AF_INET6, fwd ? PF_FWD : PF_OUT, encif, &m, NULL)
+	    != PF_PASS) {
 		m_freem(m);
 		return EACCES;
 	}