Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
Re: cache route at pf state
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Sun, 21 Jul 2024 09:48:15 +0200

Download raw body.

Thread
On Sat, Jul 20, 2024 at 07:20:32PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> When forwarding packets, rtable_match takes quite some time.
> 
> http://bluhm.genua.de/perform/results/2024-07-19T07:25:47Z/2024-07-19T00%3A00%3A00Z/btrace/ssh_perform%40lt13_iperf3_-6_-cfdd7%3Ae83e%3A66bc%3A0346%3A%3A36_-P10_-t10-btrace-kstack.0.svg?s=rtable_match
> 
> This can be avoided by caching the route at the pf state.
> 
> Comments, tests?

I think this is a bad idea.
Caching this into a pf state will have negative consequences in various
scenarios. This will cache route entries for possible a long time for
sessions that are idle. A remote actor may use this to attack the local
system.

In my opinion I would prefer if you looked into the reason why
rtable_match() takes so much time. Unless you run with large routing
tables the lookups should be cheap. Is there a lock or mutex that causes
this?
 
> bluhm
> 
> Index: net/if_bridge.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v
> diff -u -p -r1.370 if_bridge.c
> --- net/if_bridge.c	14 Apr 2024 20:46:27 -0000	1.370
> +++ net/if_bridge.c	20 Jul 2024 14:52:40 -0000
> @@ -1619,7 +1619,7 @@ bridge_ipsec(struct ifnet *ifp, struct e
>  #if NPF > 0
>  			if ((encif = enc_getif(tdb->tdb_rdomain,
>  			    tdb->tdb_tap)) == NULL ||
> -			    pf_test(af, dir, encif, &m) != PF_PASS) {
> +			    pf_test(af, dir, encif, &m, NULL) != PF_PASS) {
>  				m_freem(m);
>  				tdb_unref(tdb);
>  				return (1);
> @@ -1725,7 +1725,7 @@ bridge_ip(struct ifnet *brifp, int dir, 
>  #endif /* IPSEC */
>  #if NPF > 0
>  		/* Finally, we get to filter the packet! */
> -		if (pf_test(AF_INET, dir, ifp, &m) != PF_PASS)
> +		if (pf_test(AF_INET, dir, ifp, &m, NULL) != PF_PASS)
>  			goto dropit;
>  		if (m == NULL)
>  			goto dropit;
> @@ -1768,7 +1768,7 @@ bridge_ip(struct ifnet *brifp, int dir, 
>  #endif /* IPSEC */
>  
>  #if NPF > 0
> -		if (pf_test(AF_INET6, dir, ifp, &m) != PF_PASS)
> +		if (pf_test(AF_INET6, dir, ifp, &m, NULL) != PF_PASS)
>  			goto dropit;
>  		if (m == NULL)
>  			return (NULL);
> Index: net/if_tpmr.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_tpmr.c,v
> diff -u -p -r1.35 if_tpmr.c
> --- net/if_tpmr.c	23 Dec 2023 10:52:54 -0000	1.35
> +++ net/if_tpmr.c	20 Jul 2024 14:52:40 -0000
> @@ -295,7 +295,7 @@ tpmr_pf(struct ifnet *ifp0, int dir, str
>  			return (NULL);
>  	}
>  
> -	if (pf_test(fam->af, dir, ifp0, &m) != PF_PASS) {
> +	if (pf_test(fam->af, dir, ifp0, &m, NULL) != PF_PASS) {
>  		m_freem(m);
>  		return (NULL);
>  	}
> Index: net/if_veb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_veb.c,v
> diff -u -p -r1.35 if_veb.c
> --- net/if_veb.c	13 Feb 2024 12:22:09 -0000	1.35
> +++ net/if_veb.c	20 Jul 2024 14:52:40 -0000
> @@ -644,7 +644,7 @@ veb_pf(struct ifnet *ifp0, int dir, stru
>  	if (m == NULL)
>  		return (NULL);
>  
> -	if (pf_test(fam->af, dir, ifp0, &m) != PF_PASS) {
> +	if (pf_test(fam->af, dir, ifp0, &m, NULL) != PF_PASS) {
>  		m_freem(m);
>  		return (NULL);
>  	}
> @@ -839,7 +839,7 @@ veb_ipsec_proto_out(struct mbuf *m, sa_f
>  #if NPF > 0
>  	encifp = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap);
>  	if (encifp != NULL) {
> -		if (pf_test(af, PF_OUT, encifp, &m) != PF_PASS) {
> +		if (pf_test(af, PF_OUT, encifp, &m, NULL) != PF_PASS) {
>  			m_freem(m);
>  			return (NULL);
>  		}
> Index: net/pf.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
> diff -u -p -r1.1203 pf.c
> --- net/pf.c	14 Jul 2024 18:53:39 -0000	1.1203
> +++ net/pf.c	20 Jul 2024 15:13:53 -0000
> @@ -113,6 +113,7 @@ struct pf_queuehead	*pf_queues_inactive;
>  struct pf_status	 pf_status;
>  
>  struct mutex		 pf_inp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> +struct mutex		 pf_st_ro_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
>  
>  int			 pf_hdr_limit = 20;  /* arbitrary limit, tune in ddb */
>  
> @@ -6638,7 +6639,7 @@ pf_route(struct pf_pdesc *pd, struct pf_
>  		ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr;
>  
>  	if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
> -		if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)
> +		if (pf_test(AF_INET, PF_OUT, ifp, &m0, NULL) != PF_PASS)
>  			goto bad;
>  		else if (m0 == NULL)
>  			goto done;
> @@ -6758,7 +6759,7 @@ pf_route6(struct pf_pdesc *pd, struct pf
>  		ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
>  
>  	if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
> -		if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS)
> +		if (pf_test(AF_INET6, PF_OUT, ifp, &m0, NULL) != PF_PASS)
>  			goto bad;
>  		else if (m0 == NULL)
>  			goto done;
> @@ -7533,7 +7534,8 @@ pf_counters_inc(int action, struct pf_pd
>  }
>  
>  int
> -pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
> +pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0,
> +    struct route *ro)
>  {
>  #if NCARP > 0
>  	struct ifnet		*ifp0;
> @@ -7761,7 +7763,7 @@ pf_test(sa_family_t af, int fwdir, struc
>  		    pf_syncookie_validate(&pd)) {
>  			struct mbuf	*msyn = pf_syncookie_recreate_syn(&pd);
>  			if (msyn) {
> -				action = pf_test(af, fwdir, ifp, &msyn);
> +				action = pf_test(af, fwdir, ifp, &msyn, NULL);
>  				m_freem(msyn);
>  				if (action == PF_PASS || action == PF_AFRT) {
>  					PF_STATE_ENTER_READ();
> @@ -8039,12 +8041,47 @@ done:
>  			action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL);
>  	}
>  #endif	/* INET6 */
> +	/* update interface index for pflow(4) */
>  	if (st && action != PF_DROP) {
>  		if (!st->if_index_in && dir == PF_IN)
>  			st->if_index_in = ifp->if_index;
>  		else if (!st->if_index_out && dir == PF_OUT)
>  			st->if_index_out = ifp->if_index;
>  	}
> +	/* the pf state keeps cache of route */
> +	if (ro && st && pd.m && action == PF_PASS && pf_ouraddr(pd.m) != 1) {
> +		/* check cache without lock, worst case is additional lookup */
> +		if (memcmp(ro, &st->route, sizeof(*ro)) != 0) {
> +			mtx_enter(&pf_st_ro_mtx);
> +			if (st->route.ro_rt != NULL) {
> +				rtfree(ro->ro_rt);
> +				*ro = st->route;
> +				rtref(ro->ro_rt);
> +			}
> +			mtx_leave(&pf_st_ro_mtx);
> +		}
> +		switch (pd.naf) {
> +		case AF_INET:
> +			route_mpath(ro, &pd.ndaddr.v4, &pd.nsaddr.v4,
> +			    pd.m->m_pkthdr.ph_rtableid);
> +			break;
> +#ifdef INET6
> +		case AF_INET6:
> +			route6_mpath(ro, &pd.ndaddr.v6, &pd.nsaddr.v6,
> +			    pd.m->m_pkthdr.ph_rtableid);
> +			break;
> +#endif /* INET6 */
> +		}
> +		if (ro->ro_rt != NULL) {
> +			if (memcmp(&st->route, ro, sizeof(*ro)) != 0) {
> +				mtx_enter(&pf_st_ro_mtx);
> +				rtfree(st->route.ro_rt);
> +				st->route = *ro;
> +				rtref(st->route.ro_rt);
> +				mtx_leave(&pf_st_ro_mtx);
> +			}
> +		}
> +	}
>  
>   out:
>  	*m0 = pd.m;
> @@ -8327,6 +8364,7 @@ pf_state_unref(struct pf_state *st)
>  
>  		pf_state_key_unref(st->key[PF_SK_WIRE]);
>  		pf_state_key_unref(st->key[PF_SK_STACK]);
> +		rtfree(st->route.ro_rt);
>  
>  		pool_put(&pf_state_pl, st);
>  	}
> Index: net/pfvar.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar.h,v
> diff -u -p -r1.538 pfvar.h
> --- net/pfvar.h	13 May 2024 01:15:53 -0000	1.538
> +++ net/pfvar.h	20 Jul 2024 14:52:40 -0000
> @@ -1640,7 +1640,8 @@ int				 pf_setup_pdesc(struct pf_pdesc *
>  				    int, struct pfi_kif *, struct mbuf *,
>  				    u_short *);
>  
> -int	pf_test(sa_family_t, int, struct ifnet *, struct mbuf **);
> +int	pf_test(sa_family_t, int, struct ifnet *, struct mbuf **,
> +	    struct route *);
>  
>  void	pf_poolmask(struct pf_addr *, struct pf_addr*,
>  	    struct pf_addr *, struct pf_addr *, sa_family_t);
> Index: net/pfvar_priv.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar_priv.h,v
> diff -u -p -r1.37 pfvar_priv.h
> --- net/pfvar_priv.h	21 Jun 2024 12:51:29 -0000	1.37
> +++ net/pfvar_priv.h	20 Jul 2024 14:52:40 -0000
> @@ -97,6 +97,7 @@ RBT_PROTOTYPE(pf_state_tree, pf_state_ke
>   *	S	pfsync
>   *	L	pf_state_list
>   *	g	pf_purge gc
> + *	R	pf_st_ro_mtx
>   */
>  
>  struct pf_state {
> @@ -120,6 +121,7 @@ struct pf_state {
>  	struct pf_sn_head	 src_nodes;	/* [I] */
>  	struct pf_state_key	*key[2];	/* [I] stack and wire */
>  	struct pfi_kif		*kif;		/* [I] */
> +	struct route		 route;		/* [R] */
>  	struct mutex		 mtx;
>  	pf_refcnt_t		 refcnt;
>  	u_int64_t		 packets[2];
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> diff -u -p -r1.400 ip_input.c
> --- netinet/ip_input.c	19 Jul 2024 16:58:31 -0000	1.400
> +++ netinet/ip_input.c	20 Jul 2024 14:52:40 -0000
> @@ -464,7 +464,7 @@ ip_input_if(struct mbuf **mp, int *offp,
>  	 * Packet filter
>  	 */
>  	odst = ip->ip_dst;
> -	if (pf_test(AF_INET, PF_IN, ifp, mp) != PF_PASS)
> +	if (pf_test(AF_INET, PF_IN, ifp, mp, &ro) != PF_PASS)
>  		goto bad;
>  	m = *mp;
>  	if (m == NULL)
> Index: netinet/ip_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
> diff -u -p -r1.401 ip_output.c
> --- netinet/ip_output.c	2 Jul 2024 18:33:47 -0000	1.401
> +++ netinet/ip_output.c	20 Jul 2024 14:52:40 -0000
> @@ -403,7 +403,7 @@ sendit:
>  	 */
>  #if NPF > 0
>  	if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT,
> -	    ifp, &m) != PF_PASS) {
> +	    ifp, &m, NULL) != PF_PASS) {
>  		error = EACCES;
>  		goto bad;
>  	}
> @@ -584,7 +584,8 @@ ip_output_ipsec_send(struct tdb *tdb, st
>  	 * Packet filter
>  	 */
>  	if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL ||
> -	    pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) {
> +	    pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m, NULL)
> +	    != PF_PASS) {
>  		m_freem(m);
>  		return EACCES;
>  	}
> Index: netinet/ipsec_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_input.c,v
> diff -u -p -r1.206 ipsec_input.c
> --- netinet/ipsec_input.c	16 Sep 2023 09:33:27 -0000	1.206
> +++ netinet/ipsec_input.c	20 Jul 2024 14:52:40 -0000
> @@ -580,7 +580,7 @@ ipsec_common_input_cb(struct mbuf **mp, 
>  		if ((ifp = if_get(m->m_pkthdr.ph_ifidx)) == NULL) {
>  			goto baddone;
>  		}
> -		if (pf_test(af, PF_IN, ifp, mp) != PF_PASS) {
> +		if (pf_test(af, PF_IN, ifp, mp, NULL) != PF_PASS) {
>  			if_put(ifp);
>  			goto baddone;
>  		}
> Index: netinet6/ip6_forward.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
> diff -u -p -r1.124 ip6_forward.c
> --- netinet6/ip6_forward.c	19 Jul 2024 16:58:32 -0000	1.124
> +++ netinet6/ip6_forward.c	20 Jul 2024 14:52:40 -0000
> @@ -322,7 +322,7 @@ reroute:
>  		ip6->ip6_dst.s6_addr16[1] = 0;
>  
>  #if NPF > 0
> -	if (pf_test(AF_INET6, PF_FWD, ifp, &m) != PF_PASS) {
> +	if (pf_test(AF_INET6, PF_FWD, ifp, &m, NULL) != PF_PASS) {
>  		m_freem(m);
>  		goto senderr;
>  	}
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
> diff -u -p -r1.266 ip6_input.c
> --- netinet6/ip6_input.c	19 Jul 2024 16:58:32 -0000	1.266
> +++ netinet6/ip6_input.c	20 Jul 2024 14:52:40 -0000
> @@ -405,7 +405,7 @@ ip6_input_if(struct mbuf **mp, int *offp
>  	 * Packet filter
>  	 */
>  	odst = ip6->ip6_dst;
> -	if (pf_test(AF_INET6, PF_IN, ifp, mp) != PF_PASS)
> +	if (pf_test(AF_INET6, PF_IN, ifp, mp, &ro) != PF_PASS)
>  		goto bad;
>  	m = *mp;
>  	if (m == NULL)
> Index: netinet6/ip6_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
> diff -u -p -r1.292 ip6_output.c
> --- netinet6/ip6_output.c	4 Jul 2024 12:50:08 -0000	1.292
> +++ netinet6/ip6_output.c	20 Jul 2024 14:52:40 -0000
> @@ -619,7 +619,7 @@ reroute:
>  	}
>  
>  #if NPF > 0
> -	if (pf_test(AF_INET6, PF_OUT, ifp, &m) != PF_PASS) {
> +	if (pf_test(AF_INET6, PF_OUT, ifp, &m, NULL) != PF_PASS) {
>  		error = EACCES;
>  		m_freem(m);
>  		goto done;
> @@ -2840,7 +2840,8 @@ ip6_output_ipsec_send(struct tdb *tdb, s
>  	 * Packet filter
>  	 */
>  	if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL ||
> -	    pf_test(AF_INET6, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) {
> +	    pf_test(AF_INET6, fwd ? PF_FWD : PF_OUT, encif, &m, NULL)
> +	    != PF_PASS) {
>  		m_freem(m);
>  		return EACCES;
>  	}
> 

-- 
:wq Claudio