Download raw body.
cache route at pf state
On Sat, Jul 20, 2024 at 07:20:32PM +0200, Alexander Bluhm wrote:
> Hi,
>
> When forwarding packets, rtable_match takes quite some time.
>
> http://bluhm.genua.de/perform/results/2024-07-19T07:25:47Z/2024-07-19T00%3A00%3A00Z/btrace/ssh_perform%40lt13_iperf3_-6_-cfdd7%3Ae83e%3A66bc%3A0346%3A%3A36_-P10_-t10-btrace-kstack.0.svg?s=rtable_match
>
> This can be avoided by caching the route at the pf state.
>
> Comments, tests?
I think this is a bad idea.
Caching this into a pf state will have negative consequences in various
scenarios. This will cache route entries for possible a long time for
sessions that are idle. A remote actor may use this to attack the local
system.
In my opinion I would prefer if you looked into the reason why
rtable_match() takes so much time. Unless you run with large routing
tables the lookups should be cheap. Is there a lock or mutex that causes
this?
> bluhm
>
> Index: net/if_bridge.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v
> diff -u -p -r1.370 if_bridge.c
> --- net/if_bridge.c 14 Apr 2024 20:46:27 -0000 1.370
> +++ net/if_bridge.c 20 Jul 2024 14:52:40 -0000
> @@ -1619,7 +1619,7 @@ bridge_ipsec(struct ifnet *ifp, struct e
> #if NPF > 0
> if ((encif = enc_getif(tdb->tdb_rdomain,
> tdb->tdb_tap)) == NULL ||
> - pf_test(af, dir, encif, &m) != PF_PASS) {
> + pf_test(af, dir, encif, &m, NULL) != PF_PASS) {
> m_freem(m);
> tdb_unref(tdb);
> return (1);
> @@ -1725,7 +1725,7 @@ bridge_ip(struct ifnet *brifp, int dir,
> #endif /* IPSEC */
> #if NPF > 0
> /* Finally, we get to filter the packet! */
> - if (pf_test(AF_INET, dir, ifp, &m) != PF_PASS)
> + if (pf_test(AF_INET, dir, ifp, &m, NULL) != PF_PASS)
> goto dropit;
> if (m == NULL)
> goto dropit;
> @@ -1768,7 +1768,7 @@ bridge_ip(struct ifnet *brifp, int dir,
> #endif /* IPSEC */
>
> #if NPF > 0
> - if (pf_test(AF_INET6, dir, ifp, &m) != PF_PASS)
> + if (pf_test(AF_INET6, dir, ifp, &m, NULL) != PF_PASS)
> goto dropit;
> if (m == NULL)
> return (NULL);
> Index: net/if_tpmr.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_tpmr.c,v
> diff -u -p -r1.35 if_tpmr.c
> --- net/if_tpmr.c 23 Dec 2023 10:52:54 -0000 1.35
> +++ net/if_tpmr.c 20 Jul 2024 14:52:40 -0000
> @@ -295,7 +295,7 @@ tpmr_pf(struct ifnet *ifp0, int dir, str
> return (NULL);
> }
>
> - if (pf_test(fam->af, dir, ifp0, &m) != PF_PASS) {
> + if (pf_test(fam->af, dir, ifp0, &m, NULL) != PF_PASS) {
> m_freem(m);
> return (NULL);
> }
> Index: net/if_veb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_veb.c,v
> diff -u -p -r1.35 if_veb.c
> --- net/if_veb.c 13 Feb 2024 12:22:09 -0000 1.35
> +++ net/if_veb.c 20 Jul 2024 14:52:40 -0000
> @@ -644,7 +644,7 @@ veb_pf(struct ifnet *ifp0, int dir, stru
> if (m == NULL)
> return (NULL);
>
> - if (pf_test(fam->af, dir, ifp0, &m) != PF_PASS) {
> + if (pf_test(fam->af, dir, ifp0, &m, NULL) != PF_PASS) {
> m_freem(m);
> return (NULL);
> }
> @@ -839,7 +839,7 @@ veb_ipsec_proto_out(struct mbuf *m, sa_f
> #if NPF > 0
> encifp = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap);
> if (encifp != NULL) {
> - if (pf_test(af, PF_OUT, encifp, &m) != PF_PASS) {
> + if (pf_test(af, PF_OUT, encifp, &m, NULL) != PF_PASS) {
> m_freem(m);
> return (NULL);
> }
> Index: net/pf.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
> diff -u -p -r1.1203 pf.c
> --- net/pf.c 14 Jul 2024 18:53:39 -0000 1.1203
> +++ net/pf.c 20 Jul 2024 15:13:53 -0000
> @@ -113,6 +113,7 @@ struct pf_queuehead *pf_queues_inactive;
> struct pf_status pf_status;
>
> struct mutex pf_inp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> +struct mutex pf_st_ro_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
>
> int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */
>
> @@ -6638,7 +6639,7 @@ pf_route(struct pf_pdesc *pd, struct pf_
> ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr;
>
> if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
> - if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)
> + if (pf_test(AF_INET, PF_OUT, ifp, &m0, NULL) != PF_PASS)
> goto bad;
> else if (m0 == NULL)
> goto done;
> @@ -6758,7 +6759,7 @@ pf_route6(struct pf_pdesc *pd, struct pf
> ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
>
> if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
> - if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS)
> + if (pf_test(AF_INET6, PF_OUT, ifp, &m0, NULL) != PF_PASS)
> goto bad;
> else if (m0 == NULL)
> goto done;
> @@ -7533,7 +7534,8 @@ pf_counters_inc(int action, struct pf_pd
> }
>
> int
> -pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
> +pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0,
> + struct route *ro)
> {
> #if NCARP > 0
> struct ifnet *ifp0;
> @@ -7761,7 +7763,7 @@ pf_test(sa_family_t af, int fwdir, struc
> pf_syncookie_validate(&pd)) {
> struct mbuf *msyn = pf_syncookie_recreate_syn(&pd);
> if (msyn) {
> - action = pf_test(af, fwdir, ifp, &msyn);
> + action = pf_test(af, fwdir, ifp, &msyn, NULL);
> m_freem(msyn);
> if (action == PF_PASS || action == PF_AFRT) {
> PF_STATE_ENTER_READ();
> @@ -8039,12 +8041,47 @@ done:
> action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL);
> }
> #endif /* INET6 */
> + /* update interface index for pflow(4) */
> if (st && action != PF_DROP) {
> if (!st->if_index_in && dir == PF_IN)
> st->if_index_in = ifp->if_index;
> else if (!st->if_index_out && dir == PF_OUT)
> st->if_index_out = ifp->if_index;
> }
> + /* the pf state keeps cache of route */
> + if (ro && st && pd.m && action == PF_PASS && pf_ouraddr(pd.m) != 1) {
> + /* check cache without lock, worst case is additional lookup */
> + if (memcmp(ro, &st->route, sizeof(*ro)) != 0) {
> + mtx_enter(&pf_st_ro_mtx);
> + if (st->route.ro_rt != NULL) {
> + rtfree(ro->ro_rt);
> + *ro = st->route;
> + rtref(ro->ro_rt);
> + }
> + mtx_leave(&pf_st_ro_mtx);
> + }
> + switch (pd.naf) {
> + case AF_INET:
> + route_mpath(ro, &pd.ndaddr.v4, &pd.nsaddr.v4,
> + pd.m->m_pkthdr.ph_rtableid);
> + break;
> +#ifdef INET6
> + case AF_INET6:
> + route6_mpath(ro, &pd.ndaddr.v6, &pd.nsaddr.v6,
> + pd.m->m_pkthdr.ph_rtableid);
> + break;
> +#endif /* INET6 */
> + }
> + if (ro->ro_rt != NULL) {
> + if (memcmp(&st->route, ro, sizeof(*ro)) != 0) {
> + mtx_enter(&pf_st_ro_mtx);
> + rtfree(st->route.ro_rt);
> + st->route = *ro;
> + rtref(st->route.ro_rt);
> + mtx_leave(&pf_st_ro_mtx);
> + }
> + }
> + }
>
> out:
> *m0 = pd.m;
> @@ -8327,6 +8364,7 @@ pf_state_unref(struct pf_state *st)
>
> pf_state_key_unref(st->key[PF_SK_WIRE]);
> pf_state_key_unref(st->key[PF_SK_STACK]);
> + rtfree(st->route.ro_rt);
>
> pool_put(&pf_state_pl, st);
> }
> Index: net/pfvar.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar.h,v
> diff -u -p -r1.538 pfvar.h
> --- net/pfvar.h 13 May 2024 01:15:53 -0000 1.538
> +++ net/pfvar.h 20 Jul 2024 14:52:40 -0000
> @@ -1640,7 +1640,8 @@ int pf_setup_pdesc(struct pf_pdesc *
> int, struct pfi_kif *, struct mbuf *,
> u_short *);
>
> -int pf_test(sa_family_t, int, struct ifnet *, struct mbuf **);
> +int pf_test(sa_family_t, int, struct ifnet *, struct mbuf **,
> + struct route *);
>
> void pf_poolmask(struct pf_addr *, struct pf_addr*,
> struct pf_addr *, struct pf_addr *, sa_family_t);
> Index: net/pfvar_priv.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar_priv.h,v
> diff -u -p -r1.37 pfvar_priv.h
> --- net/pfvar_priv.h 21 Jun 2024 12:51:29 -0000 1.37
> +++ net/pfvar_priv.h 20 Jul 2024 14:52:40 -0000
> @@ -97,6 +97,7 @@ RBT_PROTOTYPE(pf_state_tree, pf_state_ke
> * S pfsync
> * L pf_state_list
> * g pf_purge gc
> + * R pf_st_ro_mtx
> */
>
> struct pf_state {
> @@ -120,6 +121,7 @@ struct pf_state {
> struct pf_sn_head src_nodes; /* [I] */
> struct pf_state_key *key[2]; /* [I] stack and wire */
> struct pfi_kif *kif; /* [I] */
> + struct route route; /* [R] */
> struct mutex mtx;
> pf_refcnt_t refcnt;
> u_int64_t packets[2];
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> diff -u -p -r1.400 ip_input.c
> --- netinet/ip_input.c 19 Jul 2024 16:58:31 -0000 1.400
> +++ netinet/ip_input.c 20 Jul 2024 14:52:40 -0000
> @@ -464,7 +464,7 @@ ip_input_if(struct mbuf **mp, int *offp,
> * Packet filter
> */
> odst = ip->ip_dst;
> - if (pf_test(AF_INET, PF_IN, ifp, mp) != PF_PASS)
> + if (pf_test(AF_INET, PF_IN, ifp, mp, &ro) != PF_PASS)
> goto bad;
> m = *mp;
> if (m == NULL)
> Index: netinet/ip_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
> diff -u -p -r1.401 ip_output.c
> --- netinet/ip_output.c 2 Jul 2024 18:33:47 -0000 1.401
> +++ netinet/ip_output.c 20 Jul 2024 14:52:40 -0000
> @@ -403,7 +403,7 @@ sendit:
> */
> #if NPF > 0
> if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT,
> - ifp, &m) != PF_PASS) {
> + ifp, &m, NULL) != PF_PASS) {
> error = EACCES;
> goto bad;
> }
> @@ -584,7 +584,8 @@ ip_output_ipsec_send(struct tdb *tdb, st
> * Packet filter
> */
> if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL ||
> - pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) {
> + pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m, NULL)
> + != PF_PASS) {
> m_freem(m);
> return EACCES;
> }
> Index: netinet/ipsec_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_input.c,v
> diff -u -p -r1.206 ipsec_input.c
> --- netinet/ipsec_input.c 16 Sep 2023 09:33:27 -0000 1.206
> +++ netinet/ipsec_input.c 20 Jul 2024 14:52:40 -0000
> @@ -580,7 +580,7 @@ ipsec_common_input_cb(struct mbuf **mp,
> if ((ifp = if_get(m->m_pkthdr.ph_ifidx)) == NULL) {
> goto baddone;
> }
> - if (pf_test(af, PF_IN, ifp, mp) != PF_PASS) {
> + if (pf_test(af, PF_IN, ifp, mp, NULL) != PF_PASS) {
> if_put(ifp);
> goto baddone;
> }
> Index: netinet6/ip6_forward.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
> diff -u -p -r1.124 ip6_forward.c
> --- netinet6/ip6_forward.c 19 Jul 2024 16:58:32 -0000 1.124
> +++ netinet6/ip6_forward.c 20 Jul 2024 14:52:40 -0000
> @@ -322,7 +322,7 @@ reroute:
> ip6->ip6_dst.s6_addr16[1] = 0;
>
> #if NPF > 0
> - if (pf_test(AF_INET6, PF_FWD, ifp, &m) != PF_PASS) {
> + if (pf_test(AF_INET6, PF_FWD, ifp, &m, NULL) != PF_PASS) {
> m_freem(m);
> goto senderr;
> }
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
> diff -u -p -r1.266 ip6_input.c
> --- netinet6/ip6_input.c 19 Jul 2024 16:58:32 -0000 1.266
> +++ netinet6/ip6_input.c 20 Jul 2024 14:52:40 -0000
> @@ -405,7 +405,7 @@ ip6_input_if(struct mbuf **mp, int *offp
> * Packet filter
> */
> odst = ip6->ip6_dst;
> - if (pf_test(AF_INET6, PF_IN, ifp, mp) != PF_PASS)
> + if (pf_test(AF_INET6, PF_IN, ifp, mp, &ro) != PF_PASS)
> goto bad;
> m = *mp;
> if (m == NULL)
> Index: netinet6/ip6_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
> diff -u -p -r1.292 ip6_output.c
> --- netinet6/ip6_output.c 4 Jul 2024 12:50:08 -0000 1.292
> +++ netinet6/ip6_output.c 20 Jul 2024 14:52:40 -0000
> @@ -619,7 +619,7 @@ reroute:
> }
>
> #if NPF > 0
> - if (pf_test(AF_INET6, PF_OUT, ifp, &m) != PF_PASS) {
> + if (pf_test(AF_INET6, PF_OUT, ifp, &m, NULL) != PF_PASS) {
> error = EACCES;
> m_freem(m);
> goto done;
> @@ -2840,7 +2840,8 @@ ip6_output_ipsec_send(struct tdb *tdb, s
> * Packet filter
> */
> if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL ||
> - pf_test(AF_INET6, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) {
> + pf_test(AF_INET6, fwd ? PF_FWD : PF_OUT, encif, &m, NULL)
> + != PF_PASS) {
> m_freem(m);
> return EACCES;
> }
>
--
:wq Claudio
cache route at pf state