From: Claudio Jeker Subject: Re: cache route at pf state To: Alexander Bluhm Cc: tech@openbsd.org Date: Sun, 21 Jul 2024 09:48:15 +0200 On Sat, Jul 20, 2024 at 07:20:32PM +0200, Alexander Bluhm wrote: > Hi, > > When forwarding packets, rtable_match takes quite some time. > > http://bluhm.genua.de/perform/results/2024-07-19T07:25:47Z/2024-07-19T00%3A00%3A00Z/btrace/ssh_perform%40lt13_iperf3_-6_-cfdd7%3Ae83e%3A66bc%3A0346%3A%3A36_-P10_-t10-btrace-kstack.0.svg?s=rtable_match > > This can be avoided by caching the route at the pf state. > > Comments, tests? I think this is a bad idea. Caching this into a pf state will have negative consequences in various scenarios. This will cache route entries for possible a long time for sessions that are idle. A remote actor may use this to attack the local system. In my opinion I would prefer if you looked into the reason why rtable_match() takes so much time. Unless you run with large routing tables the lookups should be cheap. Is there a lock or mutex that causes this? > bluhm > > Index: net/if_bridge.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v > diff -u -p -r1.370 if_bridge.c > --- net/if_bridge.c 14 Apr 2024 20:46:27 -0000 1.370 > +++ net/if_bridge.c 20 Jul 2024 14:52:40 -0000 > @@ -1619,7 +1619,7 @@ bridge_ipsec(struct ifnet *ifp, struct e > #if NPF > 0 > if ((encif = enc_getif(tdb->tdb_rdomain, > tdb->tdb_tap)) == NULL || > - pf_test(af, dir, encif, &m) != PF_PASS) { > + pf_test(af, dir, encif, &m, NULL) != PF_PASS) { > m_freem(m); > tdb_unref(tdb); > return (1); > @@ -1725,7 +1725,7 @@ bridge_ip(struct ifnet *brifp, int dir, > #endif /* IPSEC */ > #if NPF > 0 > /* Finally, we get to filter the packet! */ > - if (pf_test(AF_INET, dir, ifp, &m) != PF_PASS) > + if (pf_test(AF_INET, dir, ifp, &m, NULL) != PF_PASS) > goto dropit; > if (m == NULL) > goto dropit; > @@ -1768,7 +1768,7 @@ bridge_ip(struct ifnet *brifp, int dir, > #endif /* IPSEC */ > > #if NPF > 0 > - if (pf_test(AF_INET6, dir, ifp, &m) != PF_PASS) > + if (pf_test(AF_INET6, dir, ifp, &m, NULL) != PF_PASS) > goto dropit; > if (m == NULL) > return (NULL); > Index: net/if_tpmr.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_tpmr.c,v > diff -u -p -r1.35 if_tpmr.c > --- net/if_tpmr.c 23 Dec 2023 10:52:54 -0000 1.35 > +++ net/if_tpmr.c 20 Jul 2024 14:52:40 -0000 > @@ -295,7 +295,7 @@ tpmr_pf(struct ifnet *ifp0, int dir, str > return (NULL); > } > > - if (pf_test(fam->af, dir, ifp0, &m) != PF_PASS) { > + if (pf_test(fam->af, dir, ifp0, &m, NULL) != PF_PASS) { > m_freem(m); > return (NULL); > } > Index: net/if_veb.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_veb.c,v > diff -u -p -r1.35 if_veb.c > --- net/if_veb.c 13 Feb 2024 12:22:09 -0000 1.35 > +++ net/if_veb.c 20 Jul 2024 14:52:40 -0000 > @@ -644,7 +644,7 @@ veb_pf(struct ifnet *ifp0, int dir, stru > if (m == NULL) > return (NULL); > > - if (pf_test(fam->af, dir, ifp0, &m) != PF_PASS) { > + if (pf_test(fam->af, dir, ifp0, &m, NULL) != PF_PASS) { > m_freem(m); > return (NULL); > } > @@ -839,7 +839,7 @@ veb_ipsec_proto_out(struct mbuf *m, sa_f > #if NPF > 0 > encifp = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap); > if (encifp != NULL) { > - if (pf_test(af, PF_OUT, encifp, &m) != PF_PASS) { > + if (pf_test(af, PF_OUT, encifp, &m, NULL) != PF_PASS) { > m_freem(m); > return (NULL); > } > Index: net/pf.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v > diff -u -p -r1.1203 pf.c > --- net/pf.c 14 Jul 2024 18:53:39 -0000 1.1203 > +++ net/pf.c 20 Jul 2024 15:13:53 -0000 > @@ -113,6 +113,7 @@ struct pf_queuehead *pf_queues_inactive; > struct pf_status pf_status; > > struct mutex pf_inp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); > +struct mutex pf_st_ro_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); > > int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */ > > @@ -6638,7 +6639,7 @@ pf_route(struct pf_pdesc *pd, struct pf_ > ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; > > if (st->rt != PF_DUPTO && pd->dir == PF_IN) { > - if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) > + if (pf_test(AF_INET, PF_OUT, ifp, &m0, NULL) != PF_PASS) > goto bad; > else if (m0 == NULL) > goto done; > @@ -6758,7 +6759,7 @@ pf_route6(struct pf_pdesc *pd, struct pf > ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; > > if (st->rt != PF_DUPTO && pd->dir == PF_IN) { > - if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) > + if (pf_test(AF_INET6, PF_OUT, ifp, &m0, NULL) != PF_PASS) > goto bad; > else if (m0 == NULL) > goto done; > @@ -7533,7 +7534,8 @@ pf_counters_inc(int action, struct pf_pd > } > > int > -pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) > +pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0, > + struct route *ro) > { > #if NCARP > 0 > struct ifnet *ifp0; > @@ -7761,7 +7763,7 @@ pf_test(sa_family_t af, int fwdir, struc > pf_syncookie_validate(&pd)) { > struct mbuf *msyn = pf_syncookie_recreate_syn(&pd); > if (msyn) { > - action = pf_test(af, fwdir, ifp, &msyn); > + action = pf_test(af, fwdir, ifp, &msyn, NULL); > m_freem(msyn); > if (action == PF_PASS || action == PF_AFRT) { > PF_STATE_ENTER_READ(); > @@ -8039,12 +8041,47 @@ done: > action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); > } > #endif /* INET6 */ > + /* update interface index for pflow(4) */ > if (st && action != PF_DROP) { > if (!st->if_index_in && dir == PF_IN) > st->if_index_in = ifp->if_index; > else if (!st->if_index_out && dir == PF_OUT) > st->if_index_out = ifp->if_index; > } > + /* the pf state keeps cache of route */ > + if (ro && st && pd.m && action == PF_PASS && pf_ouraddr(pd.m) != 1) { > + /* check cache without lock, worst case is additional lookup */ > + if (memcmp(ro, &st->route, sizeof(*ro)) != 0) { > + mtx_enter(&pf_st_ro_mtx); > + if (st->route.ro_rt != NULL) { > + rtfree(ro->ro_rt); > + *ro = st->route; > + rtref(ro->ro_rt); > + } > + mtx_leave(&pf_st_ro_mtx); > + } > + switch (pd.naf) { > + case AF_INET: > + route_mpath(ro, &pd.ndaddr.v4, &pd.nsaddr.v4, > + pd.m->m_pkthdr.ph_rtableid); > + break; > +#ifdef INET6 > + case AF_INET6: > + route6_mpath(ro, &pd.ndaddr.v6, &pd.nsaddr.v6, > + pd.m->m_pkthdr.ph_rtableid); > + break; > +#endif /* INET6 */ > + } > + if (ro->ro_rt != NULL) { > + if (memcmp(&st->route, ro, sizeof(*ro)) != 0) { > + mtx_enter(&pf_st_ro_mtx); > + rtfree(st->route.ro_rt); > + st->route = *ro; > + rtref(st->route.ro_rt); > + mtx_leave(&pf_st_ro_mtx); > + } > + } > + } > > out: > *m0 = pd.m; > @@ -8327,6 +8364,7 @@ pf_state_unref(struct pf_state *st) > > pf_state_key_unref(st->key[PF_SK_WIRE]); > pf_state_key_unref(st->key[PF_SK_STACK]); > + rtfree(st->route.ro_rt); > > pool_put(&pf_state_pl, st); > } > Index: net/pfvar.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar.h,v > diff -u -p -r1.538 pfvar.h > --- net/pfvar.h 13 May 2024 01:15:53 -0000 1.538 > +++ net/pfvar.h 20 Jul 2024 14:52:40 -0000 > @@ -1640,7 +1640,8 @@ int pf_setup_pdesc(struct pf_pdesc * > int, struct pfi_kif *, struct mbuf *, > u_short *); > > -int pf_test(sa_family_t, int, struct ifnet *, struct mbuf **); > +int pf_test(sa_family_t, int, struct ifnet *, struct mbuf **, > + struct route *); > > void pf_poolmask(struct pf_addr *, struct pf_addr*, > struct pf_addr *, struct pf_addr *, sa_family_t); > Index: net/pfvar_priv.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar_priv.h,v > diff -u -p -r1.37 pfvar_priv.h > --- net/pfvar_priv.h 21 Jun 2024 12:51:29 -0000 1.37 > +++ net/pfvar_priv.h 20 Jul 2024 14:52:40 -0000 > @@ -97,6 +97,7 @@ RBT_PROTOTYPE(pf_state_tree, pf_state_ke > * S pfsync > * L pf_state_list > * g pf_purge gc > + * R pf_st_ro_mtx > */ > > struct pf_state { > @@ -120,6 +121,7 @@ struct pf_state { > struct pf_sn_head src_nodes; /* [I] */ > struct pf_state_key *key[2]; /* [I] stack and wire */ > struct pfi_kif *kif; /* [I] */ > + struct route route; /* [R] */ > struct mutex mtx; > pf_refcnt_t refcnt; > u_int64_t packets[2]; > Index: netinet/ip_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v > diff -u -p -r1.400 ip_input.c > --- netinet/ip_input.c 19 Jul 2024 16:58:31 -0000 1.400 > +++ netinet/ip_input.c 20 Jul 2024 14:52:40 -0000 > @@ -464,7 +464,7 @@ ip_input_if(struct mbuf **mp, int *offp, > * Packet filter > */ > odst = ip->ip_dst; > - if (pf_test(AF_INET, PF_IN, ifp, mp) != PF_PASS) > + if (pf_test(AF_INET, PF_IN, ifp, mp, &ro) != PF_PASS) > goto bad; > m = *mp; > if (m == NULL) > Index: netinet/ip_output.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v > diff -u -p -r1.401 ip_output.c > --- netinet/ip_output.c 2 Jul 2024 18:33:47 -0000 1.401 > +++ netinet/ip_output.c 20 Jul 2024 14:52:40 -0000 > @@ -403,7 +403,7 @@ sendit: > */ > #if NPF > 0 > if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT, > - ifp, &m) != PF_PASS) { > + ifp, &m, NULL) != PF_PASS) { > error = EACCES; > goto bad; > } > @@ -584,7 +584,8 @@ ip_output_ipsec_send(struct tdb *tdb, st > * Packet filter > */ > if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL || > - pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) { > + pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m, NULL) > + != PF_PASS) { > m_freem(m); > return EACCES; > } > Index: netinet/ipsec_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_input.c,v > diff -u -p -r1.206 ipsec_input.c > --- netinet/ipsec_input.c 16 Sep 2023 09:33:27 -0000 1.206 > +++ netinet/ipsec_input.c 20 Jul 2024 14:52:40 -0000 > @@ -580,7 +580,7 @@ ipsec_common_input_cb(struct mbuf **mp, > if ((ifp = if_get(m->m_pkthdr.ph_ifidx)) == NULL) { > goto baddone; > } > - if (pf_test(af, PF_IN, ifp, mp) != PF_PASS) { > + if (pf_test(af, PF_IN, ifp, mp, NULL) != PF_PASS) { > if_put(ifp); > goto baddone; > } > Index: netinet6/ip6_forward.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v > diff -u -p -r1.124 ip6_forward.c > --- netinet6/ip6_forward.c 19 Jul 2024 16:58:32 -0000 1.124 > +++ netinet6/ip6_forward.c 20 Jul 2024 14:52:40 -0000 > @@ -322,7 +322,7 @@ reroute: > ip6->ip6_dst.s6_addr16[1] = 0; > > #if NPF > 0 > - if (pf_test(AF_INET6, PF_FWD, ifp, &m) != PF_PASS) { > + if (pf_test(AF_INET6, PF_FWD, ifp, &m, NULL) != PF_PASS) { > m_freem(m); > goto senderr; > } > Index: netinet6/ip6_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v > diff -u -p -r1.266 ip6_input.c > --- netinet6/ip6_input.c 19 Jul 2024 16:58:32 -0000 1.266 > +++ netinet6/ip6_input.c 20 Jul 2024 14:52:40 -0000 > @@ -405,7 +405,7 @@ ip6_input_if(struct mbuf **mp, int *offp > * Packet filter > */ > odst = ip6->ip6_dst; > - if (pf_test(AF_INET6, PF_IN, ifp, mp) != PF_PASS) > + if (pf_test(AF_INET6, PF_IN, ifp, mp, &ro) != PF_PASS) > goto bad; > m = *mp; > if (m == NULL) > Index: netinet6/ip6_output.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v > diff -u -p -r1.292 ip6_output.c > --- netinet6/ip6_output.c 4 Jul 2024 12:50:08 -0000 1.292 > +++ netinet6/ip6_output.c 20 Jul 2024 14:52:40 -0000 > @@ -619,7 +619,7 @@ reroute: > } > > #if NPF > 0 > - if (pf_test(AF_INET6, PF_OUT, ifp, &m) != PF_PASS) { > + if (pf_test(AF_INET6, PF_OUT, ifp, &m, NULL) != PF_PASS) { > error = EACCES; > m_freem(m); > goto done; > @@ -2840,7 +2840,8 @@ ip6_output_ipsec_send(struct tdb *tdb, s > * Packet filter > */ > if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL || > - pf_test(AF_INET6, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) { > + pf_test(AF_INET6, fwd ? PF_FWD : PF_OUT, encif, &m, NULL) > + != PF_PASS) { > m_freem(m); > return EACCES; > } > -- :wq Claudio