From: Alexander Bluhm Subject: Re: link mbufs/inpcbs to pf_states, not pf_state_keys To: David Gwynne Cc: Alexandr Nedvedicky , tech@openbsd.org Date: Fri, 23 May 2025 10:24:28 +0900 On Wed, May 21, 2025 at 06:36:27PM +0900, Alexander Bluhm wrote: > On Sat, May 10, 2025 at 10:28:56AM +1000, David Gwynne wrote: > > tl;dr: less pointer chasing, and more precise links between things > > representing connections in the kernel. > > regress/sys/net/pf_forward and regress/sys/net/pflog fail. > > I am not 100% sure that it is related to this diff. Due to other > bugs, my last successful regress run was at May 9th. Give me another > day, then I can say if these tests are broken in current already. I have tested i386 snapshots, tests pass. Apply the diff and they fail. http://bluhm.genua.de/regress/results/2025-05-22T13%3A51%3A46Z/logs/sys/net/pf_forward/make.log FAIL sys/net/pf_forward *** Error 1 in . (Makefile:178 'run-ping-inet-AF_IN'), *** Error 2 in . (Makefile:197 'run-ping-mtu-1400-inet-AF_IN'), *** Error 2 in . (Makefile:219 'run-ping-mtu-1300-inet-AF_IN'), *** Error 1 in . (Makefile:236 'run-udp-inet-AF_IN'), *** Error 1 in . (Makefile:251 'run-tcp-inet-AF_IN'), *** Error 1 in . (Makefile:269 'run-traceroute-icmp-inet-AF_IN'), *** Error 1 in . (Makefile:269 'run-traceroute-udp-inet-AF_IN'), *** Error 1 in . (Makefile:178 'run-ping-inet6-AF_IN'), *** Error 2 in . (Makefile:199 'run-ping-mtu-1400-inet6-AF_IN'), *** Error 2 in . (Makefile:221 'run-ping-mtu-1300-inet6-AF_IN'), *** Error 1 in . (Makefile:236 'run-udp-inet6-AF_IN'), *** Error 1 in . (Makefile:251 'run-tcp-inet6-AF_IN'), *** Error 1 in . (Makefile:269 'run-traceroute-icmp-inet6-AF_IN'), *** Error 1 in . (Makefile:269 'run-traceroute-udp-inet6-AF_IN') http://bluhm.genua.de/regress/results/2025-05-22T13%3A51%3A46Z/logs/sys/net/pflog/make.log FAIL sys/net/pflog *** Error 1 in . (Makefile:164 'run-ping-14'), *** Error 1 in . (Makefile:168 'run-ping6-14'), *** Error 1 in . (Makefile:199 'run-bpf-nothing') bluhm > > Index: kern/uipc_mbuf.c > > =================================================================== > > RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v > > diff -u -p -r1.296 uipc_mbuf.c > > --- kern/uipc_mbuf.c 1 Jan 2025 13:44:22 -0000 1.296 > > +++ kern/uipc_mbuf.c 9 May 2025 02:48:26 -0000 > > @@ -301,7 +301,7 @@ m_clearhdr(struct mbuf *m) > > /* delete all mbuf tags to reset the state */ > > m_tag_delete_chain(m); > > #if NPF > 0 > > - pf_mbuf_unlink_state_key(m); > > + pf_mbuf_unlink_state(m); > > pf_mbuf_unlink_inpcb(m); > > #endif /* NPF > 0 */ > > > > @@ -429,7 +429,7 @@ m_free(struct mbuf *m) > > if (m->m_flags & M_PKTHDR) { > > m_tag_delete_chain(m); > > #if NPF > 0 > > - pf_mbuf_unlink_state_key(m); > > + pf_mbuf_unlink_state(m); > > pf_mbuf_unlink_inpcb(m); > > #endif /* NPF > 0 */ > > } > > @@ -1387,8 +1387,8 @@ m_dup_pkthdr(struct mbuf *to, struct mbu > > to->m_pkthdr = from->m_pkthdr; > > > > #if NPF > 0 > > - to->m_pkthdr.pf.statekey = NULL; > > - pf_mbuf_link_state_key(to, from->m_pkthdr.pf.statekey); > > + to->m_pkthdr.pf.st = NULL; > > + pf_mbuf_link_state(to, from->m_pkthdr.pf.st); > > to->m_pkthdr.pf.inp = NULL; > > pf_mbuf_link_inpcb(to, from->m_pkthdr.pf.inp); > > #endif /* NPF > 0 */ > > @@ -1517,8 +1517,8 @@ m_print(void *v, > > m->m_pkthdr.csum_flags, MCS_BITS); > > (*pr)("m_pkthdr.ether_vtag: %u\tm_ptkhdr.ph_rtableid: %u\n", > > m->m_pkthdr.ether_vtag, m->m_pkthdr.ph_rtableid); > > - (*pr)("m_pkthdr.pf.statekey: %p\tm_pkthdr.pf.inp %p\n", > > - m->m_pkthdr.pf.statekey, m->m_pkthdr.pf.inp); > > + (*pr)("m_pkthdr.pf.st: %p\tm_pkthdr.pf.inp %p\n", > > + m->m_pkthdr.pf.st, m->m_pkthdr.pf.inp); > > (*pr)("m_pkthdr.pf.qid: %u\tm_pkthdr.pf.tag: %u\n", > > m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag); > > (*pr)("m_pkthdr.pf.flags: %b\n", > > Index: net/if_mpw.c > > =================================================================== > > RCS file: /cvs/src/sys/net/if_mpw.c,v > > diff -u -p -r1.67 if_mpw.c > > --- net/if_mpw.c 2 Mar 2025 21:28:32 -0000 1.67 > > +++ net/if_mpw.c 9 May 2025 02:48:26 -0000 > > @@ -612,7 +612,7 @@ mpw_input(struct mpw_softc *sc, struct m > > m->m_pkthdr.ph_rtableid = ifp->if_rdomain; > > > > /* packet has not been processed by PF yet. */ > > - KASSERT(m->m_pkthdr.pf.statekey == NULL); > > + KASSERT(m->m_pkthdr.pf.st == NULL); > > > > if_vinput(ifp, m, NULL); > > return; > > Index: net/if_tpmr.c > > =================================================================== > > RCS file: /cvs/src/sys/net/if_tpmr.c,v > > diff -u -p -r1.36 if_tpmr.c > > --- net/if_tpmr.c 2 Mar 2025 21:28:32 -0000 1.36 > > +++ net/if_tpmr.c 9 May 2025 02:48:26 -0000 > > @@ -304,7 +304,7 @@ tpmr_pf(struct ifnet *ifp0, int dir, str > > return (NULL); > > > > if (dir == PF_IN && ISSET(m->m_pkthdr.pf.flags, PF_TAG_DIVERTED)) { > > - pf_mbuf_unlink_state_key(m); > > + pf_mbuf_unlink_state(m); > > pf_mbuf_unlink_inpcb(m); > > (*fam->ip_input)(ifp0, m, ns); > > return (NULL); > > Index: net/if_veb.c > > =================================================================== > > RCS file: /cvs/src/sys/net/if_veb.c,v > > diff -u -p -r1.37 if_veb.c > > --- net/if_veb.c 2 Mar 2025 21:28:32 -0000 1.37 > > +++ net/if_veb.c 9 May 2025 02:48:26 -0000 > > @@ -654,7 +654,7 @@ veb_pf(struct ifnet *ifp0, int dir, stru > > return (NULL); > > > > if (dir == PF_IN && ISSET(m->m_pkthdr.pf.flags, PF_TAG_DIVERTED)) { > > - pf_mbuf_unlink_state_key(m); > > + pf_mbuf_unlink_state(m); > > pf_mbuf_unlink_inpcb(m); > > (*fam->ip_input)(ifp0, m, ns); > > return (NULL); > > Index: net/pf.c > > =================================================================== > > RCS file: /cvs/src/sys/net/pf.c,v > > diff -u -p -r1.1210 pf.c > > --- net/pf.c 1 May 2025 01:10:08 -0000 1.1210 > > +++ net/pf.c 9 May 2025 02:48:26 -0000 > > @@ -247,15 +247,16 @@ int pf_state_insert(struct pfi_kif *, > > struct pf_state_key **, struct pf_state_key **, > > struct pf_state *); > > > > +int pf_state_isvalid(struct pf_state *); > > int pf_state_key_isvalid(struct pf_state_key *); > > struct pf_state_key *pf_state_key_ref(struct pf_state_key *); > > void pf_state_key_unref(struct pf_state_key *); > > -void pf_state_key_link_reverse(struct pf_state_key *, > > - struct pf_state_key *); > > -void pf_state_key_unlink_reverse(struct pf_state_key *); > > -void pf_state_key_link_inpcb(struct pf_state_key *, > > +void pf_state_link_reverse(struct pf_state *, > > + struct pf_state *); > > +void pf_state_unlink_reverse(struct pf_state *); > > +void pf_state_link_inpcb(struct pf_state *, > > struct inpcb *); > > -void pf_state_key_unlink_inpcb(struct pf_state_key *); > > +void pf_state_unlink_inpcb(struct pf_state *); > > void pf_pktenqueue_delayed(void *); > > int32_t pf_state_expires(const struct pf_state *, uint8_t); > > > > @@ -860,8 +861,6 @@ pf_state_key_detach(struct pf_state *st, > > if (TAILQ_EMPTY(&sk->sk_states)) { > > RBT_REMOVE(pf_state_tree, &pf_statetbl, sk); > > sk->sk_removed = 1; > > - pf_state_key_unlink_reverse(sk); > > - pf_state_key_unlink_inpcb(sk); > > pf_state_key_unref(sk); > > } > > > > @@ -1123,13 +1122,40 @@ pf_compare_state_keys(struct pf_state_ke > > } > > } > > > > +static inline struct pf_state * > > +pf_find_state_lookup(struct pf_pdesc *pd, const struct pf_state_key_cmp *key) > > +{ > > + struct pf_state_key *sk; > > + struct pf_state_item *si; > > + struct pf_state *st; > > + uint8_t dir = pd->dir; > > + > > + sk = RBT_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); > > + if (sk == NULL) > > + return (NULL); > > + > > + /* list is sorted, if-bound states before floating ones */ > > + TAILQ_FOREACH(si, &sk->sk_states, si_entry) { > > + st = si->si_st; > > + if (st->timeout == PFTM_PURGE) > > + continue; > > + if (st->kif != pfi_all && st->kif != pd->kif) > > + continue; > > + > > + if (st->key[dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK] == sk) > > + return (st); > > + } > > + > > + return (NULL); > > +} > > + > > int > > pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key, > > struct pf_state **stp) > > { > > - struct pf_state_key *sk, *pkt_sk; > > - struct pf_state_item *si; > > struct pf_state *st = NULL; > > + struct pf_state *strev = NULL; > > + struct inpcb *inp = NULL; > > > > pf_status.fcounters[FCNT_STATE_SEARCH]++; > > if (pf_status.debug >= LOG_DEBUG) { > > @@ -1139,82 +1165,60 @@ pf_find_state(struct pf_pdesc *pd, struc > > addlog("\n"); > > } > > > > - pkt_sk = NULL; > > - sk = NULL; > > if (pd->dir == PF_OUT) { > > - /* first if block deals with outbound forwarded packet */ > > - pkt_sk = pd->m->m_pkthdr.pf.statekey; > > + strev = pd->m->m_pkthdr.pf.st; > > + inp = pd->m->m_pkthdr.pf.inp; > > > > - if (!pf_state_key_isvalid(pkt_sk)) { > > - pf_mbuf_unlink_state_key(pd->m); > > - pkt_sk = NULL; > > - } > > + /* first if block deals with outbound forwarded packet */ > > + if (strev != NULL) { > > + pd->m->m_pkthdr.pf.st = NULL; > > + KASSERT(inp == NULL); > > > > - if (pkt_sk && pf_state_key_isvalid(pkt_sk->sk_reverse)) > > - sk = pkt_sk->sk_reverse; > > + if (pf_state_isvalid(strev)) { > > + st = strev->reverse; > > + if (st != NULL && pf_state_isvalid(st)) > > + goto match; > > + } > > > > - if (pkt_sk == NULL) { > > - struct inpcb *inp = pd->m->m_pkthdr.pf.inp; > > + /* this handles st not being valid too */ > > + pf_state_unlink_reverse(strev); > > > > + } else if (inp != NULL && READ_ONCE(inp->inp_pf_st) != NULL) { > > /* here we deal with local outbound packet */ > > - if (inp != NULL) { > > - struct pf_state_key *inp_sk; > > - > > - mtx_enter(&pf_inp_mtx); > > - inp_sk = inp->inp_pf_sk; > > - if (pf_state_key_isvalid(inp_sk)) { > > - sk = inp_sk; > > + mtx_enter(&pf_inp_mtx); > > + st = inp->inp_pf_st; > > + if (st != NULL) { > > + if (pf_state_isvalid(st)) { > > mtx_leave(&pf_inp_mtx); > > - } else if (inp_sk != NULL) { > > - KASSERT(inp_sk->sk_inp == inp); > > - inp_sk->sk_inp = NULL; > > - inp->inp_pf_sk = NULL; > > + goto match; > > + } else { > > + KASSERT(st->inp == inp); > > + st->inp = NULL; > > + inp->inp_pf_st = NULL; > > mtx_leave(&pf_inp_mtx); > > > > - pf_state_key_unref(inp_sk); > > + pf_state_unref(st); > > in_pcbunref(inp); > > - } else > > - mtx_leave(&pf_inp_mtx); > > - } > > - } > > - } > > - > > - if (sk == NULL) { > > - if ((sk = RBT_FIND(pf_state_tree, &pf_statetbl, > > - (struct pf_state_key *)key)) == NULL) > > - return (PF_DROP); > > - if (pd->dir == PF_OUT && pkt_sk && > > - pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0) > > - pf_state_key_link_reverse(sk, pkt_sk); > > - else if (pd->dir == PF_OUT) > > - pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp); > > - } > > - > > - /* remove firewall data from outbound packet */ > > - if (pd->dir == PF_OUT) > > - pf_pkt_addr_changed(pd->m); > > - > > - /* list is sorted, if-bound states before floating ones */ > > - TAILQ_FOREACH(si, &sk->sk_states, si_entry) { > > - struct pf_state *sist = si->si_st; > > - if (sist->timeout != PFTM_PURGE && > > - (sist->kif == pfi_all || sist->kif == pd->kif) && > > - ((sist->key[PF_SK_WIRE]->af == sist->key[PF_SK_STACK]->af && > > - sk == (pd->dir == PF_IN ? sist->key[PF_SK_WIRE] : > > - sist->key[PF_SK_STACK])) || > > - (sist->key[PF_SK_WIRE]->af != sist->key[PF_SK_STACK]->af > > - && pd->dir == PF_IN && (sk == sist->key[PF_SK_STACK] || > > - sk == sist->key[PF_SK_WIRE])))) { > > - st = sist; > > - break; > > + } > > + } else > > + mtx_leave(&pf_inp_mtx); > > } > > } > > > > + st = pf_find_state_lookup(pd, key); > > if (st == NULL) > > return (PF_DROP); > > if (ISSET(st->state_flags, PFSTATE_INP_UNLINKED)) > > return (PF_DROP); > > > > + if (pd->dir == PF_OUT) { > > + if (strev != NULL) > > + pf_state_link_reverse(st, strev); > > + else if (inp != NULL) > > + pf_state_link_inpcb(st, inp); > > + } > > + > > +match: > > if (st->rule.ptr->pktrate.limit && pd->dir == st->direction) { > > pf_add_threshold(&st->rule.ptr->pktrate); > > if (pf_check_threshold(&st->rule.ptr->pktrate)) > > @@ -1782,6 +1786,9 @@ pf_remove_state(struct pf_state *st) > > st->timeout = PFTM_UNLINKED; > > mtx_leave(&st->mtx); > > > > + pf_state_unlink_reverse(st); > > + pf_state_unlink_inpcb(st); > > + > > /* handle load balancing related tasks */ > > pf_postprocess_addr(st); > > > > @@ -1813,52 +1820,47 @@ pf_remove_state(struct pf_state *st) > > void > > pf_remove_divert_state(struct inpcb *inp) > > { > > - struct pf_state_key *sk; > > - struct pf_state_item *si; > > + struct pf_state *st; > > > > PF_ASSERT_UNLOCKED(); > > > > - if (READ_ONCE(inp->inp_pf_sk) == NULL) > > + if (READ_ONCE(inp->inp_pf_st) == NULL) > > return; > > > > mtx_enter(&pf_inp_mtx); > > - sk = pf_state_key_ref(inp->inp_pf_sk); > > + st = pf_state_ref(inp->inp_pf_st); > > mtx_leave(&pf_inp_mtx); > > - if (sk == NULL) > > + if (st == NULL) > > return; > > > > PF_LOCK(); > > PF_STATE_ENTER_WRITE(); > > - TAILQ_FOREACH(si, &sk->sk_states, si_entry) { > > - struct pf_state *sist = si->si_st; > > - if (sk == sist->key[PF_SK_STACK] && sist->rule.ptr && > > - (sist->rule.ptr->divert.type == PF_DIVERT_TO || > > - sist->rule.ptr->divert.type == PF_DIVERT_REPLY)) { > > - if (sist->key[PF_SK_STACK]->proto == IPPROTO_TCP && > > - sist->key[PF_SK_WIRE] != sist->key[PF_SK_STACK]) { > > - /* > > - * If the local address is translated, keep > > - * the state for "tcp.closed" seconds to > > - * prevent its source port from being reused. > > - */ > > - if (sist->src.state < TCPS_FIN_WAIT_2 || > > - sist->dst.state < TCPS_FIN_WAIT_2) { > > - pf_set_protostate(sist, PF_PEER_BOTH, > > - TCPS_TIME_WAIT); > > - pf_update_state_timeout(sist, > > - PFTM_TCP_CLOSED); > > - sist->expire = getuptime(); > > - } > > - sist->state_flags |= PFSTATE_INP_UNLINKED; > > - } else > > - pf_remove_state(sist); > > - break; > > - } > > + if (st->rule.ptr && > > + (st->rule.ptr->divert.type == PF_DIVERT_TO || > > + st->rule.ptr->divert.type == PF_DIVERT_REPLY)) { > > + if (st->key[PF_SK_STACK]->proto == IPPROTO_TCP && > > + st->key[PF_SK_WIRE] != st->key[PF_SK_STACK]) { > > + /* > > + * If the local address is translated, keep > > + * the state for "tcp.closed" seconds to > > + * prevent its source port from being reused. > > + */ > > + if (st->src.state < TCPS_FIN_WAIT_2 || > > + st->dst.state < TCPS_FIN_WAIT_2) { > > + pf_set_protostate(st, PF_PEER_BOTH, > > + TCPS_TIME_WAIT); > > + pf_update_state_timeout(st, > > + PFTM_TCP_CLOSED); > > + st->expire = getuptime(); > > + } > > + st->state_flags |= PFSTATE_INP_UNLINKED; > > + } else > > + pf_remove_state(st); > > } > > PF_STATE_EXIT_WRITE(); > > PF_UNLOCK(); > > > > - pf_state_key_unref(sk); > > + pf_state_unref(st); > > } > > > > void > > @@ -7916,9 +7918,9 @@ done: > > > > if (pd.dir == PF_IN) { > > KASSERT(inp == NULL); > > - pf_mbuf_link_state_key(m, st->key[PF_SK_STACK]); > > + pf_mbuf_link_state(m, st); > > } else if (pd.dir == PF_OUT) > > - pf_state_key_link_inpcb(st->key[PF_SK_STACK], inp); > > + pf_state_link_inpcb(st, inp); > > > > if (!ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) { > > m->m_pkthdr.ph_flowid = st->key[PF_SK_WIRE]->hash; > > @@ -8100,14 +8102,14 @@ done: > > int > > pf_ouraddr(struct mbuf *m) > > { > > - struct pf_state_key *sk; > > + struct pf_state *st; > > > > if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) > > return (1); > > > > - sk = m->m_pkthdr.pf.statekey; > > - if (sk != NULL) { > > - if (READ_ONCE(sk->sk_inp) != NULL) > > + st = m->m_pkthdr.pf.st; > > + if (st != NULL) { > > + if (READ_ONCE(st->inp) != NULL) > > return (1); > > } > > > > @@ -8121,7 +8123,7 @@ pf_ouraddr(struct mbuf *m) > > void > > pf_pkt_addr_changed(struct mbuf *m) > > { > > - pf_mbuf_unlink_state_key(m); > > + pf_mbuf_unlink_state(m); > > pf_mbuf_unlink_inpcb(m); > > } > > > > @@ -8129,85 +8131,97 @@ struct inpcb * > > pf_inp_lookup(struct mbuf *m) > > { > > struct inpcb *inp = NULL; > > - struct pf_state_key *sk = m->m_pkthdr.pf.statekey; > > + struct pf_state *st; > > + > > + st = m->m_pkthdr.pf.st; > > + if (st == NULL) > > + return (NULL); > > + if (!pf_state_isvalid(st)) { > > + pf_mbuf_unlink_state(m); > > + return (NULL); > > + } > > > > - if (!pf_state_key_isvalid(sk)) > > - pf_mbuf_unlink_state_key(m); > > - else if (READ_ONCE(sk->sk_inp) != NULL) { > > + if (READ_ONCE(st->inp) != NULL) { > > mtx_enter(&pf_inp_mtx); > > - inp = in_pcbref(sk->sk_inp); > > + inp = in_pcbref(st->inp); > > mtx_leave(&pf_inp_mtx); > > } > > > > return (inp); > > } > > > > +/* > > + * This is called from the IP stack after it's found an inpcb for > > + * an mbuf so it can link the pf_state to that pcb. > > + */ > > void > > pf_inp_link(struct mbuf *m, struct inpcb *inp) > > { > > - struct pf_state_key *sk = m->m_pkthdr.pf.statekey; > > + struct pf_state *st; > > > > - if (!pf_state_key_isvalid(sk)) { > > - pf_mbuf_unlink_state_key(m); > > + st = m->m_pkthdr.pf.st; > > + if (st == NULL) > > return; > > - } > > > > /* > > * we don't need to grab PF-lock here. At worst case we link inp to > > * state, which might be just being marked as deleted by another > > * thread. > > */ > > - pf_state_key_link_inpcb(sk, inp); > > + if (pf_state_isvalid(st)) { > > + if (READ_ONCE(st->inp) == NULL) > > + pf_state_link_inpcb(st, inp); > > + } > > > > /* The statekey has finished finding the inp, it is no longer needed. */ > > - pf_mbuf_unlink_state_key(m); > > + pf_mbuf_unlink_state(m); > > } > > > > void > > pf_inp_unlink(struct inpcb *inp) > > { > > - struct pf_state_key *sk; > > + struct pf_state *st; > > > > - if (READ_ONCE(inp->inp_pf_sk) == NULL) > > + if (READ_ONCE(inp->inp_pf_st) == NULL) > > return; > > > > mtx_enter(&pf_inp_mtx); > > - sk = inp->inp_pf_sk; > > - if (sk == NULL) { > > + st = inp->inp_pf_st; > > + if (st == NULL) { > > mtx_leave(&pf_inp_mtx); > > return; > > } > > - KASSERT(sk->sk_inp == inp); > > - sk->sk_inp = NULL; > > - inp->inp_pf_sk = NULL; > > + KASSERT(st->inp == inp); > > + st->inp = NULL; > > + inp->inp_pf_st = NULL; > > mtx_leave(&pf_inp_mtx); > > > > - pf_state_key_unref(sk); > > + pf_state_unref(st); > > in_pcbunref(inp); > > } > > > > void > > -pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev) > > +pf_state_link_reverse(struct pf_state *st, struct pf_state *strev) > > { > > - struct pf_state_key *old_reverse; > > + struct pf_state *ost; > > > > - old_reverse = atomic_cas_ptr(&sk->sk_reverse, NULL, skrev); > > - if (old_reverse != NULL) > > - KASSERT(old_reverse == skrev); > > + ost = atomic_cas_ptr(&st->reverse, NULL, strev); > > + if (ost != NULL) > > + KASSERT(ost == strev); > > else { > > - pf_state_key_ref(skrev); > > + pf_state_ref(strev); > > > > /* > > - * NOTE: if sk == skrev, then KASSERT() below holds true, we > > + * NOTE: if st == strev, then KASSERT() below holds true, we > > * still want to grab a reference in such case, because > > - * pf_state_key_unlink_reverse() does not check whether keys > > + * pf_state_unlink_reverse() does not check whether states > > * are identical or not. > > */ > > - old_reverse = atomic_cas_ptr(&skrev->sk_reverse, NULL, sk); > > - if (old_reverse != NULL) > > - KASSERT(old_reverse == sk); > > + ost = atomic_cas_ptr(&strev->reverse, NULL, st); > > + if (ost != NULL) > > + KASSERT(ost == st); > > > > - pf_state_key_ref(sk); > > + pf_state_ref(st); > > } > > } > > > > @@ -8243,10 +8257,6 @@ pf_state_key_unref(struct pf_state_key * > > if (PF_REF_RELE(sk->sk_refcnt)) { > > /* state key must be removed from tree */ > > KASSERT(!pf_state_key_isvalid(sk)); > > - /* state key must be unlinked from reverse key */ > > - KASSERT(sk->sk_reverse == NULL); > > - /* state key must be unlinked from socket */ > > - KASSERT(sk->sk_inp == NULL); > > pool_put(&pf_state_key_pl, sk); > > } > > } > > @@ -8257,21 +8267,28 @@ pf_state_key_isvalid(struct pf_state_key > > return ((sk != NULL) && (sk->sk_removed == 0)); > > } > > > > +int > > +pf_state_isvalid(struct pf_state *st) > > +{ > > + return (st->timeout < PFTM_MAX); > > +} > > + > > void > > -pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk) > > +pf_mbuf_link_state(struct mbuf *m, struct pf_state *st) > > { > > - KASSERT(m->m_pkthdr.pf.statekey == NULL); > > - m->m_pkthdr.pf.statekey = pf_state_key_ref(sk); > > + KASSERT(m->m_pkthdr.pf.st == NULL); > > + m->m_pkthdr.pf.st = pf_state_ref(st); > > } > > > > void > > -pf_mbuf_unlink_state_key(struct mbuf *m) > > +pf_mbuf_unlink_state(struct mbuf *m) > > { > > - struct pf_state_key *sk = m->m_pkthdr.pf.statekey; > > + struct pf_state *st; > > > > - if (sk != NULL) { > > - m->m_pkthdr.pf.statekey = NULL; > > - pf_state_key_unref(sk); > > + st = m->m_pkthdr.pf.st; > > + if (st != NULL) { > > + m->m_pkthdr.pf.st = NULL; > > + pf_state_unref(st); > > } > > } > > > > @@ -8294,57 +8311,58 @@ pf_mbuf_unlink_inpcb(struct mbuf *m) > > } > > > > void > > -pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp) > > +pf_state_link_inpcb(struct pf_state *st, struct inpcb *inp) > > { > > - if (inp == NULL || READ_ONCE(sk->sk_inp) != NULL) > > + if (inp == NULL || READ_ONCE(st->inp) != NULL) > > return; > > > > mtx_enter(&pf_inp_mtx); > > - if (inp->inp_pf_sk != NULL || sk->sk_inp != NULL) { > > + if (inp->inp_pf_st != NULL || st->inp != NULL) { > > mtx_leave(&pf_inp_mtx); > > return; > > } > > - sk->sk_inp = in_pcbref(inp); > > - inp->inp_pf_sk = pf_state_key_ref(sk); > > + st->inp = in_pcbref(inp); > > + inp->inp_pf_st = pf_state_ref(st); > > mtx_leave(&pf_inp_mtx); > > } > > > > void > > -pf_state_key_unlink_inpcb(struct pf_state_key *sk) > > +pf_state_unlink_inpcb(struct pf_state *st) > > { > > struct inpcb *inp; > > > > - if (READ_ONCE(sk->sk_inp) == NULL) > > + if (READ_ONCE(st->inp) == NULL) > > return; > > > > mtx_enter(&pf_inp_mtx); > > - inp = sk->sk_inp; > > + inp = st->inp; > > if (inp == NULL) { > > mtx_leave(&pf_inp_mtx); > > return; > > } > > - KASSERT(inp->inp_pf_sk == sk); > > - sk->sk_inp = NULL; > > - inp->inp_pf_sk = NULL; > > + KASSERT(inp->inp_pf_st == st); > > + st->inp = NULL; > > + inp->inp_pf_st = NULL; > > mtx_leave(&pf_inp_mtx); > > > > - pf_state_key_unref(sk); > > + pf_state_unref(st); > > in_pcbunref(inp); > > } > > > > void > > -pf_state_key_unlink_reverse(struct pf_state_key *sk) > > +pf_state_unlink_reverse(struct pf_state *st) > > { > > - struct pf_state_key *skrev = sk->sk_reverse; > > - > > - /* Note that sk and skrev may be equal, then we unref twice. */ > > - if (skrev != NULL) { > > - KASSERT(skrev->sk_reverse == sk); > > - sk->sk_reverse = NULL; > > - skrev->sk_reverse = NULL; > > - pf_state_key_unref(skrev); > > - pf_state_key_unref(sk); > > - } > > + struct pf_state *strev; > > + > > + /* Note that st and strev may be equal, then we unref twice. */ > > + strev = st->reverse; > > + if (strev != NULL) { > > + KASSERT(strev->reverse == st); > > + st->reverse = NULL; > > + strev->reverse = NULL; > > + pf_state_unref(strev); > > + pf_state_unref(st); > > + } > > } > > > > struct pf_state * > > @@ -8370,6 +8388,11 @@ pf_state_unref(struct pf_state *st) > > > > pf_state_key_unref(st->key[PF_SK_WIRE]); > > pf_state_key_unref(st->key[PF_SK_STACK]); > > + > > + /* state must be unlinked from reverse */ > > + KASSERT(st->reverse == NULL); > > + /* state must be unlinked from socket */ > > + KASSERT(st->inp == NULL); > > > > pool_put(&pf_state_pl, st); > > } > > Index: net/pfvar.h > > =================================================================== > > RCS file: /cvs/src/sys/net/pfvar.h,v > > diff -u -p -r1.543 pfvar.h > > --- net/pfvar.h 14 Apr 2025 20:02:34 -0000 1.543 > > +++ net/pfvar.h 9 May 2025 02:48:26 -0000 > > @@ -1849,9 +1849,8 @@ int pf_map_addr(sa_family_t, struct p > > struct pf_pool *, enum pf_sn_types); > > int pf_postprocess_addr(struct pf_state *); > > > > -void pf_mbuf_link_state_key(struct mbuf *, > > - struct pf_state_key *); > > -void pf_mbuf_unlink_state_key(struct mbuf *); > > +void pf_mbuf_link_state(struct mbuf *, struct pf_state *); > > +void pf_mbuf_unlink_state(struct mbuf *); > > void pf_mbuf_link_inpcb(struct mbuf *, struct inpcb *); > > void pf_mbuf_unlink_inpcb(struct mbuf *); > > > > Index: net/pfvar_priv.h > > =================================================================== > > RCS file: /cvs/src/sys/net/pfvar_priv.h,v > > diff -u -p -r1.38 pfvar_priv.h > > --- net/pfvar_priv.h 7 Sep 2024 22:41:55 -0000 1.38 > > +++ net/pfvar_priv.h 9 May 2025 02:48:26 -0000 > > @@ -41,11 +41,6 @@ > > #include > > #include > > > > -/* > > - * Locks used to protect struct members in this file: > > - * L pf_inp_mtx link pf to inp mutex > > - */ > > - > > struct pfsync_deferral; > > > > /* > > @@ -74,8 +69,6 @@ struct pf_state_key { > > > > RBT_ENTRY(pf_state_key) sk_entry; > > struct pf_statelisthead sk_states; > > - struct pf_state_key *sk_reverse; > > - struct inpcb *sk_inp; /* [L] */ > > pf_refcnt_t sk_refcnt; > > u_int8_t sk_removed; > > }; > > @@ -95,7 +88,8 @@ RBT_PROTOTYPE(pf_state_tree, pf_state_ke > > * M pf_state mtx > > * P PF_STATE_LOCK > > * S pfsync > > - * L pf_state_list > > + * L pf_inp_mtx link pf to inp mutex > > + * G pf_state_list > > * g pf_purge gc > > */ > > > > @@ -107,7 +101,7 @@ struct pf_state { > > > > TAILQ_ENTRY(pf_state) sync_list; /* [S] */ > > struct pfsync_deferral *sync_defer; /* [S] */ > > - TAILQ_ENTRY(pf_state) entry_list; /* [L] */ > > + TAILQ_ENTRY(pf_state) entry_list; /* [G] */ > > SLIST_ENTRY(pf_state) gc_list; /* [g] */ > > RBT_ENTRY(pf_state) entry_id; /* [P] */ > > struct pf_state_peer src; > > @@ -120,6 +114,8 @@ struct pf_state { > > struct pf_sn_head src_nodes; /* [I] */ > > struct pf_state_key *key[2]; /* [I] stack and wire */ > > struct pfi_kif *kif; /* [I] */ > > + struct pf_state *reverse; > > + struct inpcb *inp; /* [L] */ > > struct mutex mtx; > > pf_refcnt_t refcnt; > > u_int64_t packets[2]; > > Index: netinet/in_pcb.h > > =================================================================== > > RCS file: /cvs/src/sys/netinet/in_pcb.h,v > > diff -u -p -r1.167 in_pcb.h > > --- netinet/in_pcb.h 4 May 2025 23:05:17 -0000 1.167 > > +++ netinet/in_pcb.h 9 May 2025 02:48:26 -0000 > > @@ -110,7 +110,7 @@ > > * Protocol input only reads inp_[lf]addr/port during lookup and is safe. > > */ > > > > -struct pf_state_key; > > +struct pf_state; > > > > union inpaddru { > > struct in_addr iau_addr; > > @@ -168,7 +168,7 @@ struct inpcb { > > > > int inp_cksum6; > > struct icmp6_filter *inp_icmp6filt; > > - struct pf_state_key *inp_pf_sk; /* [L] */ > > + struct pf_state *inp_pf_st; /* [L] */ > > struct mbuf *(*inp_upcall)(void *, struct mbuf *, > > struct ip *, struct ip6_hdr *, void *, int, struct netstack *); > > void *inp_upcall_arg; > > Index: sys/mbuf.h > > =================================================================== > > RCS file: /cvs/src/sys/sys/mbuf.h,v > > diff -u -p -r1.265 mbuf.h > > --- sys/mbuf.h 5 Nov 2024 13:15:13 -0000 1.265 > > +++ sys/mbuf.h 9 May 2025 02:48:26 -0000 > > @@ -90,11 +90,11 @@ struct m_hdr { > > }; > > > > /* pf stuff */ > > -struct pf_state_key; > > +struct pf_state; > > struct inpcb; > > > > struct pkthdr_pf { > > - struct pf_state_key *statekey; /* pf stackside statekey */ > > + struct pf_state *st; /* pf state */ > > struct inpcb *inp; /* connected pcb for outgoing packet */ > > u_int32_t qid; /* queue id */ > > u_int16_t tag; /* tag id */ > > @@ -325,7 +325,7 @@ u_int mextfree_register(void (*)(caddr_t > > (to)->m_pkthdr = (from)->m_pkthdr; \ > > (from)->m_flags &= ~M_PKTHDR; \ > > SLIST_INIT(&(from)->m_pkthdr.ph_tags); \ > > - (from)->m_pkthdr.pf.statekey = NULL; \ > > + (from)->m_pkthdr.pf.st = NULL; \ > > } while (/* CONSTCOND */ 0) > > > > /*