Download raw body.
link mbufs/inpcbs to pf_states, not pf_state_keys
On Fri, May 23, 2025 at 10:24:28AM +0900, Alexander Bluhm wrote:
> On Wed, May 21, 2025 at 06:36:27PM +0900, Alexander Bluhm wrote:
> > On Sat, May 10, 2025 at 10:28:56AM +1000, David Gwynne wrote:
> > > tl;dr: less pointer chasing, and more precise links between things
> > > representing connections in the kernel.
> >
> > regress/sys/net/pf_forward and regress/sys/net/pflog fail.
> >
> > I am not 100% sure that it is related to this diff. Due to other
> > bugs, my last successful regress run was at May 9th. Give me another
> > day, then I can say if these tests are broken in current already.
>
> I have tested i386 snapshots, tests pass. Apply the diff and they
> fail.
>
> http://bluhm.genua.de/regress/results/2025-05-22T13%3A51%3A46Z/logs/sys/net/pf_forward/make.log
>
> FAIL sys/net/pf_forward *** Error 1 in . (Makefile:178 'run-ping-inet-AF_IN'), *** Error 2 in . (Makefile:197 'run-ping-mtu-1400-inet-AF_IN'), *** Error 2 in . (Makefile:219 'run-ping-mtu-1300-inet-AF_IN'), *** Error 1 in . (Makefile:236 'run-udp-inet-AF_IN'), *** Error 1 in . (Makefile:251 'run-tcp-inet-AF_IN'), *** Error 1 in . (Makefile:269 'run-traceroute-icmp-inet-AF_IN'), *** Error 1 in . (Makefile:269 'run-traceroute-udp-inet-AF_IN'), *** Error 1 in . (Makefile:178 'run-ping-inet6-AF_IN'), *** Error 2 in . (Makefile:199 'run-ping-mtu-1400-inet6-AF_IN'), *** Error 2 in . (Makefile:221 'run-ping-mtu-1300-inet6-AF_IN'), *** Error 1 in . (Makefile:236 'run-udp-inet6-AF_IN'), *** Error 1 in . (Makefile:251 'run-tcp-inet6-AF_IN'), *** Error 1 in . (Makefile:269 'run-traceroute-icmp-inet6-AF_IN'), *** Error 1 in . (Makefile:269 'run-traceroute-udp-inet6-AF_IN')
>
> http://bluhm.genua.de/regress/results/2025-05-22T13%3A51%3A46Z/logs/sys/net/pflog/make.log
>
> FAIL sys/net/pflog *** Error 1 in . (Makefile:164 'run-ping-14'), *** Error 1 in . (Makefile:168 'run-ping6-14'), *** Error 1 in . (Makefile:199 'run-bpf-nothing')
thanks for running those tests, and sorry about the delay getting back
to this. i only got back to looking at this again today.
i believe i've straightened these issues out in the diff below.
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
diff -u -p -r1.296 uipc_mbuf.c
--- kern/uipc_mbuf.c 1 Jan 2025 13:44:22 -0000 1.296
+++ kern/uipc_mbuf.c 2 Jun 2025 09:46:53 -0000
@@ -301,7 +301,7 @@ m_clearhdr(struct mbuf *m)
/* delete all mbuf tags to reset the state */
m_tag_delete_chain(m);
#if NPF > 0
- pf_mbuf_unlink_state_key(m);
+ pf_mbuf_unlink_state(m);
pf_mbuf_unlink_inpcb(m);
#endif /* NPF > 0 */
@@ -429,7 +429,7 @@ m_free(struct mbuf *m)
if (m->m_flags & M_PKTHDR) {
m_tag_delete_chain(m);
#if NPF > 0
- pf_mbuf_unlink_state_key(m);
+ pf_mbuf_unlink_state(m);
pf_mbuf_unlink_inpcb(m);
#endif /* NPF > 0 */
}
@@ -1387,8 +1387,8 @@ m_dup_pkthdr(struct mbuf *to, struct mbu
to->m_pkthdr = from->m_pkthdr;
#if NPF > 0
- to->m_pkthdr.pf.statekey = NULL;
- pf_mbuf_link_state_key(to, from->m_pkthdr.pf.statekey);
+ to->m_pkthdr.pf.st = NULL;
+ pf_mbuf_link_state(to, from->m_pkthdr.pf.st);
to->m_pkthdr.pf.inp = NULL;
pf_mbuf_link_inpcb(to, from->m_pkthdr.pf.inp);
#endif /* NPF > 0 */
@@ -1517,8 +1517,8 @@ m_print(void *v,
m->m_pkthdr.csum_flags, MCS_BITS);
(*pr)("m_pkthdr.ether_vtag: %u\tm_ptkhdr.ph_rtableid: %u\n",
m->m_pkthdr.ether_vtag, m->m_pkthdr.ph_rtableid);
- (*pr)("m_pkthdr.pf.statekey: %p\tm_pkthdr.pf.inp %p\n",
- m->m_pkthdr.pf.statekey, m->m_pkthdr.pf.inp);
+ (*pr)("m_pkthdr.pf.st: %p\tm_pkthdr.pf.inp %p\n",
+ m->m_pkthdr.pf.st, m->m_pkthdr.pf.inp);
(*pr)("m_pkthdr.pf.qid: %u\tm_pkthdr.pf.tag: %u\n",
m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag);
(*pr)("m_pkthdr.pf.flags: %b\n",
Index: net/if_mpw.c
===================================================================
RCS file: /cvs/src/sys/net/if_mpw.c,v
diff -u -p -r1.67 if_mpw.c
--- net/if_mpw.c 2 Mar 2025 21:28:32 -0000 1.67
+++ net/if_mpw.c 2 Jun 2025 09:46:54 -0000
@@ -612,7 +612,7 @@ mpw_input(struct mpw_softc *sc, struct m
m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
/* packet has not been processed by PF yet. */
- KASSERT(m->m_pkthdr.pf.statekey == NULL);
+ KASSERT(m->m_pkthdr.pf.st == NULL);
if_vinput(ifp, m, NULL);
return;
Index: net/if_tpmr.c
===================================================================
RCS file: /cvs/src/sys/net/if_tpmr.c,v
diff -u -p -r1.36 if_tpmr.c
--- net/if_tpmr.c 2 Mar 2025 21:28:32 -0000 1.36
+++ net/if_tpmr.c 2 Jun 2025 09:46:54 -0000
@@ -304,7 +304,7 @@ tpmr_pf(struct ifnet *ifp0, int dir, str
return (NULL);
if (dir == PF_IN && ISSET(m->m_pkthdr.pf.flags, PF_TAG_DIVERTED)) {
- pf_mbuf_unlink_state_key(m);
+ pf_mbuf_unlink_state(m);
pf_mbuf_unlink_inpcb(m);
(*fam->ip_input)(ifp0, m, ns);
return (NULL);
Index: net/if_veb.c
===================================================================
RCS file: /cvs/src/sys/net/if_veb.c,v
diff -u -p -r1.37 if_veb.c
--- net/if_veb.c 2 Mar 2025 21:28:32 -0000 1.37
+++ net/if_veb.c 2 Jun 2025 09:46:54 -0000
@@ -654,7 +654,7 @@ veb_pf(struct ifnet *ifp0, int dir, stru
return (NULL);
if (dir == PF_IN && ISSET(m->m_pkthdr.pf.flags, PF_TAG_DIVERTED)) {
- pf_mbuf_unlink_state_key(m);
+ pf_mbuf_unlink_state(m);
pf_mbuf_unlink_inpcb(m);
(*fam->ip_input)(ifp0, m, ns);
return (NULL);
Index: net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
diff -u -p -r1.1212 pf.c
--- net/pf.c 30 May 2025 13:08:07 -0000 1.1212
+++ net/pf.c 2 Jun 2025 09:46:54 -0000
@@ -247,15 +247,16 @@ int pf_state_insert(struct pfi_kif *,
struct pf_state_key **, struct pf_state_key **,
struct pf_state *);
+int pf_state_isvalid(struct pf_state *);
int pf_state_key_isvalid(struct pf_state_key *);
struct pf_state_key *pf_state_key_ref(struct pf_state_key *);
void pf_state_key_unref(struct pf_state_key *);
-void pf_state_key_link_reverse(struct pf_state_key *,
- struct pf_state_key *);
-void pf_state_key_unlink_reverse(struct pf_state_key *);
-void pf_state_key_link_inpcb(struct pf_state_key *,
+void pf_state_link_reverse(struct pf_state *,
+ struct pf_state *);
+void pf_state_unlink_reverse(struct pf_state *);
+void pf_state_link_inpcb(struct pf_state *,
struct inpcb *);
-void pf_state_key_unlink_inpcb(struct pf_state_key *);
+void pf_state_unlink_inpcb(struct pf_state *);
void pf_pktenqueue_delayed(void *);
int32_t pf_state_expires(const struct pf_state *, uint8_t);
@@ -860,8 +861,6 @@ pf_state_key_detach(struct pf_state *st,
if (TAILQ_EMPTY(&sk->sk_states)) {
RBT_REMOVE(pf_state_tree, &pf_statetbl, sk);
sk->sk_removed = 1;
- pf_state_key_unlink_reverse(sk);
- pf_state_key_unlink_inpcb(sk);
pf_state_key_unref(sk);
}
@@ -1123,13 +1122,44 @@ pf_compare_state_keys(struct pf_state_ke
}
}
+static inline struct pf_state *
+pf_find_state_lookup(struct pf_pdesc *pd, const struct pf_state_key_cmp *key)
+{
+ struct pf_state_key *sk;
+ struct pf_state_item *si;
+ struct pf_state *st;
+
+ sk = RBT_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key);
+ if (sk == NULL)
+ return (NULL);
+
+ /* list is sorted, if-bound states before floating ones */
+ TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
+ st = si->si_st;
+ if (st->timeout == PFTM_PURGE)
+ continue;
+ if (st->kif != pfi_all && st->kif != pd->kif)
+ continue;
+
+ if (((st->key[PF_SK_WIRE]->af == st->key[PF_SK_STACK]->af &&
+ sk == (pd->dir == PF_IN ? st->key[PF_SK_WIRE] :
+ st->key[PF_SK_STACK])) ||
+ (st->key[PF_SK_WIRE]->af != st->key[PF_SK_STACK]->af
+ && pd->dir == PF_IN && (sk == st->key[PF_SK_STACK] ||
+ sk == st->key[PF_SK_WIRE]))))
+ return (st);
+ }
+
+ return (NULL);
+}
+
int
pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key,
struct pf_state **stp)
{
- struct pf_state_key *sk, *pkt_sk;
- struct pf_state_item *si;
struct pf_state *st = NULL;
+ struct pf_state *strev = NULL;
+ struct inpcb *inp = NULL;
pf_status.fcounters[FCNT_STATE_SEARCH]++;
if (pf_status.debug >= LOG_DEBUG) {
@@ -1139,82 +1169,63 @@ pf_find_state(struct pf_pdesc *pd, struc
addlog("\n");
}
- pkt_sk = NULL;
- sk = NULL;
if (pd->dir == PF_OUT) {
- /* first if block deals with outbound forwarded packet */
- pkt_sk = pd->m->m_pkthdr.pf.statekey;
+ strev = pd->m->m_pkthdr.pf.st;
+ inp = pd->m->m_pkthdr.pf.inp;
- if (!pf_state_key_isvalid(pkt_sk)) {
- pf_mbuf_unlink_state_key(pd->m);
- pkt_sk = NULL;
- }
+ /* first if block deals with outbound forwarded packet */
+ if (strev != NULL) {
+ pd->m->m_pkthdr.pf.st = NULL;
+ KASSERT(inp == NULL);
- if (pkt_sk && pf_state_key_isvalid(pkt_sk->sk_reverse))
- sk = pkt_sk->sk_reverse;
+ if (pf_state_isvalid(strev)) {
+ st = strev->reverse;
+ if (st != NULL && pf_state_isvalid(st))
+ goto match;
+ }
- if (pkt_sk == NULL) {
- struct inpcb *inp = pd->m->m_pkthdr.pf.inp;
+ /* this handles st not being valid too */
+ pf_state_unlink_reverse(strev);
+ } else if (inp != NULL && READ_ONCE(inp->inp_pf_st) != NULL) {
/* here we deal with local outbound packet */
- if (inp != NULL) {
- struct pf_state_key *inp_sk;
-
- mtx_enter(&pf_inp_mtx);
- inp_sk = inp->inp_pf_sk;
- if (pf_state_key_isvalid(inp_sk)) {
- sk = inp_sk;
+ mtx_enter(&pf_inp_mtx);
+ st = inp->inp_pf_st;
+ if (st != NULL) {
+ if (pf_state_isvalid(st)) {
mtx_leave(&pf_inp_mtx);
- } else if (inp_sk != NULL) {
- KASSERT(inp_sk->sk_inp == inp);
- inp_sk->sk_inp = NULL;
- inp->inp_pf_sk = NULL;
+ goto match;
+ } else {
+ KASSERT(st->inp == inp);
+ st->inp = NULL;
+ inp->inp_pf_st = NULL;
mtx_leave(&pf_inp_mtx);
- pf_state_key_unref(inp_sk);
+ pf_state_unref(st);
in_pcbunref(inp);
- } else
- mtx_leave(&pf_inp_mtx);
- }
+ }
+ } else
+ mtx_leave(&pf_inp_mtx);
}
- }
-
- if (sk == NULL) {
- if ((sk = RBT_FIND(pf_state_tree, &pf_statetbl,
- (struct pf_state_key *)key)) == NULL)
- return (PF_DROP);
- if (pd->dir == PF_OUT && pkt_sk &&
- pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0)
- pf_state_key_link_reverse(sk, pkt_sk);
- else if (pd->dir == PF_OUT)
- pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp);
- }
- /* remove firewall data from outbound packet */
- if (pd->dir == PF_OUT)
+ /* remove firewall data from outbound packet */
pf_pkt_addr_changed(pd->m);
-
- /* list is sorted, if-bound states before floating ones */
- TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
- struct pf_state *sist = si->si_st;
- if (sist->timeout != PFTM_PURGE &&
- (sist->kif == pfi_all || sist->kif == pd->kif) &&
- ((sist->key[PF_SK_WIRE]->af == sist->key[PF_SK_STACK]->af &&
- sk == (pd->dir == PF_IN ? sist->key[PF_SK_WIRE] :
- sist->key[PF_SK_STACK])) ||
- (sist->key[PF_SK_WIRE]->af != sist->key[PF_SK_STACK]->af
- && pd->dir == PF_IN && (sk == sist->key[PF_SK_STACK] ||
- sk == sist->key[PF_SK_WIRE])))) {
- st = sist;
- break;
- }
}
+ st = pf_find_state_lookup(pd, key);
if (st == NULL)
return (PF_DROP);
if (ISSET(st->state_flags, PFSTATE_INP_UNLINKED))
return (PF_DROP);
+ if (pd->dir == PF_OUT) {
+ if (strev != NULL)
+ pf_state_link_reverse(st, strev);
+ else if (inp != NULL)
+ pf_state_link_inpcb(st, inp);
+ }
+
+match:
if (st->rule.ptr->pktrate.limit && pd->dir == st->direction) {
pf_add_threshold(&st->rule.ptr->pktrate);
if (pf_check_threshold(&st->rule.ptr->pktrate))
@@ -1782,6 +1793,9 @@ pf_remove_state(struct pf_state *st)
st->timeout = PFTM_UNLINKED;
mtx_leave(&st->mtx);
+ pf_state_unlink_reverse(st);
+ pf_state_unlink_inpcb(st);
+
/* handle load balancing related tasks */
pf_postprocess_addr(st);
@@ -1813,52 +1827,47 @@ pf_remove_state(struct pf_state *st)
void
pf_remove_divert_state(struct inpcb *inp)
{
- struct pf_state_key *sk;
- struct pf_state_item *si;
+ struct pf_state *st;
PF_ASSERT_UNLOCKED();
- if (READ_ONCE(inp->inp_pf_sk) == NULL)
+ if (READ_ONCE(inp->inp_pf_st) == NULL)
return;
mtx_enter(&pf_inp_mtx);
- sk = pf_state_key_ref(inp->inp_pf_sk);
+ st = pf_state_ref(inp->inp_pf_st);
mtx_leave(&pf_inp_mtx);
- if (sk == NULL)
+ if (st == NULL)
return;
PF_LOCK();
PF_STATE_ENTER_WRITE();
- TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
- struct pf_state *sist = si->si_st;
- if (sk == sist->key[PF_SK_STACK] && sist->rule.ptr &&
- (sist->rule.ptr->divert.type == PF_DIVERT_TO ||
- sist->rule.ptr->divert.type == PF_DIVERT_REPLY)) {
- if (sist->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
- sist->key[PF_SK_WIRE] != sist->key[PF_SK_STACK]) {
- /*
- * If the local address is translated, keep
- * the state for "tcp.closed" seconds to
- * prevent its source port from being reused.
- */
- if (sist->src.state < TCPS_FIN_WAIT_2 ||
- sist->dst.state < TCPS_FIN_WAIT_2) {
- pf_set_protostate(sist, PF_PEER_BOTH,
- TCPS_TIME_WAIT);
- pf_update_state_timeout(sist,
- PFTM_TCP_CLOSED);
- sist->expire = getuptime();
- }
- sist->state_flags |= PFSTATE_INP_UNLINKED;
- } else
- pf_remove_state(sist);
- break;
- }
+ if (st->rule.ptr &&
+ (st->rule.ptr->divert.type == PF_DIVERT_TO ||
+ st->rule.ptr->divert.type == PF_DIVERT_REPLY)) {
+ if (st->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
+ st->key[PF_SK_WIRE] != st->key[PF_SK_STACK]) {
+ /*
+ * If the local address is translated, keep
+ * the state for "tcp.closed" seconds to
+ * prevent its source port from being reused.
+ */
+ if (st->src.state < TCPS_FIN_WAIT_2 ||
+ st->dst.state < TCPS_FIN_WAIT_2) {
+ pf_set_protostate(st, PF_PEER_BOTH,
+ TCPS_TIME_WAIT);
+ pf_update_state_timeout(st,
+ PFTM_TCP_CLOSED);
+ st->expire = getuptime();
+ }
+ st->state_flags |= PFSTATE_INP_UNLINKED;
+ } else
+ pf_remove_state(st);
}
PF_STATE_EXIT_WRITE();
PF_UNLOCK();
- pf_state_key_unref(sk);
+ pf_state_unref(st);
}
void
@@ -7920,9 +7929,9 @@ done:
if (pd.dir == PF_IN) {
KASSERT(inp == NULL);
- pf_mbuf_link_state_key(m, st->key[PF_SK_STACK]);
+ pf_mbuf_link_state(m, st);
} else if (pd.dir == PF_OUT)
- pf_state_key_link_inpcb(st->key[PF_SK_STACK], inp);
+ pf_state_link_inpcb(st, inp);
if (!ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) {
m->m_pkthdr.ph_flowid = st->key[PF_SK_WIRE]->hash;
@@ -8104,14 +8113,14 @@ done:
int
pf_ouraddr(struct mbuf *m)
{
- struct pf_state_key *sk;
+ struct pf_state *st;
if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED)
return (1);
- sk = m->m_pkthdr.pf.statekey;
- if (sk != NULL) {
- if (READ_ONCE(sk->sk_inp) != NULL)
+ st = m->m_pkthdr.pf.st;
+ if (st != NULL) {
+ if (READ_ONCE(st->inp) != NULL)
return (1);
}
@@ -8125,7 +8134,7 @@ pf_ouraddr(struct mbuf *m)
void
pf_pkt_addr_changed(struct mbuf *m)
{
- pf_mbuf_unlink_state_key(m);
+ pf_mbuf_unlink_state(m);
pf_mbuf_unlink_inpcb(m);
}
@@ -8133,85 +8142,97 @@ struct inpcb *
pf_inp_lookup(struct mbuf *m)
{
struct inpcb *inp = NULL;
- struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
+ struct pf_state *st;
- if (!pf_state_key_isvalid(sk))
- pf_mbuf_unlink_state_key(m);
- else if (READ_ONCE(sk->sk_inp) != NULL) {
+ st = m->m_pkthdr.pf.st;
+ if (st == NULL)
+ return (NULL);
+ if (!pf_state_isvalid(st)) {
+ pf_mbuf_unlink_state(m);
+ return (NULL);
+ }
+
+ if (READ_ONCE(st->inp) != NULL) {
mtx_enter(&pf_inp_mtx);
- inp = in_pcbref(sk->sk_inp);
+ inp = in_pcbref(st->inp);
mtx_leave(&pf_inp_mtx);
}
return (inp);
}
+/*
+ * This is called from the IP stack after it's found an inpcb for
+ * an mbuf so it can link the pf_state to that pcb.
+ */
void
pf_inp_link(struct mbuf *m, struct inpcb *inp)
{
- struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
+ struct pf_state *st;
- if (!pf_state_key_isvalid(sk)) {
- pf_mbuf_unlink_state_key(m);
+ st = m->m_pkthdr.pf.st;
+ if (st == NULL)
return;
- }
/*
* we don't need to grab PF-lock here. At worst case we link inp to
* state, which might be just being marked as deleted by another
* thread.
*/
- pf_state_key_link_inpcb(sk, inp);
+ if (pf_state_isvalid(st)) {
+ if (READ_ONCE(st->inp) == NULL)
+ pf_state_link_inpcb(st, inp);
+ }
/* The statekey has finished finding the inp, it is no longer needed. */
- pf_mbuf_unlink_state_key(m);
+ pf_mbuf_unlink_state(m);
}
void
pf_inp_unlink(struct inpcb *inp)
{
- struct pf_state_key *sk;
+ struct pf_state *st;
- if (READ_ONCE(inp->inp_pf_sk) == NULL)
+ if (READ_ONCE(inp->inp_pf_st) == NULL)
return;
mtx_enter(&pf_inp_mtx);
- sk = inp->inp_pf_sk;
- if (sk == NULL) {
+ st = inp->inp_pf_st;
+ if (st == NULL) {
mtx_leave(&pf_inp_mtx);
return;
}
- KASSERT(sk->sk_inp == inp);
- sk->sk_inp = NULL;
- inp->inp_pf_sk = NULL;
+ KASSERT(st->inp == inp);
+ st->inp = NULL;
+ inp->inp_pf_st = NULL;
mtx_leave(&pf_inp_mtx);
- pf_state_key_unref(sk);
+ pf_state_unref(st);
in_pcbunref(inp);
}
void
-pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev)
+pf_state_link_reverse(struct pf_state *st, struct pf_state *strev)
{
- struct pf_state_key *old_reverse;
+ struct pf_state *ost;
- old_reverse = atomic_cas_ptr(&sk->sk_reverse, NULL, skrev);
- if (old_reverse != NULL)
- KASSERT(old_reverse == skrev);
+ ost = atomic_cas_ptr(&st->reverse, NULL, strev);
+ if (ost != NULL)
+ KASSERT(ost == strev);
else {
- pf_state_key_ref(skrev);
+ pf_state_ref(strev);
/*
- * NOTE: if sk == skrev, then KASSERT() below holds true, we
+ * NOTE: if st == strev, then KASSERT() below holds true, we
* still want to grab a reference in such case, because
- * pf_state_key_unlink_reverse() does not check whether keys
+ * pf_state_unlink_reverse() does not check whether states
* are identical or not.
*/
- old_reverse = atomic_cas_ptr(&skrev->sk_reverse, NULL, sk);
- if (old_reverse != NULL)
- KASSERT(old_reverse == sk);
+ ost = atomic_cas_ptr(&strev->reverse, NULL, st);
+ if (ost != NULL)
+ KASSERT(ost == st);
- pf_state_key_ref(sk);
+ pf_state_ref(st);
}
}
@@ -8247,10 +8268,6 @@ pf_state_key_unref(struct pf_state_key *
if (PF_REF_RELE(sk->sk_refcnt)) {
/* state key must be removed from tree */
KASSERT(!pf_state_key_isvalid(sk));
- /* state key must be unlinked from reverse key */
- KASSERT(sk->sk_reverse == NULL);
- /* state key must be unlinked from socket */
- KASSERT(sk->sk_inp == NULL);
pool_put(&pf_state_key_pl, sk);
}
}
@@ -8261,21 +8278,28 @@ pf_state_key_isvalid(struct pf_state_key
return ((sk != NULL) && (sk->sk_removed == 0));
}
+int
+pf_state_isvalid(struct pf_state *st)
+{
+ return (st->timeout < PFTM_MAX);
+}
+
void
-pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk)
+pf_mbuf_link_state(struct mbuf *m, struct pf_state *st)
{
- KASSERT(m->m_pkthdr.pf.statekey == NULL);
- m->m_pkthdr.pf.statekey = pf_state_key_ref(sk);
+ KASSERT(m->m_pkthdr.pf.st == NULL);
+ m->m_pkthdr.pf.st = pf_state_ref(st);
}
void
-pf_mbuf_unlink_state_key(struct mbuf *m)
+pf_mbuf_unlink_state(struct mbuf *m)
{
- struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
+ struct pf_state *st;
- if (sk != NULL) {
- m->m_pkthdr.pf.statekey = NULL;
- pf_state_key_unref(sk);
+ st = m->m_pkthdr.pf.st;
+ if (st != NULL) {
+ m->m_pkthdr.pf.st = NULL;
+ pf_state_unref(st);
}
}
@@ -8298,57 +8322,58 @@ pf_mbuf_unlink_inpcb(struct mbuf *m)
}
void
-pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp)
+pf_state_link_inpcb(struct pf_state *st, struct inpcb *inp)
{
- if (inp == NULL || READ_ONCE(sk->sk_inp) != NULL)
+ if (inp == NULL || READ_ONCE(st->inp) != NULL)
return;
mtx_enter(&pf_inp_mtx);
- if (inp->inp_pf_sk != NULL || sk->sk_inp != NULL) {
+ if (inp->inp_pf_st != NULL || st->inp != NULL) {
mtx_leave(&pf_inp_mtx);
return;
}
- sk->sk_inp = in_pcbref(inp);
- inp->inp_pf_sk = pf_state_key_ref(sk);
+ st->inp = in_pcbref(inp);
+ inp->inp_pf_st = pf_state_ref(st);
mtx_leave(&pf_inp_mtx);
}
void
-pf_state_key_unlink_inpcb(struct pf_state_key *sk)
+pf_state_unlink_inpcb(struct pf_state *st)
{
struct inpcb *inp;
- if (READ_ONCE(sk->sk_inp) == NULL)
+ if (READ_ONCE(st->inp) == NULL)
return;
mtx_enter(&pf_inp_mtx);
- inp = sk->sk_inp;
+ inp = st->inp;
if (inp == NULL) {
mtx_leave(&pf_inp_mtx);
return;
}
- KASSERT(inp->inp_pf_sk == sk);
- sk->sk_inp = NULL;
- inp->inp_pf_sk = NULL;
+ KASSERT(inp->inp_pf_st == st);
+ st->inp = NULL;
+ inp->inp_pf_st = NULL;
mtx_leave(&pf_inp_mtx);
- pf_state_key_unref(sk);
+ pf_state_unref(st);
in_pcbunref(inp);
}
void
-pf_state_key_unlink_reverse(struct pf_state_key *sk)
+pf_state_unlink_reverse(struct pf_state *st)
{
- struct pf_state_key *skrev = sk->sk_reverse;
-
- /* Note that sk and skrev may be equal, then we unref twice. */
- if (skrev != NULL) {
- KASSERT(skrev->sk_reverse == sk);
- sk->sk_reverse = NULL;
- skrev->sk_reverse = NULL;
- pf_state_key_unref(skrev);
- pf_state_key_unref(sk);
- }
+ struct pf_state *strev;
+
+ /* Note that st and strev may be equal, then we unref twice. */
+ strev = st->reverse;
+ if (strev != NULL) {
+ KASSERT(strev->reverse == st);
+ st->reverse = NULL;
+ strev->reverse = NULL;
+ pf_state_unref(strev);
+ pf_state_unref(st);
+ }
}
struct pf_state *
@@ -8374,6 +8399,11 @@ pf_state_unref(struct pf_state *st)
pf_state_key_unref(st->key[PF_SK_WIRE]);
pf_state_key_unref(st->key[PF_SK_STACK]);
+
+ /* state must be unlinked from reverse */
+ KASSERT(st->reverse == NULL);
+ /* state must be unlinked from socket */
+ KASSERT(st->inp == NULL);
pool_put(&pf_state_pl, st);
}
Index: net/pfvar.h
===================================================================
RCS file: /cvs/src/sys/net/pfvar.h,v
diff -u -p -r1.543 pfvar.h
--- net/pfvar.h 14 Apr 2025 20:02:34 -0000 1.543
+++ net/pfvar.h 2 Jun 2025 09:46:54 -0000
@@ -1849,9 +1849,8 @@ int pf_map_addr(sa_family_t, struct p
struct pf_pool *, enum pf_sn_types);
int pf_postprocess_addr(struct pf_state *);
-void pf_mbuf_link_state_key(struct mbuf *,
- struct pf_state_key *);
-void pf_mbuf_unlink_state_key(struct mbuf *);
+void pf_mbuf_link_state(struct mbuf *, struct pf_state *);
+void pf_mbuf_unlink_state(struct mbuf *);
void pf_mbuf_link_inpcb(struct mbuf *, struct inpcb *);
void pf_mbuf_unlink_inpcb(struct mbuf *);
Index: net/pfvar_priv.h
===================================================================
RCS file: /cvs/src/sys/net/pfvar_priv.h,v
diff -u -p -r1.38 pfvar_priv.h
--- net/pfvar_priv.h 7 Sep 2024 22:41:55 -0000 1.38
+++ net/pfvar_priv.h 2 Jun 2025 09:46:54 -0000
@@ -41,11 +41,6 @@
#include <sys/mutex.h>
#include <sys/percpu.h>
-/*
- * Locks used to protect struct members in this file:
- * L pf_inp_mtx link pf to inp mutex
- */
-
struct pfsync_deferral;
/*
@@ -74,8 +69,6 @@ struct pf_state_key {
RBT_ENTRY(pf_state_key) sk_entry;
struct pf_statelisthead sk_states;
- struct pf_state_key *sk_reverse;
- struct inpcb *sk_inp; /* [L] */
pf_refcnt_t sk_refcnt;
u_int8_t sk_removed;
};
@@ -95,7 +88,8 @@ RBT_PROTOTYPE(pf_state_tree, pf_state_ke
* M pf_state mtx
* P PF_STATE_LOCK
* S pfsync
- * L pf_state_list
+ * L pf_inp_mtx link pf to inp mutex
+ * G pf_state_list
* g pf_purge gc
*/
@@ -107,7 +101,7 @@ struct pf_state {
TAILQ_ENTRY(pf_state) sync_list; /* [S] */
struct pfsync_deferral *sync_defer; /* [S] */
- TAILQ_ENTRY(pf_state) entry_list; /* [L] */
+ TAILQ_ENTRY(pf_state) entry_list; /* [G] */
SLIST_ENTRY(pf_state) gc_list; /* [g] */
RBT_ENTRY(pf_state) entry_id; /* [P] */
struct pf_state_peer src;
@@ -120,6 +114,8 @@ struct pf_state {
struct pf_sn_head src_nodes; /* [I] */
struct pf_state_key *key[2]; /* [I] stack and wire */
struct pfi_kif *kif; /* [I] */
+ struct pf_state *reverse; /* [M] */
+ struct inpcb *inp; /* [L] */
struct mutex mtx;
pf_refcnt_t refcnt;
u_int64_t packets[2];
Index: netinet/in_pcb.h
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.h,v
diff -u -p -r1.168 in_pcb.h
--- netinet/in_pcb.h 20 May 2025 05:51:43 -0000 1.168
+++ netinet/in_pcb.h 2 Jun 2025 09:46:54 -0000
@@ -110,7 +110,7 @@
* Protocol input only reads inp_[lf]addr/port during lookup and is safe.
*/
-struct pf_state_key;
+struct pf_state;
union inpaddru {
struct in_addr iau_addr;
@@ -168,7 +168,7 @@ struct inpcb {
int inp_cksum6;
struct icmp6_filter *inp_icmp6filt;
- struct pf_state_key *inp_pf_sk; /* [L] */
+ struct pf_state *inp_pf_st; /* [L] */
struct mbuf *(*inp_upcall)(void *, struct mbuf *,
struct ip *, struct ip6_hdr *, void *, int, struct netstack *);
void *inp_upcall_arg;
Index: sys/mbuf.h
===================================================================
RCS file: /cvs/src/sys/sys/mbuf.h,v
diff -u -p -r1.265 mbuf.h
--- sys/mbuf.h 5 Nov 2024 13:15:13 -0000 1.265
+++ sys/mbuf.h 2 Jun 2025 09:46:54 -0000
@@ -90,11 +90,11 @@ struct m_hdr {
};
/* pf stuff */
-struct pf_state_key;
+struct pf_state;
struct inpcb;
struct pkthdr_pf {
- struct pf_state_key *statekey; /* pf stackside statekey */
+ struct pf_state *st; /* pf state */
struct inpcb *inp; /* connected pcb for outgoing packet */
u_int32_t qid; /* queue id */
u_int16_t tag; /* tag id */
@@ -325,7 +325,7 @@ u_int mextfree_register(void (*)(caddr_t
(to)->m_pkthdr = (from)->m_pkthdr; \
(from)->m_flags &= ~M_PKTHDR; \
SLIST_INIT(&(from)->m_pkthdr.ph_tags); \
- (from)->m_pkthdr.pf.statekey = NULL; \
+ (from)->m_pkthdr.pf.st = NULL; \
} while (/* CONSTCOND */ 0)
/*
link mbufs/inpcbs to pf_states, not pf_state_keys