From: David Gwynne Subject: Re: link mbufs/inpcbs to pf_states, not pf_state_keys To: Alexandr Nedvedicky Cc: tech@openbsd.org Date: Thu, 29 Jan 2026 06:40:01 +1000 On Wed, Jan 28, 2026 at 04:13:05PM +0100, Alexandr Nedvedicky wrote: > Hello, > > sorry to keep you waiting. I guess this is the fix for leak you've > talked about off-list, correct? yes, this was the most recent one. > > the change looks good. the code reads much butter now. I just could > spot one single nit, see comment in-line. sure. > > thanks and > regards > sashan > > On Fri, Jan 16, 2026 at 07:26:52AM +1000, David Gwynne wrote: > > > Index: net/pf.c > > =================================================================== > > RCS file: /cvs/src/sys/net/pf.c,v > > diff -u -p -r1.1230 pf.c > > --- net/pf.c 7 Jan 2026 13:50:05 -0000 1.1230 > > +++ net/pf.c 15 Jan 2026 05:34:55 -0000 > > @@ -247,15 +247,16 @@ int pf_state_insert(struct pfi_kif *, > > struct pf_state_key **, struct pf_state_key **, > > struct pf_state *); > > > > +int pf_state_isvalid(struct pf_state *); > > int pf_state_key_isvalid(struct pf_state_key *); > > struct pf_state_key *pf_state_key_ref(struct pf_state_key *); > > void pf_state_key_unref(struct pf_state_key *); > > -void pf_state_key_link_reverse(struct pf_state_key *, > > - struct pf_state_key *); > > -void pf_state_key_unlink_reverse(struct pf_state_key *); > > -void pf_state_key_link_inpcb(struct pf_state_key *, > > +void pf_state_link_reverse(struct pf_state *, > > + struct pf_state *); > > +void pf_state_unlink_reverse(struct pf_state *); > > +void pf_state_link_inpcb(struct pf_state *, > > struct inpcb *); > > -void pf_state_key_unlink_inpcb(struct pf_state_key *); > > +void pf_state_unlink_inpcb(struct pf_state *); > > void pf_pktenqueue_delayed(void *); > > int32_t pf_state_expires(const struct pf_state *, uint8_t); > > > > @@ -1134,8 +1135,6 @@ pf_state_key_detach(struct pf_state *st, > > if (TAILQ_EMPTY(&sk->sk_states)) { > > RBT_REMOVE(pf_state_tree, &pf_statetbl, sk); > > sk->sk_removed = 1; > > - pf_state_key_unlink_reverse(sk); > > - pf_state_key_unlink_inpcb(sk); > > pf_state_key_unref(sk); > > } > > > > @@ -1316,14 +1315,24 @@ pf_state_insert(struct pfi_kif *kif, str > > > > if (same) { > > /* pf_state_key_attach might have swapped skw */ > > - pf_state_key_unref(sks); > > - st->key[PF_SK_STACK] = sks = pf_state_key_ref(skw); > > + if (skw != sks) { > ^^^^^ > the condition here looks odd. I think it can not be met, when we are > running here. see few lines above where you will find: > 'same = (skw == sks)' > I would either drop the condition or add KASSERT(sks == skw) here, > just in case we refactor code in future. this chunk makes more sense if you can see more context. 1296 int 1297 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skwp, 1298 struct pf_state_key **sksp, struct pf_state *st) 1299 { 1300 struct pf_state_key *skw = *skwp; 1301 struct pf_state_key *sks = *sksp; 1302 int same = (skw == sks); 1303 1304 PF_ASSERT_LOCKED(); 1305 1306 st->kif = kif; 1307 PF_STATE_ENTER_WRITE(); 1308 1309 skw = pf_state_key_attach(skw, st, PF_SK_WIRE); 1310 if (skw == NULL) { 1311 pf_state_key_unref(sks); 1312 PF_STATE_EXIT_WRITE(); 1313 return (-1); 1314 } 1315 1316 if (same) { 1317 /* pf_state_key_attach might have swapped skw */ 1318 if (skw != sks) { 1319 pf_state_key_unref(sks); 1320 sks = pf_state_key_ref(skw); 1321 } 1322 st->key[PF_SK_STACK] = sks; 1323 } else if (pf_state_key_attach(sks, st, PF_SK_STACK) == NULL) { 1324 pf_state_key_detach(st, PF_SK_WIRE); 1325 pf_state_key_unref(skw); 1326 PF_STATE_EXIT_WRITE(); 1327 return (-1); 1328 } when creating and inserting a state, the code assumes that it has to allocate the pf_state_key structs for that new state. if there's no nat, the stack and wire addresses are identical, which is represented by using the same pf_state_key struct for both. this is detected on line 1302. however, the same addresses can be used by multiple states. pf_state_key_attach() on line 1309 attempts to insert the newly created skw pf_state_key into the tree, but if there's a collision with an equivalent pf_state_key it will replace the new one with the existing one. the chunk starting at 1316 basically assumes that pf_state_key_attach() did replace skw, so it replaces sks to restore the semantic that the same state key pointers for sks and skw represents the no NAT situation. my change here actually checks if skw was replaced and avoids unecessary unref and ref operations if it can. i'm happy to omit this chunk from the bigger diff and commit it separately though. it works (with or) without the rest of the changes in this diff, so it can and probably should be done independently. > > > + pf_state_key_unref(sks); > > + sks = pf_state_key_ref(skw); > > + } > > + st->key[PF_SK_STACK] = sks; > > } else if (pf_state_key_attach(sks, st, PF_SK_STACK) == NULL) { > > pf_state_key_detach(st, PF_SK_WIRE); > > + pf_state_key_unref(skw); > > PF_STATE_EXIT_WRITE(); > > return (-1); > > } > > > > + /* > > + * st owns the sks and skw refs now. pf_state_unref (maybe > > + * via pf_detach_state) is responsible for releasing them in > > + * the future. > > + */ > > +