From: David Gwynne Subject: get rid of the SRP code in src/sys/net/pfkeyv2.c To: tech@openbsd.org Date: Wed, 20 Aug 2025 14:17:30 +1000 this is like the rtsock change i just put in the tree in that it gets rid of an SRPL, but it works a bit differently. this code uses an SPRL to manage the list of pfkey sockets that route messages can be "recved" on. the obvious replacement would be an SMR list, but while you shouldn't sleep with an SRP ref, we do here and you can't do that inside an SMR read critical section. to avoid the sleep while in the SMR crit section, i'd have to add refcnt operations, which means a bunch of atomic ops. alternatively, we could do what i did with the ethernet sockets and use an rwlock to protect the list of sockets, and take a read lock while traversing the list. this reduces the overall number of atomic ops and keeps the code in the recv loop much the same. unlike the rtsock code, the pfkey sockets don't use solock to serialise the recv code, they use the rcvbuf mutexes. this means we don't have the same lock ordering problems that we had in rtsock, which makes this a much more straightforward converson. ok? tests? Index: pfkeyv2.c =================================================================== RCS file: /cvs/src/sys/net/pfkeyv2.c,v diff -u -p -r1.270 pfkeyv2.c --- pfkeyv2.c 7 Jul 2025 02:28:50 -0000 1.270 +++ pfkeyv2.c 8 Aug 2025 05:23:29 -0000 @@ -135,8 +135,7 @@ const struct domain pfkeydomain; struct pkpcb { struct socket *kcb_socket; /* [I] associated socket */ - SRPL_ENTRY(pkpcb) kcb_list; /* [l] */ - struct refcnt kcb_refcnt; /* [a] */ + TAILQ_ENTRY(pkpcb) kcb_list; /* [l] */ int kcb_flags; /* [s] */ uint32_t kcb_reg; /* [s] Inc if SATYPE_MAX > 31 */ uint32_t kcb_pid; /* [I] */ @@ -153,8 +152,7 @@ struct dump_state { }; struct pkptable { - SRPL_HEAD(, pkpcb) pkp_list; - struct srpl_rc pkp_rc; + TAILQ_HEAD(, pkpcb) pkp_list; struct rwlock pkp_lk; }; @@ -229,28 +227,11 @@ const struct domain pfkeydomain = { }; void -keycb_ref(void *null, void *v) -{ - struct pkpcb *kp = v; - - refcnt_take(&kp->kcb_refcnt); -} - -void -keycb_unref(void *null, void *v) -{ - struct pkpcb *kp = v; - - refcnt_rele_wake(&kp->kcb_refcnt); -} - -void pfkey_init(void) { rn_init(sizeof(struct sockaddr_encap)); - srpl_rc_init(&pkptable.pkp_rc, keycb_ref, keycb_unref, NULL); rw_init(&pkptable.pkp_lk, "pfkey"); - SRPL_INIT(&pkptable.pkp_list); + TAILQ_INIT(&pkptable.pkp_list); pool_init(&pkpcb_pool, sizeof(struct pkpcb), 0, IPL_SOFTNET, PR_WAITOK, "pkpcb", NULL); pool_init(&ipsec_policy_pool, sizeof(struct ipsec_policy), 0, @@ -281,7 +262,6 @@ pfkeyv2_attach(struct socket *so, int pr if (kp == NULL) return (ENOBUFS); so->so_pcb = kp; - refcnt_init(&kp->kcb_refcnt); kp->kcb_socket = so; kp->kcb_pid = curproc->p_p->ps_pid; kp->kcb_rdomain = rtable_l2(curproc->p_p->ps_rtableid); @@ -290,7 +270,7 @@ pfkeyv2_attach(struct socket *so, int pr soisconnected(so); rw_enter(&pkptable.pkp_lk, RW_WRITE); - SRPL_INSERT_HEAD_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, kcb_list); + TAILQ_INSERT_TAIL(&pkptable.pkp_list, kp, kcb_list); rw_exit(&pkptable.pkp_lk); return (0); @@ -322,15 +302,9 @@ pfkeyv2_detach(struct socket *so) } rw_enter(&pkptable.pkp_lk, RW_WRITE); - SRPL_REMOVE_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, pkpcb, - kcb_list); + TAILQ_REMOVE(&pkptable.pkp_list, kp, kcb_list); rw_exit(&pkptable.pkp_lk); - sounlock(so); - /* wait for all references to drop */ - refcnt_finalize(&kp->kcb_refcnt, "pfkeyrefs"); - solock(so); - so->so_pcb = NULL; KASSERT((so->so_state & SS_NOFDREF) == 0); pool_put(&pkpcb_pool, kp); @@ -475,7 +449,6 @@ pfkeyv2_sendmessage(void **headers, int struct mbuf *packet; struct pkpcb *kp; struct sadb_msg *smsg; - struct srp_ref sr; /* Find out how much space we'll need... */ j = sizeof(struct sadb_msg); @@ -538,14 +511,15 @@ pfkeyv2_sendmessage(void **headers, int * Search for promiscuous listeners, skipping the * original destination. */ - SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) { + rw_enter_read(&pkptable.pkp_lk); + TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) { if (kp->kcb_socket == so || kp->kcb_rdomain != rdomain) continue; if (kp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC) pfkey_sendup(kp, packet, 1); } - SRPL_LEAVE(&sr); + rw_exit_read(&pkptable.pkp_lk); m_freem(packet); break; @@ -554,7 +528,8 @@ pfkeyv2_sendmessage(void **headers, int * Send the message to all registered sockets that match * the specified satype (e.g., all IPSEC-ESP negotiators) */ - SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) { + rw_enter_read(&pkptable.pkp_lk); + TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) { if (kp->kcb_rdomain != rdomain) continue; @@ -571,7 +546,7 @@ pfkeyv2_sendmessage(void **headers, int } } } - SRPL_LEAVE(&sr); + rw_exit_read(&pkptable.pkp_lk); /* Free last/original copy of the packet */ m_freem(packet); @@ -590,7 +565,8 @@ pfkeyv2_sendmessage(void **headers, int goto ret; /* Send to all registered promiscuous listeners */ - SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) { + rw_enter_read(&pkptable.pkp_lk); + TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) { int flags = READ_ONCE(kp->kcb_flags); if (kp->kcb_rdomain != rdomain) @@ -600,19 +576,20 @@ pfkeyv2_sendmessage(void **headers, int !(flags & PFKEYV2_SOCKETFLAGS_REGISTERED)) pfkey_sendup(kp, packet, 1); } - SRPL_LEAVE(&sr); + rw_exit_read(&pkptable.pkp_lk); m_freem(packet); break; case PFKEYV2_SENDMESSAGE_BROADCAST: /* Send message to all sockets */ - SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) { + rw_enter_read(&pkptable.pkp_lk); + TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) { if (kp->kcb_rdomain != rdomain) continue; pfkey_sendup(kp, packet, 1); } - SRPL_LEAVE(&sr); + rw_exit_read(&pkptable.pkp_lk); m_freem(packet); break; } @@ -1136,7 +1113,6 @@ pfkeyv2_dosend(struct socket *so, void * struct sadb_sa *ssa; struct sadb_supported *ssup; struct sadb_ident *sid, *did; - struct srp_ref sr; struct sadb_x_rdomain *srdomain; u_int rdomain = 0; int promisc; @@ -1186,14 +1162,15 @@ pfkeyv2_dosend(struct socket *so, void * goto ret; /* Send to all promiscuous listeners */ - SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) { + rw_enter_read(&pkptable.pkp_lk); + TAILQ_FOREACH(bkp, &pkptable.pkp_list, kcb_list) { if (bkp->kcb_rdomain != kp->kcb_rdomain) continue; if (bkp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC) pfkey_sendup(bkp, packet, 1); } - SRPL_LEAVE(&sr); + rw_exit_read(&pkptable.pkp_lk); m_freem(packet); @@ -2040,7 +2017,8 @@ pfkeyv2_dosend(struct socket *so, void * if ((rval = pfdatatopacket(message, len, &packet)) != 0) goto ret; - SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) { + rw_enter_read(&pkptable.pkp_lk); + TAILQ_FOREACH(bkp, &pkptable.pkp_list, kcb_list) { if (bkp == kp || bkp->kcb_rdomain != kp->kcb_rdomain) continue; @@ -2050,7 +2028,7 @@ pfkeyv2_dosend(struct socket *so, void * pfkey_sendup(bkp, packet, 1); } } - SRPL_LEAVE(&sr); + rw_exit_read(&pkptable.pkp_lk); m_freem(packet); } else {