Download raw body.
get rid of the SRP code in src/sys/net/pfkeyv2.c
this is like the rtsock change i just put in the tree in that it gets
rid of an SRPL, but it works a bit differently.
this code uses an SPRL to manage the list of pfkey sockets that
route messages can be "recved" on. the obvious replacement would
be an SMR list, but while you shouldn't sleep with an SRP ref, we
do here and you can't do that inside an SMR read critical section.
to avoid the sleep while in the SMR crit section, i'd have to add refcnt
operations, which means a bunch of atomic ops.
alternatively, we could do what i did with the ethernet sockets and use
an rwlock to protect the list of sockets, and take a read lock while
traversing the list. this reduces the overall number of atomic ops and
keeps the code in the recv loop much the same.
unlike the rtsock code, the pfkey sockets don't use solock to serialise
the recv code, they use the rcvbuf mutexes. this means we don't have the
same lock ordering problems that we had in rtsock, which makes this a
much more straightforward converson.
ok? tests?
Index: pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
diff -u -p -r1.270 pfkeyv2.c
--- pfkeyv2.c 7 Jul 2025 02:28:50 -0000 1.270
+++ pfkeyv2.c 8 Aug 2025 05:23:29 -0000
@@ -135,8 +135,7 @@ const struct domain pfkeydomain;
struct pkpcb {
struct socket *kcb_socket; /* [I] associated socket */
- SRPL_ENTRY(pkpcb) kcb_list; /* [l] */
- struct refcnt kcb_refcnt; /* [a] */
+ TAILQ_ENTRY(pkpcb) kcb_list; /* [l] */
int kcb_flags; /* [s] */
uint32_t kcb_reg; /* [s] Inc if SATYPE_MAX > 31 */
uint32_t kcb_pid; /* [I] */
@@ -153,8 +152,7 @@ struct dump_state {
};
struct pkptable {
- SRPL_HEAD(, pkpcb) pkp_list;
- struct srpl_rc pkp_rc;
+ TAILQ_HEAD(, pkpcb) pkp_list;
struct rwlock pkp_lk;
};
@@ -229,28 +227,11 @@ const struct domain pfkeydomain = {
};
void
-keycb_ref(void *null, void *v)
-{
- struct pkpcb *kp = v;
-
- refcnt_take(&kp->kcb_refcnt);
-}
-
-void
-keycb_unref(void *null, void *v)
-{
- struct pkpcb *kp = v;
-
- refcnt_rele_wake(&kp->kcb_refcnt);
-}
-
-void
pfkey_init(void)
{
rn_init(sizeof(struct sockaddr_encap));
- srpl_rc_init(&pkptable.pkp_rc, keycb_ref, keycb_unref, NULL);
rw_init(&pkptable.pkp_lk, "pfkey");
- SRPL_INIT(&pkptable.pkp_list);
+ TAILQ_INIT(&pkptable.pkp_list);
pool_init(&pkpcb_pool, sizeof(struct pkpcb), 0,
IPL_SOFTNET, PR_WAITOK, "pkpcb", NULL);
pool_init(&ipsec_policy_pool, sizeof(struct ipsec_policy), 0,
@@ -281,7 +262,6 @@ pfkeyv2_attach(struct socket *so, int pr
if (kp == NULL)
return (ENOBUFS);
so->so_pcb = kp;
- refcnt_init(&kp->kcb_refcnt);
kp->kcb_socket = so;
kp->kcb_pid = curproc->p_p->ps_pid;
kp->kcb_rdomain = rtable_l2(curproc->p_p->ps_rtableid);
@@ -290,7 +270,7 @@ pfkeyv2_attach(struct socket *so, int pr
soisconnected(so);
rw_enter(&pkptable.pkp_lk, RW_WRITE);
- SRPL_INSERT_HEAD_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, kcb_list);
+ TAILQ_INSERT_TAIL(&pkptable.pkp_list, kp, kcb_list);
rw_exit(&pkptable.pkp_lk);
return (0);
@@ -322,15 +302,9 @@ pfkeyv2_detach(struct socket *so)
}
rw_enter(&pkptable.pkp_lk, RW_WRITE);
- SRPL_REMOVE_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, pkpcb,
- kcb_list);
+ TAILQ_REMOVE(&pkptable.pkp_list, kp, kcb_list);
rw_exit(&pkptable.pkp_lk);
- sounlock(so);
- /* wait for all references to drop */
- refcnt_finalize(&kp->kcb_refcnt, "pfkeyrefs");
- solock(so);
-
so->so_pcb = NULL;
KASSERT((so->so_state & SS_NOFDREF) == 0);
pool_put(&pkpcb_pool, kp);
@@ -475,7 +449,6 @@ pfkeyv2_sendmessage(void **headers, int
struct mbuf *packet;
struct pkpcb *kp;
struct sadb_msg *smsg;
- struct srp_ref sr;
/* Find out how much space we'll need... */
j = sizeof(struct sadb_msg);
@@ -538,14 +511,15 @@ pfkeyv2_sendmessage(void **headers, int
* Search for promiscuous listeners, skipping the
* original destination.
*/
- SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+ rw_enter_read(&pkptable.pkp_lk);
+ TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) {
if (kp->kcb_socket == so || kp->kcb_rdomain != rdomain)
continue;
if (kp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC)
pfkey_sendup(kp, packet, 1);
}
- SRPL_LEAVE(&sr);
+ rw_exit_read(&pkptable.pkp_lk);
m_freem(packet);
break;
@@ -554,7 +528,8 @@ pfkeyv2_sendmessage(void **headers, int
* Send the message to all registered sockets that match
* the specified satype (e.g., all IPSEC-ESP negotiators)
*/
- SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+ rw_enter_read(&pkptable.pkp_lk);
+ TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) {
if (kp->kcb_rdomain != rdomain)
continue;
@@ -571,7 +546,7 @@ pfkeyv2_sendmessage(void **headers, int
}
}
}
- SRPL_LEAVE(&sr);
+ rw_exit_read(&pkptable.pkp_lk);
/* Free last/original copy of the packet */
m_freem(packet);
@@ -590,7 +565,8 @@ pfkeyv2_sendmessage(void **headers, int
goto ret;
/* Send to all registered promiscuous listeners */
- SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+ rw_enter_read(&pkptable.pkp_lk);
+ TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) {
int flags = READ_ONCE(kp->kcb_flags);
if (kp->kcb_rdomain != rdomain)
@@ -600,19 +576,20 @@ pfkeyv2_sendmessage(void **headers, int
!(flags & PFKEYV2_SOCKETFLAGS_REGISTERED))
pfkey_sendup(kp, packet, 1);
}
- SRPL_LEAVE(&sr);
+ rw_exit_read(&pkptable.pkp_lk);
m_freem(packet);
break;
case PFKEYV2_SENDMESSAGE_BROADCAST:
/* Send message to all sockets */
- SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+ rw_enter_read(&pkptable.pkp_lk);
+ TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) {
if (kp->kcb_rdomain != rdomain)
continue;
pfkey_sendup(kp, packet, 1);
}
- SRPL_LEAVE(&sr);
+ rw_exit_read(&pkptable.pkp_lk);
m_freem(packet);
break;
}
@@ -1136,7 +1113,6 @@ pfkeyv2_dosend(struct socket *so, void *
struct sadb_sa *ssa;
struct sadb_supported *ssup;
struct sadb_ident *sid, *did;
- struct srp_ref sr;
struct sadb_x_rdomain *srdomain;
u_int rdomain = 0;
int promisc;
@@ -1186,14 +1162,15 @@ pfkeyv2_dosend(struct socket *so, void *
goto ret;
/* Send to all promiscuous listeners */
- SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) {
+ rw_enter_read(&pkptable.pkp_lk);
+ TAILQ_FOREACH(bkp, &pkptable.pkp_list, kcb_list) {
if (bkp->kcb_rdomain != kp->kcb_rdomain)
continue;
if (bkp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC)
pfkey_sendup(bkp, packet, 1);
}
- SRPL_LEAVE(&sr);
+ rw_exit_read(&pkptable.pkp_lk);
m_freem(packet);
@@ -2040,7 +2017,8 @@ pfkeyv2_dosend(struct socket *so, void *
if ((rval = pfdatatopacket(message, len, &packet)) != 0)
goto ret;
- SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) {
+ rw_enter_read(&pkptable.pkp_lk);
+ TAILQ_FOREACH(bkp, &pkptable.pkp_list, kcb_list) {
if (bkp == kp ||
bkp->kcb_rdomain != kp->kcb_rdomain)
continue;
@@ -2050,7 +2028,7 @@ pfkeyv2_dosend(struct socket *so, void *
pfkey_sendup(bkp, packet, 1);
}
}
- SRPL_LEAVE(&sr);
+ rw_exit_read(&pkptable.pkp_lk);
m_freem(packet);
} else {
get rid of the SRP code in src/sys/net/pfkeyv2.c