Index | Thread | Search

From:
David Gwynne <david@gwynne.id.au>
Subject:
get rid of the SRP code in src/sys/net/pfkeyv2.c
To:
tech@openbsd.org
Date:
Wed, 20 Aug 2025 14:17:30 +1000

Download raw body.

Thread
this is like the rtsock change i just put in the tree in that it gets
rid of an SRPL, but it works a bit differently.

this code uses an SPRL to manage the list of pfkey sockets that
route messages can be "recved" on. the obvious replacement would
be an SMR list, but while you shouldn't sleep with an SRP ref, we
do here and you can't do that inside an SMR read critical section.

to avoid the sleep while in the SMR crit section, i'd have to add refcnt
operations, which means a bunch of atomic ops.

alternatively, we could do what i did with the ethernet sockets and use
an rwlock to protect the list of sockets, and take a read lock while
traversing the list. this reduces the overall number of atomic ops and
keeps the code in the recv loop much the same.

unlike the rtsock code, the pfkey sockets don't use solock to serialise
the recv code, they use the rcvbuf mutexes. this means we don't have the
same lock ordering problems that we had in rtsock, which makes this a
much more straightforward converson.

ok? tests?

Index: pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
diff -u -p -r1.270 pfkeyv2.c
--- pfkeyv2.c	7 Jul 2025 02:28:50 -0000	1.270
+++ pfkeyv2.c	8 Aug 2025 05:23:29 -0000
@@ -135,8 +135,7 @@ const struct domain pfkeydomain;
 struct pkpcb {
 	struct socket		*kcb_socket;	/* [I] associated socket */
 
-	SRPL_ENTRY(pkpcb)	kcb_list;	/* [l] */
-	struct refcnt		kcb_refcnt;	/* [a] */
+	TAILQ_ENTRY(pkpcb)	kcb_list;	/* [l] */
 	int			kcb_flags;	/* [s] */
 	uint32_t		kcb_reg;	/* [s] Inc if SATYPE_MAX > 31 */
 	uint32_t		kcb_pid;	/* [I] */
@@ -153,8 +152,7 @@ struct dump_state {
 };
 
 struct pkptable {
-	SRPL_HEAD(, pkpcb)	pkp_list;
-	struct srpl_rc		pkp_rc;
+	TAILQ_HEAD(, pkpcb)	pkp_list;
 	struct rwlock		pkp_lk;
 };
 
@@ -229,28 +227,11 @@ const struct domain pfkeydomain = {
 };
 
 void
-keycb_ref(void *null, void *v)
-{
-	struct pkpcb *kp = v;
-
-	refcnt_take(&kp->kcb_refcnt);
-}
-
-void
-keycb_unref(void *null, void *v)
-{
-	struct pkpcb *kp = v;
-
-	refcnt_rele_wake(&kp->kcb_refcnt);
-}
-
-void
 pfkey_init(void)
 {
 	rn_init(sizeof(struct sockaddr_encap));
-	srpl_rc_init(&pkptable.pkp_rc, keycb_ref, keycb_unref, NULL);
 	rw_init(&pkptable.pkp_lk, "pfkey");
-	SRPL_INIT(&pkptable.pkp_list);
+	TAILQ_INIT(&pkptable.pkp_list);
 	pool_init(&pkpcb_pool, sizeof(struct pkpcb), 0,
 	    IPL_SOFTNET, PR_WAITOK, "pkpcb", NULL);
 	pool_init(&ipsec_policy_pool, sizeof(struct ipsec_policy), 0,
@@ -281,7 +262,6 @@ pfkeyv2_attach(struct socket *so, int pr
 	if (kp == NULL)
 		return (ENOBUFS);
 	so->so_pcb = kp;
-	refcnt_init(&kp->kcb_refcnt);
 	kp->kcb_socket = so;
 	kp->kcb_pid = curproc->p_p->ps_pid;
 	kp->kcb_rdomain = rtable_l2(curproc->p_p->ps_rtableid);
@@ -290,7 +270,7 @@ pfkeyv2_attach(struct socket *so, int pr
 	soisconnected(so);
 
 	rw_enter(&pkptable.pkp_lk, RW_WRITE);
-	SRPL_INSERT_HEAD_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, kcb_list);
+	TAILQ_INSERT_TAIL(&pkptable.pkp_list, kp, kcb_list);
 	rw_exit(&pkptable.pkp_lk);
 
 	return (0);
@@ -322,15 +302,9 @@ pfkeyv2_detach(struct socket *so)
 	}
 
 	rw_enter(&pkptable.pkp_lk, RW_WRITE);
-	SRPL_REMOVE_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, pkpcb,
-	    kcb_list);
+	TAILQ_REMOVE(&pkptable.pkp_list, kp, kcb_list);
 	rw_exit(&pkptable.pkp_lk);
 
-	sounlock(so);
-	/* wait for all references to drop */
-	refcnt_finalize(&kp->kcb_refcnt, "pfkeyrefs");
-	solock(so);
-
 	so->so_pcb = NULL;
 	KASSERT((so->so_state & SS_NOFDREF) == 0);
 	pool_put(&pkpcb_pool, kp);
@@ -475,7 +449,6 @@ pfkeyv2_sendmessage(void **headers, int 
 	struct mbuf *packet;
 	struct pkpcb *kp;
 	struct sadb_msg *smsg;
-	struct srp_ref sr;
 
 	/* Find out how much space we'll need... */
 	j = sizeof(struct sadb_msg);
@@ -538,14 +511,15 @@ pfkeyv2_sendmessage(void **headers, int 
 		 * Search for promiscuous listeners, skipping the
 		 * original destination.
 		 */
-		SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+		rw_enter_read(&pkptable.pkp_lk);
+		TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) {
 			if (kp->kcb_socket == so || kp->kcb_rdomain != rdomain)
 				continue;
 
 			if (kp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC)
 				pfkey_sendup(kp, packet, 1);
 		}
-		SRPL_LEAVE(&sr);
+		rw_exit_read(&pkptable.pkp_lk);
 		m_freem(packet);
 		break;
 
@@ -554,7 +528,8 @@ pfkeyv2_sendmessage(void **headers, int 
 		 * Send the message to all registered sockets that match
 		 * the specified satype (e.g., all IPSEC-ESP negotiators)
 		 */
-		SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+		rw_enter_read(&pkptable.pkp_lk);
+		TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) {
 			if (kp->kcb_rdomain != rdomain)
 				continue;
 
@@ -571,7 +546,7 @@ pfkeyv2_sendmessage(void **headers, int 
 				}
 			}
 		}
-		SRPL_LEAVE(&sr);
+		rw_exit_read(&pkptable.pkp_lk);
 		/* Free last/original copy of the packet */
 		m_freem(packet);
 
@@ -590,7 +565,8 @@ pfkeyv2_sendmessage(void **headers, int 
 			goto ret;
 
 		/* Send to all registered promiscuous listeners */
-		SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+		rw_enter_read(&pkptable.pkp_lk);
+		TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) {
 			int flags = READ_ONCE(kp->kcb_flags);
 
 			if (kp->kcb_rdomain != rdomain)
@@ -600,19 +576,20 @@ pfkeyv2_sendmessage(void **headers, int 
 			    !(flags & PFKEYV2_SOCKETFLAGS_REGISTERED))
 				pfkey_sendup(kp, packet, 1);
 		}
-		SRPL_LEAVE(&sr);
+		rw_exit_read(&pkptable.pkp_lk);
 		m_freem(packet);
 		break;
 
 	case PFKEYV2_SENDMESSAGE_BROADCAST:
 		/* Send message to all sockets */
-		SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+		rw_enter_read(&pkptable.pkp_lk);
+		TAILQ_FOREACH(kp, &pkptable.pkp_list, kcb_list) {
 			if (kp->kcb_rdomain != rdomain)
 				continue;
 
 			pfkey_sendup(kp, packet, 1);
 		}
-		SRPL_LEAVE(&sr);
+		rw_exit_read(&pkptable.pkp_lk);
 		m_freem(packet);
 		break;
 	}
@@ -1136,7 +1113,6 @@ pfkeyv2_dosend(struct socket *so, void *
 	struct sadb_sa *ssa;
 	struct sadb_supported *ssup;
 	struct sadb_ident *sid, *did;
-	struct srp_ref sr;
 	struct sadb_x_rdomain *srdomain;
 	u_int rdomain = 0;
 	int promisc;
@@ -1186,14 +1162,15 @@ pfkeyv2_dosend(struct socket *so, void *
 			goto ret;
 
 		/* Send to all promiscuous listeners */
-		SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) {
+		rw_enter_read(&pkptable.pkp_lk);
+		TAILQ_FOREACH(bkp, &pkptable.pkp_list, kcb_list) {
 			if (bkp->kcb_rdomain != kp->kcb_rdomain)
 				continue;
 
 			if (bkp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC)
 				pfkey_sendup(bkp, packet, 1);
 		}
-		SRPL_LEAVE(&sr);
+		rw_exit_read(&pkptable.pkp_lk);
 
 		m_freem(packet);
 
@@ -2040,7 +2017,8 @@ pfkeyv2_dosend(struct socket *so, void *
 			if ((rval = pfdatatopacket(message, len, &packet)) != 0)
 				goto ret;
 
-			SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) {
+			rw_enter_read(&pkptable.pkp_lk);
+			TAILQ_FOREACH(bkp, &pkptable.pkp_list, kcb_list) {
 				if (bkp == kp ||
 				    bkp->kcb_rdomain != kp->kcb_rdomain)
 					continue;
@@ -2050,7 +2028,7 @@ pfkeyv2_dosend(struct socket *so, void *
 					pfkey_sendup(bkp, packet, 1);
 				}
 			}
-			SRPL_LEAVE(&sr);
+			rw_exit_read(&pkptable.pkp_lk);
 
 			m_freem(packet);
 		} else {