Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
AF_KET sockets: turn `pkp_list' from SRP to SMR list
To:
tech@openbsd.org
Date:
Sat, 25 May 2024 20:03:08 +0300

Download raw body.

Thread
  • Vitaliy Makkoveev:

    AF_KET sockets: turn `pkp_list' from SRP to SMR list

`pkp_list' foreach walkthroughs are simply sbappendarrd() and
sorwakeup() which rely on `sb_mtx' mutex(9). The only sleep point is
solock() which is used to protect `kcb_reg' and `kcb_flags' of pckcb
structure. There are no code paths to serialize, so solock() could be
replaced by per-pkpcb `kcb_mtx' mutex(9). This removes sleep points from
`pkp_list' walkthroughs and allow to convert `pkp_list' to SMR list.

`pkp_lk' rwlock(9) was replaced by `pkp_mtx' mutex(9). It used only to
protect `pkp_list' modifications and rwlock looks overkilling here.

AF_ROUTE sockets are more complicated, but seems it could be possible
convert them too.

Index: sys/net/pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
diff -u -p -r1.262 pfkeyv2.c
--- sys/net/pfkeyv2.c	17 May 2024 19:02:04 -0000	1.262
+++ sys/net/pfkeyv2.c	20 May 2024 10:01:06 -0000
@@ -81,6 +81,7 @@
 #include <sys/proc.h>
 #include <sys/pool.h>
 #include <sys/mutex.h>
+#include <sys/smr.h>
 
 #include <net/route.h>
 #include <netinet/ip_ipsp.h>
@@ -131,38 +132,31 @@ const struct domain pfkeydomain;
  *
  *  Locks used to protect struct members in this file:
  *	I	immutable after creation
- *	a	atomic operations
- *	l	pkptable's lock
- *	s	socket lock
+ *	L	pkp_mtx
+ *	m	kcb_mtx
  */
 struct pkpcb {
 	struct socket		*kcb_socket;	/* [I] associated socket */
 
-	SRPL_ENTRY(pkpcb)	kcb_list;	/* [l] */
-	struct refcnt		kcb_refcnt;	/* [a] */
-	int			kcb_flags;	/* [s] */
-	uint32_t		kcb_reg;	/* [s] Inc if SATYPE_MAX > 31 */
+	struct mutex		kcb_mtx;
+	SMR_SLIST_ENTRY(pkpcb)	kcb_list;	/* [L] */
+	int			kcb_flags;	/* [m] */
+	uint32_t		kcb_reg;	/* [m] Inc if SATYPE_MAX > 31 */
 	uint32_t		kcb_pid;	/* [I] */
 	unsigned int		kcb_rdomain;	/* [I] routing domain */
 };
 #define sotokeycb(so)		((struct pkpcb *)(so)->so_pcb)
-#define keylock(kp)		solock((kp)->kcb_socket)
-#define keyunlock(kp)		sounlock((kp)->kcb_socket)
-
 
 struct dump_state {
 	struct sadb_msg *sadb_msg;
 	struct socket *socket;
 };
 
-struct pkptable {
-	SRPL_HEAD(, pkpcb)	pkp_list;
-	struct srpl_rc		pkp_rc;
-	struct rwlock		pkp_lk;
-};
+struct mutex pkp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
+SMR_SLIST_HEAD(, pkpcb) pkp_list =
+    SMR_SLIST_HEAD_INITIALIZER(pkp_list); /* L */
 
-struct pkptable pkptable;
-struct mutex pfkeyv2_mtx = MUTEX_INITIALIZER(IPL_MPFLOOR);
+struct mutex pfkeyv2_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
 static uint32_t pfkeyv2_seq = 1;
 static int nregistered = 0;
 static int npromisc = 0;
@@ -183,9 +177,6 @@ int pfkeyv2_sa_flush(struct tdb *, void 
 int pfkeyv2_policy_flush(struct ipsec_policy *, void *, unsigned int);
 int pfkeyv2_sysctl_policydumper(struct ipsec_policy *, void *, unsigned int);
 
-void	keycb_ref(void *, void *);
-void	keycb_unref(void *, void *);
-
 /*
  * Wrapper around m_devget(); copy data from contiguous buffer to mbuf
  * chain.
@@ -232,28 +223,9 @@ const struct domain pfkeydomain = {
 };
 
 void
-keycb_ref(void *null, void *v)
-{
-	struct pkpcb *kp = v;
-
-	refcnt_take(&kp->kcb_refcnt);
-}
-
-void
-keycb_unref(void *null, void *v)
-{
-	struct pkpcb *kp = v;
-
-	refcnt_rele_wake(&kp->kcb_refcnt);
-}
-
-void
 pfkey_init(void)
 {
 	rn_init(sizeof(struct sockaddr_encap));
-	srpl_rc_init(&pkptable.pkp_rc, keycb_ref, keycb_unref, NULL);
-	rw_init(&pkptable.pkp_lk, "pfkey");
-	SRPL_INIT(&pkptable.pkp_list);
 	pool_init(&pkpcb_pool, sizeof(struct pkpcb), 0,
 	    IPL_SOFTNET, PR_WAITOK, "pkpcb", NULL);
 	pool_init(&ipsec_policy_pool, sizeof(struct ipsec_policy), 0,
@@ -284,7 +256,7 @@ pfkeyv2_attach(struct socket *so, int pr
 	if (kp == NULL)
 		return (ENOBUFS);
 	so->so_pcb = kp;
-	refcnt_init(&kp->kcb_refcnt);
+	mtx_init(&kp->kcb_mtx, IPL_SOFTNET);
 	kp->kcb_socket = so;
 	kp->kcb_pid = curproc->p_p->ps_pid;
 	kp->kcb_rdomain = rtable_l2(curproc->p_p->ps_rtableid);
@@ -292,9 +264,9 @@ pfkeyv2_attach(struct socket *so, int pr
 	so->so_options |= SO_USELOOPBACK;
 	soisconnected(so);
 
-	rw_enter(&pkptable.pkp_lk, RW_WRITE);
-	SRPL_INSERT_HEAD_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, kcb_list);
-	rw_exit(&pkptable.pkp_lk);
+	mtx_enter(&pkp_mtx);
+	SMR_SLIST_INSERT_HEAD_LOCKED(&pkp_list, kp, kcb_list);
+	mtx_leave(&pkp_mtx);
 
 	return (0);
 }
@@ -324,15 +296,10 @@ pfkeyv2_detach(struct socket *so)
 		mtx_leave(&pfkeyv2_mtx);
 	}
 
-	rw_enter(&pkptable.pkp_lk, RW_WRITE);
-	SRPL_REMOVE_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, pkpcb,
-	    kcb_list);
-	rw_exit(&pkptable.pkp_lk);
-
-	sounlock(so);
-	/* wait for all references to drop */
-	refcnt_finalize(&kp->kcb_refcnt, "pfkeyrefs");
-	solock(so);
+	mtx_enter(&pkp_mtx);
+	SMR_SLIST_REMOVE_LOCKED(&pkp_list, kp, pkpcb, kcb_list);
+	mtx_leave(&pkp_mtx);
+	smr_barrier();
 
 	so->so_pcb = NULL;
 	KASSERT((so->so_state & SS_NOFDREF) == 0);
@@ -478,7 +445,6 @@ pfkeyv2_sendmessage(void **headers, int 
 	struct mbuf *packet;
 	struct pkpcb *kp;
 	struct sadb_msg *smsg;
-	struct srp_ref sr;
 
 	/* Find out how much space we'll need... */
 	j = sizeof(struct sadb_msg);
@@ -541,14 +507,13 @@ pfkeyv2_sendmessage(void **headers, int 
 		 * Search for promiscuous listeners, skipping the
 		 * original destination.
 		 */
-		SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+		SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) {
 			if (kp->kcb_socket == so || kp->kcb_rdomain != rdomain)
 				continue;
 
 			if (kp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC)
 				pfkey_sendup(kp, packet, 1);
 		}
-		SRPL_LEAVE(&sr);
 		m_freem(packet);
 		break;
 
@@ -557,7 +522,7 @@ pfkeyv2_sendmessage(void **headers, int 
 		 * Send the message to all registered sockets that match
 		 * the specified satype (e.g., all IPSEC-ESP negotiators)
 		 */
-		SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+		SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) {
 			if (kp->kcb_rdomain != rdomain)
 				continue;
 
@@ -566,15 +531,18 @@ pfkeyv2_sendmessage(void **headers, int 
 					/* Just send to everyone registered */
 					pfkey_sendup(kp, packet, 1);
 				} else {
-					keylock(kp);
+					int sendup = 0;
+
+					mtx_enter(&kp->kcb_mtx);
 					/* Check for specified satype */
 					if ((1 << satype) & kp->kcb_reg)
+						sendup = 1;
+					mtx_leave(&kp->kcb_mtx);
+					if (sendup)
 						pfkey_sendup(kp, packet, 1);
-					keyunlock(kp);
 				}
 			}
 		}
-		SRPL_LEAVE(&sr);
 		/* Free last/original copy of the packet */
 		m_freem(packet);
 
@@ -593,7 +561,7 @@ pfkeyv2_sendmessage(void **headers, int 
 			goto ret;
 
 		/* Send to all registered promiscuous listeners */
-		SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+		SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) {
 			int flags = READ_ONCE(kp->kcb_flags);
 
 			if (kp->kcb_rdomain != rdomain)
@@ -603,19 +571,17 @@ pfkeyv2_sendmessage(void **headers, int 
 			    !(flags & PFKEYV2_SOCKETFLAGS_REGISTERED))
 				pfkey_sendup(kp, packet, 1);
 		}
-		SRPL_LEAVE(&sr);
 		m_freem(packet);
 		break;
 
 	case PFKEYV2_SENDMESSAGE_BROADCAST:
 		/* Send message to all sockets */
-		SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+		SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) {
 			if (kp->kcb_rdomain != rdomain)
 				continue;
 
 			pfkey_sendup(kp, packet, 1);
 		}
-		SRPL_LEAVE(&sr);
 		m_freem(packet);
 		break;
 	}
@@ -1139,7 +1105,6 @@ pfkeyv2_dosend(struct socket *so, void *
 	struct sadb_sa *ssa;
 	struct sadb_supported *ssup;
 	struct sadb_ident *sid, *did;
-	struct srp_ref sr;
 	struct sadb_x_rdomain *srdomain;
 	u_int rdomain = 0;
 	int promisc;
@@ -1189,14 +1154,13 @@ pfkeyv2_dosend(struct socket *so, void *
 			goto ret;
 
 		/* Send to all promiscuous listeners */
-		SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) {
+		SMR_SLIST_FOREACH(bkp, &pkp_list, kcb_list) {
 			if (bkp->kcb_rdomain != kp->kcb_rdomain)
 				continue;
 
 			if (bkp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC)
 				pfkey_sendup(bkp, packet, 1);
 		}
-		SRPL_LEAVE(&sr);
 
 		m_freem(packet);
 
@@ -1636,14 +1600,14 @@ pfkeyv2_dosend(struct socket *so, void *
 		break;
 
 	case SADB_REGISTER:
-		keylock(kp);
+		mtx_enter(&kp->kcb_mtx);
 		if (!(kp->kcb_flags & PFKEYV2_SOCKETFLAGS_REGISTERED)) {
 			kp->kcb_flags |= PFKEYV2_SOCKETFLAGS_REGISTERED;
 			mtx_enter(&pfkeyv2_mtx);
 			nregistered++;
 			mtx_leave(&pfkeyv2_mtx);
 		}
-		keyunlock(kp);
+		mtx_leave(&kp->kcb_mtx);
 
 		freeme_sz = sizeof(struct sadb_supported) + sizeof(ealgs);
 		if (!(freeme = malloc(freeme_sz, M_PFKEY, M_NOWAIT | M_ZERO))) {
@@ -1670,10 +1634,10 @@ pfkeyv2_dosend(struct socket *so, void *
 		}
 
 		/* Keep track what this socket has registered for */
-		keylock(kp);
+		mtx_enter(&kp->kcb_mtx);
 		kp->kcb_reg |=
 		    (1 << ((struct sadb_msg *)message)->sadb_msg_satype);
-		keyunlock(kp);
+		mtx_leave(&kp->kcb_mtx);
 
 		ssup = (struct sadb_supported *) freeme2;
 		ssup->sadb_supported_len = freeme2_sz / sizeof(uint64_t);
@@ -2043,7 +2007,7 @@ pfkeyv2_dosend(struct socket *so, void *
 			if ((rval = pfdatatopacket(message, len, &packet)) != 0)
 				goto ret;
 
-			SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) {
+			SMR_SLIST_FOREACH(bkp, &pkp_list, kcb_list) {
 				if (bkp == kp ||
 				    bkp->kcb_rdomain != kp->kcb_rdomain)
 					continue;
@@ -2053,7 +2017,6 @@ pfkeyv2_dosend(struct socket *so, void *
 					pfkey_sendup(bkp, packet, 1);
 				}
 			}
-			SRPL_LEAVE(&sr);
 
 			m_freem(packet);
 		} else {
@@ -2062,7 +2025,7 @@ pfkeyv2_dosend(struct socket *so, void *
 				goto ret;
 			}
 
-			keylock(kp);
+			mtx_enter(&kp->kcb_mtx);
 			i = (kp->kcb_flags &
 			    PFKEYV2_SOCKETFLAGS_PROMISC) ? 1 : 0;
 			j = smsg->sadb_msg_satype ? 1 : 0;
@@ -2082,7 +2045,7 @@ pfkeyv2_dosend(struct socket *so, void *
 					mtx_leave(&pfkeyv2_mtx);
 				}
 			}
-			keyunlock(kp);
+			mtx_leave(&kp->kcb_mtx);
 		}
 
 		break;