Download raw body.
AF_KET sockets: turn `pkp_list' from SRP to SMR list
`pkp_list' foreach walkthroughs are simply sbappendarrd() and
sorwakeup() which rely on `sb_mtx' mutex(9). The only sleep point is
solock() which is used to protect `kcb_reg' and `kcb_flags' of pckcb
structure. There are no code paths to serialize, so solock() could be
replaced by per-pkpcb `kcb_mtx' mutex(9). This removes sleep points from
`pkp_list' walkthroughs and allow to convert `pkp_list' to SMR list.
`pkp_lk' rwlock(9) was replaced by `pkp_mtx' mutex(9). It used only to
protect `pkp_list' modifications and rwlock looks overkilling here.
AF_ROUTE sockets are more complicated, but seems it could be possible
convert them too.
Index: sys/net/pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
diff -u -p -r1.262 pfkeyv2.c
--- sys/net/pfkeyv2.c 17 May 2024 19:02:04 -0000 1.262
+++ sys/net/pfkeyv2.c 20 May 2024 10:01:06 -0000
@@ -81,6 +81,7 @@
#include <sys/proc.h>
#include <sys/pool.h>
#include <sys/mutex.h>
+#include <sys/smr.h>
#include <net/route.h>
#include <netinet/ip_ipsp.h>
@@ -131,38 +132,31 @@ const struct domain pfkeydomain;
*
* Locks used to protect struct members in this file:
* I immutable after creation
- * a atomic operations
- * l pkptable's lock
- * s socket lock
+ * L pkp_mtx
+ * m kcb_mtx
*/
struct pkpcb {
struct socket *kcb_socket; /* [I] associated socket */
- SRPL_ENTRY(pkpcb) kcb_list; /* [l] */
- struct refcnt kcb_refcnt; /* [a] */
- int kcb_flags; /* [s] */
- uint32_t kcb_reg; /* [s] Inc if SATYPE_MAX > 31 */
+ struct mutex kcb_mtx;
+ SMR_SLIST_ENTRY(pkpcb) kcb_list; /* [L] */
+ int kcb_flags; /* [m] */
+ uint32_t kcb_reg; /* [m] Inc if SATYPE_MAX > 31 */
uint32_t kcb_pid; /* [I] */
unsigned int kcb_rdomain; /* [I] routing domain */
};
#define sotokeycb(so) ((struct pkpcb *)(so)->so_pcb)
-#define keylock(kp) solock((kp)->kcb_socket)
-#define keyunlock(kp) sounlock((kp)->kcb_socket)
-
struct dump_state {
struct sadb_msg *sadb_msg;
struct socket *socket;
};
-struct pkptable {
- SRPL_HEAD(, pkpcb) pkp_list;
- struct srpl_rc pkp_rc;
- struct rwlock pkp_lk;
-};
+struct mutex pkp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
+SMR_SLIST_HEAD(, pkpcb) pkp_list =
+ SMR_SLIST_HEAD_INITIALIZER(pkp_list); /* L */
-struct pkptable pkptable;
-struct mutex pfkeyv2_mtx = MUTEX_INITIALIZER(IPL_MPFLOOR);
+struct mutex pfkeyv2_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
static uint32_t pfkeyv2_seq = 1;
static int nregistered = 0;
static int npromisc = 0;
@@ -183,9 +177,6 @@ int pfkeyv2_sa_flush(struct tdb *, void
int pfkeyv2_policy_flush(struct ipsec_policy *, void *, unsigned int);
int pfkeyv2_sysctl_policydumper(struct ipsec_policy *, void *, unsigned int);
-void keycb_ref(void *, void *);
-void keycb_unref(void *, void *);
-
/*
* Wrapper around m_devget(); copy data from contiguous buffer to mbuf
* chain.
@@ -232,28 +223,9 @@ const struct domain pfkeydomain = {
};
void
-keycb_ref(void *null, void *v)
-{
- struct pkpcb *kp = v;
-
- refcnt_take(&kp->kcb_refcnt);
-}
-
-void
-keycb_unref(void *null, void *v)
-{
- struct pkpcb *kp = v;
-
- refcnt_rele_wake(&kp->kcb_refcnt);
-}
-
-void
pfkey_init(void)
{
rn_init(sizeof(struct sockaddr_encap));
- srpl_rc_init(&pkptable.pkp_rc, keycb_ref, keycb_unref, NULL);
- rw_init(&pkptable.pkp_lk, "pfkey");
- SRPL_INIT(&pkptable.pkp_list);
pool_init(&pkpcb_pool, sizeof(struct pkpcb), 0,
IPL_SOFTNET, PR_WAITOK, "pkpcb", NULL);
pool_init(&ipsec_policy_pool, sizeof(struct ipsec_policy), 0,
@@ -284,7 +256,7 @@ pfkeyv2_attach(struct socket *so, int pr
if (kp == NULL)
return (ENOBUFS);
so->so_pcb = kp;
- refcnt_init(&kp->kcb_refcnt);
+ mtx_init(&kp->kcb_mtx, IPL_SOFTNET);
kp->kcb_socket = so;
kp->kcb_pid = curproc->p_p->ps_pid;
kp->kcb_rdomain = rtable_l2(curproc->p_p->ps_rtableid);
@@ -292,9 +264,9 @@ pfkeyv2_attach(struct socket *so, int pr
so->so_options |= SO_USELOOPBACK;
soisconnected(so);
- rw_enter(&pkptable.pkp_lk, RW_WRITE);
- SRPL_INSERT_HEAD_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, kcb_list);
- rw_exit(&pkptable.pkp_lk);
+ mtx_enter(&pkp_mtx);
+ SMR_SLIST_INSERT_HEAD_LOCKED(&pkp_list, kp, kcb_list);
+ mtx_leave(&pkp_mtx);
return (0);
}
@@ -324,15 +296,10 @@ pfkeyv2_detach(struct socket *so)
mtx_leave(&pfkeyv2_mtx);
}
- rw_enter(&pkptable.pkp_lk, RW_WRITE);
- SRPL_REMOVE_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, pkpcb,
- kcb_list);
- rw_exit(&pkptable.pkp_lk);
-
- sounlock(so);
- /* wait for all references to drop */
- refcnt_finalize(&kp->kcb_refcnt, "pfkeyrefs");
- solock(so);
+ mtx_enter(&pkp_mtx);
+ SMR_SLIST_REMOVE_LOCKED(&pkp_list, kp, pkpcb, kcb_list);
+ mtx_leave(&pkp_mtx);
+ smr_barrier();
so->so_pcb = NULL;
KASSERT((so->so_state & SS_NOFDREF) == 0);
@@ -478,7 +445,6 @@ pfkeyv2_sendmessage(void **headers, int
struct mbuf *packet;
struct pkpcb *kp;
struct sadb_msg *smsg;
- struct srp_ref sr;
/* Find out how much space we'll need... */
j = sizeof(struct sadb_msg);
@@ -541,14 +507,13 @@ pfkeyv2_sendmessage(void **headers, int
* Search for promiscuous listeners, skipping the
* original destination.
*/
- SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+ SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) {
if (kp->kcb_socket == so || kp->kcb_rdomain != rdomain)
continue;
if (kp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC)
pfkey_sendup(kp, packet, 1);
}
- SRPL_LEAVE(&sr);
m_freem(packet);
break;
@@ -557,7 +522,7 @@ pfkeyv2_sendmessage(void **headers, int
* Send the message to all registered sockets that match
* the specified satype (e.g., all IPSEC-ESP negotiators)
*/
- SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+ SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) {
if (kp->kcb_rdomain != rdomain)
continue;
@@ -566,15 +531,18 @@ pfkeyv2_sendmessage(void **headers, int
/* Just send to everyone registered */
pfkey_sendup(kp, packet, 1);
} else {
- keylock(kp);
+ int sendup = 0;
+
+ mtx_enter(&kp->kcb_mtx);
/* Check for specified satype */
if ((1 << satype) & kp->kcb_reg)
+ sendup = 1;
+ mtx_leave(&kp->kcb_mtx);
+ if (sendup)
pfkey_sendup(kp, packet, 1);
- keyunlock(kp);
}
}
}
- SRPL_LEAVE(&sr);
/* Free last/original copy of the packet */
m_freem(packet);
@@ -593,7 +561,7 @@ pfkeyv2_sendmessage(void **headers, int
goto ret;
/* Send to all registered promiscuous listeners */
- SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+ SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) {
int flags = READ_ONCE(kp->kcb_flags);
if (kp->kcb_rdomain != rdomain)
@@ -603,19 +571,17 @@ pfkeyv2_sendmessage(void **headers, int
!(flags & PFKEYV2_SOCKETFLAGS_REGISTERED))
pfkey_sendup(kp, packet, 1);
}
- SRPL_LEAVE(&sr);
m_freem(packet);
break;
case PFKEYV2_SENDMESSAGE_BROADCAST:
/* Send message to all sockets */
- SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) {
+ SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) {
if (kp->kcb_rdomain != rdomain)
continue;
pfkey_sendup(kp, packet, 1);
}
- SRPL_LEAVE(&sr);
m_freem(packet);
break;
}
@@ -1139,7 +1105,6 @@ pfkeyv2_dosend(struct socket *so, void *
struct sadb_sa *ssa;
struct sadb_supported *ssup;
struct sadb_ident *sid, *did;
- struct srp_ref sr;
struct sadb_x_rdomain *srdomain;
u_int rdomain = 0;
int promisc;
@@ -1189,14 +1154,13 @@ pfkeyv2_dosend(struct socket *so, void *
goto ret;
/* Send to all promiscuous listeners */
- SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) {
+ SMR_SLIST_FOREACH(bkp, &pkp_list, kcb_list) {
if (bkp->kcb_rdomain != kp->kcb_rdomain)
continue;
if (bkp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC)
pfkey_sendup(bkp, packet, 1);
}
- SRPL_LEAVE(&sr);
m_freem(packet);
@@ -1636,14 +1600,14 @@ pfkeyv2_dosend(struct socket *so, void *
break;
case SADB_REGISTER:
- keylock(kp);
+ mtx_enter(&kp->kcb_mtx);
if (!(kp->kcb_flags & PFKEYV2_SOCKETFLAGS_REGISTERED)) {
kp->kcb_flags |= PFKEYV2_SOCKETFLAGS_REGISTERED;
mtx_enter(&pfkeyv2_mtx);
nregistered++;
mtx_leave(&pfkeyv2_mtx);
}
- keyunlock(kp);
+ mtx_leave(&kp->kcb_mtx);
freeme_sz = sizeof(struct sadb_supported) + sizeof(ealgs);
if (!(freeme = malloc(freeme_sz, M_PFKEY, M_NOWAIT | M_ZERO))) {
@@ -1670,10 +1634,10 @@ pfkeyv2_dosend(struct socket *so, void *
}
/* Keep track what this socket has registered for */
- keylock(kp);
+ mtx_enter(&kp->kcb_mtx);
kp->kcb_reg |=
(1 << ((struct sadb_msg *)message)->sadb_msg_satype);
- keyunlock(kp);
+ mtx_leave(&kp->kcb_mtx);
ssup = (struct sadb_supported *) freeme2;
ssup->sadb_supported_len = freeme2_sz / sizeof(uint64_t);
@@ -2043,7 +2007,7 @@ pfkeyv2_dosend(struct socket *so, void *
if ((rval = pfdatatopacket(message, len, &packet)) != 0)
goto ret;
- SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) {
+ SMR_SLIST_FOREACH(bkp, &pkp_list, kcb_list) {
if (bkp == kp ||
bkp->kcb_rdomain != kp->kcb_rdomain)
continue;
@@ -2053,7 +2017,6 @@ pfkeyv2_dosend(struct socket *so, void *
pfkey_sendup(bkp, packet, 1);
}
}
- SRPL_LEAVE(&sr);
m_freem(packet);
} else {
@@ -2062,7 +2025,7 @@ pfkeyv2_dosend(struct socket *so, void *
goto ret;
}
- keylock(kp);
+ mtx_enter(&kp->kcb_mtx);
i = (kp->kcb_flags &
PFKEYV2_SOCKETFLAGS_PROMISC) ? 1 : 0;
j = smsg->sadb_msg_satype ? 1 : 0;
@@ -2082,7 +2045,7 @@ pfkeyv2_dosend(struct socket *so, void *
mtx_leave(&pfkeyv2_mtx);
}
}
- keyunlock(kp);
+ mtx_leave(&kp->kcb_mtx);
}
break;
AF_KET sockets: turn `pkp_list' from SRP to SMR list