From: Vitaliy Makkoveev Subject: AF_KET sockets: turn `pkp_list' from SRP to SMR list To: tech@openbsd.org Date: Sat, 25 May 2024 20:03:08 +0300 `pkp_list' foreach walkthroughs are simply sbappendarrd() and sorwakeup() which rely on `sb_mtx' mutex(9). The only sleep point is solock() which is used to protect `kcb_reg' and `kcb_flags' of pckcb structure. There are no code paths to serialize, so solock() could be replaced by per-pkpcb `kcb_mtx' mutex(9). This removes sleep points from `pkp_list' walkthroughs and allow to convert `pkp_list' to SMR list. `pkp_lk' rwlock(9) was replaced by `pkp_mtx' mutex(9). It used only to protect `pkp_list' modifications and rwlock looks overkilling here. AF_ROUTE sockets are more complicated, but seems it could be possible convert them too. Index: sys/net/pfkeyv2.c =================================================================== RCS file: /cvs/src/sys/net/pfkeyv2.c,v diff -u -p -r1.262 pfkeyv2.c --- sys/net/pfkeyv2.c 17 May 2024 19:02:04 -0000 1.262 +++ sys/net/pfkeyv2.c 20 May 2024 10:01:06 -0000 @@ -81,6 +81,7 @@ #include #include #include +#include #include #include @@ -131,38 +132,31 @@ const struct domain pfkeydomain; * * Locks used to protect struct members in this file: * I immutable after creation - * a atomic operations - * l pkptable's lock - * s socket lock + * L pkp_mtx + * m kcb_mtx */ struct pkpcb { struct socket *kcb_socket; /* [I] associated socket */ - SRPL_ENTRY(pkpcb) kcb_list; /* [l] */ - struct refcnt kcb_refcnt; /* [a] */ - int kcb_flags; /* [s] */ - uint32_t kcb_reg; /* [s] Inc if SATYPE_MAX > 31 */ + struct mutex kcb_mtx; + SMR_SLIST_ENTRY(pkpcb) kcb_list; /* [L] */ + int kcb_flags; /* [m] */ + uint32_t kcb_reg; /* [m] Inc if SATYPE_MAX > 31 */ uint32_t kcb_pid; /* [I] */ unsigned int kcb_rdomain; /* [I] routing domain */ }; #define sotokeycb(so) ((struct pkpcb *)(so)->so_pcb) -#define keylock(kp) solock((kp)->kcb_socket) -#define keyunlock(kp) sounlock((kp)->kcb_socket) - struct dump_state { struct sadb_msg *sadb_msg; struct socket *socket; }; -struct pkptable { - SRPL_HEAD(, pkpcb) pkp_list; - struct srpl_rc pkp_rc; - struct rwlock pkp_lk; -}; +struct mutex pkp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +SMR_SLIST_HEAD(, pkpcb) pkp_list = + SMR_SLIST_HEAD_INITIALIZER(pkp_list); /* L */ -struct pkptable pkptable; -struct mutex pfkeyv2_mtx = MUTEX_INITIALIZER(IPL_MPFLOOR); +struct mutex pfkeyv2_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); static uint32_t pfkeyv2_seq = 1; static int nregistered = 0; static int npromisc = 0; @@ -183,9 +177,6 @@ int pfkeyv2_sa_flush(struct tdb *, void int pfkeyv2_policy_flush(struct ipsec_policy *, void *, unsigned int); int pfkeyv2_sysctl_policydumper(struct ipsec_policy *, void *, unsigned int); -void keycb_ref(void *, void *); -void keycb_unref(void *, void *); - /* * Wrapper around m_devget(); copy data from contiguous buffer to mbuf * chain. @@ -232,28 +223,9 @@ const struct domain pfkeydomain = { }; void -keycb_ref(void *null, void *v) -{ - struct pkpcb *kp = v; - - refcnt_take(&kp->kcb_refcnt); -} - -void -keycb_unref(void *null, void *v) -{ - struct pkpcb *kp = v; - - refcnt_rele_wake(&kp->kcb_refcnt); -} - -void pfkey_init(void) { rn_init(sizeof(struct sockaddr_encap)); - srpl_rc_init(&pkptable.pkp_rc, keycb_ref, keycb_unref, NULL); - rw_init(&pkptable.pkp_lk, "pfkey"); - SRPL_INIT(&pkptable.pkp_list); pool_init(&pkpcb_pool, sizeof(struct pkpcb), 0, IPL_SOFTNET, PR_WAITOK, "pkpcb", NULL); pool_init(&ipsec_policy_pool, sizeof(struct ipsec_policy), 0, @@ -284,7 +256,7 @@ pfkeyv2_attach(struct socket *so, int pr if (kp == NULL) return (ENOBUFS); so->so_pcb = kp; - refcnt_init(&kp->kcb_refcnt); + mtx_init(&kp->kcb_mtx, IPL_SOFTNET); kp->kcb_socket = so; kp->kcb_pid = curproc->p_p->ps_pid; kp->kcb_rdomain = rtable_l2(curproc->p_p->ps_rtableid); @@ -292,9 +264,9 @@ pfkeyv2_attach(struct socket *so, int pr so->so_options |= SO_USELOOPBACK; soisconnected(so); - rw_enter(&pkptable.pkp_lk, RW_WRITE); - SRPL_INSERT_HEAD_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, kcb_list); - rw_exit(&pkptable.pkp_lk); + mtx_enter(&pkp_mtx); + SMR_SLIST_INSERT_HEAD_LOCKED(&pkp_list, kp, kcb_list); + mtx_leave(&pkp_mtx); return (0); } @@ -324,15 +296,10 @@ pfkeyv2_detach(struct socket *so) mtx_leave(&pfkeyv2_mtx); } - rw_enter(&pkptable.pkp_lk, RW_WRITE); - SRPL_REMOVE_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, pkpcb, - kcb_list); - rw_exit(&pkptable.pkp_lk); - - sounlock(so); - /* wait for all references to drop */ - refcnt_finalize(&kp->kcb_refcnt, "pfkeyrefs"); - solock(so); + mtx_enter(&pkp_mtx); + SMR_SLIST_REMOVE_LOCKED(&pkp_list, kp, pkpcb, kcb_list); + mtx_leave(&pkp_mtx); + smr_barrier(); so->so_pcb = NULL; KASSERT((so->so_state & SS_NOFDREF) == 0); @@ -478,7 +445,6 @@ pfkeyv2_sendmessage(void **headers, int struct mbuf *packet; struct pkpcb *kp; struct sadb_msg *smsg; - struct srp_ref sr; /* Find out how much space we'll need... */ j = sizeof(struct sadb_msg); @@ -541,14 +507,13 @@ pfkeyv2_sendmessage(void **headers, int * Search for promiscuous listeners, skipping the * original destination. */ - SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) { + SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) { if (kp->kcb_socket == so || kp->kcb_rdomain != rdomain) continue; if (kp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC) pfkey_sendup(kp, packet, 1); } - SRPL_LEAVE(&sr); m_freem(packet); break; @@ -557,7 +522,7 @@ pfkeyv2_sendmessage(void **headers, int * Send the message to all registered sockets that match * the specified satype (e.g., all IPSEC-ESP negotiators) */ - SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) { + SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) { if (kp->kcb_rdomain != rdomain) continue; @@ -566,15 +531,18 @@ pfkeyv2_sendmessage(void **headers, int /* Just send to everyone registered */ pfkey_sendup(kp, packet, 1); } else { - keylock(kp); + int sendup = 0; + + mtx_enter(&kp->kcb_mtx); /* Check for specified satype */ if ((1 << satype) & kp->kcb_reg) + sendup = 1; + mtx_leave(&kp->kcb_mtx); + if (sendup) pfkey_sendup(kp, packet, 1); - keyunlock(kp); } } } - SRPL_LEAVE(&sr); /* Free last/original copy of the packet */ m_freem(packet); @@ -593,7 +561,7 @@ pfkeyv2_sendmessage(void **headers, int goto ret; /* Send to all registered promiscuous listeners */ - SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) { + SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) { int flags = READ_ONCE(kp->kcb_flags); if (kp->kcb_rdomain != rdomain) @@ -603,19 +571,17 @@ pfkeyv2_sendmessage(void **headers, int !(flags & PFKEYV2_SOCKETFLAGS_REGISTERED)) pfkey_sendup(kp, packet, 1); } - SRPL_LEAVE(&sr); m_freem(packet); break; case PFKEYV2_SENDMESSAGE_BROADCAST: /* Send message to all sockets */ - SRPL_FOREACH(kp, &sr, &pkptable.pkp_list, kcb_list) { + SMR_SLIST_FOREACH(kp, &pkp_list, kcb_list) { if (kp->kcb_rdomain != rdomain) continue; pfkey_sendup(kp, packet, 1); } - SRPL_LEAVE(&sr); m_freem(packet); break; } @@ -1139,7 +1105,6 @@ pfkeyv2_dosend(struct socket *so, void * struct sadb_sa *ssa; struct sadb_supported *ssup; struct sadb_ident *sid, *did; - struct srp_ref sr; struct sadb_x_rdomain *srdomain; u_int rdomain = 0; int promisc; @@ -1189,14 +1154,13 @@ pfkeyv2_dosend(struct socket *so, void * goto ret; /* Send to all promiscuous listeners */ - SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) { + SMR_SLIST_FOREACH(bkp, &pkp_list, kcb_list) { if (bkp->kcb_rdomain != kp->kcb_rdomain) continue; if (bkp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC) pfkey_sendup(bkp, packet, 1); } - SRPL_LEAVE(&sr); m_freem(packet); @@ -1636,14 +1600,14 @@ pfkeyv2_dosend(struct socket *so, void * break; case SADB_REGISTER: - keylock(kp); + mtx_enter(&kp->kcb_mtx); if (!(kp->kcb_flags & PFKEYV2_SOCKETFLAGS_REGISTERED)) { kp->kcb_flags |= PFKEYV2_SOCKETFLAGS_REGISTERED; mtx_enter(&pfkeyv2_mtx); nregistered++; mtx_leave(&pfkeyv2_mtx); } - keyunlock(kp); + mtx_leave(&kp->kcb_mtx); freeme_sz = sizeof(struct sadb_supported) + sizeof(ealgs); if (!(freeme = malloc(freeme_sz, M_PFKEY, M_NOWAIT | M_ZERO))) { @@ -1670,10 +1634,10 @@ pfkeyv2_dosend(struct socket *so, void * } /* Keep track what this socket has registered for */ - keylock(kp); + mtx_enter(&kp->kcb_mtx); kp->kcb_reg |= (1 << ((struct sadb_msg *)message)->sadb_msg_satype); - keyunlock(kp); + mtx_leave(&kp->kcb_mtx); ssup = (struct sadb_supported *) freeme2; ssup->sadb_supported_len = freeme2_sz / sizeof(uint64_t); @@ -2043,7 +2007,7 @@ pfkeyv2_dosend(struct socket *so, void * if ((rval = pfdatatopacket(message, len, &packet)) != 0) goto ret; - SRPL_FOREACH(bkp, &sr, &pkptable.pkp_list, kcb_list) { + SMR_SLIST_FOREACH(bkp, &pkp_list, kcb_list) { if (bkp == kp || bkp->kcb_rdomain != kp->kcb_rdomain) continue; @@ -2053,7 +2017,6 @@ pfkeyv2_dosend(struct socket *so, void * pfkey_sendup(bkp, packet, 1); } } - SRPL_LEAVE(&sr); m_freem(packet); } else { @@ -2062,7 +2025,7 @@ pfkeyv2_dosend(struct socket *so, void * goto ret; } - keylock(kp); + mtx_enter(&kp->kcb_mtx); i = (kp->kcb_flags & PFKEYV2_SOCKETFLAGS_PROMISC) ? 1 : 0; j = smsg->sadb_msg_satype ? 1 : 0; @@ -2082,7 +2045,7 @@ pfkeyv2_dosend(struct socket *so, void * mtx_leave(&pfkeyv2_mtx); } } - keyunlock(kp); + mtx_leave(&kp->kcb_mtx); } break;