Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: Please test: shared solock for all intet sockets within knote(9) routines
To:
Alexander Bluhm <alexander.bluhm@gmx.net>
Cc:
tech@openbsd.org
Date:
Tue, 30 Jan 2024 17:39:43 +0300

Download raw body.

Thread
On Tue, Jan 30, 2024 at 12:52:03PM +0300, Vitaliy Makkoveev wrote:
> On Mon, Jan 29, 2024 at 10:25:23PM +0300, Vitaliy Makkoveev wrote:
> > On Mon, Jan 29, 2024 at 03:57:35PM +0100, Alexander Bluhm wrote:
> > > On Mon, Jan 29, 2024 at 04:20:02PM +0300, Vitaliy Makkoveev wrote:
> > > > So, I want to commit soassertlocked() right now. sofilt_lock() diff
> > > > could be pushed to the snaps, so we could test them together.
> > > 
> > > Regress not finished, but here are the first findings.  I think it
> > > is regress/sys/net/pf_divert which requires complicated setup with
> > > two machines.
> > > 
> > > divert_packet runs with shared net lock, without rwlock socket lock,
> > > but with inpcb mutex for receive socket buffer.  I think soassertlocked
> > > has to take pru_lock() into account.
> > > 
> > > And for sorwakeup this is not sufficent, there I hope for your
> > > sofilt_lock().
> > > 
> > > This is basically the same what my diff found a month ago.
> > > 
> > > bluhm
> > 

Shared netlock is sufficient to call so{r,w}wakeup(). The following
sowakeup() modifies `sb_flags' and knote(9) stuff. Unfortunately, we
can't call so{r,w}wakeup() with mutex(9) because sowakeup() also calls
pgsigio() which grabs kernel lock.

However, `so*_filtops' callbacks only perform read-only access to the
socket stuff, so it is enough to hold shared netlock only, but the klist
stuff needs to be protected.

This diff uses some chunks of my socket buffer standalone locking work.
It introduces `sb_mtx' mutex(9) to protect sockbuf stuff. This time
`sb_mtx' used to protect `sb_flags' and `sb_klist'.

Now we have soassertlocked_readonly() and soassertlocked(). The first
one is happy if only shared netlock is held, meanwhile the second wants
`so_lock' or pru_lock() be held together with shared netlock.

To keep soassertlocked*() assertions soft, we need to know mutex(9)
state, so new mtx_owned() macro was introduces. Also, the new optional
(*pru_locked)() handler brings the state of pru_lock().

Please test it, but keep in mind, soassertlocked_readonly() could be
required in some more places.

Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.315
diff -u -p -r1.315 uipc_socket.c
--- sys/kern/uipc_socket.c	26 Jan 2024 18:24:23 -0000	1.315
+++ sys/kern/uipc_socket.c	30 Jan 2024 14:24:45 -0000
@@ -72,26 +72,20 @@ int	filt_soread(struct knote *kn, long h
 void	filt_sowdetach(struct knote *kn);
 int	filt_sowrite(struct knote *kn, long hint);
 int	filt_soexcept(struct knote *kn, long hint);
-int	filt_solisten(struct knote *kn, long hint);
-int	filt_somodify(struct kevent *kev, struct knote *kn);
-int	filt_soprocess(struct knote *kn, struct kevent *kev);
 
-const struct filterops solisten_filtops = {
-	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
-	.f_attach	= NULL,
-	.f_detach	= filt_sordetach,
-	.f_event	= filt_solisten,
-	.f_modify	= filt_somodify,
-	.f_process	= filt_soprocess,
-};
+int	filt_snd_somodify(struct kevent *kev, struct knote *kn);
+int	filt_snd_soprocess(struct knote *kn, struct kevent *kev);
+
+int	filt_rcv_somodify(struct kevent *kev, struct knote *kn);
+int	filt_rcv_soprocess(struct knote *kn, struct kevent *kev);
 
 const struct filterops soread_filtops = {
 	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
 	.f_attach	= NULL,
 	.f_detach	= filt_sordetach,
 	.f_event	= filt_soread,
-	.f_modify	= filt_somodify,
-	.f_process	= filt_soprocess,
+	.f_modify	= filt_rcv_somodify,
+	.f_process	= filt_rcv_soprocess,
 };
 
 const struct filterops sowrite_filtops = {
@@ -99,8 +93,8 @@ const struct filterops sowrite_filtops =
 	.f_attach	= NULL,
 	.f_detach	= filt_sowdetach,
 	.f_event	= filt_sowrite,
-	.f_modify	= filt_somodify,
-	.f_process	= filt_soprocess,
+	.f_modify	= filt_snd_somodify,
+	.f_process	= filt_snd_soprocess,
 };
 
 const struct filterops soexcept_filtops = {
@@ -108,18 +102,28 @@ const struct filterops soexcept_filtops 
 	.f_attach	= NULL,
 	.f_detach	= filt_sordetach,
 	.f_event	= filt_soexcept,
-	.f_modify	= filt_somodify,
-	.f_process	= filt_soprocess,
+	.f_modify	= filt_rcv_somodify,
+	.f_process	= filt_rcv_soprocess,
+};
+
+void	klist_snd_soassertlk(void *);
+int	klist_snd_solock(void *);
+void	klist_snd_sounlock(void *, int);
+
+const struct klistops socket_snd_klistops = {
+	.klo_assertlk	= klist_snd_soassertlk,
+	.klo_lock	= klist_snd_solock,
+	.klo_unlock	= klist_snd_sounlock,
 };
 
-void	klist_soassertlk(void *);
-int	klist_solock(void *);
-void	klist_sounlock(void *, int);
-
-const struct klistops socket_klistops = {
-	.klo_assertlk	= klist_soassertlk,
-	.klo_lock	= klist_solock,
-	.klo_unlock	= klist_sounlock,
+void	klist_rcv_soassertlk(void *);
+int	klist_rcv_solock(void *);
+void	klist_rcv_sounlock(void *, int);
+
+const struct klistops socket_rcv_klistops = {
+	.klo_assertlk	= klist_rcv_soassertlk,
+	.klo_lock	= klist_rcv_solock,
+	.klo_unlock	= klist_rcv_sounlock,
 };
 
 #ifndef SOMINCONN
@@ -158,8 +162,10 @@ soalloc(const struct domain *dp, int wai
 		return (NULL);
 	rw_init_flags(&so->so_lock, dp->dom_name, RWL_DUPOK);
 	refcnt_init(&so->so_refcnt);
-	klist_init(&so->so_rcv.sb_klist, &socket_klistops, so);
-	klist_init(&so->so_snd.sb_klist, &socket_klistops, so);
+	mtx_init(&so->so_rcv.sb_mtx, IPL_MPFLOOR);
+	mtx_init(&so->so_snd.sb_mtx, IPL_MPFLOOR);
+	klist_init(&so->so_rcv.sb_klist, &socket_rcv_klistops, so);
+	klist_init(&so->so_snd.sb_klist, &socket_snd_klistops, so);
 	sigio_init(&so->so_sigio);
 	TAILQ_INIT(&so->so_q0);
 	TAILQ_INIT(&so->so_q);
@@ -1757,7 +1763,7 @@ somove(struct socket *so, int wait)
 void
 sorwakeup(struct socket *so)
 {
-	soassertlocked(so);
+	soassertlocked_readonly(so);
 
 #ifdef SOCKET_SPLICE
 	if (so->so_rcv.sb_flags & SB_SPLICE) {
@@ -1785,7 +1791,7 @@ sorwakeup(struct socket *so)
 void
 sowwakeup(struct socket *so)
 {
-	soassertlocked(so);
+	soassertlocked_readonly(so);
 
 #ifdef SOCKET_SPLICE
 	if (so->so_snd.sb_flags & SB_SPLICE)
@@ -2137,19 +2143,54 @@ sohasoutofband(struct socket *so)
 	knote_locked(&so->so_rcv.sb_klist, 0);
 }
 
+void
+sofilt_lock(struct socket *so, struct sockbuf *sb)
+{
+	switch (so->so_proto->pr_domain->dom_family) {
+	case PF_INET:
+	case PF_INET6:
+		NET_LOCK_SHARED();
+		break;
+	default:
+		rw_enter_write(&so->so_lock);
+		break;
+	}
+
+	mtx_enter(&sb->sb_mtx);
+}
+
+void
+sofilt_unlock(struct socket *so, struct sockbuf *sb)
+{
+	mtx_leave(&sb->sb_mtx);
+
+	switch (so->so_proto->pr_domain->dom_family) {
+	case PF_INET:
+	case PF_INET6:
+		NET_UNLOCK_SHARED();
+		break;
+	default:
+		rw_exit_write(&so->so_lock);
+		break;
+	}
+}
+
+static inline void
+sofilt_assert_locked(struct socket *so, struct sockbuf *sb)
+{
+	MUTEX_ASSERT_LOCKED(&sb->sb_mtx);
+	soassertlocked_readonly(so);
+}
+
 int
 soo_kqfilter(struct file *fp, struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 	struct sockbuf *sb;
 
-	solock(so);
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
-		if (so->so_options & SO_ACCEPTCONN)
-			kn->kn_fop = &solisten_filtops;
-		else
-			kn->kn_fop = &soread_filtops;
+		kn->kn_fop = &soread_filtops;
 		sb = &so->so_rcv;
 		break;
 	case EVFILT_WRITE:
@@ -2161,12 +2202,12 @@ soo_kqfilter(struct file *fp, struct kno
 		sb = &so->so_rcv;
 		break;
 	default:
-		sounlock(so);
 		return (EINVAL);
 	}
 
+	mtx_enter(&sb->sb_mtx);
 	klist_insert_locked(&sb->sb_klist, kn);
-	sounlock(so);
+	mtx_leave(&sb->sb_mtx);
 
 	return (0);
 }
@@ -2185,7 +2226,23 @@ filt_soread(struct knote *kn, long hint)
 	struct socket *so = kn->kn_fp->f_data;
 	int rv = 0;
 
-	soassertlocked(so);
+	sofilt_assert_locked(so, &so->so_rcv);
+
+	if (so->so_options & SO_ACCEPTCONN) {
+		kn->kn_data = so->so_qlen;
+		rv = (kn->kn_data != 0);
+
+		if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) {
+			if (so->so_state & SS_ISDISCONNECTED) {
+				kn->kn_flags |= __EV_HUP;
+				rv = 1;
+			} else {
+				rv = soreadable(so);
+			}
+		}
+
+		return rv;
+	}
 
 	kn->kn_data = so->so_rcv.sb_cc;
 #ifdef SOCKET_SPLICE
@@ -2226,7 +2283,7 @@ filt_sowrite(struct knote *kn, long hint
 	struct socket *so = kn->kn_fp->f_data;
 	int rv;
 
-	soassertlocked(so);
+	sofilt_assert_locked(so, &so->so_snd);
 
 	kn->kn_data = sbspace(so, &so->so_snd);
 	if (so->so_snd.sb_state & SS_CANTSENDMORE) {
@@ -2257,7 +2314,7 @@ filt_soexcept(struct knote *kn, long hin
 	struct socket *so = kn->kn_fp->f_data;
 	int rv = 0;
 
-	soassertlocked(so);
+	sofilt_assert_locked(so, &so->so_rcv);
 
 #ifdef SOCKET_SPLICE
 	if (isspliced(so)) {
@@ -2283,77 +2340,105 @@ filt_soexcept(struct knote *kn, long hin
 }
 
 int
-filt_solisten(struct knote *kn, long hint)
+filt_snd_somodify(struct kevent *kev, struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
-	int active;
+	int rv;
 
-	soassertlocked(so);
+	sofilt_lock(so, &so->so_snd);
+	rv = knote_modify(kev, kn);
+	sofilt_unlock(so, &so->so_snd);
 
-	kn->kn_data = so->so_qlen;
-	active = (kn->kn_data != 0);
+	return (rv);
+}
 
-	if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) {
-		if (so->so_state & SS_ISDISCONNECTED) {
-			kn->kn_flags |= __EV_HUP;
-			active = 1;
-		} else {
-			active = soreadable(so);
-		}
-	}
+int
+filt_snd_soprocess(struct knote *kn, struct kevent *kev)
+{
+	struct socket *so = kn->kn_fp->f_data;
+	int rv;
+
+	sofilt_lock(so, &so->so_snd);
+	rv = knote_process(kn, kev);
+	sofilt_unlock(so, &so->so_snd);
 
-	return (active);
+	return (rv);
 }
 
 int
-filt_somodify(struct kevent *kev, struct knote *kn)
+filt_rcv_somodify(struct kevent *kev, struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 	int rv;
 
-	solock(so);
+	sofilt_lock(so, &so->so_rcv);
 	rv = knote_modify(kev, kn);
-	sounlock(so);
+	sofilt_unlock(so, &so->so_rcv);
 
 	return (rv);
 }
 
 int
-filt_soprocess(struct knote *kn, struct kevent *kev)
+filt_rcv_soprocess(struct knote *kn, struct kevent *kev)
 {
 	struct socket *so = kn->kn_fp->f_data;
 	int rv;
 
-	solock(so);
+	sofilt_lock(so, &so->so_rcv);
 	rv = knote_process(kn, kev);
-	sounlock(so);
+	sofilt_unlock(so, &so->so_rcv);
 
 	return (rv);
 }
 
 void
-klist_soassertlk(void *arg)
+klist_snd_soassertlk(void *arg)
 {
 	struct socket *so = arg;
 
-	soassertlocked(so);
+	MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx);
 }
 
 int
-klist_solock(void *arg)
+klist_snd_solock(void *arg)
 {
 	struct socket *so = arg;
 
-	solock(so);
+	mtx_enter(&so->so_snd.sb_mtx);
 	return (1);
 }
 
 void
-klist_sounlock(void *arg, int ls)
+klist_snd_sounlock(void *arg, int ls)
 {
 	struct socket *so = arg;
 
-	sounlock(so);
+	mtx_leave(&so->so_snd.sb_mtx);
+}
+
+void
+klist_rcv_soassertlk(void *arg)
+{
+	struct socket *so = arg;
+
+	MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx);
+}
+
+int
+klist_rcv_solock(void *arg)
+{
+	struct socket *so = arg;
+
+	mtx_enter(&so->so_rcv.sb_mtx);
+	return (1);
+}
+
+void
+klist_rcv_sounlock(void *arg, int ls)
+{
+	struct socket *so = arg;
+
+	mtx_leave(&so->so_rcv.sb_mtx);
 }
 
 #ifdef DDB
Index: sys/kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.140
diff -u -p -r1.140 uipc_socket2.c
--- sys/kern/uipc_socket2.c	11 Jan 2024 14:15:11 -0000	1.140
+++ sys/kern/uipc_socket2.c	30 Jan 2024 14:24:45 -0000
@@ -439,7 +439,7 @@ sounlock_shared(struct socket *so)
 }
 
 void
-soassertlocked(struct socket *so)
+soassertlocked_readonly(struct socket *so)
 {
 	switch (so->so_proto->pr_domain->dom_family) {
 	case PF_INET:
@@ -452,6 +452,27 @@ soassertlocked(struct socket *so)
 	}
 }
 
+void
+soassertlocked(struct socket *so)
+{
+	switch (so->so_proto->pr_domain->dom_family) {
+	case PF_INET:
+	case PF_INET6:
+		if (rw_status(&netlock) == RW_READ) {
+			NET_ASSERT_LOCKED();
+
+			if (splassert_ctl > 0 && pru_locked(so) == 0 &&
+			    rw_status(&so->so_lock) != RW_WRITE)
+				splassert_fail(0, RW_WRITE, __func__);
+		} else
+			NET_ASSERT_LOCKED_EXCLUSIVE();
+		break;
+	default:
+		rw_assert_wrlock(&so->so_lock);
+		break;
+	}
+}
+
 int
 sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg,
     uint64_t nsecs)
@@ -489,46 +510,62 @@ sbwait(struct socket *so, struct sockbuf
 
 	soassertlocked(so);
 
+	mtx_enter(&sb->sb_mtx);
 	sb->sb_flags |= SB_WAIT;
+	mtx_leave(&sb->sb_mtx);
+
 	return sosleep_nsec(so, &sb->sb_cc, prio, "netio", sb->sb_timeo_nsecs);
 }
 
 int
 sblock(struct socket *so, struct sockbuf *sb, int flags)
 {
-	int error, prio = PSOCK;
+	int error = 0, prio = PSOCK;
 
 	soassertlocked(so);
 
+	mtx_enter(&sb->sb_mtx);
 	if ((sb->sb_flags & SB_LOCK) == 0) {
 		sb->sb_flags |= SB_LOCK;
-		return (0);
+		goto out;
+	}
+	if ((flags & SBL_WAIT) == 0) {
+		error = EWOULDBLOCK;
+		goto out;
 	}
-	if ((flags & SBL_WAIT) == 0)
-		return (EWOULDBLOCK);
 	if (!(flags & SBL_NOINTR || sb->sb_flags & SB_NOINTR))
 		prio |= PCATCH;
 
 	while (sb->sb_flags & SB_LOCK) {
 		sb->sb_flags |= SB_WANT;
+		mtx_leave(&sb->sb_mtx);
 		error = sosleep_nsec(so, &sb->sb_flags, prio, "netlck", INFSLP);
 		if (error)
 			return (error);
+		mtx_enter(&sb->sb_mtx);
 	}
 	sb->sb_flags |= SB_LOCK;
+out:
+	mtx_leave(&sb->sb_mtx);
+
 	return (0);
 }
 
 void
 sbunlock(struct socket *so, struct sockbuf *sb)
 {
-	soassertlocked(so);
+	int dowakeup = 0;
 
+	mtx_enter(&sb->sb_mtx);
 	sb->sb_flags &= ~SB_LOCK;
 	if (sb->sb_flags & SB_WANT) {
 		sb->sb_flags &= ~SB_WANT;
-		wakeup(&sb->sb_flags);
+		dowakeup = 1;
 	}
+	mtx_leave(&sb->sb_mtx);
+
+	if (dowakeup)
+		wakeup(&sb->sb_flags);
 }
 
 /*
@@ -539,15 +576,24 @@ sbunlock(struct socket *so, struct sockb
 void
 sowakeup(struct socket *so, struct sockbuf *sb)
 {
-	soassertlocked(so);
+	int dowakeup = 0, dopgsigio = 0;
 
+	mtx_enter(&sb->sb_mtx);
 	if (sb->sb_flags & SB_WAIT) {
 		sb->sb_flags &= ~SB_WAIT;
-		wakeup(&sb->sb_cc);
+		dowakeup = 1;
 	}
 	if (sb->sb_flags & SB_ASYNC)
-		pgsigio(&so->so_sigio, SIGIO, 0);
+		dopgsigio = 1;
+
 	knote_locked(&sb->sb_klist, 0);
+	mtx_leave(&sb->sb_mtx);
+
+	if (dowakeup)
+		wakeup(&sb->sb_cc);
+
+	if (dopgsigio)
+		pgsigio(&so->so_sigio, SIGIO, 0);
 }
 
 /*
Index: sys/kern/uipc_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.216
diff -u -p -r1.216 uipc_syscalls.c
--- sys/kern/uipc_syscalls.c	3 Jan 2024 11:07:04 -0000	1.216
+++ sys/kern/uipc_syscalls.c	30 Jan 2024 14:24:45 -0000
@@ -326,7 +326,9 @@ doaccept(struct proc *p, int sock, struc
 	    : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
 
 	/* connection has been removed from the listen queue */
+	mtx_enter(&head->so_rcv.sb_mtx);
 	knote_locked(&head->so_rcv.sb_klist, 0);
+	mtx_leave(&head->so_rcv.sb_mtx);
 
 	if (persocket)
 		sounlock(head);
Index: sys/netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.92
diff -u -p -r1.92 ip_divert.c
--- sys/netinet/ip_divert.c	16 Sep 2023 09:33:27 -0000	1.92
+++ sys/netinet/ip_divert.c	30 Jan 2024 14:24:45 -0000
@@ -67,6 +67,7 @@ const struct pr_usrreqs divert_usrreqs =
 	.pru_detach	= divert_detach,
 	.pru_lock	= divert_lock,
 	.pru_unlock	= divert_unlock,
+	.pru_locked	= divert_locked,
 	.pru_bind	= divert_bind,
 	.pru_shutdown	= divert_shutdown,
 	.pru_send	= divert_send,
@@ -311,6 +312,14 @@ divert_unlock(struct socket *so)
 
 	NET_ASSERT_LOCKED();
 	mtx_leave(&inp->inp_mtx);
+}
+
+int
+divert_locked(struct socket *so)
+{
+	struct inpcb *inp = sotoinpcb(so);
+
+	return mtx_owned(&inp->inp_mtx);
 }
 
 int
Index: sys/netinet/ip_divert.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.h,v
retrieving revision 1.24
diff -u -p -r1.24 ip_divert.h
--- sys/netinet/ip_divert.h	17 Oct 2022 14:49:02 -0000	1.24
+++ sys/netinet/ip_divert.h	30 Jan 2024 14:24:45 -0000
@@ -74,6 +74,7 @@ int	 divert_attach(struct socket *, int,
 int	 divert_detach(struct socket *);
 void	 divert_lock(struct socket *);
 void	 divert_unlock(struct socket *);
+int	 divert_locked(struct socket *);
 int	 divert_bind(struct socket *, struct mbuf *, struct proc *);
 int	 divert_shutdown(struct socket *);
 int	 divert_send(struct socket *, struct mbuf *, struct mbuf *,
Index: sys/netinet/ip_var.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.110
diff -u -p -r1.110 ip_var.h
--- sys/netinet/ip_var.h	26 Nov 2023 22:08:10 -0000	1.110
+++ sys/netinet/ip_var.h	30 Jan 2024 14:24:45 -0000
@@ -260,6 +260,7 @@ int	 rip_attach(struct socket *, int, in
 int	 rip_detach(struct socket *);
 void	 rip_lock(struct socket *);
 void	 rip_unlock(struct socket *);
+int	 rip_locked(struct socket *);
 int	 rip_bind(struct socket *, struct mbuf *, struct proc *);
 int	 rip_connect(struct socket *, struct mbuf *);
 int	 rip_disconnect(struct socket *);
Index: sys/netinet/raw_ip.c
===================================================================
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.154
diff -u -p -r1.154 raw_ip.c
--- sys/netinet/raw_ip.c	21 Jan 2024 01:17:20 -0000	1.154
+++ sys/netinet/raw_ip.c	30 Jan 2024 14:24:45 -0000
@@ -108,6 +108,7 @@ const struct pr_usrreqs rip_usrreqs = {
 	.pru_detach	= rip_detach,
 	.pru_lock	= rip_lock,
 	.pru_unlock	= rip_unlock,
+	.pru_locked	= rip_locked,
 	.pru_bind	= rip_bind,
 	.pru_connect	= rip_connect,
 	.pru_disconnect	= rip_disconnect,
@@ -522,6 +523,14 @@ rip_unlock(struct socket *so)
 
 	NET_ASSERT_LOCKED();
 	mtx_leave(&inp->inp_mtx);
+}
+
+int
+rip_locked(struct socket *so)
+{
+	struct inpcb *inp = sotoinpcb(so);
+
+	return mtx_owned(&inp->inp_mtx);
 }
 
 int
Index: sys/netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.316
diff -u -p -r1.316 udp_usrreq.c
--- sys/netinet/udp_usrreq.c	28 Jan 2024 20:34:25 -0000	1.316
+++ sys/netinet/udp_usrreq.c	30 Jan 2024 14:24:45 -0000
@@ -127,6 +127,7 @@ const struct pr_usrreqs udp_usrreqs = {
 	.pru_detach	= udp_detach,
 	.pru_lock	= udp_lock,
 	.pru_unlock	= udp_unlock,
+	.pru_locked	= udp_locked,
 	.pru_bind	= udp_bind,
 	.pru_connect	= udp_connect,
 	.pru_disconnect	= udp_disconnect,
@@ -143,6 +144,7 @@ const struct pr_usrreqs udp6_usrreqs = {
 	.pru_detach	= udp_detach,
 	.pru_lock	= udp_lock,
 	.pru_unlock	= udp_unlock,
+	.pru_locked	= udp_locked,
 	.pru_bind	= udp_bind,
 	.pru_connect	= udp_connect,
 	.pru_disconnect	= udp_disconnect,
@@ -1154,6 +1156,14 @@ udp_unlock(struct socket *so)
 
 	NET_ASSERT_LOCKED();
 	mtx_leave(&inp->inp_mtx);
+}
+
+int
+udp_locked(struct socket *so)
+{
+	struct inpcb *inp = sotoinpcb(so);
+
+	return mtx_owned(&inp->inp_mtx);
 }
 
 int
Index: sys/netinet/udp_var.h
===================================================================
RCS file: /cvs/src/sys/netinet/udp_var.h,v
retrieving revision 1.50
diff -u -p -r1.50 udp_var.h
--- sys/netinet/udp_var.h	10 Jan 2024 16:44:30 -0000	1.50
+++ sys/netinet/udp_var.h	30 Jan 2024 14:24:45 -0000
@@ -147,6 +147,7 @@ int	 udp_attach(struct socket *, int, in
 int	 udp_detach(struct socket *);
 void	 udp_lock(struct socket *);
 void	 udp_unlock(struct socket *);
+int	 udp_locked(struct socket *);
 int	 udp_bind(struct socket *, struct mbuf *, struct proc *);
 int	 udp_connect(struct socket *, struct mbuf *);
 int	 udp_disconnect(struct socket *);
Index: sys/netinet6/ip6_mroute.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_mroute.c,v
retrieving revision 1.138
diff -u -p -r1.138 ip6_mroute.c
--- sys/netinet6/ip6_mroute.c	6 Dec 2023 09:27:17 -0000	1.138
+++ sys/netinet6/ip6_mroute.c	30 Jan 2024 14:24:45 -0000
@@ -861,12 +861,12 @@ socket6_send(struct socket *so, struct m
 
 		mtx_enter(&inp->inp_mtx);
 		ret = sbappendaddr(so, &so->so_rcv, sin6tosa(src), mm, NULL);
+		if (ret != 0)
+			sorwakeup(so);
 		mtx_leave(&inp->inp_mtx);
 
-		if (ret != 0) {
-			sorwakeup(so);
+		if (ret != 0)
 			return 0;
-		}
 	}
 	m_freem(mm);
 	return -1;
Index: sys/netinet6/ip6_var.h
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_var.h,v
retrieving revision 1.109
diff -u -p -r1.109 ip6_var.h
--- sys/netinet6/ip6_var.h	3 Dec 2023 20:36:24 -0000	1.109
+++ sys/netinet6/ip6_var.h	30 Jan 2024 14:24:45 -0000
@@ -353,6 +353,7 @@ int	rip6_attach(struct socket *, int, in
 int	rip6_detach(struct socket *);
 void	rip6_lock(struct socket *);
 void	rip6_unlock(struct socket *);
+int	rip6_locked(struct socket *);
 int	rip6_bind(struct socket *, struct mbuf *, struct proc *);
 int	rip6_connect(struct socket *, struct mbuf *);
 int	rip6_disconnect(struct socket *);
Index: sys/netinet6/raw_ip6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.179
diff -u -p -r1.179 raw_ip6.c
--- sys/netinet6/raw_ip6.c	21 Jan 2024 01:17:20 -0000	1.179
+++ sys/netinet6/raw_ip6.c	30 Jan 2024 14:24:45 -0000
@@ -110,6 +110,7 @@ const struct pr_usrreqs rip6_usrreqs = {
 	.pru_detach	= rip6_detach,
 	.pru_lock	= rip6_lock,
 	.pru_unlock	= rip6_unlock,
+	.pru_locked	= rip6_locked,
 	.pru_bind	= rip6_bind,
 	.pru_connect	= rip6_connect,
 	.pru_disconnect	= rip6_disconnect,
@@ -651,6 +652,14 @@ rip6_unlock(struct socket *so)
 
 	NET_ASSERT_LOCKED();
 	mtx_leave(&inp->inp_mtx);
+}
+
+int
+rip6_locked(struct socket *so)
+{
+	struct inpcb *inp = sotoinpcb(so);
+
+	return mtx_owned(&inp->inp_mtx);
 }
 
 int
Index: sys/sys/mutex.h
===================================================================
RCS file: /cvs/src/sys/sys/mutex.h,v
retrieving revision 1.19
diff -u -p -r1.19 mutex.h
--- sys/sys/mutex.h	1 Dec 2023 14:37:22 -0000	1.19
+++ sys/sys/mutex.h	30 Jan 2024 14:24:46 -0000
@@ -127,6 +127,9 @@ void	mtx_leave(struct mutex *);
 
 #define mtx_init(m, ipl)	mtx_init_flags(m, ipl, NULL, 0)
 
+#define mtx_owned(mtx) \
+	(((mtx)->mtx_owner == curcpu()) || panicstr || db_active)
+
 #ifdef WITNESS
 
 void	_mtx_init_flags(struct mutex *, int, const char *, int,
Index: sys/sys/protosw.h
===================================================================
RCS file: /cvs/src/sys/sys/protosw.h,v
retrieving revision 1.64
diff -u -p -r1.64 protosw.h
--- sys/sys/protosw.h	11 Jan 2024 14:15:12 -0000	1.64
+++ sys/sys/protosw.h	30 Jan 2024 14:24:46 -0000
@@ -69,6 +69,7 @@ struct pr_usrreqs {
 	int	(*pru_detach)(struct socket *);
 	void	(*pru_lock)(struct socket *);
 	void	(*pru_unlock)(struct socket *);
+	int	(*pru_locked)(struct socket *so);
 	int	(*pru_bind)(struct socket *, struct mbuf *, struct proc *);
 	int	(*pru_listen)(struct socket *);
 	int	(*pru_connect)(struct socket *, struct mbuf *);
@@ -294,6 +295,14 @@ pru_unlock(struct socket *so)
 {
 	if (so->so_proto->pr_usrreqs->pru_unlock)
 		(*so->so_proto->pr_usrreqs->pru_unlock)(so);
+}
+
+static inline int
+pru_locked(struct socket *so)
+{
+	if (so->so_proto->pr_usrreqs->pru_locked)
+		return (*so->so_proto->pr_usrreqs->pru_locked)(so);
+	return (0);
 }
 
 static inline int
Index: sys/sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.121
diff -u -p -r1.121 socketvar.h
--- sys/sys/socketvar.h	11 Jan 2024 14:15:12 -0000	1.121
+++ sys/sys/socketvar.h	30 Jan 2024 14:24:46 -0000
@@ -40,6 +40,7 @@
 #include <sys/sigio.h>				/* for struct sigio_ref */
 #include <sys/task.h>
 #include <sys/timeout.h>
+#include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/refcnt.h>
 
@@ -105,6 +106,7 @@ struct socket {
  * Variables for socket buffering.
  */
 	struct	sockbuf {
+		struct mutex sb_mtx;
 /* The following fields are all zeroed on flush. */
 #define	sb_startzero	sb_cc
 		u_long	sb_cc;		/* actual chars in buffer */
@@ -174,6 +176,7 @@ struct socket {
 #include <lib/libkern/libkern.h>
 
 void	soassertlocked(struct socket *);
+void	soassertlocked_readonly(struct socket *);
 
 static inline void
 soref(struct socket *so)
@@ -211,10 +214,12 @@ sb_notify(struct socket *so, struct sock
  * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
  * overflow and return 0.
  */
+
 static inline long
 sbspace(struct socket *so, struct sockbuf *sb)
 {
-	soassertlocked(so);
+	soassertlocked_readonly(so);
+
 	return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
 }
 
@@ -230,7 +235,7 @@ sbspace(struct socket *so, struct sockbu
 static inline int
 soreadable(struct socket *so)
 {
-	soassertlocked(so);
+	soassertlocked_readonly(so);
 	if (isspliced(so))
 		return 0;
 	return (so->so_rcv.sb_state & SS_CANTRCVMORE) || so->so_qlen ||
@@ -241,7 +246,7 @@ soreadable(struct socket *so)
 static inline int
 sowriteable(struct socket *so)
 {
-	soassertlocked(so);
+	soassertlocked_readonly(so);
 	return ((sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat &&
 	    ((so->so_state & SS_ISCONNECTED) ||
 	    (so->so_proto->pr_flags & PR_CONNREQUIRED)==0)) ||