Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: Please test: shared solock for all intet sockets within knote(9) routines
To:
Alexander Bluhm <alexander.bluhm@gmx.net>
Cc:
tech@openbsd.org
Date:
Mon, 29 Jan 2024 22:25:23 +0300

Download raw body.

Thread
On Mon, Jan 29, 2024 at 03:57:35PM +0100, Alexander Bluhm wrote:
> On Mon, Jan 29, 2024 at 04:20:02PM +0300, Vitaliy Makkoveev wrote:
> > So, I want to commit soassertlocked() right now. sofilt_lock() diff
> > could be pushed to the snaps, so we could test them together.
> 
> Regress not finished, but here are the first findings.  I think it
> is regress/sys/net/pf_divert which requires complicated setup with
> two machines.
> 
> divert_packet runs with shared net lock, without rwlock socket lock,
> but with inpcb mutex for receive socket buffer.  I think soassertlocked
> has to take pru_lock() into account.
> 
> And for sorwakeup this is not sufficent, there I hope for your
> sofilt_lock().
> 
> This is basically the same what my diff found a month ago.
> 
> bluhm

We should have no so{r,w}wakeup() calls outside pru_lock() or `so_lock'
protection for shared netlock case. May be we have the only caller with
shared netlock for socket instance, but this could be changed in the
future. Any case this is very fragile.

Updated diff. We have no rw_status() analog for mutex, so I introduced
mtx_owned() macro. The 'pr_usrreqs' structure received new optional
`pru_locked' member to get the status of pru_lock(). soassertlocked()
was adjusted to use it.

sorwakeup() call was moved under `inp_mtx' for both divert sockets and
multicast routing.

This diff also includes the sofilt_lock() hunks to make the test
coverage wider. I want to commit hem separately, so I remove them before
commit.

The mutex man page is missing. I'll add it later if required.

Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.315
diff -u -p -r1.315 uipc_socket.c
--- sys/kern/uipc_socket.c	26 Jan 2024 18:24:23 -0000	1.315
+++ sys/kern/uipc_socket.c	29 Jan 2024 19:08:04 -0000
@@ -2137,13 +2137,45 @@ sohasoutofband(struct socket *so)
 	knote_locked(&so->so_rcv.sb_klist, 0);
 }
 
+void
+sofilt_lock(struct socket *so)
+{
+	switch (so->so_proto->pr_domain->dom_family) {
+	case PF_INET:
+	case PF_INET6:
+		NET_LOCK_SHARED();
+		rw_enter_write(&so->so_lock);
+		pru_lock(so);
+		break;
+	default:
+		rw_enter_write(&so->so_lock);
+		break;
+	}
+}
+
+void
+sofilt_unlock(struct socket *so)
+{
+	switch (so->so_proto->pr_domain->dom_family) {
+	case PF_INET:
+	case PF_INET6:
+		pru_unlock(so);
+		rw_exit_write(&so->so_lock);
+		NET_UNLOCK_SHARED();
+		break;
+	default:
+		rw_exit_write(&so->so_lock);
+		break;
+	}
+}
+
 int
 soo_kqfilter(struct file *fp, struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 	struct sockbuf *sb;
 
-	solock(so);
+	sofilt_lock(so);
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		if (so->so_options & SO_ACCEPTCONN)
@@ -2161,12 +2193,12 @@ soo_kqfilter(struct file *fp, struct kno
 		sb = &so->so_rcv;
 		break;
 	default:
-		sounlock(so);
+		sofilt_unlock(so);
 		return (EINVAL);
 	}
 
 	klist_insert_locked(&sb->sb_klist, kn);
-	sounlock(so);
+	sofilt_unlock(so);
 
 	return (0);
 }
@@ -2311,9 +2343,9 @@ filt_somodify(struct kevent *kev, struct
 	struct socket *so = kn->kn_fp->f_data;
 	int rv;
 
-	solock(so);
+	sofilt_lock(so);
 	rv = knote_modify(kev, kn);
-	sounlock(so);
+	sofilt_unlock(so);
 
 	return (rv);
 }
@@ -2324,9 +2356,9 @@ filt_soprocess(struct knote *kn, struct 
 	struct socket *so = kn->kn_fp->f_data;
 	int rv;
 
-	solock(so);
+	sofilt_lock(so);
 	rv = knote_process(kn, kev);
-	sounlock(so);
+	sofilt_unlock(so);
 
 	return (rv);
 }
@@ -2344,7 +2376,7 @@ klist_solock(void *arg)
 {
 	struct socket *so = arg;
 
-	solock(so);
+	sofilt_lock(so);
 	return (1);
 }
 
@@ -2353,7 +2385,7 @@ klist_sounlock(void *arg, int ls)
 {
 	struct socket *so = arg;
 
-	sounlock(so);
+	sofilt_unlock(so);
 }
 
 #ifdef DDB
Index: sys/kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.140
diff -u -p -r1.140 uipc_socket2.c
--- sys/kern/uipc_socket2.c	11 Jan 2024 14:15:11 -0000	1.140
+++ sys/kern/uipc_socket2.c	29 Jan 2024 19:08:04 -0000
@@ -444,7 +444,14 @@ soassertlocked(struct socket *so)
 	switch (so->so_proto->pr_domain->dom_family) {
 	case PF_INET:
 	case PF_INET6:
-		NET_ASSERT_LOCKED();
+		if (rw_status(&netlock) == RW_READ) {
+			NET_ASSERT_LOCKED();
+
+			if (splassert_ctl > 0 && pru_locked(so) == 0 &&
+			    rw_status(&so->so_lock) != RW_WRITE)
+				splassert_fail(0, RW_WRITE, __func__);
+		} else
+			NET_ASSERT_LOCKED_EXCLUSIVE();
 		break;
 	default:
 		rw_assert_wrlock(&so->so_lock);
Index: sys/netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.92
diff -u -p -r1.92 ip_divert.c
--- sys/netinet/ip_divert.c	16 Sep 2023 09:33:27 -0000	1.92
+++ sys/netinet/ip_divert.c	29 Jan 2024 19:08:04 -0000
@@ -67,6 +67,7 @@ const struct pr_usrreqs divert_usrreqs =
 	.pru_detach	= divert_detach,
 	.pru_lock	= divert_lock,
 	.pru_unlock	= divert_unlock,
+	.pru_locked	= divert_locked,
 	.pru_bind	= divert_bind,
 	.pru_shutdown	= divert_shutdown,
 	.pru_send	= divert_send,
@@ -247,8 +248,8 @@ divert_packet(struct mbuf *m, int dir, u
 		divstat_inc(divs_fullsock);
 		goto bad;
 	}
-	mtx_leave(&inp->inp_mtx);
 	sorwakeup(so);
+	mtx_leave(&inp->inp_mtx);
 
 	in_pcbunref(inp);
 	return;
@@ -311,6 +312,14 @@ divert_unlock(struct socket *so)
 
 	NET_ASSERT_LOCKED();
 	mtx_leave(&inp->inp_mtx);
+}
+
+int
+divert_locked(struct socket *so)
+{
+	struct inpcb *inp = sotoinpcb(so);
+
+	return mtx_owned(&inp->inp_mtx);
 }
 
 int
Index: sys/netinet/ip_divert.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.h,v
retrieving revision 1.24
diff -u -p -r1.24 ip_divert.h
--- sys/netinet/ip_divert.h	17 Oct 2022 14:49:02 -0000	1.24
+++ sys/netinet/ip_divert.h	29 Jan 2024 19:08:04 -0000
@@ -74,6 +74,7 @@ int	 divert_attach(struct socket *, int,
 int	 divert_detach(struct socket *);
 void	 divert_lock(struct socket *);
 void	 divert_unlock(struct socket *);
+int	 divert_locked(struct socket *);
 int	 divert_bind(struct socket *, struct mbuf *, struct proc *);
 int	 divert_shutdown(struct socket *);
 int	 divert_send(struct socket *, struct mbuf *, struct mbuf *,
Index: sys/netinet/ip_mroute.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_mroute.c,v
retrieving revision 1.140
diff -u -p -r1.140 ip_mroute.c
--- sys/netinet/ip_mroute.c	6 Dec 2023 09:27:17 -0000	1.140
+++ sys/netinet/ip_mroute.c	29 Jan 2024 19:08:04 -0000
@@ -1056,12 +1056,12 @@ socket_send(struct socket *so, struct mb
 
 		mtx_enter(&inp->inp_mtx);
 		ret = sbappendaddr(so, &so->so_rcv, sintosa(src), mm, NULL);
+		if (ret != 0)
+			sorwakeup(so);
 		mtx_leave(&inp->inp_mtx);
 
-		if (ret != 0) {
-			sorwakeup(so);
+		if (ret != 0)
 			return (0);
-		}
 	}
 	m_freem(mm);
 	return (-1);
Index: sys/netinet/ip_var.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.110
diff -u -p -r1.110 ip_var.h
--- sys/netinet/ip_var.h	26 Nov 2023 22:08:10 -0000	1.110
+++ sys/netinet/ip_var.h	29 Jan 2024 19:08:04 -0000
@@ -260,6 +260,7 @@ int	 rip_attach(struct socket *, int, in
 int	 rip_detach(struct socket *);
 void	 rip_lock(struct socket *);
 void	 rip_unlock(struct socket *);
+int	 rip_locked(struct socket *);
 int	 rip_bind(struct socket *, struct mbuf *, struct proc *);
 int	 rip_connect(struct socket *, struct mbuf *);
 int	 rip_disconnect(struct socket *);
Index: sys/netinet/raw_ip.c
===================================================================
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.154
diff -u -p -r1.154 raw_ip.c
--- sys/netinet/raw_ip.c	21 Jan 2024 01:17:20 -0000	1.154
+++ sys/netinet/raw_ip.c	29 Jan 2024 19:08:04 -0000
@@ -108,6 +108,7 @@ const struct pr_usrreqs rip_usrreqs = {
 	.pru_detach	= rip_detach,
 	.pru_lock	= rip_lock,
 	.pru_unlock	= rip_unlock,
+	.pru_locked	= rip_locked,
 	.pru_bind	= rip_bind,
 	.pru_connect	= rip_connect,
 	.pru_disconnect	= rip_disconnect,
@@ -522,6 +523,14 @@ rip_unlock(struct socket *so)
 
 	NET_ASSERT_LOCKED();
 	mtx_leave(&inp->inp_mtx);
+}
+
+int
+rip_locked(struct socket *so)
+{
+	struct inpcb *inp = sotoinpcb(so);
+
+	return mtx_owned(&inp->inp_mtx);
 }
 
 int
Index: sys/netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.315
diff -u -p -r1.315 udp_usrreq.c
--- sys/netinet/udp_usrreq.c	21 Jan 2024 01:17:20 -0000	1.315
+++ sys/netinet/udp_usrreq.c	29 Jan 2024 19:08:04 -0000
@@ -127,6 +127,7 @@ const struct pr_usrreqs udp_usrreqs = {
 	.pru_detach	= udp_detach,
 	.pru_lock	= udp_lock,
 	.pru_unlock	= udp_unlock,
+	.pru_locked	= udp_locked,
 	.pru_bind	= udp_bind,
 	.pru_connect	= udp_connect,
 	.pru_disconnect	= udp_disconnect,
@@ -143,6 +144,7 @@ const struct pr_usrreqs udp6_usrreqs = {
 	.pru_detach	= udp_detach,
 	.pru_lock	= udp_lock,
 	.pru_unlock	= udp_unlock,
+	.pru_locked	= udp_locked,
 	.pru_bind	= udp_bind,
 	.pru_connect	= udp_connect,
 	.pru_disconnect	= udp_disconnect,
@@ -1154,6 +1156,14 @@ udp_unlock(struct socket *so)
 
 	NET_ASSERT_LOCKED();
 	mtx_leave(&inp->inp_mtx);
+}
+
+int
+udp_locked(struct socket *so)
+{
+	struct inpcb *inp = sotoinpcb(so);
+
+	return mtx_owned(&inp->inp_mtx);
 }
 
 int
Index: sys/netinet/udp_var.h
===================================================================
RCS file: /cvs/src/sys/netinet/udp_var.h,v
retrieving revision 1.50
diff -u -p -r1.50 udp_var.h
--- sys/netinet/udp_var.h	10 Jan 2024 16:44:30 -0000	1.50
+++ sys/netinet/udp_var.h	29 Jan 2024 19:08:04 -0000
@@ -147,6 +147,7 @@ int	 udp_attach(struct socket *, int, in
 int	 udp_detach(struct socket *);
 void	 udp_lock(struct socket *);
 void	 udp_unlock(struct socket *);
+int	 udp_locked(struct socket *);
 int	 udp_bind(struct socket *, struct mbuf *, struct proc *);
 int	 udp_connect(struct socket *, struct mbuf *);
 int	 udp_disconnect(struct socket *);
Index: sys/netinet6/ip6_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v
retrieving revision 1.91
diff -u -p -r1.91 ip6_divert.c
--- sys/netinet6/ip6_divert.c	1 Jan 2024 18:52:09 -0000	1.91
+++ sys/netinet6/ip6_divert.c	29 Jan 2024 19:08:04 -0000
@@ -69,6 +69,7 @@ const struct pr_usrreqs divert6_usrreqs 
 	.pru_detach	= divert_detach,
 	.pru_lock	= divert_lock,
 	.pru_unlock	= divert_unlock,
+	.pru_locked	= divert_locked,
 	.pru_bind	= divert_bind,
 	.pru_shutdown	= divert_shutdown,
 	.pru_send	= divert6_send,
@@ -254,8 +255,8 @@ divert6_packet(struct mbuf *m, int dir, 
 		div6stat_inc(div6s_fullsock);
 		goto bad;
 	}
-	mtx_leave(&inp->inp_mtx);
 	sorwakeup(so);
+	mtx_leave(&inp->inp_mtx);
 
 	in_pcbunref(inp);
 	return;
Index: sys/netinet6/ip6_mroute.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_mroute.c,v
retrieving revision 1.138
diff -u -p -r1.138 ip6_mroute.c
--- sys/netinet6/ip6_mroute.c	6 Dec 2023 09:27:17 -0000	1.138
+++ sys/netinet6/ip6_mroute.c	29 Jan 2024 19:08:04 -0000
@@ -861,12 +861,12 @@ socket6_send(struct socket *so, struct m
 
 		mtx_enter(&inp->inp_mtx);
 		ret = sbappendaddr(so, &so->so_rcv, sin6tosa(src), mm, NULL);
+		if (ret != 0)
+			sorwakeup(so);
 		mtx_leave(&inp->inp_mtx);
 
-		if (ret != 0) {
-			sorwakeup(so);
+		if (ret != 0)
 			return 0;
-		}
 	}
 	m_freem(mm);
 	return -1;
Index: sys/netinet6/ip6_var.h
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_var.h,v
retrieving revision 1.109
diff -u -p -r1.109 ip6_var.h
--- sys/netinet6/ip6_var.h	3 Dec 2023 20:36:24 -0000	1.109
+++ sys/netinet6/ip6_var.h	29 Jan 2024 19:08:04 -0000
@@ -353,6 +353,7 @@ int	rip6_attach(struct socket *, int, in
 int	rip6_detach(struct socket *);
 void	rip6_lock(struct socket *);
 void	rip6_unlock(struct socket *);
+int	rip6_locked(struct socket *);
 int	rip6_bind(struct socket *, struct mbuf *, struct proc *);
 int	rip6_connect(struct socket *, struct mbuf *);
 int	rip6_disconnect(struct socket *);
Index: sys/netinet6/raw_ip6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.179
diff -u -p -r1.179 raw_ip6.c
--- sys/netinet6/raw_ip6.c	21 Jan 2024 01:17:20 -0000	1.179
+++ sys/netinet6/raw_ip6.c	29 Jan 2024 19:08:04 -0000
@@ -110,6 +110,7 @@ const struct pr_usrreqs rip6_usrreqs = {
 	.pru_detach	= rip6_detach,
 	.pru_lock	= rip6_lock,
 	.pru_unlock	= rip6_unlock,
+	.pru_locked	= rip6_locked,
 	.pru_bind	= rip6_bind,
 	.pru_connect	= rip6_connect,
 	.pru_disconnect	= rip6_disconnect,
@@ -651,6 +652,14 @@ rip6_unlock(struct socket *so)
 
 	NET_ASSERT_LOCKED();
 	mtx_leave(&inp->inp_mtx);
+}
+
+int
+rip6_locked(struct socket *so)
+{
+	struct inpcb *inp = sotoinpcb(so);
+
+	return mtx_owned(&inp->inp_mtx);
 }
 
 int
Index: sys/sys/mutex.h
===================================================================
RCS file: /cvs/src/sys/sys/mutex.h,v
retrieving revision 1.19
diff -u -p -r1.19 mutex.h
--- sys/sys/mutex.h	1 Dec 2023 14:37:22 -0000	1.19
+++ sys/sys/mutex.h	29 Jan 2024 19:08:04 -0000
@@ -127,6 +127,9 @@ void	mtx_leave(struct mutex *);
 
 #define mtx_init(m, ipl)	mtx_init_flags(m, ipl, NULL, 0)
 
+#define mtx_owned(mtx) \
+	(((mtx)->mtx_owner == curcpu()) || panicstr || db_active)
+
 #ifdef WITNESS
 
 void	_mtx_init_flags(struct mutex *, int, const char *, int,
Index: sys/sys/protosw.h
===================================================================
RCS file: /cvs/src/sys/sys/protosw.h,v
retrieving revision 1.64
diff -u -p -r1.64 protosw.h
--- sys/sys/protosw.h	11 Jan 2024 14:15:12 -0000	1.64
+++ sys/sys/protosw.h	29 Jan 2024 19:08:04 -0000
@@ -69,6 +69,7 @@ struct pr_usrreqs {
 	int	(*pru_detach)(struct socket *);
 	void	(*pru_lock)(struct socket *);
 	void	(*pru_unlock)(struct socket *);
+	int	(*pru_locked)(struct socket *so);
 	int	(*pru_bind)(struct socket *, struct mbuf *, struct proc *);
 	int	(*pru_listen)(struct socket *);
 	int	(*pru_connect)(struct socket *, struct mbuf *);
@@ -294,6 +295,14 @@ pru_unlock(struct socket *so)
 {
 	if (so->so_proto->pr_usrreqs->pru_unlock)
 		(*so->so_proto->pr_usrreqs->pru_unlock)(so);
+}
+
+static inline int
+pru_locked(struct socket *so)
+{
+	if (so->so_proto->pr_usrreqs->pru_locked)
+		return (*so->so_proto->pr_usrreqs->pru_locked)(so);
+	return (0);
 }
 
 static inline int