Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: shared netlock for soclose()
To:
tech@openbsd.org
Date:
Tue, 24 Jun 2025 11:47:28 +0200

Download raw body.

Thread
On Thu, Jun 12, 2025 at 12:41:07AM +0200, Alexander Bluhm wrote:
> On Wed, Jun 11, 2025 at 12:04:01PM +0200, Alexander Bluhm wrote:
> > On Wed, Jun 04, 2025 at 11:45:14PM +0300, Vitaliy Makkoveev wrote:
> > > On Wed, Jun 04, 2025 at 05:09:25PM +0200, Alexander Bluhm wrote:
> > > > On Tue, Jun 03, 2025 at 02:33:03PM +0200, Alexander Bluhm wrote:
> > > > > Hi,
> > > > > 
> > > > > Currently soclose() needs exclusive netlock.  I see deadlocks as
> > > > > tcp_newtcpcb() calls pool_get(&tcpcb_pool) with PR_WAITOK while
> > > > > holding shared netlock.  Running memory allocation in parallel
> > > > > although free needs an exclusive lock is a bad idea.
> > > > > 
> > > > > Solution is to run soclose() in parallel.
> > > > > 
> > > > > Diff consists of four parts:
> > > > > - always hold reference count of socket in inp_socket
> > > > > - inp_socket is not longer protected by special mutex
> > > > > - account when other CPU sets so_pcb to NULL
> > > > > - run soclose() and sofree() with shared netlock
> > > > > - tcp timeout reaper can go away
> > > > > 
> > > > > Please test.  I will split the diff for review.
> 
> Next chunk has been commited, updated diff.

I think this diff contains everything that is needed for unlocking
the soclose() system call.  We have to unlock the socket lock after
looking into so->so_proto.  Then we can switch to shared netlock
in soclose() and sofree().

ok?

bluhm

Index: kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
diff -u -p -r1.378 uipc_socket.c
--- kern/uipc_socket.c	23 May 2025 23:41:46 -0000	1.378
+++ kern/uipc_socket.c	23 Jun 2025 12:10:06 -0000
@@ -213,10 +213,7 @@ socreate(int dom, struct socket **aso, i
 	if (error) {
 		so->so_state |= SS_NOFDREF;
 		/* sofree() calls sounlock(). */
-		soref(so);
-		sofree(so, 1);
-		sounlock_shared(so);
-		sorele(so);
+		sofree(so, 0);
 		return (error);
 	}
 	sounlock_shared(so);
@@ -304,7 +301,7 @@ sofree(struct socket *so, int keep_lock)
 
 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
 		if (!keep_lock)
-			sounlock(so);
+			sounlock_shared(so);
 		return;
 	}
 	if (so->so_head) {
@@ -317,7 +314,7 @@ sofree(struct socket *so, int keep_lock)
 		 */
 		if (so->so_onq == &head->so_q) {
 			if (!keep_lock)
-				sounlock(so);
+				sounlock_shared(so);
 			return;
 		}
 
@@ -344,7 +341,7 @@ sofree(struct socket *so, int keep_lock)
 	}
 
 	if (!keep_lock)
-		sounlock(so);
+		sounlock_shared(so);
 	sorele(so);
 }
 
@@ -368,7 +365,7 @@ soclose(struct socket *so, int flags)
 	struct socket *so2;
 	int error = 0;
 
-	solock(so);
+	solock_shared(so);
 	/* Revoke async IO early. There is a final revocation in sofree(). */
 	sigio_free(&so->so_sigio);
 	if (so->so_state & SS_ISCONNECTED) {
@@ -430,7 +427,7 @@ discard:
 	if (so->so_sp) {
 		struct socket *soback;
 
-		sounlock(so);
+		sounlock_shared(so);
 		/*
 		 * Concurrent sounsplice() locks `sb_mtx' mutexes on
 		 * both `so_snd' and `so_rcv' before unsplice sockets.
@@ -477,7 +474,7 @@ notsplicedback:
 		task_del(sosplice_taskq, &so->so_sp->ssp_task);
 		taskq_barrier(sosplice_taskq);
 
-		solock(so);
+		solock_shared(so);
 	}
 #endif /* SOCKET_SPLICE */
 
Index: kern/uipc_socket2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v
diff -u -p -r1.185 uipc_socket2.c
--- kern/uipc_socket2.c	12 Mar 2025 14:08:31 -0000	1.185
+++ kern/uipc_socket2.c	23 Jun 2025 12:09:50 -0000
@@ -409,13 +409,13 @@ sounlock(struct socket *so)
 void
 sounlock_shared(struct socket *so)
 {
-	rw_exit_write(&so->so_lock);
 	switch (so->so_proto->pr_domain->dom_family) {
 	case PF_INET:
 	case PF_INET6:
 		NET_UNLOCK_SHARED();
 		break;
 	}
+	rw_exit_write(&so->so_lock);
 }
 
 void
@@ -427,6 +427,12 @@ sounlock_nonet(struct socket *so)
 void
 sounlock_pair(struct socket *so1, struct socket *so2)
 {
+	switch (so1->so_proto->pr_domain->dom_family) {
+	case PF_INET:
+	case PF_INET6:
+		NET_UNLOCK_SHARED();
+		break;
+	}
 	if (so1 == so2)
 		rw_exit_write(&so1->so_lock);
 	else if (so1 < so2) {
@@ -435,12 +441,6 @@ sounlock_pair(struct socket *so1, struct
 	} else {
 		rw_exit_write(&so1->so_lock);
 		rw_exit_write(&so2->so_lock);
-	}
-	switch (so1->so_proto->pr_domain->dom_family) {
-	case PF_INET:
-	case PF_INET6:
-		NET_UNLOCK_SHARED();
-		break;
 	}
 }