From: Alexander Bluhm Subject: Re: shared netlock for soclose() To: tech@openbsd.org Date: Tue, 24 Jun 2025 11:47:28 +0200 On Thu, Jun 12, 2025 at 12:41:07AM +0200, Alexander Bluhm wrote: > On Wed, Jun 11, 2025 at 12:04:01PM +0200, Alexander Bluhm wrote: > > On Wed, Jun 04, 2025 at 11:45:14PM +0300, Vitaliy Makkoveev wrote: > > > On Wed, Jun 04, 2025 at 05:09:25PM +0200, Alexander Bluhm wrote: > > > > On Tue, Jun 03, 2025 at 02:33:03PM +0200, Alexander Bluhm wrote: > > > > > Hi, > > > > > > > > > > Currently soclose() needs exclusive netlock. I see deadlocks as > > > > > tcp_newtcpcb() calls pool_get(&tcpcb_pool) with PR_WAITOK while > > > > > holding shared netlock. Running memory allocation in parallel > > > > > although free needs an exclusive lock is a bad idea. > > > > > > > > > > Solution is to run soclose() in parallel. > > > > > > > > > > Diff consists of four parts: > > > > > - always hold reference count of socket in inp_socket > > > > > - inp_socket is not longer protected by special mutex > > > > > - account when other CPU sets so_pcb to NULL > > > > > - run soclose() and sofree() with shared netlock > > > > > - tcp timeout reaper can go away > > > > > > > > > > Please test. I will split the diff for review. > > Next chunk has been commited, updated diff. I think this diff contains everything that is needed for unlocking the soclose() system call. We have to unlock the socket lock after looking into so->so_proto. Then we can switch to shared netlock in soclose() and sofree(). ok? bluhm Index: kern/uipc_socket.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v diff -u -p -r1.378 uipc_socket.c --- kern/uipc_socket.c 23 May 2025 23:41:46 -0000 1.378 +++ kern/uipc_socket.c 23 Jun 2025 12:10:06 -0000 @@ -213,10 +213,7 @@ socreate(int dom, struct socket **aso, i if (error) { so->so_state |= SS_NOFDREF; /* sofree() calls sounlock(). */ - soref(so); - sofree(so, 1); - sounlock_shared(so); - sorele(so); + sofree(so, 0); return (error); } sounlock_shared(so); @@ -304,7 +301,7 @@ sofree(struct socket *so, int keep_lock) if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { if (!keep_lock) - sounlock(so); + sounlock_shared(so); return; } if (so->so_head) { @@ -317,7 +314,7 @@ sofree(struct socket *so, int keep_lock) */ if (so->so_onq == &head->so_q) { if (!keep_lock) - sounlock(so); + sounlock_shared(so); return; } @@ -344,7 +341,7 @@ sofree(struct socket *so, int keep_lock) } if (!keep_lock) - sounlock(so); + sounlock_shared(so); sorele(so); } @@ -368,7 +365,7 @@ soclose(struct socket *so, int flags) struct socket *so2; int error = 0; - solock(so); + solock_shared(so); /* Revoke async IO early. There is a final revocation in sofree(). */ sigio_free(&so->so_sigio); if (so->so_state & SS_ISCONNECTED) { @@ -430,7 +427,7 @@ discard: if (so->so_sp) { struct socket *soback; - sounlock(so); + sounlock_shared(so); /* * Concurrent sounsplice() locks `sb_mtx' mutexes on * both `so_snd' and `so_rcv' before unsplice sockets. @@ -477,7 +474,7 @@ notsplicedback: task_del(sosplice_taskq, &so->so_sp->ssp_task); taskq_barrier(sosplice_taskq); - solock(so); + solock_shared(so); } #endif /* SOCKET_SPLICE */ Index: kern/uipc_socket2.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v diff -u -p -r1.185 uipc_socket2.c --- kern/uipc_socket2.c 12 Mar 2025 14:08:31 -0000 1.185 +++ kern/uipc_socket2.c 23 Jun 2025 12:09:50 -0000 @@ -409,13 +409,13 @@ sounlock(struct socket *so) void sounlock_shared(struct socket *so) { - rw_exit_write(&so->so_lock); switch (so->so_proto->pr_domain->dom_family) { case PF_INET: case PF_INET6: NET_UNLOCK_SHARED(); break; } + rw_exit_write(&so->so_lock); } void @@ -427,6 +427,12 @@ sounlock_nonet(struct socket *so) void sounlock_pair(struct socket *so1, struct socket *so2) { + switch (so1->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + NET_UNLOCK_SHARED(); + break; + } if (so1 == so2) rw_exit_write(&so1->so_lock); else if (so1 < so2) { @@ -435,12 +441,6 @@ sounlock_pair(struct socket *so1, struct } else { rw_exit_write(&so1->so_lock); rw_exit_write(&so2->so_lock); - } - switch (so1->so_proto->pr_domain->dom_family) { - case PF_INET: - case PF_INET6: - NET_UNLOCK_SHARED(); - break; } }