From: Alexander Bluhm Subject: Re: shared netlock for soclose() To: tech@openbsd.org Date: Sat, 28 Jun 2025 23:13:36 +0200 On Tue, Jun 24, 2025 at 11:47:28AM +0200, Alexander Bluhm wrote: > On Thu, Jun 12, 2025 at 12:41:07AM +0200, Alexander Bluhm wrote: > > On Wed, Jun 11, 2025 at 12:04:01PM +0200, Alexander Bluhm wrote: > > > On Wed, Jun 04, 2025 at 11:45:14PM +0300, Vitaliy Makkoveev wrote: > > > > On Wed, Jun 04, 2025 at 05:09:25PM +0200, Alexander Bluhm wrote: > > > > > On Tue, Jun 03, 2025 at 02:33:03PM +0200, Alexander Bluhm wrote: > > > > > > Hi, > > > > > > > > > > > > Currently soclose() needs exclusive netlock. I see deadlocks as > > > > > > tcp_newtcpcb() calls pool_get(&tcpcb_pool) with PR_WAITOK while > > > > > > holding shared netlock. Running memory allocation in parallel > > > > > > although free needs an exclusive lock is a bad idea. > > > > > > > > > > > > Solution is to run soclose() in parallel. > > > > > > > > > > > > Diff consists of four parts: > > > > > > - always hold reference count of socket in inp_socket > > > > > > - inp_socket is not longer protected by special mutex > > > > > > - account when other CPU sets so_pcb to NULL > > > > > > - run soclose() and sofree() with shared netlock > > > > > > - tcp timeout reaper can go away > > > > > > > > > > > > Please test. I will split the diff for review. > > > > Next chunk has been commited, updated diff. > > I think this diff contains everything that is needed for unlocking > the soclose() system call. We have to unlock the socket lock after > looking into so->so_proto. Then we can switch to shared netlock > in soclose() and sofree(). > > ok? anyone? > bluhm > > Index: kern/uipc_socket.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v > diff -u -p -r1.378 uipc_socket.c > --- kern/uipc_socket.c 23 May 2025 23:41:46 -0000 1.378 > +++ kern/uipc_socket.c 23 Jun 2025 12:10:06 -0000 > @@ -213,10 +213,7 @@ socreate(int dom, struct socket **aso, i > if (error) { > so->so_state |= SS_NOFDREF; > /* sofree() calls sounlock(). */ > - soref(so); > - sofree(so, 1); > - sounlock_shared(so); > - sorele(so); > + sofree(so, 0); > return (error); > } > sounlock_shared(so); > @@ -304,7 +301,7 @@ sofree(struct socket *so, int keep_lock) > > if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { > if (!keep_lock) > - sounlock(so); > + sounlock_shared(so); > return; > } > if (so->so_head) { > @@ -317,7 +314,7 @@ sofree(struct socket *so, int keep_lock) > */ > if (so->so_onq == &head->so_q) { > if (!keep_lock) > - sounlock(so); > + sounlock_shared(so); > return; > } > > @@ -344,7 +341,7 @@ sofree(struct socket *so, int keep_lock) > } > > if (!keep_lock) > - sounlock(so); > + sounlock_shared(so); > sorele(so); > } > > @@ -368,7 +365,7 @@ soclose(struct socket *so, int flags) > struct socket *so2; > int error = 0; > > - solock(so); > + solock_shared(so); > /* Revoke async IO early. There is a final revocation in sofree(). */ > sigio_free(&so->so_sigio); > if (so->so_state & SS_ISCONNECTED) { > @@ -430,7 +427,7 @@ discard: > if (so->so_sp) { > struct socket *soback; > > - sounlock(so); > + sounlock_shared(so); > /* > * Concurrent sounsplice() locks `sb_mtx' mutexes on > * both `so_snd' and `so_rcv' before unsplice sockets. > @@ -477,7 +474,7 @@ notsplicedback: > task_del(sosplice_taskq, &so->so_sp->ssp_task); > taskq_barrier(sosplice_taskq); > > - solock(so); > + solock_shared(so); > } > #endif /* SOCKET_SPLICE */ > > Index: kern/uipc_socket2.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v > diff -u -p -r1.185 uipc_socket2.c > --- kern/uipc_socket2.c 12 Mar 2025 14:08:31 -0000 1.185 > +++ kern/uipc_socket2.c 23 Jun 2025 12:09:50 -0000 > @@ -409,13 +409,13 @@ sounlock(struct socket *so) > void > sounlock_shared(struct socket *so) > { > - rw_exit_write(&so->so_lock); > switch (so->so_proto->pr_domain->dom_family) { > case PF_INET: > case PF_INET6: > NET_UNLOCK_SHARED(); > break; > } > + rw_exit_write(&so->so_lock); > } > > void > @@ -427,6 +427,12 @@ sounlock_nonet(struct socket *so) > void > sounlock_pair(struct socket *so1, struct socket *so2) > { > + switch (so1->so_proto->pr_domain->dom_family) { > + case PF_INET: > + case PF_INET6: > + NET_UNLOCK_SHARED(); > + break; > + } > if (so1 == so2) > rw_exit_write(&so1->so_lock); > else if (so1 < so2) { > @@ -435,12 +441,6 @@ sounlock_pair(struct socket *so1, struct > } else { > rw_exit_write(&so1->so_lock); > rw_exit_write(&so2->so_lock); > - } > - switch (so1->so_proto->pr_domain->dom_family) { > - case PF_INET: > - case PF_INET6: > - NET_UNLOCK_SHARED(); > - break; > } > } >