Download raw body.
shared netlock for soclose()
On Tue, Jun 24, 2025 at 11:47:28AM +0200, Alexander Bluhm wrote:
> On Thu, Jun 12, 2025 at 12:41:07AM +0200, Alexander Bluhm wrote:
> > On Wed, Jun 11, 2025 at 12:04:01PM +0200, Alexander Bluhm wrote:
> > > On Wed, Jun 04, 2025 at 11:45:14PM +0300, Vitaliy Makkoveev wrote:
> > > > On Wed, Jun 04, 2025 at 05:09:25PM +0200, Alexander Bluhm wrote:
> > > > > On Tue, Jun 03, 2025 at 02:33:03PM +0200, Alexander Bluhm wrote:
> > > > > > Hi,
> > > > > >
> > > > > > Currently soclose() needs exclusive netlock. I see deadlocks as
> > > > > > tcp_newtcpcb() calls pool_get(&tcpcb_pool) with PR_WAITOK while
> > > > > > holding shared netlock. Running memory allocation in parallel
> > > > > > although free needs an exclusive lock is a bad idea.
> > > > > >
> > > > > > Solution is to run soclose() in parallel.
> > > > > >
> > > > > > Diff consists of four parts:
> > > > > > - always hold reference count of socket in inp_socket
> > > > > > - inp_socket is not longer protected by special mutex
> > > > > > - account when other CPU sets so_pcb to NULL
> > > > > > - run soclose() and sofree() with shared netlock
> > > > > > - tcp timeout reaper can go away
> > > > > >
> > > > > > Please test. I will split the diff for review.
> >
> > Next chunk has been commited, updated diff.
>
> I think this diff contains everything that is needed for unlocking
> the soclose() system call. We have to unlock the socket lock after
> looking into so->so_proto. Then we can switch to shared netlock
> in soclose() and sofree().
>
> ok?
anyone?
> bluhm
>
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> diff -u -p -r1.378 uipc_socket.c
> --- kern/uipc_socket.c 23 May 2025 23:41:46 -0000 1.378
> +++ kern/uipc_socket.c 23 Jun 2025 12:10:06 -0000
> @@ -213,10 +213,7 @@ socreate(int dom, struct socket **aso, i
> if (error) {
> so->so_state |= SS_NOFDREF;
> /* sofree() calls sounlock(). */
> - soref(so);
> - sofree(so, 1);
> - sounlock_shared(so);
> - sorele(so);
> + sofree(so, 0);
> return (error);
> }
> sounlock_shared(so);
> @@ -304,7 +301,7 @@ sofree(struct socket *so, int keep_lock)
>
> if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
> if (!keep_lock)
> - sounlock(so);
> + sounlock_shared(so);
> return;
> }
> if (so->so_head) {
> @@ -317,7 +314,7 @@ sofree(struct socket *so, int keep_lock)
> */
> if (so->so_onq == &head->so_q) {
> if (!keep_lock)
> - sounlock(so);
> + sounlock_shared(so);
> return;
> }
>
> @@ -344,7 +341,7 @@ sofree(struct socket *so, int keep_lock)
> }
>
> if (!keep_lock)
> - sounlock(so);
> + sounlock_shared(so);
> sorele(so);
> }
>
> @@ -368,7 +365,7 @@ soclose(struct socket *so, int flags)
> struct socket *so2;
> int error = 0;
>
> - solock(so);
> + solock_shared(so);
> /* Revoke async IO early. There is a final revocation in sofree(). */
> sigio_free(&so->so_sigio);
> if (so->so_state & SS_ISCONNECTED) {
> @@ -430,7 +427,7 @@ discard:
> if (so->so_sp) {
> struct socket *soback;
>
> - sounlock(so);
> + sounlock_shared(so);
> /*
> * Concurrent sounsplice() locks `sb_mtx' mutexes on
> * both `so_snd' and `so_rcv' before unsplice sockets.
> @@ -477,7 +474,7 @@ notsplicedback:
> task_del(sosplice_taskq, &so->so_sp->ssp_task);
> taskq_barrier(sosplice_taskq);
>
> - solock(so);
> + solock_shared(so);
> }
> #endif /* SOCKET_SPLICE */
>
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v
> diff -u -p -r1.185 uipc_socket2.c
> --- kern/uipc_socket2.c 12 Mar 2025 14:08:31 -0000 1.185
> +++ kern/uipc_socket2.c 23 Jun 2025 12:09:50 -0000
> @@ -409,13 +409,13 @@ sounlock(struct socket *so)
> void
> sounlock_shared(struct socket *so)
> {
> - rw_exit_write(&so->so_lock);
> switch (so->so_proto->pr_domain->dom_family) {
> case PF_INET:
> case PF_INET6:
> NET_UNLOCK_SHARED();
> break;
> }
> + rw_exit_write(&so->so_lock);
> }
>
> void
> @@ -427,6 +427,12 @@ sounlock_nonet(struct socket *so)
> void
> sounlock_pair(struct socket *so1, struct socket *so2)
> {
> + switch (so1->so_proto->pr_domain->dom_family) {
> + case PF_INET:
> + case PF_INET6:
> + NET_UNLOCK_SHARED();
> + break;
> + }
> if (so1 == so2)
> rw_exit_write(&so1->so_lock);
> else if (so1 < so2) {
> @@ -435,12 +441,6 @@ sounlock_pair(struct socket *so1, struct
> } else {
> rw_exit_write(&so1->so_lock);
> rw_exit_write(&so2->so_lock);
> - }
> - switch (so1->so_proto->pr_domain->dom_family) {
> - case PF_INET:
> - case PF_INET6:
> - NET_UNLOCK_SHARED();
> - break;
> }
> }
>
shared netlock for soclose()