Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: shared netlock for soclose()
To:
tech@openbsd.org
Date:
Sat, 28 Jun 2025 23:13:36 +0200

Download raw body.

Thread
On Tue, Jun 24, 2025 at 11:47:28AM +0200, Alexander Bluhm wrote:
> On Thu, Jun 12, 2025 at 12:41:07AM +0200, Alexander Bluhm wrote:
> > On Wed, Jun 11, 2025 at 12:04:01PM +0200, Alexander Bluhm wrote:
> > > On Wed, Jun 04, 2025 at 11:45:14PM +0300, Vitaliy Makkoveev wrote:
> > > > On Wed, Jun 04, 2025 at 05:09:25PM +0200, Alexander Bluhm wrote:
> > > > > On Tue, Jun 03, 2025 at 02:33:03PM +0200, Alexander Bluhm wrote:
> > > > > > Hi,
> > > > > > 
> > > > > > Currently soclose() needs exclusive netlock.  I see deadlocks as
> > > > > > tcp_newtcpcb() calls pool_get(&tcpcb_pool) with PR_WAITOK while
> > > > > > holding shared netlock.  Running memory allocation in parallel
> > > > > > although free needs an exclusive lock is a bad idea.
> > > > > > 
> > > > > > Solution is to run soclose() in parallel.
> > > > > > 
> > > > > > Diff consists of four parts:
> > > > > > - always hold reference count of socket in inp_socket
> > > > > > - inp_socket is not longer protected by special mutex
> > > > > > - account when other CPU sets so_pcb to NULL
> > > > > > - run soclose() and sofree() with shared netlock
> > > > > > - tcp timeout reaper can go away
> > > > > > 
> > > > > > Please test.  I will split the diff for review.
> > 
> > Next chunk has been commited, updated diff.
> 
> I think this diff contains everything that is needed for unlocking
> the soclose() system call.  We have to unlock the socket lock after
> looking into so->so_proto.  Then we can switch to shared netlock
> in soclose() and sofree().
> 
> ok?

anyone?

> bluhm
> 
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> diff -u -p -r1.378 uipc_socket.c
> --- kern/uipc_socket.c	23 May 2025 23:41:46 -0000	1.378
> +++ kern/uipc_socket.c	23 Jun 2025 12:10:06 -0000
> @@ -213,10 +213,7 @@ socreate(int dom, struct socket **aso, i
>  	if (error) {
>  		so->so_state |= SS_NOFDREF;
>  		/* sofree() calls sounlock(). */
> -		soref(so);
> -		sofree(so, 1);
> -		sounlock_shared(so);
> -		sorele(so);
> +		sofree(so, 0);
>  		return (error);
>  	}
>  	sounlock_shared(so);
> @@ -304,7 +301,7 @@ sofree(struct socket *so, int keep_lock)
>  
>  	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
>  		if (!keep_lock)
> -			sounlock(so);
> +			sounlock_shared(so);
>  		return;
>  	}
>  	if (so->so_head) {
> @@ -317,7 +314,7 @@ sofree(struct socket *so, int keep_lock)
>  		 */
>  		if (so->so_onq == &head->so_q) {
>  			if (!keep_lock)
> -				sounlock(so);
> +				sounlock_shared(so);
>  			return;
>  		}
>  
> @@ -344,7 +341,7 @@ sofree(struct socket *so, int keep_lock)
>  	}
>  
>  	if (!keep_lock)
> -		sounlock(so);
> +		sounlock_shared(so);
>  	sorele(so);
>  }
>  
> @@ -368,7 +365,7 @@ soclose(struct socket *so, int flags)
>  	struct socket *so2;
>  	int error = 0;
>  
> -	solock(so);
> +	solock_shared(so);
>  	/* Revoke async IO early. There is a final revocation in sofree(). */
>  	sigio_free(&so->so_sigio);
>  	if (so->so_state & SS_ISCONNECTED) {
> @@ -430,7 +427,7 @@ discard:
>  	if (so->so_sp) {
>  		struct socket *soback;
>  
> -		sounlock(so);
> +		sounlock_shared(so);
>  		/*
>  		 * Concurrent sounsplice() locks `sb_mtx' mutexes on
>  		 * both `so_snd' and `so_rcv' before unsplice sockets.
> @@ -477,7 +474,7 @@ notsplicedback:
>  		task_del(sosplice_taskq, &so->so_sp->ssp_task);
>  		taskq_barrier(sosplice_taskq);
>  
> -		solock(so);
> +		solock_shared(so);
>  	}
>  #endif /* SOCKET_SPLICE */
>  
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v
> diff -u -p -r1.185 uipc_socket2.c
> --- kern/uipc_socket2.c	12 Mar 2025 14:08:31 -0000	1.185
> +++ kern/uipc_socket2.c	23 Jun 2025 12:09:50 -0000
> @@ -409,13 +409,13 @@ sounlock(struct socket *so)
>  void
>  sounlock_shared(struct socket *so)
>  {
> -	rw_exit_write(&so->so_lock);
>  	switch (so->so_proto->pr_domain->dom_family) {
>  	case PF_INET:
>  	case PF_INET6:
>  		NET_UNLOCK_SHARED();
>  		break;
>  	}
> +	rw_exit_write(&so->so_lock);
>  }
>  
>  void
> @@ -427,6 +427,12 @@ sounlock_nonet(struct socket *so)
>  void
>  sounlock_pair(struct socket *so1, struct socket *so2)
>  {
> +	switch (so1->so_proto->pr_domain->dom_family) {
> +	case PF_INET:
> +	case PF_INET6:
> +		NET_UNLOCK_SHARED();
> +		break;
> +	}
>  	if (so1 == so2)
>  		rw_exit_write(&so1->so_lock);
>  	else if (so1 < so2) {
> @@ -435,12 +441,6 @@ sounlock_pair(struct socket *so1, struct
>  	} else {
>  		rw_exit_write(&so1->so_lock);
>  		rw_exit_write(&so2->so_lock);
> -	}
> -	switch (so1->so_proto->pr_domain->dom_family) {
> -	case PF_INET:
> -	case PF_INET6:
> -		NET_UNLOCK_SHARED();
> -		break;
>  	}
>  }
>