Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
OpenBSD tech <tech@openbsd.org>
Date:
Thu, 26 Dec 2024 13:47:45 +0100

Download raw body.

Thread
On Tue, Dec 24, 2024 at 10:21:55PM +0300, Vitaliy Makkoveev wrote:
> This is the updated diff. It uses barriers instead of `ssp_idleto'
> timeout and `ssp_task' task redefinition.

I have tested this diff with my regression, performance, and network
setup.  Works fine.

OK bluhm@

Could you wait one day before commiting this?  Then we have a
snapshot between my and your unlocking diff.  If something goes
wrong, it will be easier to bisect.

> Index: sys/kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> diff -u -p -r1.347 uipc_socket.c
> --- sys/kern/uipc_socket.c	19 Dec 2024 22:11:35 -0000	1.347
> +++ sys/kern/uipc_socket.c	24 Dec 2024 19:12:56 -0000
> @@ -62,8 +62,6 @@ int	sosplice(struct socket *, int, off_t
>  void	sounsplice(struct socket *, struct socket *, int);
>  void	soidle(void *);
>  void	sotask(void *);
> -void	soreaper(void *);
> -void	soput(void *);
>  int	somove(struct socket *, int);
>  void	sorflush(struct socket *);
>  
> @@ -327,17 +325,14 @@ sofree(struct socket *so, int keep_lock)
>  			sounlock(head);
>  	}
>  
> -	switch (so->so_proto->pr_domain->dom_family) {
> -	case AF_INET:
> -	case AF_INET6:
> -		if (so->so_proto->pr_type == SOCK_STREAM)
> -			break;
> -		/* FALLTHROUGH */
> -	default:
> +	if (!keep_lock) {
> +		/*
> +		 * sofree() was called from soclose(). Sleep is safe
> +		 * even for tcp(4) sockets.
> +		 */
>  		sounlock(so);
>  		refcnt_finalize(&so->so_refcnt, "sofinal");
>  		solock(so);
> -		break;
>  	}
>  
>  	sigio_free(&so->so_sigio);
> @@ -358,20 +353,20 @@ sofree(struct socket *so, int keep_lock)
>  		(*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
>  	m_purge(so->so_rcv.sb_mb);
>  
> -	if (!keep_lock)
> +	if (!keep_lock) {
>  		sounlock(so);
>  
>  #ifdef SOCKET_SPLICE
> -	if (so->so_sp) {
> -		/* Reuse splice idle, sounsplice() has been called before. */
> -		timeout_set_flags(&so->so_sp->ssp_idleto, soreaper, so,
> -		    KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
> -		timeout_add(&so->so_sp->ssp_idleto, 0);
> -	} else
> +		if (so->so_sp) {
> +			timeout_del_barrier(&so->so_sp->ssp_idleto);
> +			task_del(sosplice_taskq, &so->so_sp->ssp_task);
> +			taskq_barrier(sosplice_taskq);
> +			pool_put(&sosplice_pool, so->so_sp);
> +		}
>  #endif /* SOCKET_SPLICE */
> -	{
> -		pool_put(&socket_pool, so);
>  	}
> +
> +	pool_put(&socket_pool, so);
>  }
>  
>  static inline uint64_t
> @@ -450,39 +445,10 @@ drop:
>  		}
>  	}
>  discard:
> -	if (so->so_state & SS_NOFDREF)
> -		panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
> -	so->so_state |= SS_NOFDREF;
> -
>  #ifdef SOCKET_SPLICE
>  	if (so->so_sp) {
>  		struct socket *soback;
>  
> -		if (so->so_proto->pr_flags & PR_WANTRCVD) {
> -			/*
> -			 * Copy - Paste, but can't relock and sleep in
> -			 * sofree() in tcp(4) case. That's why tcp(4)
> -			 * still rely on solock() for splicing and
> -			 * unsplicing.
> -			 */
> -
> -			if (issplicedback(so)) {
> -				int freeing = SOSP_FREEING_WRITE;
> -
> -				if (so->so_sp->ssp_soback == so)
> -					freeing |= SOSP_FREEING_READ;
> -				sounsplice(so->so_sp->ssp_soback, so, freeing);
> -			}
> -			if (isspliced(so)) {
> -				int freeing = SOSP_FREEING_READ;
> -
> -				if (so == so->so_sp->ssp_socket)
> -					freeing |= SOSP_FREEING_WRITE;
> -				sounsplice(so, so->so_sp->ssp_socket, freeing);
> -			}
> -			goto free;
> -		}
> -
>  		sounlock(so);
>  		mtx_enter(&so->so_snd.sb_mtx);
>  		/*
> @@ -530,8 +496,12 @@ notsplicedback:
>  
>  		solock(so);
>  	}
> -free:
>  #endif /* SOCKET_SPLICE */
> +
> +	if (so->so_state & SS_NOFDREF)
> +		panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
> +	so->so_state |= SS_NOFDREF;
> +
>  	/* sofree() calls sounlock(). */
>  	sofree(so, 0);
>  	return (error);
> @@ -1532,8 +1502,7 @@ sosplice(struct socket *so, int fd, off_
>  void
>  sounsplice(struct socket *so, struct socket *sosp, int freeing)
>  {
> -	if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0)
> -		sbassertlocked(&so->so_rcv);
> +	sbassertlocked(&so->so_rcv);
>  	soassertlocked(so);
>  
>  	task_del(sosplice_taskq, &so->so_splicetask);
> @@ -1587,17 +1556,23 @@ sotask(void *arg)
>  	 */
>  
>  	sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR);
> -	if (sockstream)
> -		solock(so);
> -
>  	if (so->so_rcv.sb_flags & SB_SPLICE) {
> -		if (sockstream)
> +		struct socket *sosp = so->so_sp->ssp_socket;
> +
> +		if (sockstream) {
> +			sblock(&sosp->so_snd, SBL_WAIT | SBL_NOINTR);
> +			solock(so);
>  			doyield = 1;
> +		}
> +
>  		somove(so, M_DONTWAIT);
> +
> +		if (sockstream) {
> +			sounlock(so);
> +			sbunlock(&sosp->so_snd);
> +		}
>  	}
>  
> -	if (sockstream)
> -		sounlock(so);
>  	sbunlock(&so->so_rcv);
>  
>  	if (doyield) {
> @@ -1607,32 +1582,6 @@ sotask(void *arg)
>  }
>  
>  /*
> - * The socket splicing task or idle timeout may sleep while grabbing the net
> - * lock.  As sofree() can be called anytime, sotask() or soidle() could access
> - * the socket memory of a freed socket after wakeup.  So delay the pool_put()
> - * after all pending socket splicing tasks or timeouts have finished.  Do this
> - * by scheduling it on the same threads.
> - */
> -void
> -soreaper(void *arg)
> -{
> -	struct socket *so = arg;
> -
> -	/* Reuse splice task, sounsplice() has been called before. */
> -	task_set(&so->so_sp->ssp_task, soput, so);
> -	task_add(sosplice_taskq, &so->so_sp->ssp_task);
> -}
> -
> -void
> -soput(void *arg)
> -{
> -	struct socket *so = arg;
> -
> -	pool_put(&sosplice_pool, so->so_sp);
> -	pool_put(&socket_pool, so);
> -}
> -
> -/*
>   * Move data from receive buffer of spliced source socket to send
>   * buffer of drain socket.  Try to move as much as possible in one
>   * big chunk.  It is a TCP only implementation.
> @@ -1652,8 +1601,10 @@ somove(struct socket *so, int wait)
>  
>  	if (sockdgram)
>  		sbassertlocked(&so->so_rcv);
> -	else
> +	else {
> +		sbassertlocked(&sosp->so_snd);
>  		soassertlocked(so);
> +	}
>  
>  	mtx_enter(&so->so_rcv.sb_mtx);
>  	mtx_enter(&sosp->so_snd.sb_mtx);