Index | Thread | Search

From:
Alexander Bluhm <alexander.bluhm@gmx.net>
Subject:
Re: Unsplice socket before start the teardown
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
tech@openbsd.org
Date:
Thu, 15 Feb 2024 18:08:38 +0100

Download raw body.

Thread
On Thu, Feb 15, 2024 at 07:29:53PM +0300, Vitaliy Makkoveev wrote:
> On Thu, Feb 15, 2024 at 01:20:15PM +0100, Alexander Bluhm wrote:
> > On Thu, Feb 15, 2024 at 12:28:28PM +0300, Vitaliy Makkoveev wrote:
> > > Use barriers instead of `ssp_idleto' task and timeout(9)
> > > re-initialization to wait concurrent somove() and soidle().
> > 
> > Why do you move the unsplice code before the solinger sleep?
> > 
> > Are you sure that you don't change the semantics?  Is all data still
> > spliced before the connection closes?
> > 
> > bluhm
> > 
> 
> The socket is inaccessible from userland, but SO_LINGER keeps it alive,
> mostly to perform transmission of pending data. The new transmissions
> and the reception are impossible. However, this is not true for spliced
> sockets because they are still accessible by the spliced peer, but not
> forever.
> 
> IIRC, only tcp_ctlinput() will wakeup us, so we leave this loop after
> TCP_LINGERTIME or if the remote peer was disconnected.
> 
> So, for the spliced case, you can receive data and awake somove(), but
> reach `so_linger' timeout and follow the destruction path. Or you could
> pru_send() data to dying `sosp' which already reached `so_linger'
> timeout. You don't know is your spliced peer closed, so I see no
> difference, will it be unspliced now or after TCP_LINGERTIME seconds.
> 
> Since the pru_detach() is not called, the remote also doesn't know that
> socket is closed.
> 
> I wanted to move it from sofree(). Since I see no differences, the idea
> to do pru_detach() after disconnect and unsplice looked good.
> 
> 
> This diff keeps current notation, so the unsplice moved before sofree().

As there are a lot of corner cases that I have fixed in the years
before, I fear that movin code around may change the semantics.  So
I like this versiom much more, I have thrown it on regress machine.

Before sofree and unsplice was called from in_pcbdetach().  Are you
sure that this is no longer necessary?  TCP can get a reset and
call tcp_close().  It has to unsplice then.

> Index: sys/kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.320
> diff -u -p -r1.320 uipc_socket.c
> --- sys/kern/uipc_socket.c	12 Feb 2024 22:48:27 -0000	1.320
> +++ sys/kern/uipc_socket.c	15 Feb 2024 16:24:32 -0000
> @@ -62,8 +62,6 @@ int	sosplice(struct socket *, int, off_t
>  void	sounsplice(struct socket *, struct socket *, int);
>  void	soidle(void *);
>  void	sotask(void *);
> -void	soreaper(void *);
> -void	soput(void *);
>  int	somove(struct socket *, int);
>  void	sorflush(struct socket *);
>  
> @@ -250,8 +248,6 @@ solisten(struct socket *so, int backlog)
>  	return (0);
>  }
>  
> -#define SOSP_FREEING_READ	1
> -#define SOSP_FREEING_WRITE	2
>  void
>  sofree(struct socket *so, int keep_lock)
>  {
> @@ -316,36 +312,11 @@ sofree(struct socket *so, int keep_lock)
>  	sigio_free(&so->so_sigio);
>  	klist_free(&so->so_rcv.sb_klist);
>  	klist_free(&so->so_snd.sb_klist);
> -#ifdef SOCKET_SPLICE
> -	if (issplicedback(so)) {
> -		int freeing = SOSP_FREEING_WRITE;
> -
> -		if (so->so_sp->ssp_soback == so)
> -			freeing |= SOSP_FREEING_READ;
> -		sounsplice(so->so_sp->ssp_soback, so, freeing);
> -	}
> -	if (isspliced(so)) {
> -		int freeing = SOSP_FREEING_READ;
> -
> -		if (so == so->so_sp->ssp_socket)
> -			freeing |= SOSP_FREEING_WRITE;
> -		sounsplice(so, so->so_sp->ssp_socket, freeing);
> -	}
> -#endif /* SOCKET_SPLICE */
>  	sbrelease(so, &so->so_snd);
>  	sorflush(so);
>  	if (!keep_lock)
>  		sounlock(so);
> -#ifdef SOCKET_SPLICE
> -	if (so->so_sp) {
> -		/* Reuse splice idle, sounsplice() has been called before. */
> -		timeout_set_proc(&so->so_sp->ssp_idleto, soreaper, so);
> -		timeout_add(&so->so_sp->ssp_idleto, 0);
> -	} else
> -#endif /* SOCKET_SPLICE */
> -	{
> -		pool_put(&socket_pool, so);
> -	}
> +	pool_put(&socket_pool, so);
>  }
>  
>  static inline uint64_t
> @@ -357,6 +328,8 @@ solinger_nsec(struct socket *so)
>  	return SEC_TO_NSEC(so->so_linger);
>  }
>  
> +#define SOSP_FREEING_READ	1
> +#define SOSP_FREEING_WRITE	2
>  /*
>   * Close a socket on last file table reference removal.
>   * Initiate disconnect if connected.
> @@ -436,6 +409,31 @@ discard:
>  	if (so->so_state & SS_NOFDREF)
>  		panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
>  	so->so_state |= SS_NOFDREF;
> +
> +#ifdef SOCKET_SPLICE
> +	if (issplicedback(so)) {
> +		int freeing = SOSP_FREEING_WRITE;
> +
> +		if (so->so_sp->ssp_soback == so)
> +			freeing |= SOSP_FREEING_READ;
> +		sounsplice(so->so_sp->ssp_soback, so, freeing);
> +	}
> +	if (isspliced(so)) {
> +		int freeing = SOSP_FREEING_READ;
> +
> +		if (so == so->so_sp->ssp_socket)
> +			freeing |= SOSP_FREEING_WRITE;
> +		sounsplice(so, so->so_sp->ssp_socket, freeing);
> +	}
> +	if (so->so_sp) {
> +		sounlock(so);
> +		taskq_barrier(sosplice_taskq);
> +		timeout_del_barrier(&so->so_sp->ssp_idleto);
> +		pool_put(&sosplice_pool, so->so_sp);
> +		solock(so);
> +	}
> +#endif /* SOCKET_SPLICE */
> +
>  	/* sofree() calls sounlock(). */
>  	sofree(so, 0);
>  	return (error);
> @@ -1441,32 +1439,6 @@ sotask(void *arg)
>  
>  	/* Avoid user land starvation. */
>  	yield();
> -}
> -
> -/*
> - * The socket splicing task or idle timeout may sleep while grabbing the net
> - * lock.  As sofree() can be called anytime, sotask() or soidle() could access
> - * the socket memory of a freed socket after wakeup.  So delay the pool_put()
> - * after all pending socket splicing tasks or timeouts have finished.  Do this
> - * by scheduling it on the same threads.
> - */
> -void
> -soreaper(void *arg)
> -{
> -	struct socket *so = arg;
> -
> -	/* Reuse splice task, sounsplice() has been called before. */
> -	task_set(&so->so_sp->ssp_task, soput, so);
> -	task_add(sosplice_taskq, &so->so_sp->ssp_task);
> -}
> -
> -void
> -soput(void *arg)
> -{
> -	struct socket *so = arg;
> -
> -	pool_put(&sosplice_pool, so->so_sp);
> -	pool_put(&socket_pool, so);
>  }
>  
>  /*