Download raw body.
Unsplice socket before start the teardown
On Thu, Feb 15, 2024 at 07:29:53PM +0300, Vitaliy Makkoveev wrote:
> On Thu, Feb 15, 2024 at 01:20:15PM +0100, Alexander Bluhm wrote:
> > On Thu, Feb 15, 2024 at 12:28:28PM +0300, Vitaliy Makkoveev wrote:
> > > Use barriers instead of `ssp_idleto' task and timeout(9)
> > > re-initialization to wait concurrent somove() and soidle().
> >
> > Why do you move the unsplice code before the solinger sleep?
> >
> > Are you sure that you don't change the semantics? Is all data still
> > spliced before the connection closes?
> >
> > bluhm
> >
>
> The socket is inaccessible from userland, but SO_LINGER keeps it alive,
> mostly to perform transmission of pending data. The new transmissions
> and the reception are impossible. However, this is not true for spliced
> sockets because they are still accessible by the spliced peer, but not
> forever.
>
> IIRC, only tcp_ctlinput() will wakeup us, so we leave this loop after
> TCP_LINGERTIME or if the remote peer was disconnected.
>
> So, for the spliced case, you can receive data and awake somove(), but
> reach `so_linger' timeout and follow the destruction path. Or you could
> pru_send() data to dying `sosp' which already reached `so_linger'
> timeout. You don't know is your spliced peer closed, so I see no
> difference, will it be unspliced now or after TCP_LINGERTIME seconds.
>
> Since the pru_detach() is not called, the remote also doesn't know that
> socket is closed.
>
> I wanted to move it from sofree(). Since I see no differences, the idea
> to do pru_detach() after disconnect and unsplice looked good.
>
>
> This diff keeps current notation, so the unsplice moved before sofree().
As there are a lot of corner cases that I have fixed in the years
before, I fear that movin code around may change the semantics. So
I like this versiom much more, I have thrown it on regress machine.
Before sofree and unsplice was called from in_pcbdetach(). Are you
sure that this is no longer necessary? TCP can get a reset and
call tcp_close(). It has to unsplice then.
> Index: sys/kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.320
> diff -u -p -r1.320 uipc_socket.c
> --- sys/kern/uipc_socket.c 12 Feb 2024 22:48:27 -0000 1.320
> +++ sys/kern/uipc_socket.c 15 Feb 2024 16:24:32 -0000
> @@ -62,8 +62,6 @@ int sosplice(struct socket *, int, off_t
> void sounsplice(struct socket *, struct socket *, int);
> void soidle(void *);
> void sotask(void *);
> -void soreaper(void *);
> -void soput(void *);
> int somove(struct socket *, int);
> void sorflush(struct socket *);
>
> @@ -250,8 +248,6 @@ solisten(struct socket *so, int backlog)
> return (0);
> }
>
> -#define SOSP_FREEING_READ 1
> -#define SOSP_FREEING_WRITE 2
> void
> sofree(struct socket *so, int keep_lock)
> {
> @@ -316,36 +312,11 @@ sofree(struct socket *so, int keep_lock)
> sigio_free(&so->so_sigio);
> klist_free(&so->so_rcv.sb_klist);
> klist_free(&so->so_snd.sb_klist);
> -#ifdef SOCKET_SPLICE
> - if (issplicedback(so)) {
> - int freeing = SOSP_FREEING_WRITE;
> -
> - if (so->so_sp->ssp_soback == so)
> - freeing |= SOSP_FREEING_READ;
> - sounsplice(so->so_sp->ssp_soback, so, freeing);
> - }
> - if (isspliced(so)) {
> - int freeing = SOSP_FREEING_READ;
> -
> - if (so == so->so_sp->ssp_socket)
> - freeing |= SOSP_FREEING_WRITE;
> - sounsplice(so, so->so_sp->ssp_socket, freeing);
> - }
> -#endif /* SOCKET_SPLICE */
> sbrelease(so, &so->so_snd);
> sorflush(so);
> if (!keep_lock)
> sounlock(so);
> -#ifdef SOCKET_SPLICE
> - if (so->so_sp) {
> - /* Reuse splice idle, sounsplice() has been called before. */
> - timeout_set_proc(&so->so_sp->ssp_idleto, soreaper, so);
> - timeout_add(&so->so_sp->ssp_idleto, 0);
> - } else
> -#endif /* SOCKET_SPLICE */
> - {
> - pool_put(&socket_pool, so);
> - }
> + pool_put(&socket_pool, so);
> }
>
> static inline uint64_t
> @@ -357,6 +328,8 @@ solinger_nsec(struct socket *so)
> return SEC_TO_NSEC(so->so_linger);
> }
>
> +#define SOSP_FREEING_READ 1
> +#define SOSP_FREEING_WRITE 2
> /*
> * Close a socket on last file table reference removal.
> * Initiate disconnect if connected.
> @@ -436,6 +409,31 @@ discard:
> if (so->so_state & SS_NOFDREF)
> panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
> so->so_state |= SS_NOFDREF;
> +
> +#ifdef SOCKET_SPLICE
> + if (issplicedback(so)) {
> + int freeing = SOSP_FREEING_WRITE;
> +
> + if (so->so_sp->ssp_soback == so)
> + freeing |= SOSP_FREEING_READ;
> + sounsplice(so->so_sp->ssp_soback, so, freeing);
> + }
> + if (isspliced(so)) {
> + int freeing = SOSP_FREEING_READ;
> +
> + if (so == so->so_sp->ssp_socket)
> + freeing |= SOSP_FREEING_WRITE;
> + sounsplice(so, so->so_sp->ssp_socket, freeing);
> + }
> + if (so->so_sp) {
> + sounlock(so);
> + taskq_barrier(sosplice_taskq);
> + timeout_del_barrier(&so->so_sp->ssp_idleto);
> + pool_put(&sosplice_pool, so->so_sp);
> + solock(so);
> + }
> +#endif /* SOCKET_SPLICE */
> +
> /* sofree() calls sounlock(). */
> sofree(so, 0);
> return (error);
> @@ -1441,32 +1439,6 @@ sotask(void *arg)
>
> /* Avoid user land starvation. */
> yield();
> -}
> -
> -/*
> - * The socket splicing task or idle timeout may sleep while grabbing the net
> - * lock. As sofree() can be called anytime, sotask() or soidle() could access
> - * the socket memory of a freed socket after wakeup. So delay the pool_put()
> - * after all pending socket splicing tasks or timeouts have finished. Do this
> - * by scheduling it on the same threads.
> - */
> -void
> -soreaper(void *arg)
> -{
> - struct socket *so = arg;
> -
> - /* Reuse splice task, sounsplice() has been called before. */
> - task_set(&so->so_sp->ssp_task, soput, so);
> - task_add(sosplice_taskq, &so->so_sp->ssp_task);
> -}
> -
> -void
> -soput(void *arg)
> -{
> - struct socket *so = arg;
> -
> - pool_put(&sosplice_pool, so->so_sp);
> - pool_put(&socket_pool, so);
> }
>
> /*
Unsplice socket before start the teardown