From: Alexander Bluhm Subject: Re: Unsplice socket before start the teardown To: Vitaliy Makkoveev Cc: tech@openbsd.org Date: Thu, 15 Feb 2024 18:08:38 +0100 On Thu, Feb 15, 2024 at 07:29:53PM +0300, Vitaliy Makkoveev wrote: > On Thu, Feb 15, 2024 at 01:20:15PM +0100, Alexander Bluhm wrote: > > On Thu, Feb 15, 2024 at 12:28:28PM +0300, Vitaliy Makkoveev wrote: > > > Use barriers instead of `ssp_idleto' task and timeout(9) > > > re-initialization to wait concurrent somove() and soidle(). > > > > Why do you move the unsplice code before the solinger sleep? > > > > Are you sure that you don't change the semantics? Is all data still > > spliced before the connection closes? > > > > bluhm > > > > The socket is inaccessible from userland, but SO_LINGER keeps it alive, > mostly to perform transmission of pending data. The new transmissions > and the reception are impossible. However, this is not true for spliced > sockets because they are still accessible by the spliced peer, but not > forever. > > IIRC, only tcp_ctlinput() will wakeup us, so we leave this loop after > TCP_LINGERTIME or if the remote peer was disconnected. > > So, for the spliced case, you can receive data and awake somove(), but > reach `so_linger' timeout and follow the destruction path. Or you could > pru_send() data to dying `sosp' which already reached `so_linger' > timeout. You don't know is your spliced peer closed, so I see no > difference, will it be unspliced now or after TCP_LINGERTIME seconds. > > Since the pru_detach() is not called, the remote also doesn't know that > socket is closed. > > I wanted to move it from sofree(). Since I see no differences, the idea > to do pru_detach() after disconnect and unsplice looked good. > > > This diff keeps current notation, so the unsplice moved before sofree(). As there are a lot of corner cases that I have fixed in the years before, I fear that movin code around may change the semantics. So I like this versiom much more, I have thrown it on regress machine. Before sofree and unsplice was called from in_pcbdetach(). Are you sure that this is no longer necessary? TCP can get a reset and call tcp_close(). It has to unsplice then. > Index: sys/kern/uipc_socket.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.320 > diff -u -p -r1.320 uipc_socket.c > --- sys/kern/uipc_socket.c 12 Feb 2024 22:48:27 -0000 1.320 > +++ sys/kern/uipc_socket.c 15 Feb 2024 16:24:32 -0000 > @@ -62,8 +62,6 @@ int sosplice(struct socket *, int, off_t > void sounsplice(struct socket *, struct socket *, int); > void soidle(void *); > void sotask(void *); > -void soreaper(void *); > -void soput(void *); > int somove(struct socket *, int); > void sorflush(struct socket *); > > @@ -250,8 +248,6 @@ solisten(struct socket *so, int backlog) > return (0); > } > > -#define SOSP_FREEING_READ 1 > -#define SOSP_FREEING_WRITE 2 > void > sofree(struct socket *so, int keep_lock) > { > @@ -316,36 +312,11 @@ sofree(struct socket *so, int keep_lock) > sigio_free(&so->so_sigio); > klist_free(&so->so_rcv.sb_klist); > klist_free(&so->so_snd.sb_klist); > -#ifdef SOCKET_SPLICE > - if (issplicedback(so)) { > - int freeing = SOSP_FREEING_WRITE; > - > - if (so->so_sp->ssp_soback == so) > - freeing |= SOSP_FREEING_READ; > - sounsplice(so->so_sp->ssp_soback, so, freeing); > - } > - if (isspliced(so)) { > - int freeing = SOSP_FREEING_READ; > - > - if (so == so->so_sp->ssp_socket) > - freeing |= SOSP_FREEING_WRITE; > - sounsplice(so, so->so_sp->ssp_socket, freeing); > - } > -#endif /* SOCKET_SPLICE */ > sbrelease(so, &so->so_snd); > sorflush(so); > if (!keep_lock) > sounlock(so); > -#ifdef SOCKET_SPLICE > - if (so->so_sp) { > - /* Reuse splice idle, sounsplice() has been called before. */ > - timeout_set_proc(&so->so_sp->ssp_idleto, soreaper, so); > - timeout_add(&so->so_sp->ssp_idleto, 0); > - } else > -#endif /* SOCKET_SPLICE */ > - { > - pool_put(&socket_pool, so); > - } > + pool_put(&socket_pool, so); > } > > static inline uint64_t > @@ -357,6 +328,8 @@ solinger_nsec(struct socket *so) > return SEC_TO_NSEC(so->so_linger); > } > > +#define SOSP_FREEING_READ 1 > +#define SOSP_FREEING_WRITE 2 > /* > * Close a socket on last file table reference removal. > * Initiate disconnect if connected. > @@ -436,6 +409,31 @@ discard: > if (so->so_state & SS_NOFDREF) > panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); > so->so_state |= SS_NOFDREF; > + > +#ifdef SOCKET_SPLICE > + if (issplicedback(so)) { > + int freeing = SOSP_FREEING_WRITE; > + > + if (so->so_sp->ssp_soback == so) > + freeing |= SOSP_FREEING_READ; > + sounsplice(so->so_sp->ssp_soback, so, freeing); > + } > + if (isspliced(so)) { > + int freeing = SOSP_FREEING_READ; > + > + if (so == so->so_sp->ssp_socket) > + freeing |= SOSP_FREEING_WRITE; > + sounsplice(so, so->so_sp->ssp_socket, freeing); > + } > + if (so->so_sp) { > + sounlock(so); > + taskq_barrier(sosplice_taskq); > + timeout_del_barrier(&so->so_sp->ssp_idleto); > + pool_put(&sosplice_pool, so->so_sp); > + solock(so); > + } > +#endif /* SOCKET_SPLICE */ > + > /* sofree() calls sounlock(). */ > sofree(so, 0); > return (error); > @@ -1441,32 +1439,6 @@ sotask(void *arg) > > /* Avoid user land starvation. */ > yield(); > -} > - > -/* > - * The socket splicing task or idle timeout may sleep while grabbing the net > - * lock. As sofree() can be called anytime, sotask() or soidle() could access > - * the socket memory of a freed socket after wakeup. So delay the pool_put() > - * after all pending socket splicing tasks or timeouts have finished. Do this > - * by scheduling it on the same threads. > - */ > -void > -soreaper(void *arg) > -{ > - struct socket *so = arg; > - > - /* Reuse splice task, sounsplice() has been called before. */ > - task_set(&so->so_sp->ssp_task, soput, so); > - task_add(sosplice_taskq, &so->so_sp->ssp_task); > -} > - > -void > -soput(void *arg) > -{ > - struct socket *so = arg; > - > - pool_put(&sosplice_pool, so->so_sp); > - pool_put(&socket_pool, so); > } > > /*