From: Alexander Bluhm Subject: Re: tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer To: Vitaliy Makkoveev Cc: OpenBSD tech Date: Thu, 26 Dec 2024 13:50:13 +0100 On Thu, Dec 26, 2024 at 02:59:39PM +0300, Vitaliy Makkoveev wrote: > > On 24 Dec 2024, at 22:21, Vitaliy Makkoveev wrote: > > > > This is the updated diff. It uses barriers instead of `ssp_idleto' > > timeout and `ssp_task' task redefinition. > > > > IIRC, this diff which uses barriers instead of task and timeout > reinitialisation is stable? Diff works fine. > Do you prefer to push these unsplicing > only hunks separately or combine it with the so_snd unlocking? Please commit in small steps. This gives better feedback from snapshot users. > > Index: sys/kern/uipc_socket.c > > =================================================================== > > RCS file: /cvs/src/sys/kern/uipc_socket.c,v > > diff -u -p -r1.347 uipc_socket.c > > --- sys/kern/uipc_socket.c 19 Dec 2024 22:11:35 -0000 1.347 > > +++ sys/kern/uipc_socket.c 24 Dec 2024 19:12:56 -0000 > > @@ -62,8 +62,6 @@ int sosplice(struct socket *, int, off_t > > void sounsplice(struct socket *, struct socket *, int); > > void soidle(void *); > > void sotask(void *); > > -void soreaper(void *); > > -void soput(void *); > > int somove(struct socket *, int); > > void sorflush(struct socket *); > > > > @@ -327,17 +325,14 @@ sofree(struct socket *so, int keep_lock) > > sounlock(head); > > } > > > > - switch (so->so_proto->pr_domain->dom_family) { > > - case AF_INET: > > - case AF_INET6: > > - if (so->so_proto->pr_type == SOCK_STREAM) > > - break; > > - /* FALLTHROUGH */ > > - default: > > + if (!keep_lock) { > > + /* > > + * sofree() was called from soclose(). Sleep is safe > > + * even for tcp(4) sockets. > > + */ > > sounlock(so); > > refcnt_finalize(&so->so_refcnt, "sofinal"); > > solock(so); > > - break; > > } > > > > sigio_free(&so->so_sigio); > > @@ -358,20 +353,20 @@ sofree(struct socket *so, int keep_lock) > > (*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb); > > m_purge(so->so_rcv.sb_mb); > > > > - if (!keep_lock) > > + if (!keep_lock) { > > sounlock(so); > > > > #ifdef SOCKET_SPLICE > > - if (so->so_sp) { > > - /* Reuse splice idle, sounsplice() has been called before. */ > > - timeout_set_flags(&so->so_sp->ssp_idleto, soreaper, so, > > - KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); > > - timeout_add(&so->so_sp->ssp_idleto, 0); > > - } else > > + if (so->so_sp) { > > + timeout_del_barrier(&so->so_sp->ssp_idleto); > > + task_del(sosplice_taskq, &so->so_sp->ssp_task); > > + taskq_barrier(sosplice_taskq); > > + pool_put(&sosplice_pool, so->so_sp); > > + } > > #endif /* SOCKET_SPLICE */ > > - { > > - pool_put(&socket_pool, so); > > } > > + > > + pool_put(&socket_pool, so); > > } > > > > static inline uint64_t > > @@ -450,39 +445,10 @@ drop: > > } > > } > > discard: > > - if (so->so_state & SS_NOFDREF) > > - panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); > > - so->so_state |= SS_NOFDREF; > > - > > #ifdef SOCKET_SPLICE > > if (so->so_sp) { > > struct socket *soback; > > > > - if (so->so_proto->pr_flags & PR_WANTRCVD) { > > - /* > > - * Copy - Paste, but can't relock and sleep in > > - * sofree() in tcp(4) case. That's why tcp(4) > > - * still rely on solock() for splicing and > > - * unsplicing. > > - */ > > - > > - if (issplicedback(so)) { > > - int freeing = SOSP_FREEING_WRITE; > > - > > - if (so->so_sp->ssp_soback == so) > > - freeing |= SOSP_FREEING_READ; > > - sounsplice(so->so_sp->ssp_soback, so, freeing); > > - } > > - if (isspliced(so)) { > > - int freeing = SOSP_FREEING_READ; > > - > > - if (so == so->so_sp->ssp_socket) > > - freeing |= SOSP_FREEING_WRITE; > > - sounsplice(so, so->so_sp->ssp_socket, freeing); > > - } > > - goto free; > > - } > > - > > sounlock(so); > > mtx_enter(&so->so_snd.sb_mtx); > > /* > > @@ -530,8 +496,12 @@ notsplicedback: > > > > solock(so); > > } > > -free: > > #endif /* SOCKET_SPLICE */ > > + > > + if (so->so_state & SS_NOFDREF) > > + panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); > > + so->so_state |= SS_NOFDREF; > > + > > /* sofree() calls sounlock(). */ > > sofree(so, 0); > > return (error); > > @@ -1532,8 +1502,7 @@ sosplice(struct socket *so, int fd, off_ > > void > > sounsplice(struct socket *so, struct socket *sosp, int freeing) > > { > > - if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) > > - sbassertlocked(&so->so_rcv); > > + sbassertlocked(&so->so_rcv); > > soassertlocked(so); > > > > task_del(sosplice_taskq, &so->so_splicetask); > > @@ -1587,17 +1556,23 @@ sotask(void *arg) > > */ > > > > sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); > > - if (sockstream) > > - solock(so); > > - > > if (so->so_rcv.sb_flags & SB_SPLICE) { > > - if (sockstream) > > + struct socket *sosp = so->so_sp->ssp_socket; > > + > > + if (sockstream) { > > + sblock(&sosp->so_snd, SBL_WAIT | SBL_NOINTR); > > + solock(so); > > doyield = 1; > > + } > > + > > somove(so, M_DONTWAIT); > > + > > + if (sockstream) { > > + sounlock(so); > > + sbunlock(&sosp->so_snd); > > + } > > } > > > > - if (sockstream) > > - sounlock(so); > > sbunlock(&so->so_rcv); > > > > if (doyield) { > > @@ -1607,32 +1582,6 @@ sotask(void *arg) > > } > > > > /* > > - * The socket splicing task or idle timeout may sleep while grabbing the net > > - * lock. As sofree() can be called anytime, sotask() or soidle() could access > > - * the socket memory of a freed socket after wakeup. So delay the pool_put() > > - * after all pending socket splicing tasks or timeouts have finished. Do this > > - * by scheduling it on the same threads. > > - */ > > -void > > -soreaper(void *arg) > > -{ > > - struct socket *so = arg; > > - > > - /* Reuse splice task, sounsplice() has been called before. */ > > - task_set(&so->so_sp->ssp_task, soput, so); > > - task_add(sosplice_taskq, &so->so_sp->ssp_task); > > -} > > - > > -void > > -soput(void *arg) > > -{ > > - struct socket *so = arg; > > - > > - pool_put(&sosplice_pool, so->so_sp); > > - pool_put(&socket_pool, so); > > -} > > - > > -/* > > * Move data from receive buffer of spliced source socket to send > > * buffer of drain socket. Try to move as much as possible in one > > * big chunk. It is a TCP only implementation. > > @@ -1652,8 +1601,10 @@ somove(struct socket *so, int wait) > > > > if (sockdgram) > > sbassertlocked(&so->so_rcv); > > - else > > + else { > > + sbassertlocked(&sosp->so_snd); > > soassertlocked(so); > > + } > > > > mtx_enter(&so->so_rcv.sb_mtx); > > mtx_enter(&sosp->so_snd.sb_mtx); >