From: Alexander Bluhm Subject: Re: tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer To: Vitaliy Makkoveev Cc: OpenBSD tech Date: Thu, 26 Dec 2024 13:47:45 +0100 On Tue, Dec 24, 2024 at 10:21:55PM +0300, Vitaliy Makkoveev wrote: > This is the updated diff. It uses barriers instead of `ssp_idleto' > timeout and `ssp_task' task redefinition. I have tested this diff with my regression, performance, and network setup. Works fine. OK bluhm@ Could you wait one day before commiting this? Then we have a snapshot between my and your unlocking diff. If something goes wrong, it will be easier to bisect. > Index: sys/kern/uipc_socket.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket.c,v > diff -u -p -r1.347 uipc_socket.c > --- sys/kern/uipc_socket.c 19 Dec 2024 22:11:35 -0000 1.347 > +++ sys/kern/uipc_socket.c 24 Dec 2024 19:12:56 -0000 > @@ -62,8 +62,6 @@ int sosplice(struct socket *, int, off_t > void sounsplice(struct socket *, struct socket *, int); > void soidle(void *); > void sotask(void *); > -void soreaper(void *); > -void soput(void *); > int somove(struct socket *, int); > void sorflush(struct socket *); > > @@ -327,17 +325,14 @@ sofree(struct socket *so, int keep_lock) > sounlock(head); > } > > - switch (so->so_proto->pr_domain->dom_family) { > - case AF_INET: > - case AF_INET6: > - if (so->so_proto->pr_type == SOCK_STREAM) > - break; > - /* FALLTHROUGH */ > - default: > + if (!keep_lock) { > + /* > + * sofree() was called from soclose(). Sleep is safe > + * even for tcp(4) sockets. > + */ > sounlock(so); > refcnt_finalize(&so->so_refcnt, "sofinal"); > solock(so); > - break; > } > > sigio_free(&so->so_sigio); > @@ -358,20 +353,20 @@ sofree(struct socket *so, int keep_lock) > (*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb); > m_purge(so->so_rcv.sb_mb); > > - if (!keep_lock) > + if (!keep_lock) { > sounlock(so); > > #ifdef SOCKET_SPLICE > - if (so->so_sp) { > - /* Reuse splice idle, sounsplice() has been called before. */ > - timeout_set_flags(&so->so_sp->ssp_idleto, soreaper, so, > - KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); > - timeout_add(&so->so_sp->ssp_idleto, 0); > - } else > + if (so->so_sp) { > + timeout_del_barrier(&so->so_sp->ssp_idleto); > + task_del(sosplice_taskq, &so->so_sp->ssp_task); > + taskq_barrier(sosplice_taskq); > + pool_put(&sosplice_pool, so->so_sp); > + } > #endif /* SOCKET_SPLICE */ > - { > - pool_put(&socket_pool, so); > } > + > + pool_put(&socket_pool, so); > } > > static inline uint64_t > @@ -450,39 +445,10 @@ drop: > } > } > discard: > - if (so->so_state & SS_NOFDREF) > - panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); > - so->so_state |= SS_NOFDREF; > - > #ifdef SOCKET_SPLICE > if (so->so_sp) { > struct socket *soback; > > - if (so->so_proto->pr_flags & PR_WANTRCVD) { > - /* > - * Copy - Paste, but can't relock and sleep in > - * sofree() in tcp(4) case. That's why tcp(4) > - * still rely on solock() for splicing and > - * unsplicing. > - */ > - > - if (issplicedback(so)) { > - int freeing = SOSP_FREEING_WRITE; > - > - if (so->so_sp->ssp_soback == so) > - freeing |= SOSP_FREEING_READ; > - sounsplice(so->so_sp->ssp_soback, so, freeing); > - } > - if (isspliced(so)) { > - int freeing = SOSP_FREEING_READ; > - > - if (so == so->so_sp->ssp_socket) > - freeing |= SOSP_FREEING_WRITE; > - sounsplice(so, so->so_sp->ssp_socket, freeing); > - } > - goto free; > - } > - > sounlock(so); > mtx_enter(&so->so_snd.sb_mtx); > /* > @@ -530,8 +496,12 @@ notsplicedback: > > solock(so); > } > -free: > #endif /* SOCKET_SPLICE */ > + > + if (so->so_state & SS_NOFDREF) > + panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); > + so->so_state |= SS_NOFDREF; > + > /* sofree() calls sounlock(). */ > sofree(so, 0); > return (error); > @@ -1532,8 +1502,7 @@ sosplice(struct socket *so, int fd, off_ > void > sounsplice(struct socket *so, struct socket *sosp, int freeing) > { > - if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) > - sbassertlocked(&so->so_rcv); > + sbassertlocked(&so->so_rcv); > soassertlocked(so); > > task_del(sosplice_taskq, &so->so_splicetask); > @@ -1587,17 +1556,23 @@ sotask(void *arg) > */ > > sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); > - if (sockstream) > - solock(so); > - > if (so->so_rcv.sb_flags & SB_SPLICE) { > - if (sockstream) > + struct socket *sosp = so->so_sp->ssp_socket; > + > + if (sockstream) { > + sblock(&sosp->so_snd, SBL_WAIT | SBL_NOINTR); > + solock(so); > doyield = 1; > + } > + > somove(so, M_DONTWAIT); > + > + if (sockstream) { > + sounlock(so); > + sbunlock(&sosp->so_snd); > + } > } > > - if (sockstream) > - sounlock(so); > sbunlock(&so->so_rcv); > > if (doyield) { > @@ -1607,32 +1582,6 @@ sotask(void *arg) > } > > /* > - * The socket splicing task or idle timeout may sleep while grabbing the net > - * lock. As sofree() can be called anytime, sotask() or soidle() could access > - * the socket memory of a freed socket after wakeup. So delay the pool_put() > - * after all pending socket splicing tasks or timeouts have finished. Do this > - * by scheduling it on the same threads. > - */ > -void > -soreaper(void *arg) > -{ > - struct socket *so = arg; > - > - /* Reuse splice task, sounsplice() has been called before. */ > - task_set(&so->so_sp->ssp_task, soput, so); > - task_add(sosplice_taskq, &so->so_sp->ssp_task); > -} > - > -void > -soput(void *arg) > -{ > - struct socket *so = arg; > - > - pool_put(&sosplice_pool, so->so_sp); > - pool_put(&socket_pool, so); > -} > - > -/* > * Move data from receive buffer of spliced source socket to send > * buffer of drain socket. Try to move as much as possible in one > * big chunk. It is a TCP only implementation. > @@ -1652,8 +1601,10 @@ somove(struct socket *so, int wait) > > if (sockdgram) > sbassertlocked(&so->so_rcv); > - else > + else { > + sbassertlocked(&sosp->so_snd); > soassertlocked(so); > + } > > mtx_enter(&so->so_rcv.sb_mtx); > mtx_enter(&sosp->so_snd.sb_mtx);