From: Vitaliy Makkoveev Subject: Re: tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer To: Alexander Bluhm Cc: OpenBSD tech Date: Thu, 26 Dec 2024 18:43:29 +0300 > On 26 Dec 2024, at 15:47, Alexander Bluhm wrote: > > On Tue, Dec 24, 2024 at 10:21:55PM +0300, Vitaliy Makkoveev wrote: >> This is the updated diff. It uses barriers instead of `ssp_idleto' >> timeout and `ssp_task' task redefinition. > > I have tested this diff with my regression, performance, and network > setup. Works fine. > > OK bluhm@ > > Could you wait one day before commiting this? Then we have a > snapshot between my and your unlocking diff. If something goes > wrong, it will be easier to bisect. Thanks for testing! Will commit this weekend or Monday. > >> Index: sys/kern/uipc_socket.c >> =================================================================== >> RCS file: /cvs/src/sys/kern/uipc_socket.c,v >> diff -u -p -r1.347 uipc_socket.c >> --- sys/kern/uipc_socket.c 19 Dec 2024 22:11:35 -0000 1.347 >> +++ sys/kern/uipc_socket.c 24 Dec 2024 19:12:56 -0000 >> @@ -62,8 +62,6 @@ int sosplice(struct socket *, int, off_t >> void sounsplice(struct socket *, struct socket *, int); >> void soidle(void *); >> void sotask(void *); >> -void soreaper(void *); >> -void soput(void *); >> int somove(struct socket *, int); >> void sorflush(struct socket *); >> >> @@ -327,17 +325,14 @@ sofree(struct socket *so, int keep_lock) >> sounlock(head); >> } >> >> - switch (so->so_proto->pr_domain->dom_family) { >> - case AF_INET: >> - case AF_INET6: >> - if (so->so_proto->pr_type == SOCK_STREAM) >> - break; >> - /* FALLTHROUGH */ >> - default: >> + if (!keep_lock) { >> + /* >> + * sofree() was called from soclose(). Sleep is safe >> + * even for tcp(4) sockets. >> + */ >> sounlock(so); >> refcnt_finalize(&so->so_refcnt, "sofinal"); >> solock(so); >> - break; >> } >> >> sigio_free(&so->so_sigio); >> @@ -358,20 +353,20 @@ sofree(struct socket *so, int keep_lock) >> (*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb); >> m_purge(so->so_rcv.sb_mb); >> >> - if (!keep_lock) >> + if (!keep_lock) { >> sounlock(so); >> >> #ifdef SOCKET_SPLICE >> - if (so->so_sp) { >> - /* Reuse splice idle, sounsplice() has been called before. */ >> - timeout_set_flags(&so->so_sp->ssp_idleto, soreaper, so, >> - KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); >> - timeout_add(&so->so_sp->ssp_idleto, 0); >> - } else >> + if (so->so_sp) { >> + timeout_del_barrier(&so->so_sp->ssp_idleto); >> + task_del(sosplice_taskq, &so->so_sp->ssp_task); >> + taskq_barrier(sosplice_taskq); >> + pool_put(&sosplice_pool, so->so_sp); >> + } >> #endif /* SOCKET_SPLICE */ >> - { >> - pool_put(&socket_pool, so); >> } >> + >> + pool_put(&socket_pool, so); >> } >> >> static inline uint64_t >> @@ -450,39 +445,10 @@ drop: >> } >> } >> discard: >> - if (so->so_state & SS_NOFDREF) >> - panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); >> - so->so_state |= SS_NOFDREF; >> - >> #ifdef SOCKET_SPLICE >> if (so->so_sp) { >> struct socket *soback; >> >> - if (so->so_proto->pr_flags & PR_WANTRCVD) { >> - /* >> - * Copy - Paste, but can't relock and sleep in >> - * sofree() in tcp(4) case. That's why tcp(4) >> - * still rely on solock() for splicing and >> - * unsplicing. >> - */ >> - >> - if (issplicedback(so)) { >> - int freeing = SOSP_FREEING_WRITE; >> - >> - if (so->so_sp->ssp_soback == so) >> - freeing |= SOSP_FREEING_READ; >> - sounsplice(so->so_sp->ssp_soback, so, freeing); >> - } >> - if (isspliced(so)) { >> - int freeing = SOSP_FREEING_READ; >> - >> - if (so == so->so_sp->ssp_socket) >> - freeing |= SOSP_FREEING_WRITE; >> - sounsplice(so, so->so_sp->ssp_socket, freeing); >> - } >> - goto free; >> - } >> - >> sounlock(so); >> mtx_enter(&so->so_snd.sb_mtx); >> /* >> @@ -530,8 +496,12 @@ notsplicedback: >> >> solock(so); >> } >> -free: >> #endif /* SOCKET_SPLICE */ >> + >> + if (so->so_state & SS_NOFDREF) >> + panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); >> + so->so_state |= SS_NOFDREF; >> + >> /* sofree() calls sounlock(). */ >> sofree(so, 0); >> return (error); >> @@ -1532,8 +1502,7 @@ sosplice(struct socket *so, int fd, off_ >> void >> sounsplice(struct socket *so, struct socket *sosp, int freeing) >> { >> - if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) >> - sbassertlocked(&so->so_rcv); >> + sbassertlocked(&so->so_rcv); >> soassertlocked(so); >> >> task_del(sosplice_taskq, &so->so_splicetask); >> @@ -1587,17 +1556,23 @@ sotask(void *arg) >> */ >> >> sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); >> - if (sockstream) >> - solock(so); >> - >> if (so->so_rcv.sb_flags & SB_SPLICE) { >> - if (sockstream) >> + struct socket *sosp = so->so_sp->ssp_socket; >> + >> + if (sockstream) { >> + sblock(&sosp->so_snd, SBL_WAIT | SBL_NOINTR); >> + solock(so); >> doyield = 1; >> + } >> + >> somove(so, M_DONTWAIT); >> + >> + if (sockstream) { >> + sounlock(so); >> + sbunlock(&sosp->so_snd); >> + } >> } >> >> - if (sockstream) >> - sounlock(so); >> sbunlock(&so->so_rcv); >> >> if (doyield) { >> @@ -1607,32 +1582,6 @@ sotask(void *arg) >> } >> >> /* >> - * The socket splicing task or idle timeout may sleep while grabbing the net >> - * lock. As sofree() can be called anytime, sotask() or soidle() could access >> - * the socket memory of a freed socket after wakeup. So delay the pool_put() >> - * after all pending socket splicing tasks or timeouts have finished. Do this >> - * by scheduling it on the same threads. >> - */ >> -void >> -soreaper(void *arg) >> -{ >> - struct socket *so = arg; >> - >> - /* Reuse splice task, sounsplice() has been called before. */ >> - task_set(&so->so_sp->ssp_task, soput, so); >> - task_add(sosplice_taskq, &so->so_sp->ssp_task); >> -} >> - >> -void >> -soput(void *arg) >> -{ >> - struct socket *so = arg; >> - >> - pool_put(&sosplice_pool, so->so_sp); >> - pool_put(&socket_pool, so); >> -} >> - >> -/* >> * Move data from receive buffer of spliced source socket to send >> * buffer of drain socket. Try to move as much as possible in one >> * big chunk. It is a TCP only implementation. >> @@ -1652,8 +1601,10 @@ somove(struct socket *so, int wait) >> >> if (sockdgram) >> sbassertlocked(&so->so_rcv); >> - else >> + else { >> + sbassertlocked(&sosp->so_snd); >> soassertlocked(so); >> + } >> >> mtx_enter(&so->so_rcv.sb_mtx); >> mtx_enter(&sosp->so_snd.sb_mtx); >