Download raw body.
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
On Thu, Dec 26, 2024 at 02:59:39PM +0300, Vitaliy Makkoveev wrote:
> > On 24 Dec 2024, at 22:21, Vitaliy Makkoveev <mvs@openbsd.org> wrote:
> >
> > This is the updated diff. It uses barriers instead of `ssp_idleto'
> > timeout and `ssp_task' task redefinition.
> >
>
> IIRC, this diff which uses barriers instead of task and timeout
> reinitialisation is stable?
Diff works fine.
> Do you prefer to push these unsplicing
> only hunks separately or combine it with the so_snd unlocking?
Please commit in small steps. This gives better feedback from
snapshot users.
> > Index: sys/kern/uipc_socket.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> > diff -u -p -r1.347 uipc_socket.c
> > --- sys/kern/uipc_socket.c 19 Dec 2024 22:11:35 -0000 1.347
> > +++ sys/kern/uipc_socket.c 24 Dec 2024 19:12:56 -0000
> > @@ -62,8 +62,6 @@ int sosplice(struct socket *, int, off_t
> > void sounsplice(struct socket *, struct socket *, int);
> > void soidle(void *);
> > void sotask(void *);
> > -void soreaper(void *);
> > -void soput(void *);
> > int somove(struct socket *, int);
> > void sorflush(struct socket *);
> >
> > @@ -327,17 +325,14 @@ sofree(struct socket *so, int keep_lock)
> > sounlock(head);
> > }
> >
> > - switch (so->so_proto->pr_domain->dom_family) {
> > - case AF_INET:
> > - case AF_INET6:
> > - if (so->so_proto->pr_type == SOCK_STREAM)
> > - break;
> > - /* FALLTHROUGH */
> > - default:
> > + if (!keep_lock) {
> > + /*
> > + * sofree() was called from soclose(). Sleep is safe
> > + * even for tcp(4) sockets.
> > + */
> > sounlock(so);
> > refcnt_finalize(&so->so_refcnt, "sofinal");
> > solock(so);
> > - break;
> > }
> >
> > sigio_free(&so->so_sigio);
> > @@ -358,20 +353,20 @@ sofree(struct socket *so, int keep_lock)
> > (*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
> > m_purge(so->so_rcv.sb_mb);
> >
> > - if (!keep_lock)
> > + if (!keep_lock) {
> > sounlock(so);
> >
> > #ifdef SOCKET_SPLICE
> > - if (so->so_sp) {
> > - /* Reuse splice idle, sounsplice() has been called before. */
> > - timeout_set_flags(&so->so_sp->ssp_idleto, soreaper, so,
> > - KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
> > - timeout_add(&so->so_sp->ssp_idleto, 0);
> > - } else
> > + if (so->so_sp) {
> > + timeout_del_barrier(&so->so_sp->ssp_idleto);
> > + task_del(sosplice_taskq, &so->so_sp->ssp_task);
> > + taskq_barrier(sosplice_taskq);
> > + pool_put(&sosplice_pool, so->so_sp);
> > + }
> > #endif /* SOCKET_SPLICE */
> > - {
> > - pool_put(&socket_pool, so);
> > }
> > +
> > + pool_put(&socket_pool, so);
> > }
> >
> > static inline uint64_t
> > @@ -450,39 +445,10 @@ drop:
> > }
> > }
> > discard:
> > - if (so->so_state & SS_NOFDREF)
> > - panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
> > - so->so_state |= SS_NOFDREF;
> > -
> > #ifdef SOCKET_SPLICE
> > if (so->so_sp) {
> > struct socket *soback;
> >
> > - if (so->so_proto->pr_flags & PR_WANTRCVD) {
> > - /*
> > - * Copy - Paste, but can't relock and sleep in
> > - * sofree() in tcp(4) case. That's why tcp(4)
> > - * still rely on solock() for splicing and
> > - * unsplicing.
> > - */
> > -
> > - if (issplicedback(so)) {
> > - int freeing = SOSP_FREEING_WRITE;
> > -
> > - if (so->so_sp->ssp_soback == so)
> > - freeing |= SOSP_FREEING_READ;
> > - sounsplice(so->so_sp->ssp_soback, so, freeing);
> > - }
> > - if (isspliced(so)) {
> > - int freeing = SOSP_FREEING_READ;
> > -
> > - if (so == so->so_sp->ssp_socket)
> > - freeing |= SOSP_FREEING_WRITE;
> > - sounsplice(so, so->so_sp->ssp_socket, freeing);
> > - }
> > - goto free;
> > - }
> > -
> > sounlock(so);
> > mtx_enter(&so->so_snd.sb_mtx);
> > /*
> > @@ -530,8 +496,12 @@ notsplicedback:
> >
> > solock(so);
> > }
> > -free:
> > #endif /* SOCKET_SPLICE */
> > +
> > + if (so->so_state & SS_NOFDREF)
> > + panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
> > + so->so_state |= SS_NOFDREF;
> > +
> > /* sofree() calls sounlock(). */
> > sofree(so, 0);
> > return (error);
> > @@ -1532,8 +1502,7 @@ sosplice(struct socket *so, int fd, off_
> > void
> > sounsplice(struct socket *so, struct socket *sosp, int freeing)
> > {
> > - if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0)
> > - sbassertlocked(&so->so_rcv);
> > + sbassertlocked(&so->so_rcv);
> > soassertlocked(so);
> >
> > task_del(sosplice_taskq, &so->so_splicetask);
> > @@ -1587,17 +1556,23 @@ sotask(void *arg)
> > */
> >
> > sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR);
> > - if (sockstream)
> > - solock(so);
> > -
> > if (so->so_rcv.sb_flags & SB_SPLICE) {
> > - if (sockstream)
> > + struct socket *sosp = so->so_sp->ssp_socket;
> > +
> > + if (sockstream) {
> > + sblock(&sosp->so_snd, SBL_WAIT | SBL_NOINTR);
> > + solock(so);
> > doyield = 1;
> > + }
> > +
> > somove(so, M_DONTWAIT);
> > +
> > + if (sockstream) {
> > + sounlock(so);
> > + sbunlock(&sosp->so_snd);
> > + }
> > }
> >
> > - if (sockstream)
> > - sounlock(so);
> > sbunlock(&so->so_rcv);
> >
> > if (doyield) {
> > @@ -1607,32 +1582,6 @@ sotask(void *arg)
> > }
> >
> > /*
> > - * The socket splicing task or idle timeout may sleep while grabbing the net
> > - * lock. As sofree() can be called anytime, sotask() or soidle() could access
> > - * the socket memory of a freed socket after wakeup. So delay the pool_put()
> > - * after all pending socket splicing tasks or timeouts have finished. Do this
> > - * by scheduling it on the same threads.
> > - */
> > -void
> > -soreaper(void *arg)
> > -{
> > - struct socket *so = arg;
> > -
> > - /* Reuse splice task, sounsplice() has been called before. */
> > - task_set(&so->so_sp->ssp_task, soput, so);
> > - task_add(sosplice_taskq, &so->so_sp->ssp_task);
> > -}
> > -
> > -void
> > -soput(void *arg)
> > -{
> > - struct socket *so = arg;
> > -
> > - pool_put(&sosplice_pool, so->so_sp);
> > - pool_put(&socket_pool, so);
> > -}
> > -
> > -/*
> > * Move data from receive buffer of spliced source socket to send
> > * buffer of drain socket. Try to move as much as possible in one
> > * big chunk. It is a TCP only implementation.
> > @@ -1652,8 +1601,10 @@ somove(struct socket *so, int wait)
> >
> > if (sockdgram)
> > sbassertlocked(&so->so_rcv);
> > - else
> > + else {
> > + sbassertlocked(&sosp->so_snd);
> > soassertlocked(so);
> > + }
> >
> > mtx_enter(&so->so_rcv.sb_mtx);
> > mtx_enter(&sosp->so_snd.sb_mtx);
>
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer