Download raw body.
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
> On 26 Dec 2024, at 15:47, Alexander Bluhm <bluhm@openbsd.org> wrote:
>
> On Tue, Dec 24, 2024 at 10:21:55PM +0300, Vitaliy Makkoveev wrote:
>> This is the updated diff. It uses barriers instead of `ssp_idleto'
>> timeout and `ssp_task' task redefinition.
>
> I have tested this diff with my regression, performance, and network
> setup. Works fine.
>
> OK bluhm@
>
> Could you wait one day before commiting this? Then we have a
> snapshot between my and your unlocking diff. If something goes
> wrong, it will be easier to bisect.
Thanks for testing! Will commit this weekend or Monday.
>
>> Index: sys/kern/uipc_socket.c
>> ===================================================================
>> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
>> diff -u -p -r1.347 uipc_socket.c
>> --- sys/kern/uipc_socket.c 19 Dec 2024 22:11:35 -0000 1.347
>> +++ sys/kern/uipc_socket.c 24 Dec 2024 19:12:56 -0000
>> @@ -62,8 +62,6 @@ int sosplice(struct socket *, int, off_t
>> void sounsplice(struct socket *, struct socket *, int);
>> void soidle(void *);
>> void sotask(void *);
>> -void soreaper(void *);
>> -void soput(void *);
>> int somove(struct socket *, int);
>> void sorflush(struct socket *);
>>
>> @@ -327,17 +325,14 @@ sofree(struct socket *so, int keep_lock)
>> sounlock(head);
>> }
>>
>> - switch (so->so_proto->pr_domain->dom_family) {
>> - case AF_INET:
>> - case AF_INET6:
>> - if (so->so_proto->pr_type == SOCK_STREAM)
>> - break;
>> - /* FALLTHROUGH */
>> - default:
>> + if (!keep_lock) {
>> + /*
>> + * sofree() was called from soclose(). Sleep is safe
>> + * even for tcp(4) sockets.
>> + */
>> sounlock(so);
>> refcnt_finalize(&so->so_refcnt, "sofinal");
>> solock(so);
>> - break;
>> }
>>
>> sigio_free(&so->so_sigio);
>> @@ -358,20 +353,20 @@ sofree(struct socket *so, int keep_lock)
>> (*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
>> m_purge(so->so_rcv.sb_mb);
>>
>> - if (!keep_lock)
>> + if (!keep_lock) {
>> sounlock(so);
>>
>> #ifdef SOCKET_SPLICE
>> - if (so->so_sp) {
>> - /* Reuse splice idle, sounsplice() has been called before. */
>> - timeout_set_flags(&so->so_sp->ssp_idleto, soreaper, so,
>> - KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
>> - timeout_add(&so->so_sp->ssp_idleto, 0);
>> - } else
>> + if (so->so_sp) {
>> + timeout_del_barrier(&so->so_sp->ssp_idleto);
>> + task_del(sosplice_taskq, &so->so_sp->ssp_task);
>> + taskq_barrier(sosplice_taskq);
>> + pool_put(&sosplice_pool, so->so_sp);
>> + }
>> #endif /* SOCKET_SPLICE */
>> - {
>> - pool_put(&socket_pool, so);
>> }
>> +
>> + pool_put(&socket_pool, so);
>> }
>>
>> static inline uint64_t
>> @@ -450,39 +445,10 @@ drop:
>> }
>> }
>> discard:
>> - if (so->so_state & SS_NOFDREF)
>> - panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
>> - so->so_state |= SS_NOFDREF;
>> -
>> #ifdef SOCKET_SPLICE
>> if (so->so_sp) {
>> struct socket *soback;
>>
>> - if (so->so_proto->pr_flags & PR_WANTRCVD) {
>> - /*
>> - * Copy - Paste, but can't relock and sleep in
>> - * sofree() in tcp(4) case. That's why tcp(4)
>> - * still rely on solock() for splicing and
>> - * unsplicing.
>> - */
>> -
>> - if (issplicedback(so)) {
>> - int freeing = SOSP_FREEING_WRITE;
>> -
>> - if (so->so_sp->ssp_soback == so)
>> - freeing |= SOSP_FREEING_READ;
>> - sounsplice(so->so_sp->ssp_soback, so, freeing);
>> - }
>> - if (isspliced(so)) {
>> - int freeing = SOSP_FREEING_READ;
>> -
>> - if (so == so->so_sp->ssp_socket)
>> - freeing |= SOSP_FREEING_WRITE;
>> - sounsplice(so, so->so_sp->ssp_socket, freeing);
>> - }
>> - goto free;
>> - }
>> -
>> sounlock(so);
>> mtx_enter(&so->so_snd.sb_mtx);
>> /*
>> @@ -530,8 +496,12 @@ notsplicedback:
>>
>> solock(so);
>> }
>> -free:
>> #endif /* SOCKET_SPLICE */
>> +
>> + if (so->so_state & SS_NOFDREF)
>> + panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
>> + so->so_state |= SS_NOFDREF;
>> +
>> /* sofree() calls sounlock(). */
>> sofree(so, 0);
>> return (error);
>> @@ -1532,8 +1502,7 @@ sosplice(struct socket *so, int fd, off_
>> void
>> sounsplice(struct socket *so, struct socket *sosp, int freeing)
>> {
>> - if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0)
>> - sbassertlocked(&so->so_rcv);
>> + sbassertlocked(&so->so_rcv);
>> soassertlocked(so);
>>
>> task_del(sosplice_taskq, &so->so_splicetask);
>> @@ -1587,17 +1556,23 @@ sotask(void *arg)
>> */
>>
>> sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR);
>> - if (sockstream)
>> - solock(so);
>> -
>> if (so->so_rcv.sb_flags & SB_SPLICE) {
>> - if (sockstream)
>> + struct socket *sosp = so->so_sp->ssp_socket;
>> +
>> + if (sockstream) {
>> + sblock(&sosp->so_snd, SBL_WAIT | SBL_NOINTR);
>> + solock(so);
>> doyield = 1;
>> + }
>> +
>> somove(so, M_DONTWAIT);
>> +
>> + if (sockstream) {
>> + sounlock(so);
>> + sbunlock(&sosp->so_snd);
>> + }
>> }
>>
>> - if (sockstream)
>> - sounlock(so);
>> sbunlock(&so->so_rcv);
>>
>> if (doyield) {
>> @@ -1607,32 +1582,6 @@ sotask(void *arg)
>> }
>>
>> /*
>> - * The socket splicing task or idle timeout may sleep while grabbing the net
>> - * lock. As sofree() can be called anytime, sotask() or soidle() could access
>> - * the socket memory of a freed socket after wakeup. So delay the pool_put()
>> - * after all pending socket splicing tasks or timeouts have finished. Do this
>> - * by scheduling it on the same threads.
>> - */
>> -void
>> -soreaper(void *arg)
>> -{
>> - struct socket *so = arg;
>> -
>> - /* Reuse splice task, sounsplice() has been called before. */
>> - task_set(&so->so_sp->ssp_task, soput, so);
>> - task_add(sosplice_taskq, &so->so_sp->ssp_task);
>> -}
>> -
>> -void
>> -soput(void *arg)
>> -{
>> - struct socket *so = arg;
>> -
>> - pool_put(&sosplice_pool, so->so_sp);
>> - pool_put(&socket_pool, so);
>> -}
>> -
>> -/*
>> * Move data from receive buffer of spliced source socket to send
>> * buffer of drain socket. Try to move as much as possible in one
>> * big chunk. It is a TCP only implementation.
>> @@ -1652,8 +1601,10 @@ somove(struct socket *so, int wait)
>>
>> if (sockdgram)
>> sbassertlocked(&so->so_rcv);
>> - else
>> + else {
>> + sbassertlocked(&sosp->so_snd);
>> soassertlocked(so);
>> + }
>>
>> mtx_enter(&so->so_rcv.sb_mtx);
>> mtx_enter(&sosp->so_snd.sb_mtx);
>
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer