Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
OpenBSD tech <tech@openbsd.org>
Date:
Thu, 26 Dec 2024 13:50:13 +0100

Download raw body.

Thread
  • Vitaliy Makkoveev:

    tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer

    • Alexander Bluhm:

      tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer

  • Alexander Bluhm:

    tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer

  • On Thu, Dec 26, 2024 at 02:59:39PM +0300, Vitaliy Makkoveev wrote:
    > > On 24 Dec 2024, at 22:21, Vitaliy Makkoveev <mvs@openbsd.org> wrote:
    > > 
    > > This is the updated diff. It uses barriers instead of `ssp_idleto'
    > > timeout and `ssp_task' task redefinition.
    > > 
    > 
    > IIRC, this diff which uses barriers instead of task and timeout
    > reinitialisation is stable?
    
    Diff works fine.  
    
    > Do you prefer to push these unsplicing
    > only hunks separately or combine it with the so_snd unlocking?
    
    Please commit in small steps.  This gives better feedback from
    snapshot users.
    
    > > Index: sys/kern/uipc_socket.c
    > > ===================================================================
    > > RCS file: /cvs/src/sys/kern/uipc_socket.c,v
    > > diff -u -p -r1.347 uipc_socket.c
    > > --- sys/kern/uipc_socket.c	19 Dec 2024 22:11:35 -0000	1.347
    > > +++ sys/kern/uipc_socket.c	24 Dec 2024 19:12:56 -0000
    > > @@ -62,8 +62,6 @@ int	sosplice(struct socket *, int, off_t
    > > void	sounsplice(struct socket *, struct socket *, int);
    > > void	soidle(void *);
    > > void	sotask(void *);
    > > -void	soreaper(void *);
    > > -void	soput(void *);
    > > int	somove(struct socket *, int);
    > > void	sorflush(struct socket *);
    > > 
    > > @@ -327,17 +325,14 @@ sofree(struct socket *so, int keep_lock)
    > > 			sounlock(head);
    > > 	}
    > > 
    > > -	switch (so->so_proto->pr_domain->dom_family) {
    > > -	case AF_INET:
    > > -	case AF_INET6:
    > > -		if (so->so_proto->pr_type == SOCK_STREAM)
    > > -			break;
    > > -		/* FALLTHROUGH */
    > > -	default:
    > > +	if (!keep_lock) {
    > > +		/*
    > > +		 * sofree() was called from soclose(). Sleep is safe
    > > +		 * even for tcp(4) sockets.
    > > +		 */
    > > 		sounlock(so);
    > > 		refcnt_finalize(&so->so_refcnt, "sofinal");
    > > 		solock(so);
    > > -		break;
    > > 	}
    > > 
    > > 	sigio_free(&so->so_sigio);
    > > @@ -358,20 +353,20 @@ sofree(struct socket *so, int keep_lock)
    > > 		(*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
    > > 	m_purge(so->so_rcv.sb_mb);
    > > 
    > > -	if (!keep_lock)
    > > +	if (!keep_lock) {
    > > 		sounlock(so);
    > > 
    > > #ifdef SOCKET_SPLICE
    > > -	if (so->so_sp) {
    > > -		/* Reuse splice idle, sounsplice() has been called before. */
    > > -		timeout_set_flags(&so->so_sp->ssp_idleto, soreaper, so,
    > > -		    KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
    > > -		timeout_add(&so->so_sp->ssp_idleto, 0);
    > > -	} else
    > > +		if (so->so_sp) {
    > > +			timeout_del_barrier(&so->so_sp->ssp_idleto);
    > > +			task_del(sosplice_taskq, &so->so_sp->ssp_task);
    > > +			taskq_barrier(sosplice_taskq);
    > > +			pool_put(&sosplice_pool, so->so_sp);
    > > +		}
    > > #endif /* SOCKET_SPLICE */
    > > -	{
    > > -		pool_put(&socket_pool, so);
    > > 	}
    > > +
    > > +	pool_put(&socket_pool, so);
    > > }
    > > 
    > > static inline uint64_t
    > > @@ -450,39 +445,10 @@ drop:
    > > 		}
    > > 	}
    > > discard:
    > > -	if (so->so_state & SS_NOFDREF)
    > > -		panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
    > > -	so->so_state |= SS_NOFDREF;
    > > -
    > > #ifdef SOCKET_SPLICE
    > > 	if (so->so_sp) {
    > > 		struct socket *soback;
    > > 
    > > -		if (so->so_proto->pr_flags & PR_WANTRCVD) {
    > > -			/*
    > > -			 * Copy - Paste, but can't relock and sleep in
    > > -			 * sofree() in tcp(4) case. That's why tcp(4)
    > > -			 * still rely on solock() for splicing and
    > > -			 * unsplicing.
    > > -			 */
    > > -
    > > -			if (issplicedback(so)) {
    > > -				int freeing = SOSP_FREEING_WRITE;
    > > -
    > > -				if (so->so_sp->ssp_soback == so)
    > > -					freeing |= SOSP_FREEING_READ;
    > > -				sounsplice(so->so_sp->ssp_soback, so, freeing);
    > > -			}
    > > -			if (isspliced(so)) {
    > > -				int freeing = SOSP_FREEING_READ;
    > > -
    > > -				if (so == so->so_sp->ssp_socket)
    > > -					freeing |= SOSP_FREEING_WRITE;
    > > -				sounsplice(so, so->so_sp->ssp_socket, freeing);
    > > -			}
    > > -			goto free;
    > > -		}
    > > -
    > > 		sounlock(so);
    > > 		mtx_enter(&so->so_snd.sb_mtx);
    > > 		/*
    > > @@ -530,8 +496,12 @@ notsplicedback:
    > > 
    > > 		solock(so);
    > > 	}
    > > -free:
    > > #endif /* SOCKET_SPLICE */
    > > +
    > > +	if (so->so_state & SS_NOFDREF)
    > > +		panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
    > > +	so->so_state |= SS_NOFDREF;
    > > +
    > > 	/* sofree() calls sounlock(). */
    > > 	sofree(so, 0);
    > > 	return (error);
    > > @@ -1532,8 +1502,7 @@ sosplice(struct socket *so, int fd, off_
    > > void
    > > sounsplice(struct socket *so, struct socket *sosp, int freeing)
    > > {
    > > -	if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0)
    > > -		sbassertlocked(&so->so_rcv);
    > > +	sbassertlocked(&so->so_rcv);
    > > 	soassertlocked(so);
    > > 
    > > 	task_del(sosplice_taskq, &so->so_splicetask);
    > > @@ -1587,17 +1556,23 @@ sotask(void *arg)
    > > 	 */
    > > 
    > > 	sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR);
    > > -	if (sockstream)
    > > -		solock(so);
    > > -
    > > 	if (so->so_rcv.sb_flags & SB_SPLICE) {
    > > -		if (sockstream)
    > > +		struct socket *sosp = so->so_sp->ssp_socket;
    > > +
    > > +		if (sockstream) {
    > > +			sblock(&sosp->so_snd, SBL_WAIT | SBL_NOINTR);
    > > +			solock(so);
    > > 			doyield = 1;
    > > +		}
    > > +
    > > 		somove(so, M_DONTWAIT);
    > > +
    > > +		if (sockstream) {
    > > +			sounlock(so);
    > > +			sbunlock(&sosp->so_snd);
    > > +		}
    > > 	}
    > > 
    > > -	if (sockstream)
    > > -		sounlock(so);
    > > 	sbunlock(&so->so_rcv);
    > > 
    > > 	if (doyield) {
    > > @@ -1607,32 +1582,6 @@ sotask(void *arg)
    > > }
    > > 
    > > /*
    > > - * The socket splicing task or idle timeout may sleep while grabbing the net
    > > - * lock.  As sofree() can be called anytime, sotask() or soidle() could access
    > > - * the socket memory of a freed socket after wakeup.  So delay the pool_put()
    > > - * after all pending socket splicing tasks or timeouts have finished.  Do this
    > > - * by scheduling it on the same threads.
    > > - */
    > > -void
    > > -soreaper(void *arg)
    > > -{
    > > -	struct socket *so = arg;
    > > -
    > > -	/* Reuse splice task, sounsplice() has been called before. */
    > > -	task_set(&so->so_sp->ssp_task, soput, so);
    > > -	task_add(sosplice_taskq, &so->so_sp->ssp_task);
    > > -}
    > > -
    > > -void
    > > -soput(void *arg)
    > > -{
    > > -	struct socket *so = arg;
    > > -
    > > -	pool_put(&sosplice_pool, so->so_sp);
    > > -	pool_put(&socket_pool, so);
    > > -}
    > > -
    > > -/*
    > >  * Move data from receive buffer of spliced source socket to send
    > >  * buffer of drain socket.  Try to move as much as possible in one
    > >  * big chunk.  It is a TCP only implementation.
    > > @@ -1652,8 +1601,10 @@ somove(struct socket *so, int wait)
    > > 
    > > 	if (sockdgram)
    > > 		sbassertlocked(&so->so_rcv);
    > > -	else
    > > +	else {
    > > +		sbassertlocked(&sosp->so_snd);
    > > 		soassertlocked(so);
    > > +	}
    > > 
    > > 	mtx_enter(&so->so_rcv.sb_mtx);
    > > 	mtx_enter(&sosp->so_snd.sb_mtx);
    > 
    
    
    
  • Vitaliy Makkoveev:

    tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer

  • Alexander Bluhm:

    tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer