Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: ref count spliced sockets
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Wed, 23 Jul 2025 21:43:20 +0300

Download raw body.

Thread
On Wed, Jul 23, 2025 at 02:44:23PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> Protect the socket in the splicing pointer by reference counting.
> I think this is cleaner to have a proper reference than temporarily
> incrementing the refcount during unsplice.
> 
> ok?
> 

Sure. The only place where we need to take extra reference is the
soclose() where receiving socket is spliced with our dying. That's
why we need to lock it's `so_rcv' buffer.

        if (so->so_sp) {
                struct socket *soback;

                sounlock_shared(so);
                /*
                 * Concurrent sounsplice() locks `sb_mtx' mutexes on
                 * both `so_snd' and `so_rcv' before unsplice sockets.
                 */
                mtx_enter(&so->so_snd.sb_mtx);
                soback = soref(so->so_sp->ssp_soback);
                mtx_leave(&so->so_snd.sb_mtx);

                if (soback == NULL)
                        goto notsplicedback;
                /* ... 
                sblock(&soback->so_rcv, SBL_WAIT | SBL_NOINTR);

> bluhm
> 
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> diff -u -p -r1.382 uipc_socket.c
> --- kern/uipc_socket.c	21 Jul 2025 20:36:41 -0000	1.382
> +++ kern/uipc_socket.c	22 Jul 2025 20:59:14 -0000
> @@ -460,14 +460,11 @@ discard:
>  notsplicedback:
>  		sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR);
>  		if (isspliced(so)) {
> -			struct socket *sosp;
>  			int freeing = SOSP_FREEING_READ;
>  
>  			if (so == so->so_sp->ssp_socket)
>  				freeing |= SOSP_FREEING_WRITE;
> -			sosp = soref(so->so_sp->ssp_socket);
>  			sounsplice(so, so->so_sp->ssp_socket, freeing);
> -			sorele(sosp);
>  		}
>  		sbunlock(&so->so_rcv);
>  
> @@ -1307,11 +1304,9 @@ sosplice(struct socket *so, int fd, off_
>  	if (fd < 0) {
>  		if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0)
>  			return (error);
> -		if (so->so_sp && so->so_sp->ssp_socket) {
> -			sosp = soref(so->so_sp->ssp_socket);
> +		if (so->so_sp && so->so_sp->ssp_socket)
>  			sounsplice(so, so->so_sp->ssp_socket, 0);
> -			sorele(sosp);
> -		} else
> +		else
>  			error = EPROTO;
>  		sbunlock(&so->so_rcv);
>  		return (error);
> @@ -1409,8 +1404,8 @@ sosplice(struct socket *so, int fd, off_
>  	/* Splice so and sosp together. */
>  	mtx_enter(&so->so_rcv.sb_mtx);
>  	mtx_enter(&sosp->so_snd.sb_mtx);
> -	so->so_sp->ssp_socket = sosp;
> -	sosp->so_sp->ssp_soback = so;
> +	so->so_sp->ssp_socket = soref(sosp);
> +	sosp->so_sp->ssp_soback = soref(so);
>  	mtx_leave(&sosp->so_snd.sb_mtx);
>  	mtx_leave(&so->so_rcv.sb_mtx);
>  
> @@ -1448,6 +1443,8 @@ sounsplice(struct socket *so, struct soc
>  	mtx_enter(&sosp->so_snd.sb_mtx);
>  	so->so_rcv.sb_flags &= ~SB_SPLICE;
>  	sosp->so_snd.sb_flags &= ~SB_SPLICE;
> +	KASSERT(so->so_sp->ssp_socket == sosp);
> +	KASSERT(sosp->so_sp->ssp_soback == so);
>  	so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
>  	mtx_leave(&sosp->so_snd.sb_mtx);
>  	mtx_leave(&so->so_rcv.sb_mtx);
> @@ -1473,6 +1470,9 @@ sounsplice(struct socket *so, struct soc
>  			sowwakeup(sosp);
>  		sounlock_shared(sosp);
>  	}
> +
> +	sorele(sosp);
> +	sorele(so);
>  }
>  
>  void
> @@ -1482,12 +1482,8 @@ soidle(void *arg)
>  
>  	sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR);
>  	if (so->so_rcv.sb_flags & SB_SPLICE) {
> -		struct socket *sosp;
> -
>  		WRITE_ONCE(so->so_error, ETIMEDOUT);
> -		sosp = soref(so->so_sp->ssp_socket);
>  		sounsplice(so, so->so_sp->ssp_socket, 0);
> -		sorele(sosp);
>  	}
>  	sbunlock(&so->so_rcv);
>  }
> @@ -1853,10 +1849,7 @@ somove(struct socket *so, int wait)
>  		sbunlock(&so->so_snd);
>  
>  	if (unsplice) {
> -		soref(sosp);
>  		sounsplice(so, sosp, 0);
> -		sorele(sosp);
> -
>  		return (0);
>  	}
>  	if (timerisset(&so->so_idletv)) {
>