Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: Remove `head' socket relocking from sonewconn()
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
tech@openbsd.org
Date:
Wed, 10 Apr 2024 13:39:00 +0200

Download raw body.

Thread
On Tue, Apr 09, 2024 at 03:42:04PM +0300, Vitaliy Makkoveev wrote:
> uipc_attach() releases solock() because it should be taken after
> `unp_gc_lock' rwlock(9) which protects the `unp_link' list. For this
> reason, the listening `head' socket should be unlocked too while
> sonewconn() calls uipc_attach(). This could be reworked because now
> `so_rcv' sockbuf relies on `sb_mtx' mutex(9).
> 
> The last one `unp_link' foreach loop within unp_gc() discards sockets
> previously marked as UNP_GCDEAD. These sockets are not accessed from the
> userland. The only exception is the sosend() threads of connected
> sending peers, but they only sbappend*() mbuf(9) to `so_rcv'. So it's
> enough to unlink mbuf(9) chain with `sb_mtx' held and discard lockless.
> 
> Please note, the existing SS_NEWCONN_WAIT logic was never used because
> the listening unix(4) socket protected from concurrent unp_detach() by 
> vnode(9) lock, however `head' relocked all times.
> 
> This diff conflicts with my "Don't take solock() in soreceive..." [1]
> diff, but since it stuck I want to commit this one first.
> 
> 1. https://marc.info/?t=171191840300002&r=1&w=2

passed regress with witness

OK bluhm@

> Index: sys/kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.327
> diff -u -p -r1.327 uipc_socket.c
> --- sys/kern/uipc_socket.c	2 Apr 2024 14:23:15 -0000	1.327
> +++ sys/kern/uipc_socket.c	9 Apr 2024 10:01:52 -0000
> @@ -65,6 +65,7 @@ void	sotask(void *);
>  void	soreaper(void *);
>  void	soput(void *);
>  int	somove(struct socket *, int);
> +void	sorflush(struct socket *);
>  
>  void	filt_sordetach(struct knote *kn);
>  int	filt_soread(struct knote *kn, long hint);
> @@ -413,15 +414,6 @@ drop:
>  	}
>  	if (so->so_options & SO_ACCEPTCONN) {
>  		int persocket = solock_persocket(so);
> -
> -		if (persocket) {
> -			/* Wait concurrent sonewconn() threads. */
> -			while (so->so_newconn > 0) {
> -				so->so_state |= SS_NEWCONN_WAIT;
> -				sosleep_nsec(so, &so->so_newconn, PSOCK,
> -				    "newcon", INFSLP);
> -			}
> -		}
>  
>  		while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
>  			if (persocket)
> Index: sys/kern/uipc_socket2.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.147
> diff -u -p -r1.147 uipc_socket2.c
> --- sys/kern/uipc_socket2.c	31 Mar 2024 13:50:00 -0000	1.147
> +++ sys/kern/uipc_socket2.c	9 Apr 2024 10:01:52 -0000
> @@ -179,7 +179,7 @@ sonewconn(struct socket *head, int conns
>  {
>  	struct socket *so;
>  	int persocket = solock_persocket(head);
> -	int error;
> +	int soqueue = connstatus ? 1 : 0;
>  
>  	/*
>  	 * XXXSMP as long as `so' and `head' share the same lock, we
> @@ -232,41 +232,13 @@ sonewconn(struct socket *head, int conns
>  
>  	sigio_copy(&so->so_sigio, &head->so_sigio);
>  
> -	soqinsque(head, so, 0);
> -
> -	/*
> -	 * We need to unlock `head' because PCB layer could release
> -	 * solock() to enforce desired lock order.
> -	 */
> -	if (persocket) {
> -		head->so_newconn++;
> -		sounlock(head);
> -	}
> -
> -	error = pru_attach(so, 0, wait);
> -
> -	if (persocket) {
> -		sounlock(so);
> -		solock(head);
> -		solock(so);
> -
> -		if ((head->so_newconn--) == 0) {
> -			if ((head->so_state & SS_NEWCONN_WAIT) != 0) {
> -				head->so_state &= ~SS_NEWCONN_WAIT;
> -				wakeup(&head->so_newconn);
> -			}
> -		}
> -	}
> -
> -	if (error) {
> -		soqremque(so, 0);
> +	soqinsque(head, so, soqueue);
> +	if (pru_attach(so, 0, wait) != 0) {
> +		soqremque(so, soqueue);
>  		goto fail;
>  	}
> -
>  	if (connstatus) {
>  		so->so_state |= connstatus;
> -		soqremque(so, 0);
> -		soqinsque(head, so, 1);
>  		sorwakeup(head);
>  		wakeup(&head->so_timeo);
>  	}
> Index: sys/kern/uipc_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
> retrieving revision 1.203
> diff -u -p -r1.203 uipc_usrreq.c
> --- sys/kern/uipc_usrreq.c	26 Mar 2024 09:46:47 -0000	1.203
> +++ sys/kern/uipc_usrreq.c	9 Apr 2024 10:01:52 -0000
> @@ -293,14 +293,10 @@ uipc_attach(struct socket *so, int proto
>  	so->so_pcb = unp;
>  	getnanotime(&unp->unp_ctime);
>  
> -	/*
> -	 * Enforce `unp_gc_lock' -> `solock()' lock order.
> -	 */
> -	sounlock(so);
>  	rw_enter_write(&unp_gc_lock);
>  	LIST_INSERT_HEAD(&unp_head, unp, unp_link);
>  	rw_exit_write(&unp_gc_lock);
> -	solock(so);
> +
>  	return (0);
>  }
>  
> @@ -753,7 +749,6 @@ unp_detach(struct unpcb *unp)
>  	unp->unp_vnode = NULL;
>  
>  	/*
> -	 * Enforce `unp_gc_lock' -> `solock()' lock order.
>  	 * Enforce `i_lock' -> `solock()' lock order.
>  	 */
>  	sounlock(so);
> @@ -1443,16 +1438,26 @@ unp_gc(void *arg __unused)
>  	if (nunref) {
>  		LIST_FOREACH(unp, &unp_head, unp_link) {
>  			if (unp->unp_gcflags & UNP_GCDEAD) {
> +				struct sockbuf *sb = &unp->unp_socket->so_rcv;
> +				struct mbuf *m;
> +
>  				/*
>  				 * This socket could still be connected
>  				 * and if so it's `so_rcv' is still
>  				 * accessible by concurrent PRU_SEND
>  				 * thread.
>  				 */
> -				so = unp->unp_socket;
> -				solock(so);
> -				sorflush(so);
> -				sounlock(so);
> +
> +				mtx_enter(&sb->sb_mtx);
> +				m = sb->sb_mb;
> +				memset(&sb->sb_startzero, 0,
> +				    (caddr_t)&sb->sb_endzero -
> +				        (caddr_t)&sb->sb_startzero);
> +				sb->sb_timeo_nsecs = INFSLP;
> +				mtx_leave(&sb->sb_mtx);
> +
> +				unp_scan(m, unp_discard);
> +				m_purge(m);
>  			}
>  		}
>  	}
> Index: sys/sys/socketvar.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.127
> diff -u -p -r1.127 socketvar.h
> --- sys/sys/socketvar.h	27 Mar 2024 22:47:53 -0000	1.127
> +++ sys/sys/socketvar.h	9 Apr 2024 10:01:52 -0000
> @@ -86,7 +86,6 @@ struct socket {
>  	short	so_q0len;		/* partials on so_q0 */
>  	short	so_qlen;		/* number of connections on so_q */
>  	short	so_qlimit;		/* max number queued connections */
> -	u_long	so_newconn;		/* # of pending sonewconn() threads */
>  	short	so_timeo;		/* connection timeout */
>  	u_long	so_oobmark;		/* chars to oob mark */
>  	u_int	so_error;		/* error affecting connection */
> @@ -169,8 +168,7 @@ struct socket {
>  #define	SS_CONNECTOUT		0x1000	/* connect, not accept, at this end */
>  #define	SS_ISSENDING		0x2000	/* hint for lower layer */
>  #define	SS_DNS			0x4000	/* created using SOCK_DNS socket(2) */
> -#define	SS_NEWCONN_WAIT		0x8000	/* waiting sonewconn() relock */
> -#define	SS_YP			0x10000	/* created using ypconnect(2) */
> +#define	SS_YP			0x8000	/* created using ypconnect(2) */
>  
>  #ifdef _KERNEL
>  
> @@ -400,7 +398,6 @@ int	sosend(struct socket *, struct mbuf 
>  	    struct mbuf *, struct mbuf *, int);
>  int	sosetopt(struct socket *, int, int, struct mbuf *);
>  int	soshutdown(struct socket *, int);
> -void	sorflush(struct socket *);
>  void	sowakeup(struct socket *, struct sockbuf *);
>  void	sorwakeup(struct socket *);
>  void	sowwakeup(struct socket *);