Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: Unsplice socket before start the teardown
To:
Alexander Bluhm <alexander.bluhm@gmx.net>
Cc:
tech@openbsd.org
Date:
Fri, 16 Feb 2024 11:44:43 +0300

Download raw body.

Thread
Sorry for this broken diff. I was blindly sure in tcp_detach() behaviour
and didn't tested this version. I did the (*pr_usrreq)(), so it's
strange that I forgot about tcp(4).

This diff could be fixed by moving SS_NOFDREF bit setting after the
spliced socket barriers. So if concurrent thread would kill this tcp(4)
socket, it only destroys PCB, but left `so' alive.

I wanted to move unsplicing from sofree() because the unlocked sblock()
should be called before solock(). Since the solock() is held within
sofree(), it should be released, so I want to remove re-locking from the
stack. With the unlocked sblock() the solock() is missing in the
soreceive() path(), so sblock(so->so_rcv) used to serialize with spliced
socket paths.

This is the fixed diff. The regress/sys/kern/sosplice/ performed without
regressions. I'm fine to delay this diff until tcp(4) or udp(4)
soreceive() unlocking.

Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.320
diff -u -p -r1.320 uipc_socket.c
--- sys/kern/uipc_socket.c	12 Feb 2024 22:48:27 -0000	1.320
+++ sys/kern/uipc_socket.c	16 Feb 2024 08:39:43 -0000
@@ -62,8 +62,6 @@ int	sosplice(struct socket *, int, off_t
 void	sounsplice(struct socket *, struct socket *, int);
 void	soidle(void *);
 void	sotask(void *);
-void	soreaper(void *);
-void	soput(void *);
 int	somove(struct socket *, int);
 void	sorflush(struct socket *);
 
@@ -250,8 +248,6 @@ solisten(struct socket *so, int backlog)
 	return (0);
 }
 
-#define SOSP_FREEING_READ	1
-#define SOSP_FREEING_WRITE	2
 void
 sofree(struct socket *so, int keep_lock)
 {
@@ -316,36 +312,11 @@ sofree(struct socket *so, int keep_lock)
 	sigio_free(&so->so_sigio);
 	klist_free(&so->so_rcv.sb_klist);
 	klist_free(&so->so_snd.sb_klist);
-#ifdef SOCKET_SPLICE
-	if (issplicedback(so)) {
-		int freeing = SOSP_FREEING_WRITE;
-
-		if (so->so_sp->ssp_soback == so)
-			freeing |= SOSP_FREEING_READ;
-		sounsplice(so->so_sp->ssp_soback, so, freeing);
-	}
-	if (isspliced(so)) {
-		int freeing = SOSP_FREEING_READ;
-
-		if (so == so->so_sp->ssp_socket)
-			freeing |= SOSP_FREEING_WRITE;
-		sounsplice(so, so->so_sp->ssp_socket, freeing);
-	}
-#endif /* SOCKET_SPLICE */
 	sbrelease(so, &so->so_snd);
 	sorflush(so);
 	if (!keep_lock)
 		sounlock(so);
-#ifdef SOCKET_SPLICE
-	if (so->so_sp) {
-		/* Reuse splice idle, sounsplice() has been called before. */
-		timeout_set_proc(&so->so_sp->ssp_idleto, soreaper, so);
-		timeout_add(&so->so_sp->ssp_idleto, 0);
-	} else
-#endif /* SOCKET_SPLICE */
-	{
-		pool_put(&socket_pool, so);
-	}
+	pool_put(&socket_pool, so);
 }
 
 static inline uint64_t
@@ -357,6 +328,8 @@ solinger_nsec(struct socket *so)
 	return SEC_TO_NSEC(so->so_linger);
 }
 
+#define SOSP_FREEING_READ	1
+#define SOSP_FREEING_WRITE	2
 /*
  * Close a socket on last file table reference removal.
  * Initiate disconnect if connected.
@@ -433,9 +406,39 @@ drop:
 		}
 	}
 discard:
+#ifdef SOCKET_SPLICE
+	if (issplicedback(so)) {
+		int freeing = SOSP_FREEING_WRITE;
+
+		if (so->so_sp->ssp_soback == so)
+			freeing |= SOSP_FREEING_READ;
+		sounsplice(so->so_sp->ssp_soback, so, freeing);
+	}
+	if (isspliced(so)) {
+		int freeing = SOSP_FREEING_READ;
+
+		if (so == so->so_sp->ssp_socket)
+			freeing |= SOSP_FREEING_WRITE;
+		sounsplice(so, so->so_sp->ssp_socket, freeing);
+	}
+	if (so->so_sp) {
+		/*
+		 * Safe to unlock. Concurrent soclose() thread
+		 * will destroy PCB but left `so' alive because
+		 * SS_NOFDREF bit is clean.
+		 */
+		sounlock(so);
+		taskq_barrier(sosplice_taskq);
+		timeout_del_barrier(&so->so_sp->ssp_idleto);
+		pool_put(&sosplice_pool, so->so_sp);
+		solock(so);
+	}
+#endif /* SOCKET_SPLICE */
+
 	if (so->so_state & SS_NOFDREF)
 		panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
 	so->so_state |= SS_NOFDREF;
+
 	/* sofree() calls sounlock(). */
 	sofree(so, 0);
 	return (error);
@@ -1441,32 +1444,6 @@ sotask(void *arg)
 
 	/* Avoid user land starvation. */
 	yield();
-}
-
-/*
- * The socket splicing task or idle timeout may sleep while grabbing the net
- * lock.  As sofree() can be called anytime, sotask() or soidle() could access
- * the socket memory of a freed socket after wakeup.  So delay the pool_put()
- * after all pending socket splicing tasks or timeouts have finished.  Do this
- * by scheduling it on the same threads.
- */
-void
-soreaper(void *arg)
-{
-	struct socket *so = arg;
-
-	/* Reuse splice task, sounsplice() has been called before. */
-	task_set(&so->so_sp->ssp_task, soput, so);
-	task_add(sosplice_taskq, &so->so_sp->ssp_task);
-}
-
-void
-soput(void *arg)
-{
-	struct socket *so = arg;
-
-	pool_put(&sosplice_pool, so->so_sp);
-	pool_put(&socket_pool, so);
 }
 
 /*