Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: use softnet for socket splicing
To:
David Gwynne <david@gwynne.id.au>
Cc:
tech@openbsd.org
Date:
Wed, 30 Jul 2025 23:24:48 +0200

Download raw body.

Thread
On Sat, Jul 26, 2025 at 02:02:11PM +1000, David Gwynne wrote:
> On Fri, Jul 25, 2025 at 02:07:57PM +0200, Alexander Bluhm wrote:
> > Hi,
> > 
> > Currently socket splicing runs on one dedicated kernel thread.  This
> > design is from a time when softnet was still a soft interrupt.
> 
> how time flies.
> 
> > Now with multiple softnet threads, I want to retire the sosplice
> > thread.  Instead call sotask() with the softnet task queue.  For
> > that I have to pass the queue down in struct netstack.  Basically
> > sorwakeup() and sowwakeup() get an additional argument.  If netstack
> > and softnet are available, I use this specific tasks queue.  Otherwise
> > softnet thread 0 is sufficient.  The hot path receiving packets
> > will distribute them over all softnet threads.
> > 
> > Keeping the same softnet means that we take a bunch of packets from
> > the network driver, do input processing, store them in socket
> > buffers.  Then the same thread handles the splicing task, calls
> > somove() and does output processing.  There is no concurrent locking
> > or scheduling, ideally packets stay on the same CPU.  Before I had
> > a yield() in sotask() to allow accumulation of packets.  With the
> > new design this is no longer necessary.
> > 
> > As we run on softnet task queue and add splice tasks there, task
> > barrier causes deadlock.  I replaced them with reference count in
> > task_add(), task_del(), and sotask().
> 
> hmm. you should be able to call taskq_barrier() from a task that's
> running on the relevant taskq. i added it in src/sys/kern/kern_task.c
> r1.28 for drm.

I did see deadlocks in my previous diff where I tried that.
But maybe my old diff had some other bug.  

> 
> also, this idiom:
> 
> +			if (task_add(soback->so_splicequeue,
> +			    &soback->so_splicetask))
> +				soref(soback);
> 
> is unsafe.
> 
> you have to assume that it's possible (regardless of how unlikely it is)
> that the task will run as soon as it's added, and before that soref is
> called. if the you're holding the only ref before the task_add call, the
> rele in the task itself can drop the count to zero and destroy the thing.
> the safe pattern is:
> 
> +			soref(soback);
> +			if (!task_add(soback->so_splicequeue,
> +			    &soback->so_splicetask))
> +				sorele(soback);
> 
> bouncing the refcnt around sucks though, which is why barriers can be
> useful. alternatively, you can set it up so you have an extra ref to
> mitigate some of the extra counting.

An alternative could be to use a splicing list instead of task for
the same softnet thread.  I use a socket queue on netstack for this.
As the queue is always used by the same thread, no locking is needed.

Performance of a socket queue or splicing task on the same softnet
does not make much difference.  I kept the dedicated task thread
for all the cases where no netstack is available.  It is never
called in the hot path.  We may remove that later.

I am still undecided wheter socket queue or splice task are the
better solution.  In both cases they should run on the same softnet
thread.

bluhm

Index: kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
diff -u -p -r1.385 uipc_socket.c
--- kern/uipc_socket.c	25 Jul 2025 08:58:44 -0000	1.385
+++ kern/uipc_socket.c	30 Jul 2025 09:34:47 -0000
@@ -52,6 +52,9 @@
 #include <sys/time.h>
 #include <sys/refcnt.h>
 
+#include <net/if.h>
+#include <net/if_var.h>
+
 #ifdef DDB
 #include <machine/db_machdep.h>
 #endif
@@ -62,6 +65,7 @@ int	sosplice(struct socket *, int, off_t
 void	sounsplice(struct socket *, struct socket *, int);
 void	soidle(void *);
 void	sotask(void *);
+void	sosp_insertq(struct socket *, struct netstack *);
 int	somove(struct socket *, int);
 void	sorflush(struct socket *);
 
@@ -125,6 +129,8 @@ struct rwlock sosplice_lock = RWLOCK_INI
 #define so_spliceidletv	so_sp->ssp_idletv
 #define so_spliceidleto	so_sp->ssp_idleto
 #define so_splicetask	so_sp->ssp_task
+#define so_spliceqhead	so_sp->ssp_qhead
+#define so_spliceqentry	so_sp->ssp_qentry
 #endif
 
 void
@@ -475,8 +481,7 @@ notsplicedback:
 		sbunlock(&so->so_rcv);
 
 		timeout_del_barrier(&so->so_spliceidleto);
-		task_del(sosplice_taskq, &so->so_splicetask);
-		taskq_barrier(sosplice_taskq);
+		taskq_del_barrier(sosplice_taskq, &so->so_splicetask);
 
 		solock_shared(so);
 	}
@@ -1441,8 +1446,8 @@ sounsplice(struct socket *so, struct soc
 	mtx_leave(&sosp->so_snd.sb_mtx);
 	mtx_leave(&so->so_rcv.sb_mtx);
 
-	task_del(sosplice_taskq, &so->so_splicetask);
 	timeout_del(&so->so_spliceidleto);
+	task_del(sosplice_taskq, &so->so_splicetask);
 
 	/* Do not wakeup a socket that is about to be freed. */
 	if ((freeing & SOSP_FREEING_READ) == 0) {
@@ -1453,13 +1458,13 @@ sounsplice(struct socket *so, struct soc
 		readable = so->so_qlen || soreadable(so);
 		mtx_leave(&so->so_rcv.sb_mtx);
 		if (readable)
-			sorwakeup(so);
+			sorwakeup(so, NULL);
 		sounlock_shared(so);
 	}
 	if ((freeing & SOSP_FREEING_WRITE) == 0) {
 		solock_shared(sosp);
 		if (sowriteable(sosp))
-			sowwakeup(sosp);
+			sowwakeup(sosp, NULL);
 		sounlock_shared(sosp);
 	}
 
@@ -1484,20 +1489,51 @@ void
 sotask(void *arg)
 {
 	struct socket *so = arg;
-	int doyield = 0;
 
 	sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR);
-	if (so->so_rcv.sb_flags & SB_SPLICE) {
-		if (so->so_proto->pr_flags & PR_WANTRCVD)
-			doyield = 1;
+	if (so->so_rcv.sb_flags & SB_SPLICE)
 		somove(so, M_DONTWAIT);
-	}
 	sbunlock(&so->so_rcv);
+}
+
+void
+sosp_processq(struct netstack *ns)
+{
+	struct socket *so;
 
-	if (doyield) {
-		/* Avoid user land starvation. */
-		yield();
+	/*
+	 * Socket queue is not locked as insert and process run on same
+	 * softnet thread.  so_spliceqhead is atomic to prevent double insert.
+	 * New entries can be added while procesing queue.  Mbuf ph_loopcnt
+	 * prevents endless looping.
+	 */
+	while ((so = TAILQ_FIRST(&ns->ns_spliceq)) != NULL) {
+		KASSERT(so->so_spliceqhead == &ns->ns_spliceq);
+		TAILQ_REMOVE(so->so_spliceqhead, so, so_spliceqentry);
+		membar_exit();
+		WRITE_ONCE(so->so_spliceqhead, NULL);
+		sotask(so);
+		sorele(so);
+	}
+}
+
+void
+sosp_insertq(struct socket *so, struct netstack *ns)
+{
+	if (ns == NULL) {
+		/* no network stack available, use task instead */
+		task_add(sosplice_taskq, &so->so_splicetask);
+		return;
+	}
+	if (atomic_cas_ptr(&so->so_spliceqhead, NULL, &ns->ns_spliceq) ==
+	    NULL) {
+		/* not queued yet, add to current softnet task */
+		membar_enter_after_atomic();
+		soref(so);
+		TAILQ_INSERT_TAIL(so->so_spliceqhead, so, so_spliceqentry);
+		return;
 	}
+	/* some other softnet task will process this socket on its queue */
 }
 
 /*
@@ -1853,13 +1889,13 @@ somove(struct socket *so, int wait)
 #endif /* SOCKET_SPLICE */
 
 void
-sorwakeup(struct socket *so)
+sorwakeup(struct socket *so, struct netstack *ns)
 {
 #ifdef SOCKET_SPLICE
 	if (so->so_proto->pr_flags & PR_SPLICE) {
 		mtx_enter(&so->so_rcv.sb_mtx);
 		if (so->so_rcv.sb_flags & SB_SPLICE)
-			task_add(sosplice_taskq, &so->so_splicetask);
+			sosp_insertq(so, ns);
 		if (isspliced(so)) {
 			mtx_leave(&so->so_rcv.sb_mtx);
 			return;
@@ -1873,14 +1909,13 @@ sorwakeup(struct socket *so)
 }
 
 void
-sowwakeup(struct socket *so)
+sowwakeup(struct socket *so, struct netstack *ns)
 {
 #ifdef SOCKET_SPLICE
 	if (so->so_proto->pr_flags & PR_SPLICE) {
 		mtx_enter(&so->so_snd.sb_mtx);
 		if (so->so_snd.sb_flags & SB_SPLICE)
-			task_add(sosplice_taskq,
-			    &so->so_sp->ssp_soback->so_splicetask);
+			sosp_insertq(so->so_sp->ssp_soback, ns);
 		if (issplicedback(so)) {
 			mtx_leave(&so->so_snd.sb_mtx);
 			return;
Index: kern/uipc_socket2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v
diff -u -p -r1.186 uipc_socket2.c
--- kern/uipc_socket2.c	14 Jul 2025 21:47:26 -0000	1.186
+++ kern/uipc_socket2.c	28 Jul 2025 09:10:37 -0000
@@ -113,15 +113,15 @@ soisconnected(struct socket *so)
 
 		soqremque(so, 0);
 		soqinsque(head, so, 1);
-		sorwakeup(head);
+		sorwakeup(head, NULL);
 		wakeup_one(&head->so_timeo);
 
 		sounlock(head);
 		sorele(head);
 	} else {
 		wakeup(&so->so_timeo);
-		sorwakeup(so);
-		sowwakeup(so);
+		sorwakeup(so, NULL);
+		sowwakeup(so, NULL);
 	}
 }
 
@@ -141,8 +141,8 @@ soisdisconnecting(struct socket *so)
 	mtx_leave(&so->so_snd.sb_mtx);
 
 	wakeup(&so->so_timeo);
-	sowwakeup(so);
-	sorwakeup(so);
+	sowwakeup(so, NULL);
+	sorwakeup(so, NULL);
 }
 
 void
@@ -162,8 +162,8 @@ soisdisconnected(struct socket *so)
 	so->so_state |= SS_ISDISCONNECTED;
 
 	wakeup(&so->so_timeo);
-	sowwakeup(so);
-	sorwakeup(so);
+	sowwakeup(so, NULL);
+	sorwakeup(so, NULL);
 }
 
 /*
@@ -233,7 +233,7 @@ sonewconn(struct socket *head, int conns
 	}
 	if (connstatus) {
 		so->so_state |= connstatus;
-		sorwakeup(head);
+		sorwakeup(head, NULL);
 		wakeup(&head->so_timeo);
 	}
 
@@ -308,7 +308,7 @@ socantsendmore(struct socket *so)
 	mtx_enter(&so->so_snd.sb_mtx);
 	so->so_snd.sb_state |= SS_CANTSENDMORE;
 	mtx_leave(&so->so_snd.sb_mtx);
-	sowwakeup(so);
+	sowwakeup(so, NULL);
 }
 
 void
@@ -317,7 +317,7 @@ socantrcvmore(struct socket *so)
 	mtx_enter(&so->so_rcv.sb_mtx);
 	so->so_rcv.sb_state |= SS_CANTRCVMORE;
 	mtx_leave(&so->so_rcv.sb_mtx);
-	sorwakeup(so);
+	sorwakeup(so, NULL);
 }
 
 void
Index: kern/uipc_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_usrreq.c,v
diff -u -p -r1.220 uipc_usrreq.c
--- kern/uipc_usrreq.c	12 Jun 2025 20:37:58 -0000	1.220
+++ kern/uipc_usrreq.c	28 Jul 2025 09:10:37 -0000
@@ -502,7 +502,7 @@ uipc_rcvd(struct socket *so)
 	so2->so_snd.sb_cc = so->so_rcv.sb_cc;
 	mtx_leave(&so2->so_snd.sb_mtx);
 	mtx_leave(&so->so_rcv.sb_mtx);
-	sowwakeup(so2);
+	sowwakeup(so2, NULL);
 }
 
 int
@@ -568,7 +568,7 @@ uipc_send(struct socket *so, struct mbuf
 	mtx_leave(&so2->so_rcv.sb_mtx);
 
 	if (dowakeup)
-		sorwakeup(so2);
+		sorwakeup(so2, NULL);
 
 	m = NULL;
 
@@ -636,7 +636,7 @@ uipc_dgram_send(struct socket *so, struc
 	mtx_leave(&so2->so_rcv.sb_mtx);
 
 	if (dowakeup)
-		sorwakeup(so2);
+		sorwakeup(so2, NULL);
 	if (nam)
 		unp_disconnect(unp);
 
Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
diff -u -p -r1.740 if.c
--- net/if.c	21 Jul 2025 20:36:41 -0000	1.740
+++ net/if.c	30 Jul 2025 08:19:06 -0000
@@ -973,6 +973,7 @@ if_input_process(struct ifnet *ifp, stru
 {
 	struct mbuf *m;
 	struct softnet *sn;
+	struct netstack *ns;
 
 	if (ml_empty(ml))
 		return;
@@ -988,22 +989,28 @@ if_input_process(struct ifnet *ifp, stru
 	 */
 
 	sn = net_sn(idx);
-	ml_init(&sn->sn_netstack.ns_tcp_ml);
+	ns = &sn->sn_netstack;
+	ml_init(&ns->ns_tcp_ml);
 #ifdef INET6
-	ml_init(&sn->sn_netstack.ns_tcp6_ml);
+	ml_init(&ns->ns_tcp6_ml);
 #endif
+	TAILQ_INIT(&ns->ns_spliceq);
 
 	NET_LOCK_SHARED();
 
 	while ((m = ml_dequeue(ml)) != NULL)
-		(*ifp->if_input)(ifp, m, &sn->sn_netstack);
+		(*ifp->if_input)(ifp, m, ns);
 
-	tcp_input_mlist(&sn->sn_netstack.ns_tcp_ml, AF_INET);
+	tcp_input_mlist(&ns->ns_tcp_ml, AF_INET, ns);
 #ifdef INET6
-	tcp_input_mlist(&sn->sn_netstack.ns_tcp6_ml, AF_INET6);
+	tcp_input_mlist(&ns->ns_tcp6_ml, AF_INET6, ns);
 #endif
 
 	NET_UNLOCK_SHARED();
+
+#ifdef SOCKET_SPLICE
+	sosp_processq(ns);
+#endif
 }
 
 void
Index: net/if_ethersubr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
diff -u -p -r1.303 if_ethersubr.c
--- net/if_ethersubr.c	7 Jul 2025 02:28:50 -0000	1.303
+++ net/if_ethersubr.c	28 Jul 2025 09:10:37 -0000
@@ -2108,7 +2108,7 @@ ether_frm_recv(struct socket *so, struct
 		return;
 	}
 
-	sorwakeup(so);
+	sorwakeup(so, NULL);
 }
 
 static struct mbuf *
Index: net/if_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v
diff -u -p -r1.139 if_var.h
--- net/if_var.h	19 Jul 2025 16:40:40 -0000	1.139
+++ net/if_var.h	28 Jul 2025 12:47:39 -0000
@@ -91,10 +91,13 @@ struct ifnet;
 struct task;
 struct cpumem;
 
+TAILQ_HEAD(soqueue, socket);
+
 struct netstack {
-	struct route		ns_route;
-	struct mbuf_list	ns_tcp_ml;
-	struct mbuf_list	ns_tcp6_ml;
+	struct route		 ns_route;
+	struct mbuf_list	 ns_tcp_ml;
+	struct mbuf_list	 ns_tcp6_ml;
+	struct soqueue		 ns_spliceq;
 };
 
 /*
Index: net/pfkeyv2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v
diff -u -p -r1.270 pfkeyv2.c
--- net/pfkeyv2.c	7 Jul 2025 02:28:50 -0000	1.270
+++ net/pfkeyv2.c	28 Jul 2025 09:10:37 -0000
@@ -457,7 +457,7 @@ pfkey_sendup(struct pkpcb *kp, struct mb
 		return (ENOBUFS);
 	}
 
-	sorwakeup(so);
+	sorwakeup(so, NULL);
 	return (0);
 }
 
Index: net/rtsock.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtsock.c,v
diff -u -p -r1.386 rtsock.c
--- net/rtsock.c	15 Jul 2025 09:55:49 -0000	1.386
+++ net/rtsock.c	28 Jul 2025 09:10:37 -0000
@@ -485,7 +485,7 @@ rtm_senddesync(struct socket *so)
 
 		if (ret != 0) {
 			rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
-			sorwakeup(rop->rop_socket);
+			sorwakeup(rop->rop_socket, NULL);
 			return;
 		}
 		m_freem(desync_mbuf);
@@ -612,7 +612,7 @@ rtm_sendup(struct socket *so, struct mbu
 		return (ENOBUFS);
 	}
 
-	sorwakeup(so);
+	sorwakeup(so, NULL);
 	return (0);
 }
 
Index: netinet/ip_divert.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.c,v
diff -u -p -r1.107 ip_divert.c
--- netinet/ip_divert.c	8 Jul 2025 00:47:41 -0000	1.107
+++ netinet/ip_divert.c	28 Jul 2025 09:10:37 -0000
@@ -237,7 +237,7 @@ divert_packet(struct mbuf *m, int dir, u
 		goto bad;
 	}
 	mtx_leave(&so->so_rcv.sb_mtx);
-	sorwakeup(so);
+	sorwakeup(so, NULL);
 
 	in_pcbunref(inp);
 	return;
Index: netinet/ip_mroute.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_mroute.c,v
diff -u -p -r1.150 ip_mroute.c
--- netinet/ip_mroute.c	19 Jul 2025 16:40:40 -0000	1.150
+++ netinet/ip_mroute.c	28 Jul 2025 09:10:37 -0000
@@ -1129,7 +1129,7 @@ socket_send(struct socket *so, struct mb
 		mtx_leave(&so->so_rcv.sb_mtx);
 
 		if (ret != 0) {
-			sorwakeup(so);
+			sorwakeup(so, NULL);
 			return (0);
 		}
 	}
Index: netinet/raw_ip.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
diff -u -p -r1.167 raw_ip.c
--- netinet/raw_ip.c	8 Jul 2025 00:47:41 -0000	1.167
+++ netinet/raw_ip.c	28 Jul 2025 09:10:37 -0000
@@ -115,7 +115,7 @@ const struct pr_usrreqs rip_usrreqs = {
 };
 
 void    rip_sbappend(struct inpcb *, struct mbuf *, struct ip *,
-	    struct sockaddr_in *);
+	    struct sockaddr_in *, struct netstack *);
 
 /*
  * Initialize raw connection block q.
@@ -195,7 +195,7 @@ rip_input(struct mbuf **mp, int *offp, i
 
 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (n != NULL)
-				rip_sbappend(last, n, ip, &ripsrc);
+				rip_sbappend(last, n, ip, &ripsrc, ns);
 			in_pcbunref(last);
 
 			mtx_enter(&rawcbtable.inpt_mtx);
@@ -222,7 +222,7 @@ rip_input(struct mbuf **mp, int *offp, i
 		return IPPROTO_DONE;
 	}
 
-	rip_sbappend(last, m, ip, &ripsrc);
+	rip_sbappend(last, m, ip, &ripsrc, ns);
 	in_pcbunref(last);
 
 	return IPPROTO_DONE;
@@ -230,7 +230,7 @@ rip_input(struct mbuf **mp, int *offp, i
 
 void
 rip_sbappend(struct inpcb *inp, struct mbuf *m, struct ip *ip,
-    struct sockaddr_in *ripsrc)
+    struct sockaddr_in *ripsrc, struct netstack *ns)
 {
 	struct socket *so = inp->inp_socket;
 	struct mbuf *opts = NULL;
@@ -249,7 +249,7 @@ rip_sbappend(struct inpcb *inp, struct m
 		m_freem(opts);
 		ipstat_inc(ips_noproto);
 	} else
-		sorwakeup(so);
+		sorwakeup(so, ns);
 }
 
 /*
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.457 tcp_input.c
--- netinet/tcp_input.c	24 Jul 2025 21:34:07 -0000	1.457
+++ netinet/tcp_input.c	28 Jul 2025 09:10:37 -0000
@@ -174,9 +174,10 @@ do { \
 	if_put(ifp); \
 } while (0)
 
-int	 tcp_input_solocked(struct mbuf **, int *, int, int, struct socket **);
+int	 tcp_input_solocked(struct mbuf **, int *, int, int, struct socket **,
+	    struct netstack *);
 int	 tcp_mss_adv(struct rtentry *, int);
-int	 tcp_flush_queue(struct tcpcb *);
+int	 tcp_flush_queue(struct tcpcb *, struct netstack *);
 void	 tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
 void	 tcp_newreno_partialack(struct tcpcb *, struct tcphdr *);
 
@@ -208,7 +209,8 @@ struct syn_cache *syn_cache_lookup(const
  */
 
 int
-tcp_reass(struct tcpcb *tp, struct tcphdr *th, struct mbuf *m, int *tlen)
+tcp_reass(struct tcpcb *tp, struct tcphdr *th, struct mbuf *m, int *tlen,
+    struct netstack *ns)
 {
 	struct tcpqent *p, *q, *nq, *tiqe;
 
@@ -303,11 +305,11 @@ tcp_reass(struct tcpcb *tp, struct tcphd
 	if (th->th_seq != tp->rcv_nxt)
 		return (0);
 
-	return (tcp_flush_queue(tp));
+	return (tcp_flush_queue(tp, ns));
 }
 
 int
-tcp_flush_queue(struct tcpcb *tp)
+tcp_flush_queue(struct tcpcb *tp, struct netstack *ns)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 	struct tcpqent *q, *nq;
@@ -342,7 +344,7 @@ tcp_flush_queue(struct tcpcb *tp)
 		q = nq;
 	} while (q != NULL && q->tcpqe_tcp->th_seq == tp->rcv_nxt);
 	tp->t_flags |= TF_BLOCKOUTPUT;
-	sorwakeup(so);
+	sorwakeup(so, ns);
 	tp->t_flags &= ~TF_BLOCKOUTPUT;
 	return (flags);
 }
@@ -351,7 +353,7 @@ int
 tcp_input(struct mbuf **mp, int *offp, int proto, int af, struct netstack *ns)
 {
 	if (ns == NULL)
-		return tcp_input_solocked(mp, offp, proto, af, NULL);
+		return tcp_input_solocked(mp, offp, proto, af, NULL, ns);
 	(*mp)->m_pkthdr.ph_cookie = (void *)(long)(*offp);
 	switch (af) {
 	case AF_INET:
@@ -370,7 +372,7 @@ tcp_input(struct mbuf **mp, int *offp, i
 }
 
 void
-tcp_input_mlist(struct mbuf_list *ml, int af)
+tcp_input_mlist(struct mbuf_list *ml, int af, struct netstack *ns)
 {
 	struct socket *so = NULL;
 	struct mbuf *m;
@@ -380,7 +382,7 @@ tcp_input_mlist(struct mbuf_list *ml, in
 
 		off = (long)m->m_pkthdr.ph_cookie;
 		m->m_pkthdr.ph_cookie = NULL;
-		nxt = tcp_input_solocked(&m, &off, IPPROTO_TCP, af, &so);
+		nxt = tcp_input_solocked(&m, &off, IPPROTO_TCP, af, &so, ns);
 		KASSERT(nxt == IPPROTO_DONE);
 	}
 
@@ -393,7 +395,7 @@ tcp_input_mlist(struct mbuf_list *ml, in
  */
 int
 tcp_input_solocked(struct mbuf **mp, int *offp, int proto, int af,
-    struct socket **solocked)
+    struct socket **solocked, struct netstack *ns)
 {
 	struct mbuf *m = *mp;
 	int iphlen = *offp;
@@ -1075,7 +1077,7 @@ findpcb:
 				tcp_update_sndspace(tp);
 				if (sb_notify(&so->so_snd)) {
 					tp->t_flags |= TF_BLOCKOUTPUT;
-					sowwakeup(so);
+					sowwakeup(so, ns);
 					tp->t_flags &= ~TF_BLOCKOUTPUT;
 				}
 				if (so->so_snd.sb_cc ||
@@ -1131,7 +1133,7 @@ findpcb:
 				mtx_leave(&so->so_rcv.sb_mtx);
 			}
 			tp->t_flags |= TF_BLOCKOUTPUT;
-			sorwakeup(so);
+			sorwakeup(so, ns);
 			tp->t_flags &= ~TF_BLOCKOUTPUT;
 			if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT))
 				(void) tcp_output(tp);
@@ -1264,7 +1266,7 @@ findpcb:
 				tp->snd_scale = tp->requested_s_scale;
 				tp->rcv_scale = tp->request_r_scale;
 			}
-			tcp_flush_queue(tp);
+			tcp_flush_queue(tp, ns);
 
 			/*
 			 * if we didn't have to retransmit the SYN,
@@ -1553,7 +1555,7 @@ trimthenstep6:
 			tp->rcv_scale = tp->request_r_scale;
 			tiwin = th->th_win << tp->snd_scale;
 		}
-		tcp_flush_queue(tp);
+		tcp_flush_queue(tp, ns);
 		tp->snd_wl1 = th->th_seq - 1;
 		/* fall into ... */
 
@@ -1835,7 +1837,7 @@ trimthenstep6:
 		tcp_update_sndspace(tp);
 		if (sb_notify(&so->so_snd)) {
 			tp->t_flags |= TF_BLOCKOUTPUT;
-			sowwakeup(so);
+			sowwakeup(so, ns);
 			tp->t_flags &= ~TF_BLOCKOUTPUT;
 		}
 
@@ -2051,11 +2053,11 @@ dodata:							/* XXX */
 				mtx_leave(&so->so_rcv.sb_mtx);
 			}
 			tp->t_flags |= TF_BLOCKOUTPUT;
-			sorwakeup(so);
+			sorwakeup(so, ns);
 			tp->t_flags &= ~TF_BLOCKOUTPUT;
 		} else {
 			m_adj(m, hdroptlen);
-			tiflags = tcp_reass(tp, th, m, &tlen);
+			tiflags = tcp_reass(tp, th, m, &tlen, ns);
 			tp->t_flags |= TF_ACKNOW;
 		}
 		if (tp->sack_enable)
Index: netinet/tcp_subr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
diff -u -p -r1.216 tcp_subr.c
--- netinet/tcp_subr.c	18 Jul 2025 08:39:14 -0000	1.216
+++ netinet/tcp_subr.c	28 Jul 2025 09:10:37 -0000
@@ -589,8 +589,8 @@ tcp_notify(struct inpcb *inp, int error)
 	else
 		tp->t_softerror = error;
 	wakeup((caddr_t) &so->so_timeo);
-	sorwakeup(so);
-	sowwakeup(so);
+	sorwakeup(so, NULL);
+	sowwakeup(so, NULL);
 }
 
 #ifdef INET6
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.195 tcp_var.h
--- netinet/tcp_var.h	18 Jun 2025 16:15:46 -0000	1.195
+++ netinet/tcp_var.h	28 Jul 2025 09:10:37 -0000
@@ -725,7 +725,7 @@ int	 tcp_dooptions(struct tcpcb *, u_cha
 		struct mbuf *, int, struct tcp_opt_info *, u_int, uint64_t);
 void	 tcp_init(void);
 int	 tcp_input(struct mbuf **, int *, int, int, struct netstack *);
-void	 tcp_input_mlist(struct mbuf_list *, int);
+void	 tcp_input_mlist(struct mbuf_list *, int, struct netstack *);
 int	 tcp_mss(struct tcpcb *, int);
 void	 tcp_mss_update(struct tcpcb *);
 void	 tcp_softlro_glue(struct mbuf_list *, struct mbuf *, struct ifnet *);
@@ -744,7 +744,8 @@ int	 tcp_softtso_chop(struct mbuf_list *
 int	 tcp_if_output_tso(struct ifnet *, struct mbuf **, struct sockaddr *,
 	    struct rtentry *, uint32_t, u_int);
 void	 tcp_pulloutofband(struct socket *, u_int, struct mbuf *, int);
-int	 tcp_reass(struct tcpcb *, struct tcphdr *, struct mbuf *, int *);
+int	 tcp_reass(struct tcpcb *, struct tcphdr *, struct mbuf *, int *,
+	    struct netstack *);
 void	 tcp_rscale(struct tcpcb *, u_long);
 void	 tcp_respond(struct tcpcb *, caddr_t, struct tcphdr *, tcp_seq,
 		tcp_seq, int, u_int, uint64_t);
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
diff -u -p -r1.349 udp_usrreq.c
--- netinet/udp_usrreq.c	18 Jul 2025 08:39:14 -0000	1.349
+++ netinet/udp_usrreq.c	28 Jul 2025 09:10:37 -0000
@@ -709,7 +709,7 @@ udp_sbappend(struct inpcb *inp, struct m
 	}
 	mtx_leave(&so->so_rcv.sb_mtx);
 
-	sorwakeup(so);
+	sorwakeup(so, ns);
 }
 
 /*
@@ -720,8 +720,8 @@ void
 udp_notify(struct inpcb *inp, int errno)
 {
 	inp->inp_socket->so_error = errno;
-	sorwakeup(inp->inp_socket);
-	sowwakeup(inp->inp_socket);
+	sorwakeup(inp->inp_socket, NULL);
+	sowwakeup(inp->inp_socket, NULL);
 }
 
 #ifdef INET6
Index: netinet6/ip6_divert.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_divert.c,v
diff -u -p -r1.108 ip6_divert.c
--- netinet6/ip6_divert.c	8 Jul 2025 00:47:41 -0000	1.108
+++ netinet6/ip6_divert.c	28 Jul 2025 09:10:37 -0000
@@ -235,7 +235,7 @@ divert6_packet(struct mbuf *m, int dir, 
 		goto bad;
 	}
 	mtx_leave(&so->so_rcv.sb_mtx);
-	sorwakeup(so);
+	sorwakeup(so, NULL);
 
 	in_pcbunref(inp);
 	return;
Index: netinet6/ip6_mroute.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_mroute.c,v
diff -u -p -r1.153 ip6_mroute.c
--- netinet6/ip6_mroute.c	25 Jul 2025 22:24:06 -0000	1.153
+++ netinet6/ip6_mroute.c	28 Jul 2025 09:10:37 -0000
@@ -906,7 +906,7 @@ socket6_send(struct socket *so, struct m
 		mtx_leave(&so->so_rcv.sb_mtx);
 
 		if (ret != 0) {
-			sorwakeup(so);
+			sorwakeup(so, NULL);
 			return 0;
 		}
 	}
Index: netinet6/raw_ip6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
diff -u -p -r1.194 raw_ip6.c
--- netinet6/raw_ip6.c	8 Jul 2025 00:47:41 -0000	1.194
+++ netinet6/raw_ip6.c	28 Jul 2025 09:10:37 -0000
@@ -114,7 +114,7 @@ const struct pr_usrreqs rip6_usrreqs = {
 };
 
 void	rip6_sbappend(struct inpcb *, struct mbuf *, struct ip6_hdr *, int,
-	    struct sockaddr_in6 *);
+	    struct sockaddr_in6 *, struct netstack *);
 
 /*
  * Initialize raw connection block queue.
@@ -229,7 +229,8 @@ rip6_input(struct mbuf **mp, int *offp, 
 
 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (n != NULL)
-				rip6_sbappend(last, n, ip6, *offp, &rip6src);
+				rip6_sbappend(last, n, ip6, *offp, &rip6src,
+				    ns);
 			in_pcbunref(last);
 
 			mtx_enter(&rawin6pcbtable.inpt_mtx);
@@ -262,7 +263,7 @@ rip6_input(struct mbuf **mp, int *offp, 
 		return IPPROTO_DONE;
 	}
 
-	rip6_sbappend(last, m, ip6, *offp, &rip6src);
+	rip6_sbappend(last, m, ip6, *offp, &rip6src, ns);
 	in_pcbunref(last);
 
 	return IPPROTO_DONE;
@@ -270,7 +271,7 @@ rip6_input(struct mbuf **mp, int *offp, 
 
 void
 rip6_sbappend(struct inpcb *inp, struct mbuf *m, struct ip6_hdr *ip6, int hlen,
-    struct sockaddr_in6 *rip6src)
+    struct sockaddr_in6 *rip6src, struct netstack *ns)
 {
 	struct socket *so = inp->inp_socket;
 	struct mbuf *opts = NULL;
@@ -291,7 +292,7 @@ rip6_sbappend(struct inpcb *inp, struct 
 		m_freem(opts);
 		rip6stat_inc(rip6s_fullsock);
 	} else
-		sorwakeup(so);
+		sorwakeup(so, ns);
 }
 
 void
Index: sys/socketvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
diff -u -p -r1.159 socketvar.h
--- sys/socketvar.h	25 Jul 2025 08:58:44 -0000	1.159
+++ sys/socketvar.h	29 Jul 2025 14:21:02 -0000
@@ -50,6 +50,7 @@ typedef	__socklen_t	socklen_t;	/* length
 #endif
 
 TAILQ_HEAD(soqhead, socket);
+struct soqueue;
 
 /*
  * Locks used to protect global data and struct members:
@@ -74,6 +75,8 @@ struct sosplice {
 	struct	timeval ssp_idletv;	/* [I] idle timeout */
 	struct	timeout ssp_idleto;
 	struct	task ssp_task;		/* task for somove */
+	struct	soqueue *ssp_qhead;	/* [a] softnet queue in netstack */
+	TAILQ_ENTRY(socket) ssp_qentry;	/* softnet queue instead of task */
 };
 
 /*
@@ -249,6 +252,7 @@ int	sodisconnect(struct socket *);
 struct socket *soalloc(const struct protosw *, int);
 void	sofree(struct socket *, int);
 void	sorele(struct socket *);
+void	sosp_processq(struct netstack *);
 int	sogetopt(struct socket *, int, int, struct mbuf *);
 void	sohasoutofband(struct socket *);
 void	soisconnected(struct socket *);
@@ -267,8 +271,8 @@ int	sosend(struct socket *, struct mbuf 
 int	sosetopt(struct socket *, int, int, struct mbuf *);
 int	soshutdown(struct socket *, int);
 void	sowakeup(struct socket *, struct sockbuf *);
-void	sorwakeup(struct socket *);
-void	sowwakeup(struct socket *);
+void	sorwakeup(struct socket *, struct netstack *);
+void	sowwakeup(struct socket *, struct netstack *);
 int	sockargs(struct mbuf **, const void *, size_t, int);
 
 int	sosleep_nsec(struct socket *, void *, int, const char *, uint64_t);