From: Alexander Bluhm Subject: Re: use softnet for socket splicing To: tech@openbsd.org Date: Tue, 23 Sep 2025 23:29:29 +0200 Any chance that we can get this commited before tree lock? bluhm Index: kern/uipc_socket.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v diff -u -p -r1.385 uipc_socket.c --- kern/uipc_socket.c 25 Jul 2025 08:58:44 -0000 1.385 +++ kern/uipc_socket.c 23 Sep 2025 20:17:14 -0000 @@ -52,6 +52,8 @@ #include #include +#include + #ifdef DDB #include #endif @@ -117,14 +119,13 @@ int sominconn = SOMINCONN; struct pool socket_pool; #ifdef SOCKET_SPLICE struct pool sosplice_pool; -struct taskq *sosplice_taskq; -struct rwlock sosplice_lock = RWLOCK_INITIALIZER("sosplicelk"); #define so_splicelen so_sp->ssp_len #define so_splicemax so_sp->ssp_max #define so_spliceidletv so_sp->ssp_idletv #define so_spliceidleto so_sp->ssp_idleto #define so_splicetask so_sp->ssp_task +#define so_splicequeue so_sp->ssp_queue #endif void @@ -433,6 +434,7 @@ discard: #ifdef SOCKET_SPLICE if (so->so_sp) { struct socket *soback; + struct taskq *spq, *spqback; sounlock_shared(so); /* @@ -461,7 +463,6 @@ discard: sounsplice(so->so_sp->ssp_soback, so, freeing); } sbunlock(&soback->so_rcv); - sorele(soback); notsplicedback: sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); @@ -474,9 +475,16 @@ notsplicedback: } sbunlock(&so->so_rcv); - timeout_del_barrier(&so->so_spliceidleto); - task_del(sosplice_taskq, &so->so_splicetask); - taskq_barrier(sosplice_taskq); + timeout_barrier(&so->so_spliceidleto); + spq = READ_ONCE(so->so_splicequeue); + if (spq != NULL) + taskq_barrier(spq); + if (soback != NULL) { + spqback = READ_ONCE(soback->so_splicequeue); + if (spqback != NULL && spqback != spq) + taskq_barrier(spqback); + sorele(soback); + } solock_shared(so); } @@ -1290,7 +1298,6 @@ sosplice(struct socket *so, int fd, off_ { struct file *fp; struct socket *sosp; - struct taskq *tq; int error = 0; if ((so->so_proto->pr_flags & PR_SPLICE) == 0) @@ -1312,25 +1319,6 @@ sosplice(struct socket *so, int fd, off_ return (error); } - if (sosplice_taskq == NULL) { - rw_enter_write(&sosplice_lock); - if (sosplice_taskq == NULL) { - tq = taskq_create("sosplice", 1, IPL_SOFTNET, - TASKQ_MPSAFE); - if (tq == NULL) { - rw_exit_write(&sosplice_lock); - return (ENOMEM); - } - /* Ensure the taskq is fully visible to other CPUs. */ - membar_producer(); - sosplice_taskq = tq; - } - rw_exit_write(&sosplice_lock); - } else { - /* Ensure the taskq is fully visible on this CPU. */ - membar_consumer(); - } - /* Find sosp, the drain socket where data will be spliced into. */ if ((error = getsock(curproc, fd, &fp)) != 0) return (error); @@ -1435,15 +1423,15 @@ sounsplice(struct socket *so, struct soc mtx_enter(&sosp->so_snd.sb_mtx); so->so_rcv.sb_flags &= ~SB_SPLICE; sosp->so_snd.sb_flags &= ~SB_SPLICE; + timeout_del(&so->so_spliceidleto); + if (so->so_splicequeue != NULL) + task_del(so->so_splicequeue, &so->so_splicetask); KASSERT(so->so_sp->ssp_socket == sosp); KASSERT(sosp->so_sp->ssp_soback == so); so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; mtx_leave(&sosp->so_snd.sb_mtx); mtx_leave(&so->so_rcv.sb_mtx); - task_del(sosplice_taskq, &so->so_splicetask); - timeout_del(&so->so_spliceidleto); - /* Do not wakeup a socket that is about to be freed. */ if ((freeing & SOSP_FREEING_READ) == 0) { int readable; @@ -1484,20 +1472,11 @@ void sotask(void *arg) { struct socket *so = arg; - int doyield = 0; sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); - if (so->so_rcv.sb_flags & SB_SPLICE) { - if (so->so_proto->pr_flags & PR_WANTRCVD) - doyield = 1; + if (so->so_rcv.sb_flags & SB_SPLICE) somove(so, M_DONTWAIT); - } sbunlock(&so->so_rcv); - - if (doyield) { - /* Avoid user land starvation. */ - yield(); - } } /* @@ -1858,8 +1837,11 @@ sorwakeup(struct socket *so) #ifdef SOCKET_SPLICE if (so->so_proto->pr_flags & PR_SPLICE) { mtx_enter(&so->so_rcv.sb_mtx); - if (so->so_rcv.sb_flags & SB_SPLICE) - task_add(sosplice_taskq, &so->so_splicetask); + if (so->so_rcv.sb_flags & SB_SPLICE) { + atomic_cas_ptr(&so->so_splicequeue, NULL, + net_tq(pru_flowid(so))); + task_add(so->so_splicequeue, &so->so_splicetask); + } if (isspliced(so)) { mtx_leave(&so->so_rcv.sb_mtx); return; @@ -1878,9 +1860,14 @@ sowwakeup(struct socket *so) #ifdef SOCKET_SPLICE if (so->so_proto->pr_flags & PR_SPLICE) { mtx_enter(&so->so_snd.sb_mtx); - if (so->so_snd.sb_flags & SB_SPLICE) - task_add(sosplice_taskq, - &so->so_sp->ssp_soback->so_splicetask); + if (so->so_snd.sb_flags & SB_SPLICE) { + struct socket *soback = so->so_sp->ssp_soback; + + atomic_cas_ptr(&soback->so_splicequeue, NULL, + net_tq(pru_flowid(soback))); + task_add(soback->so_splicequeue, + &soback->so_splicetask); + } if (issplicedback(so)) { mtx_leave(&so->so_snd.sb_mtx); return; Index: netinet/in_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v diff -u -p -r1.320 in_pcb.c --- netinet/in_pcb.c 14 Jul 2025 21:53:46 -0000 1.320 +++ netinet/in_pcb.c 23 Sep 2025 20:16:02 -0000 @@ -779,6 +779,17 @@ in_peeraddr(struct socket *so, struct mb return (0); } +int +in_flowid(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + if (inp == NULL) + return (0); + return (inp->inp_flowid); +} + /* * Pass some notification to all connections of a protocol * associated with address dst. The "usual action" will be Index: netinet/in_pcb.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v diff -u -p -r1.171 in_pcb.h --- netinet/in_pcb.h 14 Jul 2025 09:01:52 -0000 1.171 +++ netinet/in_pcb.h 23 Sep 2025 20:16:02 -0000 @@ -357,6 +357,7 @@ void in_setpeeraddr(struct inpcb *, str void in_setsockaddr(struct inpcb *, struct mbuf *); int in_sockaddr(struct socket *, struct mbuf *); int in_peeraddr(struct socket *, struct mbuf *); +int in_flowid(struct socket *); int in_baddynamic(u_int16_t, u_int16_t); int in_rootonly(u_int16_t, u_int16_t); int in_pcbselsrc(struct in_addr *, const struct sockaddr_in *, Index: netinet/tcp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v diff -u -p -r1.252 tcp_usrreq.c --- netinet/tcp_usrreq.c 8 Jul 2025 00:47:41 -0000 1.252 +++ netinet/tcp_usrreq.c 23 Sep 2025 20:16:02 -0000 @@ -131,6 +131,7 @@ const struct pr_usrreqs tcp_usrreqs = { .pru_control = in_control, .pru_sockaddr = tcp_sockaddr, .pru_peeraddr = tcp_peeraddr, + .pru_flowid = in_flowid, }; #ifdef INET6 @@ -152,6 +153,7 @@ const struct pr_usrreqs tcp6_usrreqs = { .pru_control = in6_control, .pru_sockaddr = tcp_sockaddr, .pru_peeraddr = tcp_peeraddr, + .pru_flowid = in_flowid, }; #endif Index: netinet/udp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v diff -u -p -r1.349 udp_usrreq.c --- netinet/udp_usrreq.c 18 Jul 2025 08:39:14 -0000 1.349 +++ netinet/udp_usrreq.c 23 Sep 2025 20:16:02 -0000 @@ -136,6 +136,7 @@ const struct pr_usrreqs udp_usrreqs = { .pru_control = in_control, .pru_sockaddr = in_sockaddr, .pru_peeraddr = in_peeraddr, + .pru_flowid = in_flowid, }; #ifdef INET6 @@ -150,6 +151,7 @@ const struct pr_usrreqs udp6_usrreqs = { .pru_control = in6_control, .pru_sockaddr = in6_sockaddr, .pru_peeraddr = in6_peeraddr, + .pru_flowid = in_flowid, }; #endif Index: sys/protosw.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/sys/protosw.h,v diff -u -p -r1.72 protosw.h --- sys/protosw.h 2 Mar 2025 21:28:32 -0000 1.72 +++ sys/protosw.h 23 Sep 2025 20:16:02 -0000 @@ -86,6 +86,7 @@ struct pr_usrreqs { struct mbuf *); int (*pru_sockaddr)(struct socket *, struct mbuf *); int (*pru_peeraddr)(struct socket *, struct mbuf *); + int (*pru_flowid)(struct socket *); int (*pru_connect2)(struct socket *, struct socket *); }; @@ -392,6 +393,12 @@ static inline int pru_peeraddr(struct socket *so, struct mbuf *addr) { return (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, addr); +} + +static inline int +pru_flowid(struct socket *so) +{ + return (*so->so_proto->pr_usrreqs->pru_flowid)(so); } static inline int Index: sys/socketvar.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v diff -u -p -r1.159 socketvar.h --- sys/socketvar.h 25 Jul 2025 08:58:44 -0000 1.159 +++ sys/socketvar.h 23 Sep 2025 20:16:02 -0000 @@ -74,6 +74,7 @@ struct sosplice { struct timeval ssp_idletv; /* [I] idle timeout */ struct timeout ssp_idleto; struct task ssp_task; /* task for somove */ + struct taskq *ssp_queue; /* [a] softnet queue where we add */ }; /*