From: Vitaliy Makkoveev Subject: Mark `so_rcv' sockbuf of udp(4) sockets as SB_OWNLOCK To: tech@openbsd.org Date: Fri, 29 Mar 2024 12:16:30 +0300 sbappend*() and soreceive() of SB_MTXLOCK marked sockets uses `sb_mtx' mutex(9) for protection, meanwhile buffer usage check and corresponding sbwait() sleep still serialized by solock(). Mark udp(4) as SB_OWNLOCK to avoid solock() serialisation and rely to `sb_mtx' mutex(9). The `sb_state' and `sb_flags' modifications must be protected by `sb_mtx' too. soo_ioctl() and socantrcvmore() hunks were missed for unix(4) sockets, but they are common for all sockets. divert(4) and raw sockets also needs to be converted. I can include conversion to this diff too. sbwait() modification was tested with blocking recvfrom(2) test programs. Index: sys/kern/sys_socket.c =================================================================== RCS file: /cvs/src/sys/kern/sys_socket.c,v retrieving revision 1.62 diff -u -p -r1.62 sys_socket.c --- sys/kern/sys_socket.c 26 Mar 2024 09:46:47 -0000 1.62 +++ sys/kern/sys_socket.c 29 Mar 2024 08:16:50 -0000 @@ -91,6 +91,7 @@ soo_ioctl(struct file *fp, u_long cmd, c case FIOASYNC: solock(so); + mtx_enter(&so->so_rcv.sb_mtx); if (*(int *)data) { so->so_rcv.sb_flags |= SB_ASYNC; so->so_snd.sb_flags |= SB_ASYNC; @@ -98,6 +99,7 @@ soo_ioctl(struct file *fp, u_long cmd, c so->so_rcv.sb_flags &= ~SB_ASYNC; so->so_snd.sb_flags &= ~SB_ASYNC; } + mtx_leave(&so->so_rcv.sb_mtx); sounlock(so); break; Index: sys/kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.323 diff -u -p -r1.323 uipc_socket.c --- sys/kern/uipc_socket.c 27 Mar 2024 22:47:53 -0000 1.323 +++ sys/kern/uipc_socket.c 29 Mar 2024 08:16:50 -0000 @@ -155,6 +155,8 @@ soalloc(const struct protosw *prp, int w case AF_INET6: switch (prp->pr_type) { case SOCK_DGRAM: + so->so_rcv.sb_flags |= SB_OWNLOCK; + /* FALLTHROUGH */ case SOCK_RAW: so->so_rcv.sb_flags |= SB_MTXLOCK; break; @@ -1392,7 +1394,9 @@ sosplice(struct socket *so, int fd, off_ * we sleep, the socket buffers are not marked as spliced yet. */ if (somove(so, M_WAIT)) { + mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_flags |= SB_SPLICE; + mtx_leave(&so->so_rcv.sb_mtx); sosp->so_snd.sb_flags |= SB_SPLICE; } @@ -1420,7 +1424,9 @@ sounsplice(struct socket *so, struct soc task_del(sosplice_taskq, &so->so_splicetask); timeout_del(&so->so_idleto); sosp->so_snd.sb_flags &= ~SB_SPLICE; + mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_flags &= ~SB_SPLICE; + mtx_leave(&so->so_rcv.sb_mtx); so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; /* Do not wakeup a socket that is about to be freed. */ if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so)) @@ -1678,6 +1684,7 @@ somove(struct socket *so, int wait) pru_rcvd(so); /* Receive buffer did shrink by len bytes, adjust oob. */ + mtx_enter(&so->so_rcv.sb_mtx); rcvstate = so->so_rcv.sb_state; so->so_rcv.sb_state &= ~SS_RCVATMARK; oobmark = so->so_oobmark; @@ -1688,6 +1695,7 @@ somove(struct socket *so, int wait) if (oobmark >= len) oobmark = 0; } + mtx_leave(&so->so_rcv.sb_mtx); /* * Handle oob data. If any malloc fails, ignore error. Index: sys/kern/uipc_socket2.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket2.c,v retrieving revision 1.146 diff -u -p -r1.146 uipc_socket2.c --- sys/kern/uipc_socket2.c 27 Mar 2024 22:47:53 -0000 1.146 +++ sys/kern/uipc_socket2.c 29 Mar 2024 08:16:50 -0000 @@ -351,7 +351,9 @@ void socantrcvmore(struct socket *so) { soassertlocked(so); + mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_state |= SS_CANTRCVMORE; + mtx_leave(&so->so_rcv.sb_mtx); sorwakeup(so); } Index: sys/nfs/nfs_socket.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_socket.c,v retrieving revision 1.146 diff -u -p -r1.146 nfs_socket.c --- sys/nfs/nfs_socket.c 22 Mar 2024 07:15:04 -0000 1.146 +++ sys/nfs/nfs_socket.c 29 Mar 2024 08:16:50 -0000 @@ -371,7 +371,9 @@ nfs_connect(struct nfsmount *nmp, struct error = soreserve(so, sndreserve, rcvreserve); if (error) goto bad_locked; + mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_flags |= SB_NOINTR; + mtx_leave(&so->so_rcv.sb_mtx); so->so_snd.sb_flags |= SB_NOINTR; sounlock(so); Index: sys/nfs/nfs_syscalls.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_syscalls.c,v retrieving revision 1.122 diff -u -p -r1.122 nfs_syscalls.c --- sys/nfs/nfs_syscalls.c 22 Mar 2024 07:15:04 -0000 1.122 +++ sys/nfs/nfs_syscalls.c 29 Mar 2024 08:16:51 -0000 @@ -297,12 +297,12 @@ nfssvc_addsock(struct file *fp, struct m m_freem(m); } solock(so); - so->so_rcv.sb_flags &= ~SB_NOINTR; mtx_enter(&so->so_rcv.sb_mtx); + so->so_rcv.sb_flags &= ~SB_NOINTR; so->so_rcv.sb_timeo_nsecs = INFSLP; mtx_leave(&so->so_rcv.sb_mtx); - so->so_snd.sb_flags &= ~SB_NOINTR; mtx_enter(&so->so_snd.sb_mtx); + so->so_snd.sb_flags &= ~SB_NOINTR; so->so_snd.sb_timeo_nsecs = INFSLP; mtx_leave(&so->so_snd.sb_mtx); sounlock(so);