From: Alexander Bluhm Subject: Re: Mark `so_rcv' sockbuf of udp(4) sockets as SB_OWNLOCK To: Vitaliy Makkoveev Cc: tech@openbsd.org Date: Sun, 31 Mar 2024 15:27:28 +0200 On Fri, Mar 29, 2024 at 12:16:30PM +0300, Vitaliy Makkoveev wrote: > sbappend*() and soreceive() of SB_MTXLOCK marked sockets uses `sb_mtx' > mutex(9) for protection, meanwhile buffer usage check and corresponding > sbwait() sleep still serialized by solock(). Mark udp(4) as SB_OWNLOCK > to avoid solock() serialisation and rely to `sb_mtx' mutex(9). The > `sb_state' and `sb_flags' modifications must be protected by `sb_mtx' > too. soo_ioctl() and socantrcvmore() hunks were missed for unix(4) > sockets, but they are common for all sockets. > > divert(4) and raw sockets also needs to be converted. I can include > conversion to this diff too. > > sbwait() modification was tested with blocking recvfrom(2) test > programs. passed regress, OK bluhm@ > Index: sys/kern/sys_socket.c > =================================================================== > RCS file: /cvs/src/sys/kern/sys_socket.c,v > retrieving revision 1.62 > diff -u -p -r1.62 sys_socket.c > --- sys/kern/sys_socket.c 26 Mar 2024 09:46:47 -0000 1.62 > +++ sys/kern/sys_socket.c 29 Mar 2024 08:16:50 -0000 > @@ -91,6 +91,7 @@ soo_ioctl(struct file *fp, u_long cmd, c > > case FIOASYNC: > solock(so); > + mtx_enter(&so->so_rcv.sb_mtx); > if (*(int *)data) { > so->so_rcv.sb_flags |= SB_ASYNC; > so->so_snd.sb_flags |= SB_ASYNC; > @@ -98,6 +99,7 @@ soo_ioctl(struct file *fp, u_long cmd, c > so->so_rcv.sb_flags &= ~SB_ASYNC; > so->so_snd.sb_flags &= ~SB_ASYNC; > } > + mtx_leave(&so->so_rcv.sb_mtx); > sounlock(so); > break; > > Index: sys/kern/uipc_socket.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.323 > diff -u -p -r1.323 uipc_socket.c > --- sys/kern/uipc_socket.c 27 Mar 2024 22:47:53 -0000 1.323 > +++ sys/kern/uipc_socket.c 29 Mar 2024 08:16:50 -0000 > @@ -155,6 +155,8 @@ soalloc(const struct protosw *prp, int w > case AF_INET6: > switch (prp->pr_type) { > case SOCK_DGRAM: > + so->so_rcv.sb_flags |= SB_OWNLOCK; > + /* FALLTHROUGH */ > case SOCK_RAW: > so->so_rcv.sb_flags |= SB_MTXLOCK; > break; > @@ -1392,7 +1394,9 @@ sosplice(struct socket *so, int fd, off_ > * we sleep, the socket buffers are not marked as spliced yet. > */ > if (somove(so, M_WAIT)) { > + mtx_enter(&so->so_rcv.sb_mtx); > so->so_rcv.sb_flags |= SB_SPLICE; > + mtx_leave(&so->so_rcv.sb_mtx); > sosp->so_snd.sb_flags |= SB_SPLICE; > } > > @@ -1420,7 +1424,9 @@ sounsplice(struct socket *so, struct soc > task_del(sosplice_taskq, &so->so_splicetask); > timeout_del(&so->so_idleto); > sosp->so_snd.sb_flags &= ~SB_SPLICE; > + mtx_enter(&so->so_rcv.sb_mtx); > so->so_rcv.sb_flags &= ~SB_SPLICE; > + mtx_leave(&so->so_rcv.sb_mtx); > so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; > /* Do not wakeup a socket that is about to be freed. */ > if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so)) > @@ -1678,6 +1684,7 @@ somove(struct socket *so, int wait) > pru_rcvd(so); > > /* Receive buffer did shrink by len bytes, adjust oob. */ > + mtx_enter(&so->so_rcv.sb_mtx); > rcvstate = so->so_rcv.sb_state; > so->so_rcv.sb_state &= ~SS_RCVATMARK; > oobmark = so->so_oobmark; > @@ -1688,6 +1695,7 @@ somove(struct socket *so, int wait) > if (oobmark >= len) > oobmark = 0; > } > + mtx_leave(&so->so_rcv.sb_mtx); > > /* > * Handle oob data. If any malloc fails, ignore error. > Index: sys/kern/uipc_socket2.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket2.c,v > retrieving revision 1.146 > diff -u -p -r1.146 uipc_socket2.c > --- sys/kern/uipc_socket2.c 27 Mar 2024 22:47:53 -0000 1.146 > +++ sys/kern/uipc_socket2.c 29 Mar 2024 08:16:50 -0000 > @@ -351,7 +351,9 @@ void > socantrcvmore(struct socket *so) > { > soassertlocked(so); > + mtx_enter(&so->so_rcv.sb_mtx); > so->so_rcv.sb_state |= SS_CANTRCVMORE; > + mtx_leave(&so->so_rcv.sb_mtx); > sorwakeup(so); > } > > Index: sys/nfs/nfs_socket.c > =================================================================== > RCS file: /cvs/src/sys/nfs/nfs_socket.c,v > retrieving revision 1.146 > diff -u -p -r1.146 nfs_socket.c > --- sys/nfs/nfs_socket.c 22 Mar 2024 07:15:04 -0000 1.146 > +++ sys/nfs/nfs_socket.c 29 Mar 2024 08:16:50 -0000 > @@ -371,7 +371,9 @@ nfs_connect(struct nfsmount *nmp, struct > error = soreserve(so, sndreserve, rcvreserve); > if (error) > goto bad_locked; > + mtx_enter(&so->so_rcv.sb_mtx); > so->so_rcv.sb_flags |= SB_NOINTR; > + mtx_leave(&so->so_rcv.sb_mtx); > so->so_snd.sb_flags |= SB_NOINTR; > sounlock(so); > > Index: sys/nfs/nfs_syscalls.c > =================================================================== > RCS file: /cvs/src/sys/nfs/nfs_syscalls.c,v > retrieving revision 1.122 > diff -u -p -r1.122 nfs_syscalls.c > --- sys/nfs/nfs_syscalls.c 22 Mar 2024 07:15:04 -0000 1.122 > +++ sys/nfs/nfs_syscalls.c 29 Mar 2024 08:16:51 -0000 > @@ -297,12 +297,12 @@ nfssvc_addsock(struct file *fp, struct m > m_freem(m); > } > solock(so); > - so->so_rcv.sb_flags &= ~SB_NOINTR; > mtx_enter(&so->so_rcv.sb_mtx); > + so->so_rcv.sb_flags &= ~SB_NOINTR; > so->so_rcv.sb_timeo_nsecs = INFSLP; > mtx_leave(&so->so_rcv.sb_mtx); > - so->so_snd.sb_flags &= ~SB_NOINTR; > mtx_enter(&so->so_snd.sb_mtx); > + so->so_snd.sb_flags &= ~SB_NOINTR; > so->so_snd.sb_timeo_nsecs = INFSLP; > mtx_leave(&so->so_snd.sb_mtx); > sounlock(so);