From: Vitaliy Makkoveev Subject: Use `sb_mtx' mutex(9) for the `sb_flags' protection To: Alexander Bluhm , tech@openbsd.org Date: Tue, 13 Feb 2024 15:37:40 +0300 I want to do this before go forward. Temporary, sockbuf structure members modification relies on `sb_mtx' mutex(9) only while shared solock() held, meanwhile the rest relies on exclusive solock(). Make the `sb_flags' protection consistent as it already done for `sb_timeo_nsecs' and `sb_klist'. The SB_SPLICE flag modification rests the special case. The `sb_mtx' mutex(9) only keeps `sb_flags' consistent while SB_SPLICE flag modifying. The socket splice paths serialization still rests with netlock. Index: sys/kern/sys_socket.c =================================================================== RCS file: /cvs/src/sys/kern/sys_socket.c,v retrieving revision 1.61 diff -u -p -r1.61 sys_socket.c --- sys/kern/sys_socket.c 15 Apr 2023 13:18:28 -0000 1.61 +++ sys/kern/sys_socket.c 13 Feb 2024 11:22:33 -0000 @@ -90,15 +90,18 @@ soo_ioctl(struct file *fp, u_long cmd, c break; case FIOASYNC: - solock(so); - if (*(int *)data) { + mtx_enter(&so->so_rcv.sb_mtx); + if (*(int *)data) so->so_rcv.sb_flags |= SB_ASYNC; - so->so_snd.sb_flags |= SB_ASYNC; - } else { + else so->so_rcv.sb_flags &= ~SB_ASYNC; + mtx_leave(&so->so_rcv.sb_mtx); + mtx_enter(&so->so_snd.sb_mtx); + if (*(int *)data) + so->so_snd.sb_flags |= SB_ASYNC; + else so->so_snd.sb_flags &= ~SB_ASYNC; - } - sounlock(so); + mtx_leave(&so->so_snd.sb_mtx); break; case FIONREAD: Index: sys/kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.320 diff -u -p -r1.320 uipc_socket.c --- sys/kern/uipc_socket.c 12 Feb 2024 22:48:27 -0000 1.320 +++ sys/kern/uipc_socket.c 13 Feb 2024 11:22:33 -0000 @@ -1373,8 +1373,13 @@ sosplice(struct socket *so, int fd, off_ * we sleep, the socket buffers are not marked as spliced yet. */ if (somove(so, M_WAIT)) { + mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_flags |= SB_SPLICE; + mtx_leave(&so->so_rcv.sb_mtx); + + mtx_enter(&sosp->so_snd.sb_mtx); sosp->so_snd.sb_flags |= SB_SPLICE; + mtx_leave(&sosp->so_snd.sb_mtx); } release: @@ -1400,8 +1405,15 @@ sounsplice(struct socket *so, struct soc task_del(sosplice_taskq, &so->so_splicetask); timeout_del(&so->so_idleto); + + mtx_enter(&sosp->so_snd.sb_mtx); sosp->so_snd.sb_flags &= ~SB_SPLICE; + mtx_leave(&sosp->so_snd.sb_mtx); + + mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_flags &= ~SB_SPLICE; + mtx_leave(&so->so_rcv.sb_mtx); + so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; /* Do not wakeup a socket that is about to be freed. */ if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so)) Index: sys/kern/uipc_socket2.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket2.c,v retrieving revision 1.144 diff -u -p -r1.144 uipc_socket2.c --- sys/kern/uipc_socket2.c 12 Feb 2024 22:48:27 -0000 1.144 +++ sys/kern/uipc_socket2.c 13 Feb 2024 11:22:33 -0000 @@ -521,12 +521,13 @@ int sbwait(struct socket *so, struct sockbuf *sb) { uint64_t timeo_nsecs; - int prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH; + int prio; soassertlocked(so); mtx_enter(&sb->sb_mtx); timeo_nsecs = sb->sb_timeo_nsecs; + prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH; sb->sb_flags |= SB_WAIT; mtx_leave(&sb->sb_mtx); Index: sys/nfs/nfs_socket.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_socket.c,v retrieving revision 1.145 diff -u -p -r1.145 nfs_socket.c --- sys/nfs/nfs_socket.c 5 Feb 2024 20:21:39 -0000 1.145 +++ sys/nfs/nfs_socket.c 13 Feb 2024 11:22:33 -0000 @@ -369,11 +369,17 @@ nfs_connect(struct nfsmount *nmp, struct } solock(so); error = soreserve(so, sndreserve, rcvreserve); + sounlock(so); if (error) - goto bad_locked; + goto bad; + + mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_flags |= SB_NOINTR; + mtx_leave(&so->so_rcv.sb_mtx); + + mtx_enter(&so->so_snd.sb_mtx); so->so_snd.sb_flags |= SB_NOINTR; - sounlock(so); + mtx_leave(&so->so_snd.sb_mtx); m_freem(mopt); m_freem(nam); Index: sys/nfs/nfs_syscalls.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_syscalls.c,v retrieving revision 1.121 diff -u -p -r1.121 nfs_syscalls.c --- sys/nfs/nfs_syscalls.c 5 Feb 2024 20:21:39 -0000 1.121 +++ sys/nfs/nfs_syscalls.c 13 Feb 2024 11:22:33 -0000 @@ -275,16 +275,17 @@ nfssvc_addsock(struct file *fp, struct m sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); m_freem(m); } - solock(so); - so->so_rcv.sb_flags &= ~SB_NOINTR; + mtx_enter(&so->so_rcv.sb_mtx); + so->so_rcv.sb_flags &= ~SB_NOINTR; so->so_rcv.sb_timeo_nsecs = INFSLP; mtx_leave(&so->so_rcv.sb_mtx); - so->so_snd.sb_flags &= ~SB_NOINTR; + mtx_enter(&so->so_snd.sb_mtx); + so->so_snd.sb_flags &= ~SB_NOINTR; so->so_snd.sb_timeo_nsecs = INFSLP; mtx_leave(&so->so_snd.sb_mtx); - sounlock(so); + if (tslp) slp = tslp; else { Index: sys/sys/socketvar.h =================================================================== RCS file: /cvs/src/sys/sys/socketvar.h,v retrieving revision 1.124 diff -u -p -r1.124 socketvar.h --- sys/sys/socketvar.h 12 Feb 2024 22:48:27 -0000 1.124 +++ sys/sys/socketvar.h 13 Feb 2024 11:22:33 -0000 @@ -49,6 +49,11 @@ typedef __socklen_t socklen_t; /* length type for network syscalls */ #endif +/* + * Locks used to protect struct members: + * m sb_mtx + */ + TAILQ_HEAD(soqhead, socket); /* @@ -120,12 +125,12 @@ struct socket { struct mbuf *sb_mbtail; /* the last mbuf in the chain */ struct mbuf *sb_lastrecord;/* first mbuf of last record in socket buffer */ - short sb_flags; /* flags, see below */ + short sb_flags; /* [m] flags, see below */ /* End area that is zeroed on flush. */ #define sb_endzero sb_flags short sb_state; /* socket state on sockbuf */ - uint64_t sb_timeo_nsecs;/* timeout for read/write */ - struct klist sb_klist; /* process selecting read/write */ + uint64_t sb_timeo_nsecs;/* [m] timeout for read/write */ + struct klist sb_klist; /* [m] process selecting read/write */ } so_rcv, so_snd; #define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */ #define SB_LOCK 0x01 /* lock on data queue */ @@ -221,8 +226,6 @@ static inline int sb_notify(struct socket *so, struct sockbuf *sb) { int rv; - - soassertlocked(so); mtx_enter(&sb->sb_mtx); rv = ((sb->sb_flags & (SB_WAIT|SB_ASYNC|SB_SPLICE)) != 0 ||