Download raw body.
Use `sb_mtx' mutex(9) for the `sb_flags' protection
On Tue, Feb 13, 2024 at 03:37:40PM +0300, Vitaliy Makkoveev wrote:
> I want to do this before go forward.
>
> Temporary, sockbuf structure members modification relies on `sb_mtx'
> mutex(9) only while shared solock() held, meanwhile the rest relies on
> exclusive solock(). Make the `sb_flags' protection consistent as it
> already done for `sb_timeo_nsecs' and `sb_klist'.
>
> The SB_SPLICE flag modification rests the special case. The `sb_mtx'
> mutex(9) only keeps `sb_flags' consistent while SB_SPLICE flag modifying.
> The socket splice paths serialization still rests with netlock.
Is is a good idea to rip apart the atomicity of so_rcv.sb_flags and
so_snd.sb_flags? Either the socket is async or it is not. Now you
have sockets that are async on the reviece side, but not on the
send side.
Also for socket splicing so->so_rcv.sb_flags and sosp->so_snd.sb_flags
should be consistent. For now, exclusive net lock keeps things
together. But when you introduce a mutex it should not only look
at individual fields, but also the context.
short sb_flags and short sb_state live in the same integer. I am
not sure if we may have different locking strategies for them. At
least the alpha CPU may modify one of them with a common 32 access.
bluhm
> Index: sys/kern/sys_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_socket.c,v
> retrieving revision 1.61
> diff -u -p -r1.61 sys_socket.c
> --- sys/kern/sys_socket.c 15 Apr 2023 13:18:28 -0000 1.61
> +++ sys/kern/sys_socket.c 13 Feb 2024 11:22:33 -0000
> @@ -90,15 +90,18 @@ soo_ioctl(struct file *fp, u_long cmd, c
> break;
>
> case FIOASYNC:
> - solock(so);
> - if (*(int *)data) {
> + mtx_enter(&so->so_rcv.sb_mtx);
> + if (*(int *)data)
> so->so_rcv.sb_flags |= SB_ASYNC;
> - so->so_snd.sb_flags |= SB_ASYNC;
> - } else {
> + else
> so->so_rcv.sb_flags &= ~SB_ASYNC;
> + mtx_leave(&so->so_rcv.sb_mtx);
> + mtx_enter(&so->so_snd.sb_mtx);
> + if (*(int *)data)
> + so->so_snd.sb_flags |= SB_ASYNC;
> + else
> so->so_snd.sb_flags &= ~SB_ASYNC;
> - }
> - sounlock(so);
> + mtx_leave(&so->so_snd.sb_mtx);
> break;
>
> case FIONREAD:
> Index: sys/kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.320
> diff -u -p -r1.320 uipc_socket.c
> --- sys/kern/uipc_socket.c 12 Feb 2024 22:48:27 -0000 1.320
> +++ sys/kern/uipc_socket.c 13 Feb 2024 11:22:33 -0000
> @@ -1373,8 +1373,13 @@ sosplice(struct socket *so, int fd, off_
> * we sleep, the socket buffers are not marked as spliced yet.
> */
> if (somove(so, M_WAIT)) {
> + mtx_enter(&so->so_rcv.sb_mtx);
> so->so_rcv.sb_flags |= SB_SPLICE;
> + mtx_leave(&so->so_rcv.sb_mtx);
> +
> + mtx_enter(&sosp->so_snd.sb_mtx);
> sosp->so_snd.sb_flags |= SB_SPLICE;
> + mtx_leave(&sosp->so_snd.sb_mtx);
> }
>
> release:
> @@ -1400,8 +1405,15 @@ sounsplice(struct socket *so, struct soc
>
> task_del(sosplice_taskq, &so->so_splicetask);
> timeout_del(&so->so_idleto);
> +
> + mtx_enter(&sosp->so_snd.sb_mtx);
> sosp->so_snd.sb_flags &= ~SB_SPLICE;
> + mtx_leave(&sosp->so_snd.sb_mtx);
> +
> + mtx_enter(&so->so_rcv.sb_mtx);
> so->so_rcv.sb_flags &= ~SB_SPLICE;
> + mtx_leave(&so->so_rcv.sb_mtx);
> +
> so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
> /* Do not wakeup a socket that is about to be freed. */
> if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so))
> Index: sys/kern/uipc_socket2.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.144
> diff -u -p -r1.144 uipc_socket2.c
> --- sys/kern/uipc_socket2.c 12 Feb 2024 22:48:27 -0000 1.144
> +++ sys/kern/uipc_socket2.c 13 Feb 2024 11:22:33 -0000
> @@ -521,12 +521,13 @@ int
> sbwait(struct socket *so, struct sockbuf *sb)
> {
> uint64_t timeo_nsecs;
> - int prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
> + int prio;
>
> soassertlocked(so);
>
> mtx_enter(&sb->sb_mtx);
> timeo_nsecs = sb->sb_timeo_nsecs;
> + prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
> sb->sb_flags |= SB_WAIT;
> mtx_leave(&sb->sb_mtx);
>
> Index: sys/nfs/nfs_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/nfs/nfs_socket.c,v
> retrieving revision 1.145
> diff -u -p -r1.145 nfs_socket.c
> --- sys/nfs/nfs_socket.c 5 Feb 2024 20:21:39 -0000 1.145
> +++ sys/nfs/nfs_socket.c 13 Feb 2024 11:22:33 -0000
> @@ -369,11 +369,17 @@ nfs_connect(struct nfsmount *nmp, struct
> }
> solock(so);
> error = soreserve(so, sndreserve, rcvreserve);
> + sounlock(so);
> if (error)
> - goto bad_locked;
> + goto bad;
> +
> + mtx_enter(&so->so_rcv.sb_mtx);
> so->so_rcv.sb_flags |= SB_NOINTR;
> + mtx_leave(&so->so_rcv.sb_mtx);
> +
> + mtx_enter(&so->so_snd.sb_mtx);
> so->so_snd.sb_flags |= SB_NOINTR;
> - sounlock(so);
> + mtx_leave(&so->so_snd.sb_mtx);
>
> m_freem(mopt);
> m_freem(nam);
> Index: sys/nfs/nfs_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/nfs/nfs_syscalls.c,v
> retrieving revision 1.121
> diff -u -p -r1.121 nfs_syscalls.c
> --- sys/nfs/nfs_syscalls.c 5 Feb 2024 20:21:39 -0000 1.121
> +++ sys/nfs/nfs_syscalls.c 13 Feb 2024 11:22:33 -0000
> @@ -275,16 +275,17 @@ nfssvc_addsock(struct file *fp, struct m
> sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
> m_freem(m);
> }
> - solock(so);
> - so->so_rcv.sb_flags &= ~SB_NOINTR;
> +
> mtx_enter(&so->so_rcv.sb_mtx);
> + so->so_rcv.sb_flags &= ~SB_NOINTR;
> so->so_rcv.sb_timeo_nsecs = INFSLP;
> mtx_leave(&so->so_rcv.sb_mtx);
> - so->so_snd.sb_flags &= ~SB_NOINTR;
> +
> mtx_enter(&so->so_snd.sb_mtx);
> + so->so_snd.sb_flags &= ~SB_NOINTR;
> so->so_snd.sb_timeo_nsecs = INFSLP;
> mtx_leave(&so->so_snd.sb_mtx);
> - sounlock(so);
> +
> if (tslp)
> slp = tslp;
> else {
> Index: sys/sys/socketvar.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.124
> diff -u -p -r1.124 socketvar.h
> --- sys/sys/socketvar.h 12 Feb 2024 22:48:27 -0000 1.124
> +++ sys/sys/socketvar.h 13 Feb 2024 11:22:33 -0000
> @@ -49,6 +49,11 @@
> typedef __socklen_t socklen_t; /* length type for network syscalls */
> #endif
>
> +/*
> + * Locks used to protect struct members:
> + * m sb_mtx
> + */
> +
> TAILQ_HEAD(soqhead, socket);
>
> /*
> @@ -120,12 +125,12 @@ struct socket {
> struct mbuf *sb_mbtail; /* the last mbuf in the chain */
> struct mbuf *sb_lastrecord;/* first mbuf of last record in
> socket buffer */
> - short sb_flags; /* flags, see below */
> + short sb_flags; /* [m] flags, see below */
> /* End area that is zeroed on flush. */
> #define sb_endzero sb_flags
> short sb_state; /* socket state on sockbuf */
> - uint64_t sb_timeo_nsecs;/* timeout for read/write */
> - struct klist sb_klist; /* process selecting read/write */
> + uint64_t sb_timeo_nsecs;/* [m] timeout for read/write */
> + struct klist sb_klist; /* [m] process selecting read/write */
> } so_rcv, so_snd;
> #define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */
> #define SB_LOCK 0x01 /* lock on data queue */
> @@ -221,8 +226,6 @@ static inline int
> sb_notify(struct socket *so, struct sockbuf *sb)
> {
> int rv;
> -
> - soassertlocked(so);
>
> mtx_enter(&sb->sb_mtx);
> rv = ((sb->sb_flags & (SB_WAIT|SB_ASYNC|SB_SPLICE)) != 0 ||
Use `sb_mtx' mutex(9) for the `sb_flags' protection