Download raw body.
Use `sb_mtx' to protect `so_rcv' receive buffer of unix(4) sockets
Use `sb_mtx' to protect `so_rcv' receive buffer of unix(4) sockets
On Sat, Mar 23, 2024 at 10:00:35PM +0300, Vitaliy Makkoveev wrote:
> This makes re-locking unnecessary in the uipc_*send() paths, because
> it's enough to lock one socket to prevent peer from concurrent
> disconnection. As the little bonus unix(4) one socket can perform
> simultaneous transmission and reception with one exception for
> uipc_rcvd() which still requires the re-lock for connection oriented
> sockets.
>
> A few words about filt_soread() and filt_soexcept(). The socket lock is
> not held while they called from uipc_*send() through sorwakeup().
> However, the unlocked access to the `so_options', `so_state' and
> `so_error' is fine.
>
> In the uipc_*send() paths the socket lock is still held on sending
> socket, so the receiving peer can't be disconnected. It also can't be or
> became listening socket, so SO_ACCEPTCONN is not set and can't be set
> concurrently as the SS_ISDISCONNECTED bit. The SS_ISCONNECTED bit is
> always set and can't be cleared concurrently. `so_error' is set on the
> peer sockets only by unp_detach(), which also can't be called
> concurrently on sending socket.
>
> This is also true for filt_fiforead() and filt_fifoexcept(). For other
> callers like kevent(2) or doaccept() the socket lock is still held.
Passed regress with witness on i386.
OK bluhm@
> Index: sys/kern/sys_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_socket.c,v
> retrieving revision 1.61
> diff -u -p -r1.61 sys_socket.c
> --- sys/kern/sys_socket.c 15 Apr 2023 13:18:28 -0000 1.61
> +++ sys/kern/sys_socket.c 23 Mar 2024 18:59:11 -0000
> @@ -144,9 +144,11 @@ soo_stat(struct file *fp, struct stat *u
> memset(ub, 0, sizeof (*ub));
> ub->st_mode = S_IFSOCK;
> solock(so);
> + mtx_enter(&so->so_rcv.sb_mtx);
> if ((so->so_rcv.sb_state & SS_CANTRCVMORE) == 0 ||
> so->so_rcv.sb_cc != 0)
> ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH;
> + mtx_leave(&so->so_rcv.sb_mtx);
> if ((so->so_snd.sb_state & SS_CANTSENDMORE) == 0)
> ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
> ub->st_uid = so->so_euid;
> Index: sys/kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.321
> diff -u -p -r1.321 uipc_socket.c
> --- sys/kern/uipc_socket.c 22 Mar 2024 17:34:11 -0000 1.321
> +++ sys/kern/uipc_socket.c 23 Mar 2024 18:59:11 -0000
> @@ -160,6 +160,9 @@ soalloc(const struct protosw *prp, int w
> break;
> }
> break;
> + case AF_UNIX:
> + so->so_rcv.sb_flags |= SB_MTXLOCK;
> + break;
> }
>
> return (so);
> @@ -987,8 +990,11 @@ dontblock:
> * Dispose of any SCM_RIGHTS message that went
> * through the read path rather than recv.
> */
> - if (pr->pr_domain->dom_dispose)
> + if (pr->pr_domain->dom_dispose) {
> + sb_mtx_unlock(&so->so_rcv);
> pr->pr_domain->dom_dispose(cm);
> + sb_mtx_lock(&so->so_rcv);
> + }
> m_free(cm);
> }
> }
> @@ -1173,8 +1179,11 @@ dontblock:
> }
> SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
> SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
> - if (pr->pr_flags & PR_WANTRCVD)
> + if (pr->pr_flags & PR_WANTRCVD) {
> + sb_mtx_unlock(&so->so_rcv);
> pru_rcvd(so);
> + sb_mtx_lock(&so->so_rcv);
> + }
> }
> if (orig_resid == uio->uio_resid && orig_resid &&
> (flags & MSG_EOR) == 0 &&
> @@ -1233,10 +1242,10 @@ sorflush(struct socket *so)
> /* with SBL_WAIT and SLB_NOINTR sblock() must not fail */
> KASSERT(error == 0);
> socantrcvmore(so);
> + mtx_enter(&sb->sb_mtx);
> m = sb->sb_mb;
> memset(&sb->sb_startzero, 0,
> (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero);
> - mtx_enter(&sb->sb_mtx);
> sb->sb_timeo_nsecs = INFSLP;
> mtx_leave(&sb->sb_mtx);
> sbunlock(so, sb);
> @@ -1757,7 +1766,8 @@ somove(struct socket *so, int wait)
> void
> sorwakeup(struct socket *so)
> {
> - soassertlocked_readonly(so);
> + if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0)
> + soassertlocked_readonly(so);
>
> #ifdef SOCKET_SPLICE
> if (so->so_rcv.sb_flags & SB_SPLICE) {
> @@ -1877,6 +1887,8 @@ sosetopt(struct socket *so, int level, i
> cnt = 1;
>
> solock(so);
> + mtx_enter(&sb->sb_mtx);
> +
> switch (optname) {
> case SO_SNDBUF:
> case SO_RCVBUF:
> @@ -1898,7 +1910,10 @@ sosetopt(struct socket *so, int level, i
> sb->sb_hiwat : cnt;
> break;
> }
> +
> + mtx_leave(&sb->sb_mtx);
> sounlock(so);
> +
> break;
> }
>
> @@ -2169,13 +2184,6 @@ sofilt_unlock(struct socket *so, struct
> }
> }
>
> -static inline void
> -sofilt_assert_locked(struct socket *so, struct sockbuf *sb)
> -{
> - MUTEX_ASSERT_LOCKED(&sb->sb_mtx);
> - soassertlocked_readonly(so);
> -}
> -
> int
> soo_kqfilter(struct file *fp, struct knote *kn)
> {
> @@ -2218,9 +2226,14 @@ filt_soread(struct knote *kn, long hint)
> struct socket *so = kn->kn_fp->f_data;
> int rv = 0;
>
> - sofilt_assert_locked(so, &so->so_rcv);
> + MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx);
> + if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0)
> + soassertlocked_readonly(so);
>
> if (so->so_options & SO_ACCEPTCONN) {
> + if (so->so_rcv.sb_flags & SB_MTXLOCK)
> + soassertlocked_readonly(so);
> +
> kn->kn_data = so->so_qlen;
> rv = (kn->kn_data != 0);
>
> @@ -2275,7 +2288,8 @@ filt_sowrite(struct knote *kn, long hint
> struct socket *so = kn->kn_fp->f_data;
> int rv;
>
> - sofilt_assert_locked(so, &so->so_snd);
> + MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx);
> + soassertlocked_readonly(so);
>
> kn->kn_data = sbspace(so, &so->so_snd);
> if (so->so_snd.sb_state & SS_CANTSENDMORE) {
> @@ -2306,7 +2320,9 @@ filt_soexcept(struct knote *kn, long hin
> struct socket *so = kn->kn_fp->f_data;
> int rv = 0;
>
> - sofilt_assert_locked(so, &so->so_rcv);
> + MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx);
> + if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0)
> + soassertlocked_readonly(so);
>
> #ifdef SOCKET_SPLICE
> if (isspliced(so)) {
> Index: sys/kern/uipc_socket2.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.144
> diff -u -p -r1.144 uipc_socket2.c
> --- sys/kern/uipc_socket2.c 12 Feb 2024 22:48:27 -0000 1.144
> +++ sys/kern/uipc_socket2.c 23 Mar 2024 18:59:11 -0000
> @@ -142,7 +142,9 @@ soisdisconnecting(struct socket *so)
> soassertlocked(so);
> so->so_state &= ~SS_ISCONNECTING;
> so->so_state |= SS_ISDISCONNECTING;
> + mtx_enter(&so->so_rcv.sb_mtx);
> so->so_rcv.sb_state |= SS_CANTRCVMORE;
> + mtx_leave(&so->so_rcv.sb_mtx);
> so->so_snd.sb_state |= SS_CANTSENDMORE;
> wakeup(&so->so_timeo);
> sowwakeup(so);
> @@ -155,7 +157,9 @@ soisdisconnected(struct socket *so)
> soassertlocked(so);
> so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
> so->so_state |= SS_ISDISCONNECTED;
> + mtx_enter(&so->so_rcv.sb_mtx);
> so->so_rcv.sb_state |= SS_CANTRCVMORE;
> + mtx_leave(&so->so_rcv.sb_mtx);
> so->so_snd.sb_state |= SS_CANTSENDMORE;
> wakeup(&so->so_timeo);
> sowwakeup(so);
> @@ -219,9 +223,10 @@ sonewconn(struct socket *head, int conns
> mtx_enter(&head->so_snd.sb_mtx);
> so->so_snd.sb_timeo_nsecs = head->so_snd.sb_timeo_nsecs;
> mtx_leave(&head->so_snd.sb_mtx);
> +
> + mtx_enter(&head->so_rcv.sb_mtx);
> so->so_rcv.sb_wat = head->so_rcv.sb_wat;
> so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
> - mtx_enter(&head->so_rcv.sb_mtx);
> so->so_rcv.sb_timeo_nsecs = head->so_rcv.sb_timeo_nsecs;
> mtx_leave(&head->so_rcv.sb_mtx);
>
> @@ -651,16 +656,22 @@ soreserve(struct socket *so, u_long sndc
>
> if (sbreserve(so, &so->so_snd, sndcc))
> goto bad;
> - if (sbreserve(so, &so->so_rcv, rcvcc))
> - goto bad2;
> so->so_snd.sb_wat = sndcc;
> - so->so_rcv.sb_wat = rcvcc;
> - if (so->so_rcv.sb_lowat == 0)
> - so->so_rcv.sb_lowat = 1;
> if (so->so_snd.sb_lowat == 0)
> so->so_snd.sb_lowat = MCLBYTES;
> if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
> so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
> +
> + mtx_enter(&so->so_rcv.sb_mtx);
> + if (sbreserve(so, &so->so_rcv, rcvcc)) {
> + mtx_leave(&so->so_rcv.sb_mtx);
> + goto bad2;
> + }
> + so->so_rcv.sb_wat = rcvcc;
> + if (so->so_rcv.sb_lowat == 0)
> + so->so_rcv.sb_lowat = 1;
> + mtx_leave(&so->so_rcv.sb_mtx);
> +
> return (0);
> bad2:
> sbrelease(so, &so->so_snd);
> @@ -676,8 +687,7 @@ bad:
> int
> sbreserve(struct socket *so, struct sockbuf *sb, u_long cc)
> {
> - KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
> - soassertlocked(so);
> + sbmtxassertlocked(so, sb);
>
> if (cc == 0 || cc > sb_max)
> return (1);
> @@ -818,7 +828,7 @@ sbappend(struct socket *so, struct sockb
> if (m == NULL)
> return;
>
> - soassertlocked(so);
> + sbmtxassertlocked(so, sb);
> SBLASTRECORDCHK(sb, "sbappend 1");
>
> if ((n = sb->sb_lastrecord) != NULL) {
> @@ -899,8 +909,7 @@ sbappendrecord(struct socket *so, struct
> {
> struct mbuf *m;
>
> - KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
> - soassertlocked(so);
> + sbmtxassertlocked(so, sb);
>
> if (m0 == NULL)
> return;
> @@ -984,6 +993,8 @@ sbappendcontrol(struct socket *so, struc
> struct mbuf *m, *mlast, *n;
> int eor = 0, space = 0;
>
> + sbmtxassertlocked(so, sb);
> +
> if (control == NULL)
> panic("sbappendcontrol");
> for (m = control; ; m = m->m_next) {
> @@ -1109,8 +1120,7 @@ sbdrop(struct socket *so, struct sockbuf
> struct mbuf *m, *mn;
> struct mbuf *next;
>
> - KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
> - soassertlocked(so);
> + sbmtxassertlocked(so, sb);
>
> next = (m = sb->sb_mb) ? m->m_nextpkt : NULL;
> while (len > 0) {
> Index: sys/kern/uipc_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
> retrieving revision 1.202
> diff -u -p -r1.202 uipc_usrreq.c
> --- sys/kern/uipc_usrreq.c 22 Mar 2024 17:34:11 -0000 1.202
> +++ sys/kern/uipc_usrreq.c 23 Mar 2024 18:59:11 -0000
> @@ -489,8 +489,10 @@ uipc_rcvd(struct socket *so)
> * Adjust backpressure on sender
> * and wakeup any waiting to write.
> */
> + mtx_enter(&so->so_rcv.sb_mtx);
> so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
> so2->so_snd.sb_cc = so->so_rcv.sb_cc;
> + mtx_leave(&so->so_rcv.sb_mtx);
> sowwakeup(so2);
> sounlock(so2);
> }
> @@ -499,8 +501,9 @@ int
> uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
> struct mbuf *control)
> {
> + struct unpcb *unp = sotounpcb(so);
> struct socket *so2;
> - int error = 0;
> + int error = 0, dowakeup = 0;
>
> if (control) {
> sounlock(so);
> @@ -514,21 +517,24 @@ uipc_send(struct socket *so, struct mbuf
> error = EPIPE;
> goto dispose;
> }
> - if ((so2 = unp_solock_peer(so)) == NULL) {
> + if (unp->unp_conn == NULL) {
> error = ENOTCONN;
> goto dispose;
> }
>
> + so2 = unp->unp_conn->unp_socket;
> +
> /*
> * Send to paired receive port, and then raise
> * send buffer counts to maintain backpressure.
> * Wake up readers.
> */
> + mtx_enter(&so2->so_rcv.sb_mtx);
> if (control) {
> if (sbappendcontrol(so2, &so2->so_rcv, m, control)) {
> control = NULL;
> } else {
> - sounlock(so2);
> + mtx_leave(&so2->so_rcv.sb_mtx);
> error = ENOBUFS;
> goto dispose;
> }
> @@ -539,9 +545,12 @@ uipc_send(struct socket *so, struct mbuf
> so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
> so->so_snd.sb_cc = so2->so_rcv.sb_cc;
> if (so2->so_rcv.sb_cc > 0)
> + dowakeup = 1;
> + mtx_leave(&so2->so_rcv.sb_mtx);
> +
> + if (dowakeup)
> sorwakeup(so2);
>
> - sounlock(so2);
> m = NULL;
>
> dispose:
> @@ -563,7 +572,7 @@ uipc_dgram_send(struct socket *so, struc
> struct unpcb *unp = sotounpcb(so);
> struct socket *so2;
> const struct sockaddr *from;
> - int error = 0;
> + int error = 0, dowakeup = 0;
>
> if (control) {
> sounlock(so);
> @@ -583,7 +592,7 @@ uipc_dgram_send(struct socket *so, struc
> goto dispose;
> }
>
> - if ((so2 = unp_solock_peer(so)) == NULL) {
> + if (unp->unp_conn == NULL) {
> if (nam != NULL)
> error = ECONNREFUSED;
> else
> @@ -591,20 +600,24 @@ uipc_dgram_send(struct socket *so, struc
> goto dispose;
> }
>
> + so2 = unp->unp_conn->unp_socket;
> +
> if (unp->unp_addr)
> from = mtod(unp->unp_addr, struct sockaddr *);
> else
> from = &sun_noname;
> +
> + mtx_enter(&so2->so_rcv.sb_mtx);
> if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
> - sorwakeup(so2);
> + dowakeup = 1;
> m = NULL;
> control = NULL;
> } else
> error = ENOBUFS;
> + mtx_leave(&so2->so_rcv.sb_mtx);
>
> - if (so2 != so)
> - sounlock(so2);
> -
> + if (dowakeup)
> + sorwakeup(so2);
> if (nam)
> unp_disconnect(unp);
>
> @@ -1390,9 +1403,9 @@ unp_gc(void *arg __unused)
> if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
> continue;
> so = unp->unp_socket;
> - solock(so);
> + mtx_enter(&so->so_rcv.sb_mtx);
> unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
> - sounlock(so);
> + mtx_leave(&so->so_rcv.sb_mtx);
> }
>
> /*
> @@ -1414,9 +1427,9 @@ unp_gc(void *arg __unused)
> unp->unp_gcflags &= ~UNP_GCDEAD;
>
> so = unp->unp_socket;
> - solock(so);
> + mtx_enter(&so->so_rcv.sb_mtx);
> unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
> - sounlock(so);
> + mtx_leave(&so->so_rcv.sb_mtx);
>
> KASSERT(nunref > 0);
> nunref--;
> Index: sys/miscfs/fifofs/fifo_vnops.c
> ===================================================================
> RCS file: /cvs/src/sys/miscfs/fifofs/fifo_vnops.c,v
> retrieving revision 1.103
> diff -u -p -r1.103 fifo_vnops.c
> --- sys/miscfs/fifofs/fifo_vnops.c 3 Feb 2024 22:50:09 -0000 1.103
> +++ sys/miscfs/fifofs/fifo_vnops.c 23 Mar 2024 18:59:11 -0000
> @@ -201,7 +201,9 @@ fifo_open(void *v)
> if (fip->fi_writers == 1) {
> solock(rso);
> rso->so_state &= ~SS_ISDISCONNECTED;
> + mtx_enter(&rso->so_rcv.sb_mtx);
> rso->so_rcv.sb_state &= ~SS_CANTRCVMORE;
> + mtx_leave(&rso->so_rcv.sb_mtx);
> sounlock(rso);
> if (fip->fi_readers > 0)
> wakeup(&fip->fi_readers);
> @@ -518,7 +520,6 @@ filt_fiforead(struct knote *kn, long hin
> struct socket *so = kn->kn_hook;
> int rv;
>
> - soassertlocked(so);
> MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx);
>
> kn->kn_data = so->so_rcv.sb_cc;
> @@ -574,7 +575,6 @@ filt_fifoexcept(struct knote *kn, long h
> struct socket *so = kn->kn_hook;
> int rv = 0;
>
> - soassertlocked(so);
> MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx);
>
> if (kn->kn_flags & __EV_POLL) {
> Index: sys/sys/socketvar.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.125
> diff -u -p -r1.125 socketvar.h
> --- sys/sys/socketvar.h 22 Mar 2024 17:34:11 -0000 1.125
> +++ sys/sys/socketvar.h 23 Mar 2024 18:59:11 -0000
> @@ -242,7 +242,10 @@ sb_notify(struct socket *so, struct sock
> static inline long
> sbspace(struct socket *so, struct sockbuf *sb)
> {
> - soassertlocked_readonly(so);
> + if (sb->sb_flags & SB_MTXLOCK)
> + sbmtxassertlocked(so, sb);
> + else
> + soassertlocked_readonly(so);
>
> return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
> }
Use `sb_mtx' to protect `so_rcv' receive buffer of unix(4) sockets
Use `sb_mtx' to protect `so_rcv' receive buffer of unix(4) sockets