Download raw body.
Don't take solock() in sosend() and soreceive() paths for unix(4) sockets
Don't take solock() in sosend() and soreceive() paths for unix(4) sockets
Don't take solock() in sosend() and soreceive() paths for unix(4) sockets
Don't take solock() in sosend() and soreceive() paths for unix(4) sockets
On Fri, May 03, 2024 at 02:10:49AM +0300, Vitaliy Makkoveev wrote:
> On Fri, May 03, 2024 at 12:26:32AM +0200, Alexander Bluhm wrote:
> > On Wed, May 01, 2024 at 01:01:22AM +0300, Vitaliy Makkoveev wrote:
> > > On Tue, Apr 30, 2024 at 11:01:04PM +0300, Vitaliy Makkoveev wrote:
> > > > Use `sb_mtx' mutex(9) and `sb_lock' rwlock(9) to protect `so_snd' and
> > > > `so_rcv' of unix(4) sockets.
> > > >
> > > > The transmission of unix(4) sockets already half-unlocked because
> > > > connected peer is not locked by solock() during sbappend*() call. Since
> > > > the `so_snd' is protected by `sb_mtx' mutex(9) the re-locking is not
> > > > required in uipc_rcvd() too.
> > > >
> > > > SB_OWNLOCK became redundant with SB_MTXLOCK, so remove it. SB_MTXLOCK
> > > > was kept because checks against SB_MTXLOCK within sb*() routines look
> > > > more consistent to me.
> > > >
> > > > Please note, the unlocked peer `so2' of unix(4) can't be disconnected
> > > > while solock() is held on `so'. That's why unlocked sorwakeup() and
> > > > sowwakeup() are fine, corresponding paths will never be followed.
> > > >
> > >
> > > Add forgotten fifofs chunks. Against previous, this diff does direct
> > > `so_rcv' dispose and cleanup in sofree(). This sockets is almost dead
> > > and unlinked from everywhere include spliced peer. So concurrent
> > > sotask() thread will just exit. This required to solve inode and so_rcv
> > > locks ordering. Also this removes re-locking from sofree() for all
> > > sockets.
> >
> > Passed regress with witness.
> >
> > One question, otherwise OK bluhm@
> >
> > [skip]
> >
> > You change the order of EPIPE and ENOTCONN error checks here.
> > The difference is relevant, EPIPE may signal a SIGPIPE.
> >
> > When writing into a socket that stops working, the error should be
> > EPIPE. If there are two processes, A is writing to B, and B
> > disconnects the socket, then A should get a EPIPE.
> >
> > Do you change the behavior?
> >
> > In sosend() we have a SS_CANTSENDMORE check with EPIPE error.
> > So maybe the uipc_send() errors are irrelevant.
> >
>
> Yes, I change behavior. I had concerns , so I checked FreeBSD version of
> uipc_send() and found that they do "so->so_state & SS_ISCONNECTED" check
> before "so->so_snd.sb_state & SBS_CANTSENDMORE". But sosend_generic()
> has the different order. Since this code exists for a long time, I found
> this order not significant.
>
> However, our fifofs is the only place where we do direct SS_CANTSENDMORE
> flag modifications. We could keep solock() around them for serialization
> with uipc_send() and keep existing checks order.
>
I checked other BSDs. As you can see, FreeBSD, OSX and DragonFlyBSD
returns ENOTCONN before EPIPE. NetBSD doesn't check SS_CANTSENDMORE
and only returns ENOTCONN.
So which way do we prefer?
1. NetBSD
unp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
struct mbuf *control, struct lwp *l)
{
struct unpcb *unp = sotounpcb(so);
int error = 0;
u_int newhiwat;
struct socket *so2;
KASSERT(solocked(so));
KASSERT(unp != NULL);
KASSERT(m != NULL);
/*
* Note: unp_internalize() rejects any control message
* other than SCM_RIGHTS, and only allows one. This
* has the side-effect of preventing a caller from
* forging SCM_CREDS.
*/
if (control) {
sounlock(so);
error = unp_internalize(&control);
solock(so);
if (error != 0) {
m_freem(control);
m_freem(m);
return error;
}
}
switch (so->so_type) {
case SOCK_DGRAM: {
/* SKIP */
break;
}
case SOCK_SEQPACKET: /* FALLTHROUGH */
case SOCK_STREAM:
#define rcv (&so2->so_rcv)
#define snd (&so->so_snd)
if (unp->unp_conn == NULL) {
error = ENOTCONN;
break;
}
so2 = unp->unp_conn->unp_socket;
KASSERT(solocked2(so, so2));
if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
/*
* Credentials are passed only once on
* SOCK_STREAM and SOCK_SEQPACKET.
*/
unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
control = unp_addsockcred(l, control);
}
if (unp->unp_conn->unp_flags & UNP_OWANTCRED) {
/*
* Credentials are passed only once on
* SOCK_STREAM and SOCK_SEQPACKET.
*/
unp->unp_conn->unp_flags &= ~UNP_OWANTCRED;
MODULE_HOOK_CALL(uipc_unp_70_hook, (curlwp, control),
stub_compat_70_unp_addsockcred(curlwp, control),
control);
}
/*
* Send to paired receive port, and then reduce
* send buffer hiwater marks to maintain backpressure.
* Wake up readers.
*/
if (control) {
if (sbappendcontrol(rcv, m, control) != 0)
control = NULL;
} else {
2. MacOS
uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
struct mbuf *control, proc_t p)
{
int error = 0;
struct unpcb *unp = sotounpcb(so);
struct socket *so2;
int32_t len = m_pktlen(m);
if (unp == 0) {
error = EINVAL;
goto release;
}
if (flags & PRUS_OOB) {
error = EOPNOTSUPP;
goto release;
}
if (control) {
/* release lock to avoid deadlock (4436174) */
socket_unlock(so, 0);
error = unp_internalize(control, p);
socket_lock(so, 0);
if (error) {
goto release;
}
}
switch (so->so_type) {
case SOCK_DGRAM:
{
/* SKIP */
break;
}
case SOCK_STREAM: {
int didreceive = 0;
#define rcv (&so2->so_rcv)
#define snd (&so->so_snd)
/* Connect if not connected yet. */
/*
* Note: A better implementation would complain
* if not equal to the peer's address.
*/
if ((so->so_state & SS_ISCONNECTED) == 0) {
if (nam) {
error = unp_connect(so, nam, p);
if (error) {
so->so_state &= ~SS_ISCONNECTING;
break; /* XXX */
}
} else {
error = ENOTCONN;
break;
}
}
if (so->so_state & SS_CANTSENDMORE) {
error = EPIPE;
break;
}
3. DragonFlyBSD
uipc_send(netmsg_t msg)
{
struct unpcb *unp, *unp2;
struct socket *so;
struct socket *so2;
struct mbuf *control;
struct mbuf *m;
int error = 0;
so = msg->base.nm_so;
control = msg->send.nm_control;
m = msg->send.nm_m;
/*
* so_pcb is only modified with both the global and the unp
* pool token held.
*/
so = msg->base.nm_so;
unp = unp_getsocktoken(so);
if (!UNP_ISATTACHED(unp)) {
error = EINVAL;
goto release;
}
if (msg->send.nm_flags & PRUS_OOB) {
error = EOPNOTSUPP;
goto release;
}
wakeup_start_delayed();
if (control && (error = unp_internalize(control, msg->send.nm_td)))
goto release;
switch (so->so_type) {
case SOCK_DGRAM:
{
/* SKIP */
break;
}
case SOCK_STREAM:
case SOCK_SEQPACKET:
/* Connect if not connected yet. */
/*
* Note: A better implementation would complain
* if not equal to the peer's address.
*/
if (unp->unp_conn == NULL) {
if (msg->send.nm_addr) {
error = unp_connect(so,
msg->send.nm_addr,
msg->send.nm_td);
if (error)
break; /* XXX */
}
/*
* NOTE:
* unp_conn still could be NULL, even if the
* above unp_connect() succeeds; since the
* current unp's token could be released due
* to blocking operations after unp_conn is
* assigned.
*/
if (unp->unp_conn == NULL) {
error = ENOTCONN;
break;
}
}
if (so->so_state & SS_CANTSENDMORE) {
error = EPIPE;
break;
}
Don't take solock() in sosend() and soreceive() paths for unix(4) sockets
Don't take solock() in sosend() and soreceive() paths for unix(4) sockets
Don't take solock() in sosend() and soreceive() paths for unix(4) sockets
Don't take solock() in sosend() and soreceive() paths for unix(4) sockets