Download raw body.
sys/nfs: fix TCP socket use after reconnect
On Tue, 21 Apr 2026 12:55:46 +0200,
Kirill A. Korinsky <kirill@korins.ky> wrote:
>
> tech@,
>
> There is a race in the NFS/TCP client when several requests are active
> and the connection stalls or times out. One thread enters nfs_receive(),
> takes the current socket pointer, drops nfs_sndlock(), and waits in
> soreceive(). Another thread on the same mount may then enter
> nfs_reconnect(), decide the connection is dead, and close that same
> socket.
>
> The problem is that the first thread may still use the old socket after
> reconnect has already replaced it. Under heavy parallel NFS/TCP load,
> that can wedge reconnect or socket shutdown paths.
>
> Suggested fix is simple: take a socket reference before the receive path
> uses the socket, and release it only after that path is done.
>
> This is not a strict lock order deadlock. It starts as a socket lifetime
> bug, then can show up as an apparent deadlock if the wedged path still
> owns kernel_lock. On a 2 CPU machine that is enough for a full hang: one
> CPU owns kernel_lock, the other spins waiting for it.
>
> If system has more than 2 CPU, it will survive, but NFS share will be
> locked, and on reboot system hangs with "sync devices".
>
> I encountered it few times per week when mount NFS share to laptop via
> wifi with options: soft,intr,tcp,-x=2
>
> I use this diff for more than a week now, and haven't seen NFS issues so
> far, it actually works quite good. Yes, some times IO returns invalid
> command and I see in dmesg: short receive ... from nfs server ...;
> but it is a kind of expected behaviour.
>
Now I use it almost a month, I haven't saw anymore stucked NFS via wifi
which was quite usual before.
I think it worth to be commited, anyone willing to OK?
Index: sys/nfs/nfs_socket.c
===================================================================
RCS file: /home/cvs/src/sys/nfs/nfs_socket.c,v
diff -u -p -r1.156 nfs_socket.c
--- sys/nfs/nfs_socket.c 16 Feb 2025 16:05:07 -0000 1.156
+++ sys/nfs/nfs_socket.c 10 Apr 2026 21:13:27 -0000
@@ -525,7 +525,7 @@ nfs_send(struct socket *so, struct mbuf
int
nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp)
{
- struct socket *so;
+ struct socket *so = NULL;
struct uio auio;
struct iovec aio;
struct mbuf *m;
@@ -577,6 +577,11 @@ tryagain:
}
goto tryagain;
}
+ /*
+ * Keep the socket alive while using the snapshot taken
+ * under nfs_sndlock(), even if another thread reconnects.
+ */
+ soref(so);
while (rep->r_flags & R_MUSTRESEND) {
m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
nfsstats.rpcretries++;
@@ -586,9 +591,13 @@ tryagain:
if (error) {
if (error == EINTR || error == ERESTART ||
(error = nfs_reconnect(rep)) != 0) {
+ sorele(so);
+ so = NULL;
nfs_sndunlock(&rep->r_nmp->nm_flag);
return (error);
}
+ sorele(so);
+ so = NULL;
goto tryagain;
}
}
@@ -608,8 +617,10 @@ tryagain:
error = soreceive(so, NULL, &auio, NULL, NULL,
&rcvflg, 0);
if (error == EWOULDBLOCK && rep) {
- if (rep->r_flags & R_SOFTTERM)
- return (EINTR);
+ if (rep->r_flags & R_SOFTTERM) {
+ error = EINTR;
+ goto errout;
+ }
/*
* looks like the server died after it
* received the request, make sure
@@ -678,8 +689,10 @@ tryagain:
&rcvflg, 0);
m_freem(control);
if (error == EWOULDBLOCK && rep) {
- if (rep->r_flags & R_SOFTTERM)
- return (EINTR);
+ if (rep->r_flags & R_SOFTTERM) {
+ error = EINTR;
+ goto errout;
+ }
}
} while (error == EWOULDBLOCK ||
(!error && *mp == NULL && control));
@@ -690,6 +703,10 @@ tryagain:
len -= auio.uio_resid;
}
errout:
+ if (so != NULL) {
+ sorele(so);
+ so = NULL;
+ }
if (error && error != EINTR && error != ERESTART) {
m_freemp(mp);
if (error != EPIPE)
--
wbr, Kirill
sys/nfs: fix TCP socket use after reconnect