Download raw body.
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
On Tue, Dec 24, 2024 at 07:59:38PM +0300, Vitaliy Makkoveev wrote:
> > On 24 Dec 2024, at 19:49, Alexander Bluhm <alexander.bluhm@gmx.net> wrote:
> >
> > On Sat, Dec 21, 2024 at 08:16:05AM +0300, Vitaliy Makkoveev wrote:
> >> This diff only unlocks sosend() path, all the rest is still locked
> >> exclusively.
> >>
> >> Even for tcp(4) case, sosend() only checks `so_snd' free space and
> >> sleeps if necessary, actual buffer handling happening within exclusively
> >> locked PCB layer. In the PCB layer we don't need to protect read-only
> >> access to `so_snd' because corresponding read-write access is serialized
> >> by socket lock.
> >>
> >> sosend() needs to be serialized with somove(), so sotask() takes
> >> sblock() on `so_snd' of spliced socket. This discards previous
> >> protection of tcp(4) spliced sockets where solock() was used to prevent
> >> concurrent unsplicing. This scheme was used to avoid sleep in sofree().
> >>
> >> However, sleep is sofree() is possible. We have two cases:
> >>
> >> 1. Socket was not yet accept(2)ed. Such sockets can't be accessed from
> >> the userland and can't be spliced. sofree() could be called only from
> >> PCB layer and don't need to sleep.
> >>
> >> 2. Socket was accepted. While called form PCB layer, sofree() ignores it
> >> because SS_NOFDREF bit is not set. Socket remains spliced, but without
> >> PCB. Sockets without PCB can't be accessed from PCB layer, only from
> >> userland. So, soclose()/sofree() thread needs to wait concurrent
> >> soclose()/sofree() thread for spliced socket as is is already done for
> >> udp(4) case. In such case it is safe to release solock() and sleep
> >> within sofree().
> >>
> >> Note, I intentionally left all "if (dosolock)" dances as is.
> >
> > While running your diff though my performnce tests, I got this panic
> > twice. I have never seen it before, so I guess it is related.
> >
> > panic: softclock: invalid to_clock: 19668320
> > Stopped at db_enter+0x14: popq %rbp
> > TID PID UID PRFLAGS PFLAGS CPU COMMAND
> > 120146 91986 89 0x1100012 0 1 relayd
> > 65124 13759 89 0x1100012 0 3 relayd
> > 33160 19438 0 0x14000 0x200 2 softnet0
> > db_enter() at db_enter+0x14
> > panic(ffffffff82883be3) at panic+0xdd
> > softclock(0) at softclock+0x1d5
> > softintr_dispatch(0) at softintr_dispatch+0xe6
> > Xsoftclock() at Xsoftclock+0x27
> > acpicpu_idle() at acpicpu_idle+0x2b9
> > sched_idle(ffffffff83593ff0) at sched_idle+0x298
> > end trace frame: 0x0, count: 8
> > https://www.openbsd.org/ddb.html describes the minimum info required in bug
> > reports. Insufficient info makes it difficult to find and fix bugs.
> >
> > ddb{0}> x/s version
> > version: OpenBSD 7.6-current (GENERIC.MP) #cvs : D2024.12.23.00.00.00: Tue Dec 24 14:53:39 CET 2024
> > root@ot15.obsd-lab.genua.de:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> >
> > ddb{0}> show panic
> > *cpu0: softclock: invalid to_clock: 19668320
>
> Interesting. Which test triggers this? I suspect sosplice.
This diff contains only sosplice() relared hunks. Regardless `so_snd'
unlocking, our current sosend() releases solock() around uiomove(), so
concurrent somove() thread could break us. This diff prevents this by
taking sblock() on `so_snd' of spliced socket.
I performed couple of kern/sosplice runs without any issue. Alexander,
could you test it too? I suspect the problem could be in timeout
reinitialisation in sofree(), but I can't trigger it during regress run.
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
diff -u -p -r1.347 uipc_socket.c
--- sys/kern/uipc_socket.c 19 Dec 2024 22:11:35 -0000 1.347
+++ sys/kern/uipc_socket.c 24 Dec 2024 17:45:57 -0000
@@ -327,17 +327,14 @@ sofree(struct socket *so, int keep_lock)
sounlock(head);
}
- switch (so->so_proto->pr_domain->dom_family) {
- case AF_INET:
- case AF_INET6:
- if (so->so_proto->pr_type == SOCK_STREAM)
- break;
- /* FALLTHROUGH */
- default:
+ if (!keep_lock) {
+ /*
+ * sofree() was called from soclose(). Sleep is safe
+ * even for tcp(4) sockets.
+ */
sounlock(so);
refcnt_finalize(&so->so_refcnt, "sofinal");
solock(so);
- break;
}
sigio_free(&so->so_sigio);
@@ -458,31 +455,6 @@ discard:
if (so->so_sp) {
struct socket *soback;
- if (so->so_proto->pr_flags & PR_WANTRCVD) {
- /*
- * Copy - Paste, but can't relock and sleep in
- * sofree() in tcp(4) case. That's why tcp(4)
- * still rely on solock() for splicing and
- * unsplicing.
- */
-
- if (issplicedback(so)) {
- int freeing = SOSP_FREEING_WRITE;
-
- if (so->so_sp->ssp_soback == so)
- freeing |= SOSP_FREEING_READ;
- sounsplice(so->so_sp->ssp_soback, so, freeing);
- }
- if (isspliced(so)) {
- int freeing = SOSP_FREEING_READ;
-
- if (so == so->so_sp->ssp_socket)
- freeing |= SOSP_FREEING_WRITE;
- sounsplice(so, so->so_sp->ssp_socket, freeing);
- }
- goto free;
- }
-
sounlock(so);
mtx_enter(&so->so_snd.sb_mtx);
/*
@@ -530,7 +502,6 @@ notsplicedback:
solock(so);
}
-free:
#endif /* SOCKET_SPLICE */
/* sofree() calls sounlock(). */
sofree(so, 0);
@@ -1532,8 +1503,7 @@ sosplice(struct socket *so, int fd, off_
void
sounsplice(struct socket *so, struct socket *sosp, int freeing)
{
- if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0)
- sbassertlocked(&so->so_rcv);
+ sbassertlocked(&so->so_rcv);
soassertlocked(so);
task_del(sosplice_taskq, &so->so_splicetask);
@@ -1587,17 +1557,23 @@ sotask(void *arg)
*/
sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR);
- if (sockstream)
- solock(so);
-
if (so->so_rcv.sb_flags & SB_SPLICE) {
- if (sockstream)
+ struct socket *sosp = so->so_sp->ssp_socket;
+
+ if (sockstream) {
+ sblock(&sosp->so_snd, SBL_WAIT | SBL_NOINTR);
+ solock(so);
doyield = 1;
+ }
+
somove(so, M_DONTWAIT);
+
+ if (sockstream) {
+ sounlock(so);
+ sbunlock(&sosp->so_snd);
+ }
}
- if (sockstream)
- sounlock(so);
sbunlock(&so->so_rcv);
if (doyield) {
@@ -1652,8 +1628,10 @@ somove(struct socket *so, int wait)
if (sockdgram)
sbassertlocked(&so->so_rcv);
- else
+ else {
+ sbassertlocked(&sosp->so_snd);
soassertlocked(so);
+ }
mtx_enter(&so->so_rcv.sb_mtx);
mtx_enter(&sosp->so_snd.sb_mtx);
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer