Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
retire TCP flag TF_BLOCKOUTPUT
To:
tech@openbsd.org
Date:
Wed, 10 Sep 2025 23:05:34 +0200

Download raw body.

Thread
Hi,

In these commits I introduced the TF_BLOCKOUTPUT flag.

----------------------------
revision 1.240
date: 2011/01/07 17:50:42;  author: bluhm;  state: Exp;  lines: +24 -12;
Add socket option SO_SPLICE to splice together two TCP sockets.
The data received on the source socket will automatically be sent
on the drain socket.  This allows to write relay daemons with zero
data copy.
ok markus@
----------------------------
revision 1.351
date: 2017/11/08 20:19:58;  author: bluhm;  state: Exp;  lines: +13 -1;  commitid: 66vXWlesN8Tu2Rfn;
The TF_BLOCKOUTPUT flag is set around all sorwakeup() and sowwakeup()
calls in tcp_input().  When I added this code for socket splicing,
I have missed that they may be called indirectly through functions.
Although not strictly necessary since we have the sosplice thread,
put that flag consistently when we want to prevent that tcp_output()
is called in the middle of tcp_input().  As soisconnected(),
soisdisconnected(), and socantrcvmore() call the wakeup functions
from tcp_input(), set the TF_BLOCKOUTPUT flag around them.
OK visa@
----------------------------

It was needed to prevent calls from tcp_input() via sowakeup() to
tcp_output().  Now we always use a socket splicing thread running
tcp_output() deferred.  Also socket lock prevents the that anything
runs in concurrently.  So TF_BLOCKOUTPUT is not needed anymore.

Without this diff I have also seen dangling TCP sockets in CLOSING
state after running regress/sys/netinet/tcpthread.  For some reason
their TCP timers were not set.  With this change the problem is
gone.

ok?

bluhm

Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.461 tcp_input.c
--- netinet/tcp_input.c	18 Aug 2025 13:54:01 -0000	1.461
+++ netinet/tcp_input.c	10 Sep 2025 18:37:57 -0000
@@ -344,9 +344,7 @@ tcp_flush_queue(struct tcpcb *tp)
 		pool_put(&tcpqe_pool, q);
 		q = nq;
 	} while (q != NULL && q->tcpqe_tcp->th_seq == tp->rcv_nxt);
-	tp->t_flags |= TF_BLOCKOUTPUT;
 	sorwakeup(so);
-	tp->t_flags &= ~TF_BLOCKOUTPUT;
 	return (flags);
 }
 
@@ -1080,9 +1078,7 @@ findpcb:
 
 				tcp_update_sndspace(tp);
 				if (sb_notify(&so->so_snd)) {
-					tp->t_flags |= TF_BLOCKOUTPUT;
 					sowwakeup(so);
-					tp->t_flags &= ~TF_BLOCKOUTPUT;
 				}
 				if (so->so_snd.sb_cc ||
 				    tp->t_flags & TF_NEEDOUTPUT)
@@ -1136,9 +1132,7 @@ findpcb:
 				sbappendstream(&so->so_rcv, m);
 				mtx_leave(&so->so_rcv.sb_mtx);
 			}
-			tp->t_flags |= TF_BLOCKOUTPUT;
 			sorwakeup(so);
-			tp->t_flags &= ~TF_BLOCKOUTPUT;
 			if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT))
 				(void) tcp_output(tp);
 			if (solocked != NULL)
@@ -1258,9 +1252,7 @@ findpcb:
 
 		if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
 			tcpstat_inc(tcps_connects);
-			tp->t_flags |= TF_BLOCKOUTPUT;
 			soisconnected(so);
-			tp->t_flags &= ~TF_BLOCKOUTPUT;
 			tp->t_state = TCPS_ESTABLISHED;
 			TCP_TIMER_ARM(tp, TCPT_KEEP,
 			    atomic_load_int(&tcp_keepidle));
@@ -1547,9 +1539,7 @@ trimthenstep6:
 	 */
 	case TCPS_SYN_RECEIVED:
 		tcpstat_inc(tcps_connects);
-		tp->t_flags |= TF_BLOCKOUTPUT;
 		soisconnected(so);
-		tp->t_flags &= ~TF_BLOCKOUTPUT;
 		tp->t_state = TCPS_ESTABLISHED;
 		TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepidle));
 		/* Do window scaling? */
@@ -1839,11 +1829,8 @@ trimthenstep6:
 		}
 
 		tcp_update_sndspace(tp);
-		if (sb_notify(&so->so_snd)) {
-			tp->t_flags |= TF_BLOCKOUTPUT;
+		if (sb_notify(&so->so_snd))
 			sowwakeup(so);
-			tp->t_flags &= ~TF_BLOCKOUTPUT;
-		}
 
 		/*
 		 * If we had a pending ICMP message that referred to data
@@ -1889,9 +1876,7 @@ trimthenstep6:
 				if (so->so_rcv.sb_state & SS_CANTRCVMORE) {
 					int maxidle;
 
-					tp->t_flags |= TF_BLOCKOUTPUT;
 					soisdisconnected(so);
-					tp->t_flags &= ~TF_BLOCKOUTPUT;
 					maxidle = TCPTV_KEEPCNT *
 					    atomic_load_int(&tcp_keepidle);
 					TCP_TIMER_ARM(tp, TCPT_2MSL, maxidle);
@@ -1911,9 +1896,7 @@ trimthenstep6:
 				tp->t_state = TCPS_TIME_WAIT;
 				tcp_canceltimers(tp);
 				TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL);
-				tp->t_flags |= TF_BLOCKOUTPUT;
 				soisdisconnected(so);
-				tp->t_flags &= ~TF_BLOCKOUTPUT;
 			}
 			break;
 
@@ -2056,9 +2039,7 @@ dodata:							/* XXX */
 				sbappendstream(&so->so_rcv, m);
 				mtx_leave(&so->so_rcv.sb_mtx);
 			}
-			tp->t_flags |= TF_BLOCKOUTPUT;
 			sorwakeup(so);
-			tp->t_flags &= ~TF_BLOCKOUTPUT;
 		} else {
 			m_adj(m, hdroptlen);
 			tiflags = tcp_reass(tp, th, m, &tlen);
@@ -2091,9 +2072,7 @@ dodata:							/* XXX */
 	 */
 	if ((tiflags & TH_FIN) && TCPS_HAVEESTABLISHED(tp->t_state)) {
 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
-			tp->t_flags |= TF_BLOCKOUTPUT;
 			socantrcvmore(so);
-			tp->t_flags &= ~TF_BLOCKOUTPUT;
 			tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt++;
 		}
@@ -2123,9 +2102,7 @@ dodata:							/* XXX */
 			tp->t_state = TCPS_TIME_WAIT;
 			tcp_canceltimers(tp);
 			TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL);
-			tp->t_flags |= TF_BLOCKOUTPUT;
 			soisdisconnected(so);
-			tp->t_flags &= ~TF_BLOCKOUTPUT;
 			break;
 
 		/*
Index: netinet/tcp_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v
diff -u -p -r1.156 tcp_output.c
--- netinet/tcp_output.c	8 Jul 2025 00:47:41 -0000	1.156
+++ netinet/tcp_output.c	10 Sep 2025 18:37:57 -0000
@@ -210,12 +210,6 @@ tcp_output(struct tcpcb *tp)
 #endif
 	int tso;
 
-	if (tp->t_flags & TF_BLOCKOUTPUT) {
-		tp->t_flags |= TF_NEEDOUTPUT;
-		return (0);
-	} else
-		tp->t_flags &= ~TF_NEEDOUTPUT;
-
 #if defined(TCP_SIGNATURE) && defined(DIAGNOSTIC)
 	if (tp->sack_enable && (tp->t_flags & TF_SIGNATURE))
 		return (EINVAL);
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.195 tcp_var.h
--- netinet/tcp_var.h	18 Jun 2025 16:15:46 -0000	1.195
+++ netinet/tcp_var.h	10 Sep 2025 18:37:57 -0000
@@ -96,7 +96,6 @@ struct tcpcb {
 #define TF_LASTIDLE	0x00100000U	/* no outstanding ACK on last send */
 #define TF_PMTUD_PEND	0x00400000U	/* Path MTU Discovery pending */
 #define TF_NEEDOUTPUT	0x00800000U	/* call tcp_output after tcp_input */
-#define TF_BLOCKOUTPUT	0x01000000U	/* avert tcp_output during tcp_input */
 #define TF_NOPUSH	0x02000000U	/* don't push */
 #define TF_TMR_REXMT	0x04000000U	/* retransmit timer armed */
 #define TF_TMR_PERSIST	0x08000000U	/* retransmit persistence timer armed */