Index | Thread | Search

From:
Florian Obser <florian@openbsd.org>
Subject:
Re: retire TCP flag TF_BLOCKOUTPUT
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Tue, 16 Sep 2025 11:29:01 +0200

Download raw body.

Thread
It does what it says on the lid and compiles.

OK florian


On 2025-09-15 20:21 +02, Alexander Bluhm <bluhm@openbsd.org> wrote:
> On Wed, Sep 10, 2025 at 11:05:34PM +0200, Alexander Bluhm wrote:
>> Hi,
>> 
>> In these commits I introduced the TF_BLOCKOUTPUT flag.
>> 
>> ----------------------------
>> revision 1.240
>> date: 2011/01/07 17:50:42;  author: bluhm;  state: Exp;  lines: +24 -12;
>> Add socket option SO_SPLICE to splice together two TCP sockets.
>> The data received on the source socket will automatically be sent
>> on the drain socket.  This allows to write relay daemons with zero
>> data copy.
>> ok markus@
>> ----------------------------
>> revision 1.351
>> date: 2017/11/08 20:19:58;  author: bluhm;  state: Exp;  lines: +13 -1;  commitid: 66vXWlesN8Tu2Rfn;
>> The TF_BLOCKOUTPUT flag is set around all sorwakeup() and sowwakeup()
>> calls in tcp_input().  When I added this code for socket splicing,
>> I have missed that they may be called indirectly through functions.
>> Although not strictly necessary since we have the sosplice thread,
>> put that flag consistently when we want to prevent that tcp_output()
>> is called in the middle of tcp_input().  As soisconnected(),
>> soisdisconnected(), and socantrcvmore() call the wakeup functions
>> from tcp_input(), set the TF_BLOCKOUTPUT flag around them.
>> OK visa@
>> ----------------------------
>> 
>> It was needed to prevent calls from tcp_input() via sowakeup() to
>> tcp_output().  Now we always use a socket splicing thread running
>> tcp_output() deferred.  Also socket lock prevents the that anything
>> runs in concurrently.  So TF_BLOCKOUTPUT is not needed anymore.
>> 
>> Without this diff I have also seen dangling TCP sockets in CLOSING
>> state after running regress/sys/netinet/tcpthread.  For some reason
>> their TCP timers were not set.  With this change the problem is
>> gone.
>> 
>> ok?
>
> Anyone?  It is a diff with - only :-)
>
> bluhm
>
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.461 tcp_input.c
> --- netinet/tcp_input.c	18 Aug 2025 13:54:01 -0000	1.461
> +++ netinet/tcp_input.c	15 Sep 2025 18:18:00 -0000
> @@ -344,9 +344,7 @@ tcp_flush_queue(struct tcpcb *tp)
>  		pool_put(&tcpqe_pool, q);
>  		q = nq;
>  	} while (q != NULL && q->tcpqe_tcp->th_seq == tp->rcv_nxt);
> -	tp->t_flags |= TF_BLOCKOUTPUT;
>  	sorwakeup(so);
> -	tp->t_flags &= ~TF_BLOCKOUTPUT;
>  	return (flags);
>  }
>  
> @@ -1079,11 +1077,8 @@ findpcb:
>  					TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
>  
>  				tcp_update_sndspace(tp);
> -				if (sb_notify(&so->so_snd)) {
> -					tp->t_flags |= TF_BLOCKOUTPUT;
> +				if (sb_notify(&so->so_snd))
>  					sowwakeup(so);
> -					tp->t_flags &= ~TF_BLOCKOUTPUT;
> -				}
>  				if (so->so_snd.sb_cc ||
>  				    tp->t_flags & TF_NEEDOUTPUT)
>  					(void) tcp_output(tp);
> @@ -1136,9 +1131,7 @@ findpcb:
>  				sbappendstream(&so->so_rcv, m);
>  				mtx_leave(&so->so_rcv.sb_mtx);
>  			}
> -			tp->t_flags |= TF_BLOCKOUTPUT;
>  			sorwakeup(so);
> -			tp->t_flags &= ~TF_BLOCKOUTPUT;
>  			if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT))
>  				(void) tcp_output(tp);
>  			if (solocked != NULL)
> @@ -1258,9 +1251,7 @@ findpcb:
>  
>  		if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
>  			tcpstat_inc(tcps_connects);
> -			tp->t_flags |= TF_BLOCKOUTPUT;
>  			soisconnected(so);
> -			tp->t_flags &= ~TF_BLOCKOUTPUT;
>  			tp->t_state = TCPS_ESTABLISHED;
>  			TCP_TIMER_ARM(tp, TCPT_KEEP,
>  			    atomic_load_int(&tcp_keepidle));
> @@ -1547,9 +1538,7 @@ trimthenstep6:
>  	 */
>  	case TCPS_SYN_RECEIVED:
>  		tcpstat_inc(tcps_connects);
> -		tp->t_flags |= TF_BLOCKOUTPUT;
>  		soisconnected(so);
> -		tp->t_flags &= ~TF_BLOCKOUTPUT;
>  		tp->t_state = TCPS_ESTABLISHED;
>  		TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepidle));
>  		/* Do window scaling? */
> @@ -1839,11 +1828,8 @@ trimthenstep6:
>  		}
>  
>  		tcp_update_sndspace(tp);
> -		if (sb_notify(&so->so_snd)) {
> -			tp->t_flags |= TF_BLOCKOUTPUT;
> +		if (sb_notify(&so->so_snd))
>  			sowwakeup(so);
> -			tp->t_flags &= ~TF_BLOCKOUTPUT;
> -		}
>  
>  		/*
>  		 * If we had a pending ICMP message that referred to data
> @@ -1889,9 +1875,7 @@ trimthenstep6:
>  				if (so->so_rcv.sb_state & SS_CANTRCVMORE) {
>  					int maxidle;
>  
> -					tp->t_flags |= TF_BLOCKOUTPUT;
>  					soisdisconnected(so);
> -					tp->t_flags &= ~TF_BLOCKOUTPUT;
>  					maxidle = TCPTV_KEEPCNT *
>  					    atomic_load_int(&tcp_keepidle);
>  					TCP_TIMER_ARM(tp, TCPT_2MSL, maxidle);
> @@ -1911,9 +1895,7 @@ trimthenstep6:
>  				tp->t_state = TCPS_TIME_WAIT;
>  				tcp_canceltimers(tp);
>  				TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL);
> -				tp->t_flags |= TF_BLOCKOUTPUT;
>  				soisdisconnected(so);
> -				tp->t_flags &= ~TF_BLOCKOUTPUT;
>  			}
>  			break;
>  
> @@ -2056,9 +2038,7 @@ dodata:							/* XXX */
>  				sbappendstream(&so->so_rcv, m);
>  				mtx_leave(&so->so_rcv.sb_mtx);
>  			}
> -			tp->t_flags |= TF_BLOCKOUTPUT;
>  			sorwakeup(so);
> -			tp->t_flags &= ~TF_BLOCKOUTPUT;
>  		} else {
>  			m_adj(m, hdroptlen);
>  			tiflags = tcp_reass(tp, th, m, &tlen);
> @@ -2091,9 +2071,7 @@ dodata:							/* XXX */
>  	 */
>  	if ((tiflags & TH_FIN) && TCPS_HAVEESTABLISHED(tp->t_state)) {
>  		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
> -			tp->t_flags |= TF_BLOCKOUTPUT;
>  			socantrcvmore(so);
> -			tp->t_flags &= ~TF_BLOCKOUTPUT;
>  			tp->t_flags |= TF_ACKNOW;
>  			tp->rcv_nxt++;
>  		}
> @@ -2123,9 +2101,7 @@ dodata:							/* XXX */
>  			tp->t_state = TCPS_TIME_WAIT;
>  			tcp_canceltimers(tp);
>  			TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL);
> -			tp->t_flags |= TF_BLOCKOUTPUT;
>  			soisdisconnected(so);
> -			tp->t_flags &= ~TF_BLOCKOUTPUT;
>  			break;
>  
>  		/*
> Index: netinet/tcp_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v
> diff -u -p -r1.156 tcp_output.c
> --- netinet/tcp_output.c	8 Jul 2025 00:47:41 -0000	1.156
> +++ netinet/tcp_output.c	15 Sep 2025 18:17:22 -0000
> @@ -210,12 +210,6 @@ tcp_output(struct tcpcb *tp)
>  #endif
>  	int tso;
>  
> -	if (tp->t_flags & TF_BLOCKOUTPUT) {
> -		tp->t_flags |= TF_NEEDOUTPUT;
> -		return (0);
> -	} else
> -		tp->t_flags &= ~TF_NEEDOUTPUT;
> -
>  #if defined(TCP_SIGNATURE) && defined(DIAGNOSTIC)
>  	if (tp->sack_enable && (tp->t_flags & TF_SIGNATURE))
>  		return (EINVAL);
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> diff -u -p -r1.195 tcp_var.h
> --- netinet/tcp_var.h	18 Jun 2025 16:15:46 -0000	1.195
> +++ netinet/tcp_var.h	15 Sep 2025 18:17:22 -0000
> @@ -96,7 +96,6 @@ struct tcpcb {
>  #define TF_LASTIDLE	0x00100000U	/* no outstanding ACK on last send */
>  #define TF_PMTUD_PEND	0x00400000U	/* Path MTU Discovery pending */
>  #define TF_NEEDOUTPUT	0x00800000U	/* call tcp_output after tcp_input */
> -#define TF_BLOCKOUTPUT	0x01000000U	/* avert tcp_output during tcp_input */
>  #define TF_NOPUSH	0x02000000U	/* don't push */
>  #define TF_TMR_REXMT	0x04000000U	/* retransmit timer armed */
>  #define TF_TMR_PERSIST	0x08000000U	/* retransmit persistence timer armed */
>

-- 
In my defence, I have been left unsupervised.