Index | Thread | Search

From:
Vitaliy Makkoveev <otto@bsdbox.dev>
Subject:
Re: unlock tcp timer
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Tue, 14 Jan 2025 03:02:48 +0300

Download raw body.

Thread

> On 14 Jan 2025, at 02:48, Vitaliy Makkoveev <otto@bsdbox.dev> wrote:
> 
>> On 14 Jan 2025, at 02:01, Alexander Bluhm <bluhm@openbsd.org> wrote:
>> 
>> Hi,
>> 
>> TCP timers can run with shared netlock and socket lock.  Use common
>> tcp_timer_enter() and tcp_timer_leave() that lock the socket and
>> do refcounting.  Then incpb and socket always exist.
>> 
>> ok?
>> 
> 
> ok mvs
> 

Please wait, tcp_timer_enter() should check against *so, not so.

+tcp_timer_enter(struct inpcb *inp, struct socket **so, struct tcpcb **tp,
+    u_int timer)
+{
+	KASSERT(timer < TCPT_NTIMERS);
+
+	NET_LOCK_SHARED();
+	*so = in_pcbsolock_ref(inp);
+	if (so == NULL) {
+		*tp = NULL;
+		return -1;
+	}



>> bluhm
>> 
>> Index: netinet/tcp_timer.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
>> diff -u -p -r1.80 tcp_timer.c
>> --- netinet/tcp_timer.c	5 Jan 2025 12:18:48 -0000	1.80
>> +++ netinet/tcp_timer.c	13 Jan 2025 22:54:18 -0000
>> @@ -106,6 +106,36 @@ tcp_timer_init(void)
>> 		tcp_delack_msecs = TCP_DELACK_MSECS;
>> }
>> 
>> +static inline int
>> +tcp_timer_enter(struct inpcb *inp, struct socket **so, struct tcpcb **tp,
>> +    u_int timer)
>> +{
>> +	KASSERT(timer < TCPT_NTIMERS);
>> +
>> +	NET_LOCK_SHARED();
>> +	*so = in_pcbsolock_ref(inp);
>> +	if (so == NULL) {
>> +		*tp = NULL;
>> +		return -1;
>> +	}
>> +	*tp = intotcpcb(inp);
>> +	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
>> +	if (*tp == NULL || !ISSET((*tp)->t_flags, TF_TIMER << timer) ||
>> +	    timeout_pending(&(*tp)->t_timer[timer]))
>> +		return -1;
>> +	CLR((*tp)->t_flags, TF_TIMER << timer);
>> +
>> +	return 0;
>> +}
>> +
>> +static inline void
>> +tcp_timer_leave(struct inpcb *inp, struct socket *so)
>> +{
>> +	in_pcbsounlock_rele(inp, so);
>> +	NET_UNLOCK_SHARED();
>> +	in_pcbunref(inp);
>> +}
>> +
>> /*
>> * Callout to process delayed ACKs for a TCPCB.
>> */
>> @@ -113,6 +143,7 @@ void
>> tcp_timer_delack(void *arg)
>> {
>> 	struct inpcb *inp = arg;
>> +	struct socket *so;
>> 	struct tcpcb *otp = NULL, *tp;
>> 	short ostate;
>> 
>> @@ -121,15 +152,10 @@ tcp_timer_delack(void *arg)
>> 	 * for whatever reason, it will restart the delayed
>> 	 * ACK callout.
>> 	 */
>> -	NET_LOCK();
>> -	tp = intotcpcb(inp);
>> -	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
>> -	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_DELACK) ||
>> -	    timeout_pending(&tp->t_timer[TCPT_DELACK]))
>> +	if (tcp_timer_enter(inp, &so, &tp, TCPT_DELACK))
>> 		goto out;
>> -	CLR(tp->t_flags, TF_TMR_DELACK);
>> 
>> -	if (inp->inp_socket->so_options & SO_DEBUG) {
>> +	if (so->so_options & SO_DEBUG) {
>> 		otp = tp;
>> 		ostate = tp->t_state;
>> 	}
>> @@ -138,8 +164,7 @@ tcp_timer_delack(void *arg)
>> 	if (otp)
>> 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_DELACK, 0);
>> out:
>> -	NET_UNLOCK();
>> -	in_pcbunref(inp);
>> +	tcp_timer_leave(inp, so);
>> }
>> 
>> /*
>> @@ -199,19 +224,15 @@ void
>> tcp_timer_rexmt(void *arg)
>> {
>> 	struct inpcb *inp = arg;
>> +	struct socket *so;
>> 	struct tcpcb *otp = NULL, *tp;
>> -	uint32_t rto;
>> 	short ostate;
>> +	uint32_t rto;
>> 
>> -	NET_LOCK();
>> -	tp = intotcpcb(inp);
>> -	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
>> -	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_REXMT) ||
>> -	    timeout_pending(&tp->t_timer[TCPT_REXMT]))
>> +	if (tcp_timer_enter(inp, &so, &tp, TCPT_REXMT))
>> 		goto out;
>> -	CLR(tp->t_flags, TF_TMR_REXMT);
>> 
>> -	if ((tp->t_flags & TF_PMTUD_PEND) && inp &&
>> +	if ((tp->t_flags & TF_PMTUD_PEND) &&
>> 	    SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
>> 	    SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_maxseg))) {
>> 		struct sockaddr_in sin;
>> @@ -249,7 +270,7 @@ tcp_timer_rexmt(void *arg)
>> 		    tp->t_softerror : ETIMEDOUT);
>> 		goto out;
>> 	}
>> -	if (inp->inp_socket->so_options & SO_DEBUG) {
>> +	if (so->so_options & SO_DEBUG) {
>> 		otp = tp;
>> 		ostate = tp->t_state;
>> 	}
>> @@ -270,13 +291,13 @@ tcp_timer_rexmt(void *arg)
>> 	 * lots more sophisticated searching to find the right
>> 	 * value here...
>> 	 */
>> -	if (ip_mtudisc && inp &&
>> +	if (ip_mtudisc &&
>> 	    TCPS_HAVEESTABLISHED(tp->t_state) &&
>> 	    tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
>> 		struct rtentry *rt = NULL;
>> 
>> 		/* No data to send means path mtu is not a problem */
>> -		if (!inp->inp_socket->so_snd.sb_cc)
>> +		if (!READ_ONCE(so->so_snd.sb_cc))
>> 			goto leave;
>> 
>> 		rt = in_pcbrtentry(inp);
>> @@ -391,31 +412,26 @@ tcp_timer_rexmt(void *arg)
>> 	if (otp)
>> 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_REXMT, 0);
>> out:
>> -	NET_UNLOCK();
>> -	in_pcbunref(inp);
>> +	tcp_timer_leave(inp, so);
>> }
>> 
>> void
>> tcp_timer_persist(void *arg)
>> {
>> 	struct inpcb *inp = arg;
>> +	struct socket *so;
>> 	struct tcpcb *otp = NULL, *tp;
>> -	uint32_t rto;
>> 	short ostate;
>> 	uint64_t now;
>> +	uint32_t rto;
>> 
>> -	NET_LOCK();
>> -	tp = intotcpcb(inp);
>> -	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
>> -	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_PERSIST) ||
>> -	    timeout_pending(&tp->t_timer[TCPT_PERSIST]))
>> +	if (tcp_timer_enter(inp, &so, &tp, TCPT_PERSIST))
>> 		goto out;
>> -	CLR(tp->t_flags, TF_TMR_PERSIST);
>> 
>> 	if (TCP_TIMER_ISARMED(tp, TCPT_REXMT))
>> 		goto out;
>> 
>> -	if (inp->inp_socket->so_options & SO_DEBUG) {
>> +	if (so->so_options & SO_DEBUG) {
>> 		otp = tp;
>> 		ostate = tp->t_state;
>> 	}
>> @@ -445,26 +461,21 @@ tcp_timer_persist(void *arg)
>> 	if (otp)
>> 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_PERSIST, 0);
>> out:
>> -	NET_UNLOCK();
>> -	in_pcbunref(inp);
>> +	tcp_timer_leave(inp, so);
>> }
>> 
>> void
>> tcp_timer_keep(void *arg)
>> {
>> 	struct inpcb *inp = arg;
>> +	struct socket *so;
>> 	struct tcpcb *otp = NULL, *tp;
>> 	short ostate;
>> 
>> -	NET_LOCK();
>> -	tp = intotcpcb(inp);
>> -	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
>> -	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_KEEP) ||
>> -	    timeout_pending(&tp->t_timer[TCPT_KEEP]))
>> +	if (tcp_timer_enter(inp, &so, &tp, TCPT_KEEP))
>> 		goto out;
>> -	CLR(tp->t_flags, TF_TMR_KEEP);
>> 
>> -	if (inp->inp_socket->so_options & SO_DEBUG) {
>> +	if (so->so_options & SO_DEBUG) {
>> 		otp = tp;
>> 		ostate = tp->t_state;
>> 	}
>> @@ -475,7 +486,7 @@ tcp_timer_keep(void *arg)
>> 		goto out;
>> 	}
>> 	if ((atomic_load_int(&tcp_always_keepalive) ||
>> -	    inp->inp_socket->so_options & SO_KEEPALIVE) &&
>> +	    so->so_options & SO_KEEPALIVE) &&
>> 	    tp->t_state <= TCPS_CLOSING) {
>> 		int maxidle;
>> 		uint64_t now;
>> @@ -509,28 +520,23 @@ tcp_timer_keep(void *arg)
>> 	if (otp)
>> 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0);
>> out:
>> -	NET_UNLOCK();
>> -	in_pcbunref(inp);
>> +	tcp_timer_leave(inp, so);
>> }
>> 
>> void
>> tcp_timer_2msl(void *arg)
>> {
>> 	struct inpcb *inp = arg;
>> +	struct socket *so;
>> 	struct tcpcb *otp = NULL, *tp;
>> 	short ostate;
>> -	int maxidle;
>> 	uint64_t now;
>> +	int maxidle;
>> 
>> -	NET_LOCK();
>> -	tp = intotcpcb(inp);
>> -	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
>> -	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_2MSL) ||
>> -	    timeout_pending(&tp->t_timer[TCPT_2MSL]))
>> +	if (tcp_timer_enter(inp, &so, &tp, TCPT_2MSL))
>> 		goto out;
>> -	CLR(tp->t_flags, TF_TMR_2MSL);
>> 
>> -	if (inp->inp_socket->so_options & SO_DEBUG) {
>> +	if (so->so_options & SO_DEBUG) {
>> 		otp = tp;
>> 		ostate = tp->t_state;
>> 	}
>> @@ -546,8 +552,7 @@ tcp_timer_2msl(void *arg)
>> 	if (otp)
>> 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0);
>> out:
>> -	NET_UNLOCK();
>> -	in_pcbunref(inp);
>> +	tcp_timer_leave(inp, so);
>> }
>> 
>> void
>> 
>