From: Vitaliy Makkoveev Subject: Re: unlock tcp timer To: Alexander Bluhm Cc: tech@openbsd.org Date: Tue, 14 Jan 2025 03:02:48 +0300 > On 14 Jan 2025, at 02:48, Vitaliy Makkoveev wrote: > >> On 14 Jan 2025, at 02:01, Alexander Bluhm wrote: >> >> Hi, >> >> TCP timers can run with shared netlock and socket lock. Use common >> tcp_timer_enter() and tcp_timer_leave() that lock the socket and >> do refcounting. Then incpb and socket always exist. >> >> ok? >> > > ok mvs > Please wait, tcp_timer_enter() should check against *so, not so. +tcp_timer_enter(struct inpcb *inp, struct socket **so, struct tcpcb **tp, + u_int timer) +{ + KASSERT(timer < TCPT_NTIMERS); + + NET_LOCK_SHARED(); + *so = in_pcbsolock_ref(inp); + if (so == NULL) { + *tp = NULL; + return -1; + } >> bluhm >> >> Index: netinet/tcp_timer.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v >> diff -u -p -r1.80 tcp_timer.c >> --- netinet/tcp_timer.c 5 Jan 2025 12:18:48 -0000 1.80 >> +++ netinet/tcp_timer.c 13 Jan 2025 22:54:18 -0000 >> @@ -106,6 +106,36 @@ tcp_timer_init(void) >> tcp_delack_msecs = TCP_DELACK_MSECS; >> } >> >> +static inline int >> +tcp_timer_enter(struct inpcb *inp, struct socket **so, struct tcpcb **tp, >> + u_int timer) >> +{ >> + KASSERT(timer < TCPT_NTIMERS); >> + >> + NET_LOCK_SHARED(); >> + *so = in_pcbsolock_ref(inp); >> + if (so == NULL) { >> + *tp = NULL; >> + return -1; >> + } >> + *tp = intotcpcb(inp); >> + /* Ignore canceled timeouts or timeouts that have been rescheduled. */ >> + if (*tp == NULL || !ISSET((*tp)->t_flags, TF_TIMER << timer) || >> + timeout_pending(&(*tp)->t_timer[timer])) >> + return -1; >> + CLR((*tp)->t_flags, TF_TIMER << timer); >> + >> + return 0; >> +} >> + >> +static inline void >> +tcp_timer_leave(struct inpcb *inp, struct socket *so) >> +{ >> + in_pcbsounlock_rele(inp, so); >> + NET_UNLOCK_SHARED(); >> + in_pcbunref(inp); >> +} >> + >> /* >> * Callout to process delayed ACKs for a TCPCB. >> */ >> @@ -113,6 +143,7 @@ void >> tcp_timer_delack(void *arg) >> { >> struct inpcb *inp = arg; >> + struct socket *so; >> struct tcpcb *otp = NULL, *tp; >> short ostate; >> >> @@ -121,15 +152,10 @@ tcp_timer_delack(void *arg) >> * for whatever reason, it will restart the delayed >> * ACK callout. >> */ >> - NET_LOCK(); >> - tp = intotcpcb(inp); >> - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ >> - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_DELACK) || >> - timeout_pending(&tp->t_timer[TCPT_DELACK])) >> + if (tcp_timer_enter(inp, &so, &tp, TCPT_DELACK)) >> goto out; >> - CLR(tp->t_flags, TF_TMR_DELACK); >> >> - if (inp->inp_socket->so_options & SO_DEBUG) { >> + if (so->so_options & SO_DEBUG) { >> otp = tp; >> ostate = tp->t_state; >> } >> @@ -138,8 +164,7 @@ tcp_timer_delack(void *arg) >> if (otp) >> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_DELACK, 0); >> out: >> - NET_UNLOCK(); >> - in_pcbunref(inp); >> + tcp_timer_leave(inp, so); >> } >> >> /* >> @@ -199,19 +224,15 @@ void >> tcp_timer_rexmt(void *arg) >> { >> struct inpcb *inp = arg; >> + struct socket *so; >> struct tcpcb *otp = NULL, *tp; >> - uint32_t rto; >> short ostate; >> + uint32_t rto; >> >> - NET_LOCK(); >> - tp = intotcpcb(inp); >> - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ >> - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_REXMT) || >> - timeout_pending(&tp->t_timer[TCPT_REXMT])) >> + if (tcp_timer_enter(inp, &so, &tp, TCPT_REXMT)) >> goto out; >> - CLR(tp->t_flags, TF_TMR_REXMT); >> >> - if ((tp->t_flags & TF_PMTUD_PEND) && inp && >> + if ((tp->t_flags & TF_PMTUD_PEND) && >> SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) && >> SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_maxseg))) { >> struct sockaddr_in sin; >> @@ -249,7 +270,7 @@ tcp_timer_rexmt(void *arg) >> tp->t_softerror : ETIMEDOUT); >> goto out; >> } >> - if (inp->inp_socket->so_options & SO_DEBUG) { >> + if (so->so_options & SO_DEBUG) { >> otp = tp; >> ostate = tp->t_state; >> } >> @@ -270,13 +291,13 @@ tcp_timer_rexmt(void *arg) >> * lots more sophisticated searching to find the right >> * value here... >> */ >> - if (ip_mtudisc && inp && >> + if (ip_mtudisc && >> TCPS_HAVEESTABLISHED(tp->t_state) && >> tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) { >> struct rtentry *rt = NULL; >> >> /* No data to send means path mtu is not a problem */ >> - if (!inp->inp_socket->so_snd.sb_cc) >> + if (!READ_ONCE(so->so_snd.sb_cc)) >> goto leave; >> >> rt = in_pcbrtentry(inp); >> @@ -391,31 +412,26 @@ tcp_timer_rexmt(void *arg) >> if (otp) >> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_REXMT, 0); >> out: >> - NET_UNLOCK(); >> - in_pcbunref(inp); >> + tcp_timer_leave(inp, so); >> } >> >> void >> tcp_timer_persist(void *arg) >> { >> struct inpcb *inp = arg; >> + struct socket *so; >> struct tcpcb *otp = NULL, *tp; >> - uint32_t rto; >> short ostate; >> uint64_t now; >> + uint32_t rto; >> >> - NET_LOCK(); >> - tp = intotcpcb(inp); >> - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ >> - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_PERSIST) || >> - timeout_pending(&tp->t_timer[TCPT_PERSIST])) >> + if (tcp_timer_enter(inp, &so, &tp, TCPT_PERSIST)) >> goto out; >> - CLR(tp->t_flags, TF_TMR_PERSIST); >> >> if (TCP_TIMER_ISARMED(tp, TCPT_REXMT)) >> goto out; >> >> - if (inp->inp_socket->so_options & SO_DEBUG) { >> + if (so->so_options & SO_DEBUG) { >> otp = tp; >> ostate = tp->t_state; >> } >> @@ -445,26 +461,21 @@ tcp_timer_persist(void *arg) >> if (otp) >> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_PERSIST, 0); >> out: >> - NET_UNLOCK(); >> - in_pcbunref(inp); >> + tcp_timer_leave(inp, so); >> } >> >> void >> tcp_timer_keep(void *arg) >> { >> struct inpcb *inp = arg; >> + struct socket *so; >> struct tcpcb *otp = NULL, *tp; >> short ostate; >> >> - NET_LOCK(); >> - tp = intotcpcb(inp); >> - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ >> - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_KEEP) || >> - timeout_pending(&tp->t_timer[TCPT_KEEP])) >> + if (tcp_timer_enter(inp, &so, &tp, TCPT_KEEP)) >> goto out; >> - CLR(tp->t_flags, TF_TMR_KEEP); >> >> - if (inp->inp_socket->so_options & SO_DEBUG) { >> + if (so->so_options & SO_DEBUG) { >> otp = tp; >> ostate = tp->t_state; >> } >> @@ -475,7 +486,7 @@ tcp_timer_keep(void *arg) >> goto out; >> } >> if ((atomic_load_int(&tcp_always_keepalive) || >> - inp->inp_socket->so_options & SO_KEEPALIVE) && >> + so->so_options & SO_KEEPALIVE) && >> tp->t_state <= TCPS_CLOSING) { >> int maxidle; >> uint64_t now; >> @@ -509,28 +520,23 @@ tcp_timer_keep(void *arg) >> if (otp) >> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0); >> out: >> - NET_UNLOCK(); >> - in_pcbunref(inp); >> + tcp_timer_leave(inp, so); >> } >> >> void >> tcp_timer_2msl(void *arg) >> { >> struct inpcb *inp = arg; >> + struct socket *so; >> struct tcpcb *otp = NULL, *tp; >> short ostate; >> - int maxidle; >> uint64_t now; >> + int maxidle; >> >> - NET_LOCK(); >> - tp = intotcpcb(inp); >> - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ >> - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_2MSL) || >> - timeout_pending(&tp->t_timer[TCPT_2MSL])) >> + if (tcp_timer_enter(inp, &so, &tp, TCPT_2MSL)) >> goto out; >> - CLR(tp->t_flags, TF_TMR_2MSL); >> >> - if (inp->inp_socket->so_options & SO_DEBUG) { >> + if (so->so_options & SO_DEBUG) { >> otp = tp; >> ostate = tp->t_state; >> } >> @@ -546,8 +552,7 @@ tcp_timer_2msl(void *arg) >> if (otp) >> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0); >> out: >> - NET_UNLOCK(); >> - in_pcbunref(inp); >> + tcp_timer_leave(inp, so); >> } >> >> void >> >