From: Claudio Jeker Subject: Re: domain protocol timer unlock To: Alexander Bluhm Cc: tech@openbsd.org Date: Fri, 17 May 2024 21:14:31 +0200 On Fri, May 17, 2024 at 08:23:50PM +0200, Alexander Bluhm wrote: > anyone? > > On Wed, Apr 24, 2024 at 11:38:22AM +0200, Alexander Bluhm wrote: > > Hi, > > > > The protocol slow and fast timer do not need kernel lock. Either > > they have their own mutex or grab exclusive net lock. > > > > icmp6_error() is MP safe, use shared net lock in frag6 tiemeout. > > > > Lockless access to globals in igmp and mld6 timers should use read > > once. > > > > ok? > > > > bluhm > > > > Index: kern/uipc_domain.c > > =================================================================== > > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_domain.c,v > > diff -u -p -r1.65 uipc_domain.c > > --- kern/uipc_domain.c 11 Jan 2024 14:15:11 -0000 1.65 > > +++ kern/uipc_domain.c 23 Apr 2024 18:06:32 -0000 > > @@ -90,8 +90,10 @@ domaininit(void) > > max_linkhdr = 64; > > > > max_hdr = max_linkhdr + max_protohdr; > > - timeout_set_proc(&pffast_timeout, pffasttimo, &pffast_timeout); > > - timeout_set_proc(&pfslow_timeout, pfslowtimo, &pfslow_timeout); > > + timeout_set_flags(&pffast_timeout, pffasttimo, &pffast_timeout, > > + KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); > > + timeout_set_flags(&pfslow_timeout, pfslowtimo, &pfslow_timeout, > > + KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); > > timeout_add(&pffast_timeout, 1); > > timeout_add(&pfslow_timeout, 1); > > } > > Index: netinet/igmp.c > > =================================================================== > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp.c,v > > diff -u -p -r1.83 igmp.c > > --- netinet/igmp.c 16 Sep 2023 09:33:27 -0000 1.83 > > +++ netinet/igmp.c 23 Apr 2024 18:09:06 -0000 > > @@ -533,7 +533,7 @@ igmp_fasttimo(void) > > * lock intentionally. In case it is not set due to MP races, we may > > * miss to check the timers. Then run the loop at next fast timeout. > > */ > > - if (!igmp_timers_are_running) > > + if (!READ_ONCE(igmp_timers_are_running)) > > return; This can't be correct. I know what you want but that is not the right way of doing this check. Which daemon are you removing by using READ_ONCE() here? > > > > NET_LOCK(); > > Index: netinet6/frag6.c > > =================================================================== > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/frag6.c,v > > diff -u -p -r1.88 frag6.c > > --- netinet6/frag6.c 26 Mar 2024 23:48:49 -0000 1.88 > > +++ netinet6/frag6.c 23 Apr 2024 17:54:50 -0000 > > @@ -130,7 +130,8 @@ frag6_input(struct mbuf **mp, int *offp, > > > > /* jumbo payload can't contain a fragment header */ > > if (ip6->ip6_plen == 0) { > > - icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); > > + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, > > + offset); > > return IPPROTO_DONE; > > } > > > > @@ -544,10 +545,10 @@ frag6_freef(struct ip6q *q6) > > ip6->ip6_src = q6->ip6q_src; > > ip6->ip6_dst = q6->ip6q_dst; > > > > - NET_LOCK(); > > + NET_LOCK_SHARED(); > > icmp6_error(m, ICMP6_TIME_EXCEEDED, > > ICMP6_TIME_EXCEED_REASSEMBLY, 0); > > - NET_UNLOCK(); > > + NET_UNLOCK_SHARED(); > > } else > > m_freem(m); > > pool_put(&ip6af_pool, af6); > > Index: netinet6/icmp6.c > > =================================================================== > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/icmp6.c,v > > diff -u -p -r1.252 icmp6.c > > --- netinet6/icmp6.c 21 Apr 2024 17:32:10 -0000 1.252 > > +++ netinet6/icmp6.c 23 Apr 2024 18:08:32 -0000 > > @@ -1198,7 +1198,6 @@ icmp6_reflect(struct mbuf **mp, size_t o > > void > > icmp6_fasttimo(void) > > { > > - > > mld6_fasttimeo(); > > } > > > > Index: netinet6/mld6.c > > =================================================================== > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/mld6.c,v > > diff -u -p -r1.62 mld6.c > > --- netinet6/mld6.c 13 Feb 2024 12:22:09 -0000 1.62 > > +++ netinet6/mld6.c 23 Apr 2024 18:06:48 -0000 > > @@ -340,7 +340,7 @@ mld6_fasttimeo(void) > > * lock intentionally. In case it is not set due to MP races, we may > > * miss to check the timers. Then run the loop at next fast timeout. > > */ > > - if (!mld6_timers_are_running) > > + if (!READ_ONCE(mld6_timers_are_running)) > > return; > > > > NET_LOCK(); Same here. These constant timeouts are just horrible. We now have a timeout API that can handle timeouts at scale and we don't need to tick every 200ms for nothing. -- :wq Claudio