From: Nick Owens Subject: Re: relayd reload race crash To: bugs@openbsd.org, tech@openbsd.org Cc: rafael@sizeofvoid.org Date: Sat, 28 Feb 2026 22:59:22 -0800 On Tue, Feb 10, 2026 at 04:34:08PM -0800, Nick Owens wrote: > On Tue, Feb 03, 2026 at 10:15:20PM -0800, Nick Owens wrote: > > On Wed, Feb 04, 2026 at 03:51:11PM +1000, Paul W. Rankin wrote: > > > > On 04/02/2026 1:58 PM AEST mischief () offblast ! org wrote: > > > > this was brought up by rnkn on IRC, > > > > > > I'm just looping myself in here. Thanks. > > > > > > > one approach is to return negative cko_tlen back to rsae_send_imsg. this > > might result in some broken tls connections, but its better than taking > > down all of relayd. > > > > a better fix would probably be to temporarily stop processing rsa ops > > altogether while reload is happening, but i'm not sure of a good > > approach for that. > > > > Index: ca.c > > =================================================================== > > RCS file: /cvs/src/usr.sbin/relayd/ca.c,v > > diff -u -p -r1.45 ca.c > > --- ca.c 21 Nov 2024 13:21:34 -0000 1.45 > > +++ ca.c 4 Feb 2026 06:03:33 -0000 > > @@ -234,9 +234,15 @@ ca_dispatch_relay(int fd, struct privsep > > fatalx("%s: invalid relay proc", __func__); > > if (IMSG_DATA_SIZE(imsg) != (sizeof(cko) + cko.cko_flen)) > > fatalx("%s: invalid key operation", __func__); > > - if ((pkey = pkey_find(env, cko.cko_hash)) == NULL) > > - fatalx("%s: invalid relay hash '%s'", > > + if ((pkey = pkey_find(env, cko.cko_hash)) == NULL) { > > + log_warnx("%s: invalid relay hash '%s'", > > __func__, cko.cko_hash); > > + cko.cko_tlen = -1; > > + if (proc_compose_imsg(env->sc_ps, PROC_RELAY, cko.cko_proc, > > + imsg->hdr.type, -1, -1, &cko, sizeof(cko)) == -1) > > + log_warn("%s: proc_composev_imsg", __func__); > > + break; > > + } > > if ((rsa = EVP_PKEY_get1_RSA(pkey)) == NULL) > > fatalx("%s: invalid relay key", __func__); > > > > > > ping. here is an alternate approach to stop processing imsgs from PROC_RELAY by removing the events during reload, but when testing i still had client connections get empty replies, closed early, or the rsa op timed out. however, relayd also does not crash. ping again. i hope this will be fixed for the next release. > > > diff --git a/usr.sbin/relayd/ca.c b/usr.sbin/relayd/ca.c > index e54259c5971..89f7f48685e 100644 > --- a/usr.sbin/relayd/ca.c > +++ b/usr.sbin/relayd/ca.c > @@ -204,9 +204,11 @@ ca_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg) > break; > case IMSG_CTL_START: > ca_launch(); > + proc_unblock(p->p_ps, PROC_RELAY); > break; > case IMSG_CTL_RESET: > config_getreset(env, imsg); > + proc_block(p->p_ps, PROC_RELAY); > break; > default: > return -1; > diff --git a/usr.sbin/relayd/proc.c b/usr.sbin/relayd/proc.c > index 3eb00aa0381..56a49120cb4 100644 > --- a/usr.sbin/relayd/proc.c > +++ b/usr.sbin/relayd/proc.c > @@ -682,6 +682,38 @@ proc_dispatch_null(int fd, struct privsep_proc *p, struct imsg *imsg) > return (-1); > } > > +void > +proc_block(struct privsep *ps, enum privsep_procid id) > +{ > + struct imsgev *iev; > + int n, m; > + > + n = -1; > + > + proc_range(ps, id, &n, &m); > + > + for(; n < m; n++){ > + iev = &ps->ps_ievs[id][n]; > + event_del(&iev->ev); > + } > +} > + > +void > +proc_unblock(struct privsep *ps, enum privsep_procid id) > +{ > + struct imsgev *iev; > + int n, m; > + > + n = -1; > + > + proc_range(ps, id, &n, &m); > + > + for(; n < m; n++){ > + iev = &ps->ps_ievs[id][n]; > + event_add(&iev->ev, NULL); > + } > +} > + > /* > * imsg helper functions > */ > diff --git a/usr.sbin/relayd/relayd.h b/usr.sbin/relayd/relayd.h > index 3b5c3987f93..697c3bd8b48 100644 > --- a/usr.sbin/relayd/relayd.h > +++ b/usr.sbin/relayd/relayd.h > @@ -1451,6 +1451,8 @@ int imsg_compose_event(struct imsgev *, uint16_t, uint32_t, > pid_t, int, void *, uint16_t); > int imsg_composev_event(struct imsgev *, uint16_t, uint32_t, > pid_t, int, const struct iovec *, int); > +void proc_block(struct privsep *, enum privsep_procid); > +void proc_unblock(struct privsep *, enum privsep_procid); > > /* config.c */ > int config_init(struct relayd *);