Download raw body.
[patch 2 of 3] iscsid(8): avoid double free & race during cleanup
[patch 2 of 3] iscsid(8): avoid double free & race during cleanup
[patch 2 of 3] iscsid(8): avoid double free & race during cleanup
On Fri, Jan 17, 2025 at 06:04:32PM +1030, Jack Burton wrote:
> On Thu, 16 Jan 2025 21:27:34 +0100
> Claudio Jeker <cjeker@diehard.n-r-g.com> wrote:
>
> > On Thu, Jan 16, 2025 at 01:37:13PM +1030, Jack Burton wrote:
> > > On Wed, 15 Jan 2025 14:46:02 +0100
> > > Claudio Jeker <cjeker@diehard.n-r-g.com> wrote:
> > > > On Tue, Dec 31, 2024 at 06:33:08PM +1030, Jack Burton wrote:
> > > > > Fixing that one unmasked a latent race condition during
> > > > > shutdown. Depending on whether session reestablishment attempts
> > > > > result in any new connections entering the login phase before
> > > > > ISCSI_EXIT_WAIT expires, EITHER conn_free() would get called
> > > > > multiple times on the same connection OR there'd be an outright
> > > > > access after free.
> > > > >
> > > > > To avoid adding extra states to the fsm, simply check
> > > > > exit_rounds (which gets set in shutdown_cb) before freeing
> > > > > connections in sess_do_free(): if it's set then we're about to
> > > > > exit anyway, so it doesn't matter that some connections may get
> > > > > reallocated without freeing the old ones.
> > > >
> > > > Uhm. I don't understand how conn_free() can be called multiple
> > > > times.
> > >
> > > sess_do_free() calls conn_free() in the loop for each connection in
> > > the TAILQ. Then sess_do_conn_fail() calls conn_free() because
> > > connection state is CONN_FREE.
> > >
> > > I don't really understand why sess_do_conn_fail() is getting called
> > > at all, as after sess_do_free() the current session state should be
> > > SESS_FREE (not SESS_RUNNING) ... but somehow it does get called.
> > >
> >
> > Ah, I see. The problem is that sessev has another pointer to the
> > connection and the sessev is not removed in conn_free().
> > I need to reread the code to see how we can fix that mess.
> > But what happens is that we have a event queued with a pointer to a
> > connection that was already freed.
>
> Thanks, that's just the clue I needed!
>
> You're right, it is a bit of a mess. Not your fault though: most of
> that mess is necessary if we want to keep the fsm reasonably true to the
> standard.
>
> Simplest way I could think of to avoid the double free (*without* the sledge-hammer approach of my original patch #2) was to keep track
> of any one-shot event queued for each connection, dequeuing it (if any)
> in conn_free().
>
> [I am of course assuming that no connection ever gets more than one
> one-shot event pending at the same time ... but in my testing I have not
> been able to produce any scenario that violates that assumption.]
>
> The patch below does just that. With this patch applied I get clean
> shutdowns on SIGTERM, regardless of which target(s) iscsid is connected
> to and regardless of whether each of them is in login, running, or
> failed when I send the signal.
>
> Is this a reasonable approach for the time being?
>
I took the liberty and cleaned up this a bit further. I added a struct
sessev to the session and connection structs and use those for the
session_fsm. So now a pending sessev event can be removed in
connection_free.
What you think about that? Can you test this against the non-netbsd
targets?
--
:wq Claudio
Index: connection.c
===================================================================
RCS file: /cvs/src/usr.sbin/iscsid/connection.c,v
diff -u -p -r1.22 connection.c
--- connection.c 16 Jan 2025 16:19:39 -0000 1.22
+++ connection.c 20 Jan 2025 10:40:02 -0000
@@ -71,6 +71,10 @@ conn_new(struct session *s, struct conne
c->his = iscsi_conn_defaults;
c->active = iscsi_conn_defaults;
+ c->sev.sess = s;
+ c->sev.conn = c;
+ evtimer_set(&c->sev.ev, session_fsm_callback, &c->sev);
+
TAILQ_INIT(&c->pdu_w);
TAILQ_INIT(&c->tasks);
TAILQ_INSERT_TAIL(&s->connections, c, entry);
@@ -113,6 +117,7 @@ conn_free(struct connection *c)
pdu_readbuf_free(&c->prbuf);
pdu_free_queue(&c->pdu_w);
+ event_del(&c->sev.ev);
event_del(&c->ev);
event_del(&c->wev);
if (c->fd != -1)
@@ -435,7 +440,7 @@ c_do_connect(struct connection *c, enum
if (c->fd == -1) {
log_warnx("connect(%s), lost socket",
log_sockaddr(&c->config.TargetAddr));
- session_fsm(c->session, SESS_EV_CONN_FAIL, c, 0);
+ session_fsm(&c->sev, SESS_EV_CONN_FAIL, 0);
return CONN_FREE;
}
if (c->config.LocalAddr.ss_len != 0) {
@@ -443,7 +448,7 @@ c_do_connect(struct connection *c, enum
c->config.LocalAddr.ss_len) == -1) {
log_warn("bind(%s)",
log_sockaddr(&c->config.LocalAddr));
- session_fsm(c->session, SESS_EV_CONN_FAIL, c, 0);
+ session_fsm(&c->sev, SESS_EV_CONN_FAIL, 0);
return CONN_FREE;
}
}
@@ -456,7 +461,7 @@ c_do_connect(struct connection *c, enum
} else {
log_warn("connect(%s)",
log_sockaddr(&c->config.TargetAddr));
- session_fsm(c->session, SESS_EV_CONN_FAIL, c, 0);
+ session_fsm(&c->sev, SESS_EV_CONN_FAIL, 0);
return CONN_FREE;
}
}
@@ -477,7 +482,7 @@ int
c_do_loggedin(struct connection *c, enum c_event ev)
{
iscsi_merge_conn_params(&c->active, &c->mine, &c->his);
- session_fsm(c->session, SESS_EV_CONN_LOGGED_IN, c, 0);
+ session_fsm(&c->sev, SESS_EV_CONN_LOGGED_IN, 0);
return CONN_LOGGED_IN;
}
@@ -525,7 +530,7 @@ c_do_fail(struct connection *c, enum c_e
taskq_cleanup(&c->tasks);
/* session will take care of cleaning up the mess */
- session_fsm(c->session, SESS_EV_CONN_FAIL, c, 0);
+ session_fsm(&c->sev, SESS_EV_CONN_FAIL, 0);
if (ev == CONN_EV_FREE || c->state & CONN_NEVER_LOGGED_IN)
return CONN_FREE;
Index: initiator.c
===================================================================
RCS file: /cvs/src/usr.sbin/iscsid/initiator.c,v
diff -u -p -r1.16 initiator.c
--- initiator.c 16 Jan 2025 16:19:39 -0000 1.16
+++ initiator.c 20 Jan 2025 10:42:20 -0000
@@ -512,10 +512,10 @@ initiator_logout_cb(struct connection *c
case ISCSI_LOGOUT_RESP_SUCCESS:
if (tl->reason == ISCSI_LOGOUT_CLOSE_SESS) {
conn_fsm(c, CONN_EV_LOGGED_OUT);
- session_fsm(c->session, SESS_EV_CLOSED, NULL, 0);
+ session_fsm(&c->session->sev, SESS_EV_CLOSED, 0);
} else {
conn_fsm(tl->c, CONN_EV_LOGGED_OUT);
- session_fsm(c->session, SESS_EV_CONN_CLOSED, tl->c, 0);
+ session_fsm(&tl->c->sev, SESS_EV_CONN_CLOSED, 0);
}
break;
case ISCSI_LOGOUT_RESP_UNKN_CID:
Index: iscsid.c
===================================================================
RCS file: /cvs/src/usr.sbin/iscsid/iscsid.c,v
diff -u -p -r1.22 iscsid.c
--- iscsid.c 16 Apr 2021 14:37:06 -0000 1.22
+++ iscsid.c 20 Jan 2025 10:42:40 -0000
@@ -259,7 +259,7 @@ iscsid_ctrl_dispatch(void *ch, struct pd
session_config(s, sc);
if (s->state == SESS_INIT)
- session_fsm(s, SESS_EV_START, NULL, 0);
+ session_fsm(&s->sev, SESS_EV_START, 0);
control_compose(ch, CTRL_SUCCESS, NULL, 0);
break;
Index: iscsid.h
===================================================================
RCS file: /cvs/src/usr.sbin/iscsid/iscsid.h,v
diff -u -p -r1.20 iscsid.h
--- iscsid.h 16 Jan 2025 16:19:39 -0000 1.20
+++ iscsid.h 20 Jan 2025 10:39:04 -0000
@@ -226,6 +226,7 @@ struct initiator {
};
struct sessev {
+ struct event ev;
struct session *sess;
struct connection *conn;
enum s_event event;
@@ -233,6 +234,7 @@ struct sessev {
struct session {
TAILQ_ENTRY(session) entry;
+ struct sessev sev;
struct connection_head connections;
struct taskq tasks;
struct session_config config;
@@ -264,6 +266,7 @@ struct session_poll {
struct connection {
struct event ev;
struct event wev;
+ struct sessev sev;
TAILQ_ENTRY(connection) entry;
struct connection_params mine;
struct connection_params his;
@@ -349,8 +352,8 @@ void session_config(struct session *, st
void session_task_issue(struct session *, struct task *);
void session_logout_issue(struct session *, struct task *);
void session_schedule(struct session *);
-void session_fsm(struct session *, enum s_event, struct connection *,
- unsigned int);
+void session_fsm(struct sessev *, enum s_event, unsigned int);
+void session_fsm_callback(int, short, void *);
void conn_new(struct session *, struct connection_config *);
void conn_free(struct connection *);
Index: session.c
===================================================================
RCS file: /cvs/src/usr.sbin/iscsid/session.c,v
diff -u -p -r1.10 session.c
--- session.c 16 Jan 2025 16:17:32 -0000 1.10
+++ session.c 20 Jan 2025 10:43:33 -0000
@@ -35,7 +35,6 @@
#include "iscsid.h"
#include "log.h"
-void session_fsm_callback(int, short, void *);
int sess_do_start(struct session *, struct sessev *);
int sess_do_conn_loggedin(struct session *, struct sessev *);
int sess_do_conn_fail(struct session *, struct sessev *);
@@ -75,6 +74,9 @@ session_new(struct initiator *i, u_int8_
s->initiator = i;
s->state = SESS_INIT;
+ s->sev.sess = s;
+ evtimer_set(&s->sev.ev, session_fsm_callback, &s->sev);
+
if (st == SESSION_TYPE_DISCOVERY)
s->target = 0;
else
@@ -204,25 +206,20 @@ session_schedule(struct session *s)
* The session FSM runs from a callback so that the connection FSM can finish.
*/
void
-session_fsm(struct session *s, enum s_event ev, struct connection *c,
- unsigned int timeout)
+session_fsm(struct sessev *sev, enum s_event event, unsigned int timeout)
{
+ struct session *s = sev->sess;
struct timeval tv;
- struct sessev *sev;
log_debug("session_fsm[%s]: %s ev %s timeout %d",
s->config.SessionName, sess_state(s->state),
- sess_event(ev), timeout);
+ sess_event(event), timeout);
- if ((sev = malloc(sizeof(*sev))) == NULL)
- fatal("session_fsm");
- sev->conn = c;
- sev->sess = s;
- sev->event = ev;
+ sev->event = event;
timerclear(&tv);
tv.tv_sec = timeout;
- if (event_once(-1, EV_TIMEOUT, session_fsm_callback, sev, &tv) == -1)
+ if (evtimer_add(&sev->ev, &tv) == -1)
fatal("session_fsm");
}
@@ -276,8 +273,6 @@ session_fsm_callback(int fd, short event
sess_state(s->state), sess_event(sev->event));
fatalx("bjork bjork bjork");
}
- free(sev);
-log_debug("sess_fsm: done");
}
int
@@ -360,7 +355,7 @@ sess_do_conn_fail(struct session *s, str
state = SESS_LOGGED_IN;
}
- session_fsm(s, SESS_EV_START, NULL, s->holdTimer);
+ session_fsm(&s->sev, SESS_EV_START, s->holdTimer);
/* exponential back-off on constant failure */
if (s->holdTimer < ISCSID_HOLD_TIME_MAX)
s->holdTimer = s->holdTimer ? s->holdTimer * 2 : 1;
[patch 2 of 3] iscsid(8): avoid double free & race during cleanup
[patch 2 of 3] iscsid(8): avoid double free & race during cleanup
[patch 2 of 3] iscsid(8): avoid double free & race during cleanup