From: Claudio Jeker Subject: bgpd: split session.c in two To: tech@openbsd.org Date: Tue, 25 Feb 2025 15:31:29 +0100 session.c is too big and I would like to reuse code for another project. So I decided to split out the BGP protocol bits into session_bgp.c and session.c holds the rest of the code (especially the imsg bits). Sorry the diff is huge and hard to review. -- :wq Claudio Index: Makefile =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/Makefile,v diff -u -p -r1.40 Makefile --- Makefile 20 Feb 2025 19:47:31 -0000 1.40 +++ Makefile 24 Feb 2025 15:56:57 -0000 @@ -1,17 +1,47 @@ # $OpenBSD: Makefile,v 1.40 2025/02/20 19:47:31 claudio Exp $ PROG= bgpd -SRCS= bgpd.c session.c log.c logmsg.c parse.y config.c monotime.c \ - rde.c rde_rib.c rde_decide.c rde_prefix.c mrt.c kroute.c control.c \ - pfkey.c rde_update.c rde_attr.c rde_community.c printconf.c \ - rde_filter.c rde_sets.c rde_aspa.c rde_trie.c pftable.c name2id.c \ - util.c carp.c timer.c rde_peer.c rtr.c rtr_proto.c flowspec.c +SRCS= bgpd.c +SRCS+= carp.c +SRCS+= config.c +SRCS+= control.c +SRCS+= flowspec.c +SRCS+= kroute.c +SRCS+= log.c +SRCS+= logmsg.c +SRCS+= monotime.c +SRCS+= mrt.c +SRCS+= name2id.c +SRCS+= parse.y +SRCS+= pfkey.c +SRCS+= pftable.c +SRCS+= printconf.c +SRCS+= rde.c +SRCS+= rde_aspa.c +SRCS+= rde_attr.c +SRCS+= rde_community.c +SRCS+= rde_decide.c +SRCS+= rde_filter.c +SRCS+= rde_peer.c +SRCS+= rde_prefix.c +SRCS+= rde_rib.c +SRCS+= rde_sets.c +SRCS+= rde_trie.c +SRCS+= rde_update.c +SRCS+= rtr.c +SRCS+= rtr_proto.c +SRCS+= session.c +SRCS+= session_bgp.c +SRCS+= timer.c +SRCS+= util.c + CFLAGS+= -Wall -I${.CURDIR} CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes CFLAGS+= -Wmissing-declarations CFLAGS+= -Wshadow -Wpointer-arith -Wcast-qual CFLAGS+= -Wsign-compare YFLAGS= + LDADD+= -lutil DPADD+= ${LIBUTIL} MAN= bgpd.8 bgpd.conf.5 Index: session.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/session.c,v diff -u -p -r1.518 session.c --- session.c 20 Feb 2025 19:47:31 -0000 1.518 +++ session.c 25 Feb 2025 14:12:24 -0000 @@ -60,47 +60,17 @@ void session_sighdlr(int); int setup_listeners(u_int *); void init_peer(struct peer *, struct bgpd_config *); -void start_timer_holdtime(struct peer *); -void start_timer_sendholdtime(struct peer *); -void start_timer_keepalive(struct peer *); -void session_close_connection(struct peer *); -void change_state(struct peer *, enum session_state, enum session_events); int session_setup_socket(struct peer *); void session_accept(int); -int session_connect(struct peer *); -void session_tcp_established(struct peer *); -int session_capa_add(struct ibuf *, uint8_t, uint8_t); -struct ibuf *session_newmsg(enum msg_type, uint16_t); -void session_sendmsg(struct ibuf *, struct peer *, enum msg_type); -void session_open(struct peer *); -void session_keepalive(struct peer *); -void session_update(struct peer *, struct ibuf *); -void session_notification(struct peer *, uint8_t, uint8_t, struct ibuf *); -void session_notification_data(struct peer *, uint8_t, uint8_t, void *, - size_t); -void session_rrefresh(struct peer *, uint8_t, uint8_t); int session_graceful_restart(struct peer *); int session_graceful_stop(struct peer *); -int session_dispatch_msg(struct pollfd *, struct peer *); -void session_process_msg(struct peer *); -struct ibuf *parse_header(struct ibuf *, void *, int *); -int parse_open(struct peer *, struct ibuf *); -int parse_update(struct peer *, struct ibuf *); -int parse_rrefresh(struct peer *, struct ibuf *); -void parse_notification(struct peer *, struct ibuf *); -int parse_capabilities(struct peer *, struct ibuf *, uint32_t *); -int capa_neg_calc(struct peer *); void session_dispatch_imsg(struct imsgbuf *, int, u_int *); -void session_up(struct peer *); -void session_down(struct peer *); int imsg_rde(int, uint32_t, void *, uint16_t); -void session_demote(struct peer *, int); void merge_peers(struct bgpd_config *, struct bgpd_config *); -int la_cmp(struct listen_addr *, struct listen_addr *); -void session_template_clone(struct peer *, struct sockaddr *, - uint32_t, uint32_t); -int session_match_mask(struct peer *, struct bgpd_addr *); +void session_template_clone(struct peer *, struct sockaddr *, + uint32_t, uint32_t); +int session_match_mask(struct peer *, struct bgpd_addr *); static struct bgpd_config *conf, *nconf; static struct imsgbuf *ibuf_rde; @@ -116,11 +86,6 @@ u_int peer_cnt; struct mrt_head mrthead; monotime_t pauseaccept; -static const uint8_t marker[MSGSIZE_HEADER_MARKER] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -}; - static inline int peer_compare(const struct peer *a, const struct peer *b) { @@ -429,6 +394,26 @@ session_main(int debug, int verbose) timeout = nextaction; } + /* check if peer needs throttling or not */ + if (!p->throttled && + msgbuf_queuelen(p->wbuf) > SESS_MSG_HIGH_MARK) { + if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == + -1) + log_peer_warn(&p->conf, + "imsg_compose XOFF"); + else + p->throttled = 1; + } + if (p->throttled && + msgbuf_queuelen(p->wbuf) < SESS_MSG_LOW_MARK) { + if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == + -1) + log_peer_warn(&p->conf, + "imsg_compose XON"); + else + p->throttled = 0; + } + /* are we waiting for a write? */ events = POLLIN; if (msgbuf_queuelen(p->wbuf) > 0 || @@ -600,6 +585,8 @@ init_peer(struct peer *p, struct bgpd_co p->conf.holdtime = c->holdtime; if (p->conf.min_holdtime == 0) p->conf.min_holdtime = c->min_holdtime; + p->connectretry = c->connectretry; + p->local_bgpid = c->bgpid; peer_cnt++; @@ -620,397 +607,85 @@ init_peer(struct peer *p, struct bgpd_co session_demote(p, +1); } -void -bgp_fsm(struct peer *peer, enum session_events event, struct ibuf *msg) +int +session_dispatch_msg(struct pollfd *pfd, struct peer *p) { - switch (peer->state) { - case STATE_NONE: - /* nothing */ - break; - case STATE_IDLE: - switch (event) { - case EVNT_START: - timer_stop(&peer->timers, Timer_Hold); - timer_stop(&peer->timers, Timer_SendHold); - timer_stop(&peer->timers, Timer_Keepalive); - timer_stop(&peer->timers, Timer_IdleHold); - - if (!peer->depend_ok) - timer_stop(&peer->timers, Timer_ConnectRetry); - else if (peer->passive || peer->conf.passive || - peer->conf.template) { - change_state(peer, STATE_ACTIVE, event); - timer_stop(&peer->timers, Timer_ConnectRetry); - } else { - change_state(peer, STATE_CONNECT, event); - timer_set(&peer->timers, Timer_ConnectRetry, - conf->connectretry); - session_connect(peer); + socklen_t len; + int error; + + if (p->state == STATE_CONNECT) { + if (pfd->revents & POLLOUT) { + if (pfd->revents & POLLIN) { + /* error occurred */ + len = sizeof(error); + if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, + &error, &len) == -1 || error) { + if (error) + errno = error; + if (errno != p->lasterr) { + log_peer_warn(&p->conf, + "socket error"); + p->lasterr = errno; + } + bgp_fsm(p, EVNT_CON_OPENFAIL, NULL); + return (1); + } } - peer->passive = 0; - break; - case EVNT_STOP: - timer_stop(&peer->timers, Timer_IdleHold); - break; - default: - /* ignore */ - break; - } - break; - case STATE_CONNECT: - switch (event) { - case EVNT_START: - /* ignore */ - break; - case EVNT_CON_OPEN: - session_tcp_established(peer); - session_open(peer); - timer_stop(&peer->timers, Timer_ConnectRetry); - peer->holdtime = INTERVAL_HOLD_INITIAL; - start_timer_holdtime(peer); - change_state(peer, STATE_OPENSENT, event); - break; - case EVNT_CON_OPENFAIL: - timer_set(&peer->timers, Timer_ConnectRetry, - conf->connectretry); - session_close_connection(peer); - change_state(peer, STATE_ACTIVE, event); - break; - case EVNT_TIMER_CONNRETRY: - timer_set(&peer->timers, Timer_ConnectRetry, - conf->connectretry); - session_connect(peer); - break; - default: - change_state(peer, STATE_IDLE, event); - break; - } - break; - case STATE_ACTIVE: - switch (event) { - case EVNT_START: - /* ignore */ - break; - case EVNT_CON_OPEN: - session_tcp_established(peer); - session_open(peer); - timer_stop(&peer->timers, Timer_ConnectRetry); - peer->holdtime = INTERVAL_HOLD_INITIAL; - start_timer_holdtime(peer); - change_state(peer, STATE_OPENSENT, event); - break; - case EVNT_CON_OPENFAIL: - timer_set(&peer->timers, Timer_ConnectRetry, - conf->connectretry); - session_close_connection(peer); - change_state(peer, STATE_ACTIVE, event); - break; - case EVNT_TIMER_CONNRETRY: - timer_set(&peer->timers, Timer_ConnectRetry, - peer->holdtime); - change_state(peer, STATE_CONNECT, event); - session_connect(peer); - break; - default: - change_state(peer, STATE_IDLE, event); - break; - } - break; - case STATE_OPENSENT: - switch (event) { - case EVNT_START: - /* ignore */ - break; - case EVNT_STOP: - change_state(peer, STATE_IDLE, event); - break; - case EVNT_CON_CLOSED: - session_close_connection(peer); - timer_set(&peer->timers, Timer_ConnectRetry, - conf->connectretry); - change_state(peer, STATE_ACTIVE, event); - break; - case EVNT_CON_FATAL: - change_state(peer, STATE_IDLE, event); - break; - case EVNT_TIMER_HOLDTIME: - session_notification(peer, ERR_HOLDTIMEREXPIRED, - 0, NULL); - change_state(peer, STATE_IDLE, event); - break; - case EVNT_TIMER_SENDHOLD: - session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, - 0, NULL); - change_state(peer, STATE_IDLE, event); - break; - case EVNT_RCVD_OPEN: - /* parse_open calls change_state itself on failure */ - if (parse_open(peer, msg)) - break; - session_keepalive(peer); - change_state(peer, STATE_OPENCONFIRM, event); - break; - case EVNT_RCVD_NOTIFICATION: - parse_notification(peer, msg); - break; - default: - session_notification(peer, - ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL); - change_state(peer, STATE_IDLE, event); - break; + bgp_fsm(p, EVNT_CON_OPEN, NULL); + return (1); } - break; - case STATE_OPENCONFIRM: - switch (event) { - case EVNT_START: - /* ignore */ - break; - case EVNT_STOP: - change_state(peer, STATE_IDLE, event); - break; - case EVNT_CON_CLOSED: - case EVNT_CON_FATAL: - change_state(peer, STATE_IDLE, event); - break; - case EVNT_TIMER_HOLDTIME: - session_notification(peer, ERR_HOLDTIMEREXPIRED, - 0, NULL); - change_state(peer, STATE_IDLE, event); - break; - case EVNT_TIMER_SENDHOLD: - session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, - 0, NULL); - change_state(peer, STATE_IDLE, event); - break; - case EVNT_TIMER_KEEPALIVE: - session_keepalive(peer); - break; - case EVNT_RCVD_KEEPALIVE: - start_timer_holdtime(peer); - change_state(peer, STATE_ESTABLISHED, event); - break; - case EVNT_RCVD_NOTIFICATION: - parse_notification(peer, msg); - break; - default: - session_notification(peer, - ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL); - change_state(peer, STATE_IDLE, event); - break; + if (pfd->revents & POLLHUP) { + bgp_fsm(p, EVNT_CON_OPENFAIL, NULL); + return (1); } - break; - case STATE_ESTABLISHED: - switch (event) { - case EVNT_START: - /* ignore */ - break; - case EVNT_STOP: - change_state(peer, STATE_IDLE, event); - break; - case EVNT_CON_CLOSED: - case EVNT_CON_FATAL: - change_state(peer, STATE_IDLE, event); - break; - case EVNT_TIMER_HOLDTIME: - session_notification(peer, ERR_HOLDTIMEREXPIRED, - 0, NULL); - change_state(peer, STATE_IDLE, event); - break; - case EVNT_TIMER_SENDHOLD: - session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, - 0, NULL); - change_state(peer, STATE_IDLE, event); - break; - case EVNT_TIMER_KEEPALIVE: - session_keepalive(peer); - break; - case EVNT_RCVD_KEEPALIVE: - start_timer_holdtime(peer); - break; - case EVNT_RCVD_UPDATE: - start_timer_holdtime(peer); - if (parse_update(peer, msg)) - change_state(peer, STATE_IDLE, event); - else - start_timer_holdtime(peer); - break; - case EVNT_RCVD_NOTIFICATION: - parse_notification(peer, msg); - break; - default: - session_notification(peer, - ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL); - change_state(peer, STATE_IDLE, event); - break; + if (pfd->revents & (POLLERR|POLLNVAL)) { + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return (1); } - break; + return (0); } -} - -void -start_timer_holdtime(struct peer *peer) -{ - if (peer->holdtime > 0) - timer_set(&peer->timers, Timer_Hold, peer->holdtime); - else - timer_stop(&peer->timers, Timer_Hold); -} - -void -start_timer_sendholdtime(struct peer *peer) -{ - uint16_t holdtime = INTERVAL_HOLD; - - if (peer->holdtime > INTERVAL_HOLD) - holdtime = peer->holdtime; - - if (peer->holdtime > 0) - timer_set(&peer->timers, Timer_SendHold, holdtime); - else - timer_stop(&peer->timers, Timer_SendHold); -} - -void -start_timer_keepalive(struct peer *peer) -{ - if (peer->holdtime > 0) - timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3); - else - timer_stop(&peer->timers, Timer_Keepalive); -} -void -session_close_connection(struct peer *peer) -{ - if (peer->fd != -1) { - close(peer->fd); - pauseaccept = monotime_clear(); + if (pfd->revents & POLLHUP) { + bgp_fsm(p, EVNT_CON_CLOSED, NULL); + return (1); + } + if (pfd->revents & (POLLERR|POLLNVAL)) { + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return (1); } - peer->fd = -1; -} - -void -change_state(struct peer *peer, enum session_state state, - enum session_events event) -{ - switch (state) { - case STATE_IDLE: - /* carp demotion first. new peers handled in init_peer */ - if (peer->state == STATE_ESTABLISHED && - peer->conf.demote_group[0] && !peer->demoted) - session_demote(peer, +1); - - /* - * try to write out what's buffered (maybe a notification), - * don't bother if it fails - */ - if (peer->state >= STATE_OPENSENT && - msgbuf_queuelen(peer->wbuf) > 0) - ibuf_write(peer->fd, peer->wbuf); - /* - * we must start the timer for the next EVNT_START - * if we are coming here due to an error and the - * session was not established successfully before, the - * starttimerinterval needs to be exponentially increased - */ - if (peer->IdleHoldTime == 0) - peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; - peer->holdtime = INTERVAL_HOLD_INITIAL; - timer_stop(&peer->timers, Timer_ConnectRetry); - timer_stop(&peer->timers, Timer_Keepalive); - timer_stop(&peer->timers, Timer_Hold); - timer_stop(&peer->timers, Timer_SendHold); - timer_stop(&peer->timers, Timer_IdleHold); - timer_stop(&peer->timers, Timer_IdleHoldReset); - session_close_connection(peer); - msgbuf_clear(peer->wbuf); - peer->rpending = 0; - memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); - if (!peer->template) - imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, - peer->conf.id, 0, -1, NULL, 0); - - if (peer->state == STATE_ESTABLISHED) { - if (peer->capa.neg.grestart.restart == 2 && - (event == EVNT_CON_CLOSED || - event == EVNT_CON_FATAL || - (peer->capa.neg.grestart.grnotification && - (event == EVNT_RCVD_GRACE_NOTIFICATION || - event == EVNT_TIMER_HOLDTIME || - event == EVNT_TIMER_SENDHOLD)))) { - /* don't punish graceful restart */ - timer_set(&peer->timers, Timer_IdleHold, 0); - session_graceful_restart(peer); - } else if (event != EVNT_STOP) { - timer_set(&peer->timers, Timer_IdleHold, - peer->IdleHoldTime); - if (event != EVNT_NONE && - peer->IdleHoldTime < MAX_IDLE_HOLD/2) - peer->IdleHoldTime *= 2; - session_down(peer); - } else { - session_down(peer); - } - } else if (event != EVNT_STOP) { - timer_set(&peer->timers, Timer_IdleHold, - peer->IdleHoldTime); - if (event != EVNT_NONE && - peer->IdleHoldTime < MAX_IDLE_HOLD / 2) - peer->IdleHoldTime *= 2; + if (pfd->revents & POLLOUT && msgbuf_queuelen(p->wbuf) > 0) { + if (ibuf_write(p->fd, p->wbuf) == -1) { + if (errno == EPIPE) + log_peer_warnx(&p->conf, "Connection closed"); + else + log_peer_warn(&p->conf, "write error"); + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return (1); } + p->stats.last_write = getmonotime(); + start_timer_sendholdtime(p); + if (!(pfd->revents & POLLIN)) + return (1); + } - if (peer->state == STATE_NONE || - peer->state == STATE_ESTABLISHED) { - /* initialize capability negotiation structures */ - memcpy(&peer->capa.ann, &peer->conf.capabilities, - sizeof(peer->capa.ann)); - } - break; - case STATE_CONNECT: - if (peer->state == STATE_ESTABLISHED && - peer->capa.neg.grestart.restart == 2) { - /* do the graceful restart dance */ - session_graceful_restart(peer); - peer->holdtime = INTERVAL_HOLD_INITIAL; - timer_stop(&peer->timers, Timer_ConnectRetry); - timer_stop(&peer->timers, Timer_Keepalive); - timer_stop(&peer->timers, Timer_Hold); - timer_stop(&peer->timers, Timer_SendHold); - timer_stop(&peer->timers, Timer_IdleHold); - timer_stop(&peer->timers, Timer_IdleHoldReset); - session_close_connection(peer); - msgbuf_clear(peer->wbuf); - memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); + if (p->fd != -1 && pfd->revents & POLLIN) { + switch (ibuf_read(p->fd, p->wbuf)) { + case -1: + if (p->state == STATE_IDLE) + /* error already handled before */ + return (1); + log_peer_warn(&p->conf, "read error"); + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return (1); + case 0: + bgp_fsm(p, EVNT_CON_CLOSED, NULL); + return (1); } - break; - case STATE_ACTIVE: - if (!peer->template) - imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, - peer->conf.id, 0, -1, NULL, 0); - break; - case STATE_OPENSENT: - break; - case STATE_OPENCONFIRM: - break; - case STATE_ESTABLISHED: - timer_set(&peer->timers, Timer_IdleHoldReset, - peer->IdleHoldTime); - if (peer->demoted) - timer_set(&peer->timers, Timer_CarpUndemote, - INTERVAL_HOLD_DEMOTED); - session_up(peer); - break; - default: /* something seriously fucked */ - break; + p->stats.last_read = getmonotime(); + return (1); } - - log_statechange(peer, state, event); - - session_mrt_dump_state(peer, peer->state, state); - - peer->prev_state = peer->state; - peer->state = state; + return (0); } void @@ -1047,7 +722,7 @@ session_accept(int listenfd) (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { if (p->fd != -1) { if (p->state == STATE_CONNECT) - session_close_connection(p); + session_close(p); else { close(connfd); return; @@ -1136,15 +811,17 @@ session_connect(struct peer *peer) sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len); if (connect(peer->fd, sa, sa_len) == -1) { - if (errno != EINPROGRESS) { - if (errno != peer->lasterr) - log_peer_warn(&peer->conf, "connect"); - peer->lasterr = errno; - bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL); - return (-1); - } - } else - bgp_fsm(peer, EVNT_CON_OPEN, NULL); + if (errno == EINPROGRESS) + return (0); + + if (errno != peer->lasterr) + log_peer_warn(&peer->conf, "connect"); + peer->lasterr = errno; + bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL); + return (-1); + } + + bgp_fsm(peer, EVNT_CON_OPEN, NULL); return (0); } @@ -1243,6 +920,16 @@ session_setup_socket(struct peer *p) return (0); } +void +session_close(struct peer *peer) +{ + if (peer->fd != -1) { + close(peer->fd); + pauseaccept = monotime_clear(); + } + peer->fd = -1; +} + /* * compare the bgpd_addr with the sockaddr by converting the latter into * a bgpd_addr. Return true if the two are equal, including any scope @@ -1256,7 +943,7 @@ sa_equal(struct bgpd_addr *ba, struct so return (memcmp(ba, &bb, sizeof(*ba)) == 0); } -static void +void get_alternate_addr(struct bgpd_addr *local, struct bgpd_addr *remote, struct bgpd_addr *alt, unsigned int *scope) { @@ -1339,1679 +1026,95 @@ get_alternate_addr(struct bgpd_addr *loc freeifaddrs(ifap); } -void -session_tcp_established(struct peer *peer) -{ - struct sockaddr_storage ss; - socklen_t len; - - len = sizeof(ss); - if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1) - log_warn("getsockname"); - sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port); - len = sizeof(ss); - if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1) - log_warn("getpeername"); - sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port); - - get_alternate_addr(&peer->local, &peer->remote, &peer->local_alt, - &peer->if_scope); -} - int -session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len) +session_hanlde_update(struct peer *peer, struct ibuf *msg) { - int errs = 0; - - errs += ibuf_add_n8(opb, capa_code); - errs += ibuf_add_n8(opb, capa_len); - return (errs); + /* + * we pass the message verbatim to the rde. + * in case of errors the whole session is reset with a + * notification anyway, we only need to know the peer + */ + if (imsg_rde(IMSG_UPDATE, peer->conf.id, ibuf_data(msg), + ibuf_size(msg)) == -1) + return (-1); + return (0); } -static int -session_capa_add_mp(struct ibuf *buf, uint8_t aid) +int +session_handle_rrefresh(struct peer *peer, struct route_refresh *rr) { - uint16_t afi; - uint8_t safi; - int errs = 0; - - if (aid2afi(aid, &afi, &safi) == -1) { - log_warn("%s: bad AID", __func__); + if (imsg_rde(IMSG_REFRESH, peer->conf.id, rr, sizeof(*rr)) == -1) return (-1); - } - - errs += ibuf_add_n16(buf, afi); - errs += ibuf_add_zero(buf, 1); - errs += ibuf_add_n8(buf, safi); - - return (errs); + return (0); } -static int -session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags) +int +session_graceful_restart(struct peer *p) { - int errs = 0; - uint16_t afi; - uint8_t safi; + uint8_t i; + uint16_t staletime = conf->staletime; - if (aid2afi(aid, &afi, &safi)) { - log_warn("%s: bad AID", __func__); - return (-1); - } + if (p->conf.staletime) + staletime = p->conf.staletime; - errs += ibuf_add_n16(b, afi); - errs += ibuf_add_n8(b, safi); - errs += ibuf_add_n8(b, flags); + /* RFC 8538: enforce configurable upper bound of the stale timer */ + if (staletime > p->capa.neg.grestart.timeout) + staletime = p->capa.neg.grestart.timeout; + timer_set(&p->timers, Timer_RestartTimeout, staletime); - return (errs); + for (i = AID_MIN; i < AID_MAX; i++) { + if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { + if (imsg_rde(IMSG_SESSION_STALE, p->conf.id, + &i, sizeof(i)) == -1) + return -1; + log_peer_warnx(&p->conf, + "graceful restart of %s, keeping routes", + aid2str(i)); + p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; + } else if (p->capa.neg.mp[i]) { + if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id, + &i, sizeof(i)) == -1) + return -1; + log_peer_warnx(&p->conf, + "graceful restart of %s, flushing routes", + aid2str(i)); + } + } + return 0; } -static int -session_capa_add_ext_nh(struct ibuf *b, uint8_t aid) +int +session_graceful_stop(struct peer *p) { - int errs = 0; - uint16_t afi; - uint8_t safi; + uint8_t i; - if (aid2afi(aid, &afi, &safi)) { - log_warn("%s: bad AID", __func__); - return (-1); + for (i = AID_MIN; i < AID_MAX; i++) { + /* + * Only flush if the peer is restarting and the timeout fired. + * In all other cases the session was already flushed when the + * session went down or when the new open message was parsed. + */ + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) + if (session_graceful_flush(p, i, "time-out") == -1) + return -1; + p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; } - - errs += ibuf_add_n16(b, afi); - errs += ibuf_add_n16(b, safi); - errs += ibuf_add_n16(b, AFI_IPv6); - - return (errs); + return 0; } -struct ibuf * -session_newmsg(enum msg_type msgtype, uint16_t len) +int +session_graceful_flush(struct peer *p, uint8_t aid, const char *why) { - struct ibuf *buf; - int errs = 0; - - if ((buf = ibuf_open(len)) == NULL) - return (NULL); - - errs += ibuf_add(buf, marker, sizeof(marker)); - errs += ibuf_add_n16(buf, len); - errs += ibuf_add_n8(buf, msgtype); - - if (errs) { - ibuf_free(buf); - return (NULL); - } - - return (buf); -} + log_peer_warnx(&p->conf, "graceful restart of %s, %s, flushing", + aid2str(aid), why); + if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, &aid, sizeof(aid)) == -1) + return -1; + return 0; +} void -session_sendmsg(struct ibuf *msg, struct peer *p, enum msg_type msgtype) -{ - session_mrt_dump_bgp_msg(p, msg, msgtype, DIR_OUT); - - ibuf_close(p->wbuf, msg); - if (!p->throttled && msgbuf_queuelen(p->wbuf) > SESS_MSG_HIGH_MARK) { - if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1) - log_peer_warn(&p->conf, "imsg_compose XOFF"); - else - p->throttled = 1; - } -} - -/* - * Translate between internal roles and the value expected by RFC 9234. - */ -static uint8_t -role2capa(enum role role) -{ - switch (role) { - case ROLE_CUSTOMER: - return CAPA_ROLE_CUSTOMER; - case ROLE_PROVIDER: - return CAPA_ROLE_PROVIDER; - case ROLE_RS: - return CAPA_ROLE_RS; - case ROLE_RS_CLIENT: - return CAPA_ROLE_RS_CLIENT; - case ROLE_PEER: - return CAPA_ROLE_PEER; - default: - fatalx("Unsupported role for role capability"); - } -} - -static enum role -capa2role(uint8_t val) -{ - switch (val) { - case CAPA_ROLE_PROVIDER: - return ROLE_PROVIDER; - case CAPA_ROLE_RS: - return ROLE_RS; - case CAPA_ROLE_RS_CLIENT: - return ROLE_RS_CLIENT; - case CAPA_ROLE_CUSTOMER: - return ROLE_CUSTOMER; - case CAPA_ROLE_PEER: - return ROLE_PEER; - default: - return ROLE_NONE; - } -} - -void -session_open(struct peer *p) -{ - struct ibuf *buf, *opb; - size_t len, optparamlen; - uint8_t i; - int errs = 0, extlen = 0; - int mpcapa = 0; - - - if ((opb = ibuf_dynamic(0, MAX_PKTSIZE - MSGSIZE_OPEN_MIN - 6)) == - NULL) { - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return; - } - - /* multiprotocol extensions, RFC 4760 */ - for (i = AID_MIN; i < AID_MAX; i++) - if (p->capa.ann.mp[i]) { /* 4 bytes data */ - errs += session_capa_add(opb, CAPA_MP, 4); - errs += session_capa_add_mp(opb, i); - mpcapa++; - } - - /* route refresh, RFC 2918 */ - if (p->capa.ann.refresh) /* no data */ - errs += session_capa_add(opb, CAPA_REFRESH, 0); - - /* extended nexthop encoding, RFC 8950 */ - if (p->capa.ann.ext_nh[AID_INET]) { - uint8_t enhlen = 0; - - if (p->capa.ann.mp[AID_INET]) - enhlen += 6; - if (p->capa.ann.mp[AID_VPN_IPv4]) - enhlen += 6; - errs += session_capa_add(opb, CAPA_EXT_NEXTHOP, enhlen); - if (p->capa.ann.mp[AID_INET]) - errs += session_capa_add_ext_nh(opb, AID_INET); - if (p->capa.ann.mp[AID_VPN_IPv4]) - errs += session_capa_add_ext_nh(opb, AID_VPN_IPv4); - } - - /* extended message support, RFC 8654 */ - if (p->capa.ann.ext_msg) /* no data */ - errs += session_capa_add(opb, CAPA_EXT_MSG, 0); - - /* BGP open policy, RFC 9234, only for ebgp sessions */ - if (p->conf.ebgp && p->capa.ann.policy && - p->conf.role != ROLE_NONE && - (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] || - mpcapa == 0)) { - errs += session_capa_add(opb, CAPA_ROLE, 1); - errs += ibuf_add_n8(opb, role2capa(p->conf.role)); - } - - /* graceful restart and End-of-RIB marker, RFC 4724 */ - if (p->capa.ann.grestart.restart) { - int rst = 0; - uint16_t hdr = 0; - - for (i = AID_MIN; i < AID_MAX; i++) { - if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) - rst++; - } - - /* Only set the R-flag if no graceful restart is ongoing */ - if (!rst) - hdr |= CAPA_GR_R_FLAG; - if (p->capa.ann.grestart.grnotification) - hdr |= CAPA_GR_N_FLAG; - errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr)); - errs += ibuf_add_n16(opb, hdr); - } - - /* 4-bytes AS numbers, RFC6793 */ - if (p->capa.ann.as4byte) { /* 4 bytes data */ - errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t)); - errs += ibuf_add_n32(opb, p->conf.local_as); - } - - /* advertisement of multiple paths, RFC7911 */ - if (p->capa.ann.add_path[AID_MIN]) { /* variable */ - uint8_t aplen; - - if (mpcapa) - aplen = 4 * mpcapa; - else /* AID_INET */ - aplen = 4; - errs += session_capa_add(opb, CAPA_ADD_PATH, aplen); - if (mpcapa) { - for (i = AID_MIN; i < AID_MAX; i++) { - if (p->capa.ann.mp[i]) { - errs += session_capa_add_afi(opb, - i, p->capa.ann.add_path[i] & - CAPA_AP_MASK); - } - } - } else { /* AID_INET */ - errs += session_capa_add_afi(opb, AID_INET, - p->capa.ann.add_path[AID_INET] & CAPA_AP_MASK); - } - } - - /* enhanced route-refresh, RFC7313 */ - if (p->capa.ann.enhanced_rr) /* no data */ - errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0); - - if (errs) { - ibuf_free(opb); - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return; - } - - optparamlen = ibuf_size(opb); - len = MSGSIZE_OPEN_MIN + optparamlen; - if (optparamlen == 0) { - /* nothing */ - } else if (optparamlen + 2 >= 255) { - /* RFC9072: use 255 as magic size and request extra header */ - optparamlen = 255; - extlen = 1; - /* 3 byte OPT_PARAM_EXT_LEN and OPT_PARAM_CAPABILITIES */ - len += 2 * 3; - } else { - /* regular capabilities header */ - optparamlen += 2; - len += 2; - } - - if ((buf = session_newmsg(BGP_OPEN, len)) == NULL) { - ibuf_free(opb); - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return; - } - - errs += ibuf_add_n8(buf, 4); - errs += ibuf_add_n16(buf, p->conf.local_short_as); - errs += ibuf_add_n16(buf, p->conf.holdtime); - /* is already in network byte order */ - errs += ibuf_add_n32(buf, conf->bgpid); - errs += ibuf_add_n8(buf, optparamlen); - - if (extlen) { - /* RFC9072 extra header which spans over the capabilities hdr */ - errs += ibuf_add_n8(buf, OPT_PARAM_EXT_LEN); - errs += ibuf_add_n16(buf, ibuf_size(opb) + 1 + 2); - } - - if (optparamlen) { - errs += ibuf_add_n8(buf, OPT_PARAM_CAPABILITIES); - - if (extlen) { - /* RFC9072: 2-byte extended length */ - errs += ibuf_add_n16(buf, ibuf_size(opb)); - } else { - errs += ibuf_add_n8(buf, ibuf_size(opb)); - } - errs += ibuf_add_ibuf(buf, opb); - } - - ibuf_free(opb); - - if (errs) { - ibuf_free(buf); - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return; - } - - session_sendmsg(buf, p, BGP_OPEN); - p->stats.msg_sent_open++; -} - -void -session_keepalive(struct peer *p) -{ - struct ibuf *buf; - - if ((buf = session_newmsg(BGP_KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL) { - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return; - } - - session_sendmsg(buf, p, BGP_KEEPALIVE); - start_timer_keepalive(p); - p->stats.msg_sent_keepalive++; -} - -void -session_update(struct peer *p, struct ibuf *ibuf) -{ - struct ibuf *buf; - size_t len, maxsize = MAX_PKTSIZE; - - if (p->state != STATE_ESTABLISHED) - return; - - if (p->capa.neg.ext_msg) - maxsize = MAX_EXT_PKTSIZE; - len = ibuf_size(ibuf); - if (len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER || - len > maxsize - MSGSIZE_HEADER) { - log_peer_warnx(&p->conf, "bad UPDATE from RDE"); - return; - } - - if ((buf = session_newmsg(BGP_UPDATE, MSGSIZE_HEADER + len)) == NULL) { - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return; - } - - if (ibuf_add_ibuf(buf, ibuf)) { - ibuf_free(buf); - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return; - } - - session_sendmsg(buf, p, BGP_UPDATE); - start_timer_keepalive(p); - p->stats.msg_sent_update++; -} - -/* Return 1 if a hard reset should be issued, 0 for a graceful notification */ -static int -session_req_hard_reset(enum err_codes errcode, uint8_t subcode) -{ - switch (errcode) { - case ERR_HEADER: - case ERR_OPEN: - case ERR_UPDATE: - case ERR_FSM: - case ERR_RREFRESH: - /* - * Protocol errors trigger a hard reset. The peer - * is not trustworthy and so there is no realistic - * hope that forwarding can continue. - */ - break; - case ERR_HOLDTIMEREXPIRED: - case ERR_SENDHOLDTIMEREXPIRED: - /* Keep forwarding and hope the other side is back soon. */ - return 0; - case ERR_CEASE: - switch (subcode) { - case ERR_CEASE_CONN_REJECT: - case ERR_CEASE_OTHER_CHANGE: - case ERR_CEASE_COLLISION: - case ERR_CEASE_RSRC_EXHAUST: - /* Per RFC8538 suggestion make these graceful. */ - return 0; - } - break; - } - return 1; -} - -void -session_notification_data(struct peer *p, uint8_t errcode, uint8_t subcode, - void *data, size_t datalen) -{ - struct ibuf ibuf; - - ibuf_from_buffer(&ibuf, data, datalen); - session_notification(p, errcode, subcode, &ibuf); -} - -void -session_notification(struct peer *p, uint8_t errcode, uint8_t subcode, - struct ibuf *ibuf) -{ - struct ibuf *buf; - const char *reason = "sending"; - int errs = 0, need_hard_reset = 0; - size_t datalen = 0; - - switch (p->state) { - case STATE_OPENSENT: - case STATE_OPENCONFIRM: - case STATE_ESTABLISHED: - break; - default: - /* session not open, no need to send notification */ - log_notification(p, errcode, subcode, ibuf, "dropping"); - return; - } - - if (p->capa.neg.grestart.grnotification) { - if (session_req_hard_reset(errcode, subcode)) { - need_hard_reset = 1; - datalen += 2; - reason = "sending hard-reset"; - } else { - reason = "sending graceful"; - } - } - - log_notification(p, errcode, subcode, ibuf, reason); - - /* cap to maximum size */ - if (ibuf != NULL) { - if (ibuf_size(ibuf) > - MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN - datalen) { - log_peer_warnx(&p->conf, - "oversized notification, data trunkated"); - ibuf_truncate(ibuf, MAX_PKTSIZE - - MSGSIZE_NOTIFICATION_MIN - datalen); - } - datalen += ibuf_size(ibuf); - } - - if ((buf = session_newmsg(BGP_NOTIFICATION, - MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return; - } - - if (need_hard_reset) { - errs += ibuf_add_n8(buf, ERR_CEASE); - errs += ibuf_add_n8(buf, ERR_CEASE_HARD_RESET); - } - - errs += ibuf_add_n8(buf, errcode); - errs += ibuf_add_n8(buf, subcode); - - if (ibuf != NULL) - errs += ibuf_add_ibuf(buf, ibuf); - - if (errs) { - ibuf_free(buf); - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return; - } - - session_sendmsg(buf, p, BGP_NOTIFICATION); - p->stats.msg_sent_notification++; - p->stats.last_sent_errcode = errcode; - p->stats.last_sent_suberr = subcode; -} - -int -session_neighbor_rrefresh(struct peer *p) -{ - uint8_t i; - - if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr)) - return (-1); - - for (i = AID_MIN; i < AID_MAX; i++) { - if (p->capa.neg.mp[i] != 0) - session_rrefresh(p, i, ROUTE_REFRESH_REQUEST); - } - - return (0); -} - -void -session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype) -{ - struct ibuf *buf; - int errs = 0; - uint16_t afi; - uint8_t safi; - - switch (subtype) { - case ROUTE_REFRESH_REQUEST: - p->stats.refresh_sent_req++; - break; - case ROUTE_REFRESH_BEGIN_RR: - case ROUTE_REFRESH_END_RR: - /* requires enhanced route refresh */ - if (!p->capa.neg.enhanced_rr) - return; - if (subtype == ROUTE_REFRESH_BEGIN_RR) - p->stats.refresh_sent_borr++; - else - p->stats.refresh_sent_eorr++; - break; - default: - fatalx("session_rrefresh: bad subtype %d", subtype); - } - - if (aid2afi(aid, &afi, &safi) == -1) - fatalx("session_rrefresh: bad afi/safi pair"); - - if ((buf = session_newmsg(BGP_RREFRESH, MSGSIZE_RREFRESH)) == NULL) { - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return; - } - - errs += ibuf_add_n16(buf, afi); - errs += ibuf_add_n8(buf, subtype); - errs += ibuf_add_n8(buf, safi); - - if (errs) { - ibuf_free(buf); - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return; - } - - session_sendmsg(buf, p, BGP_RREFRESH); - p->stats.msg_sent_rrefresh++; -} - -int -session_graceful_restart(struct peer *p) -{ - uint8_t i; - uint16_t staletime = conf->staletime; - - if (p->conf.staletime) - staletime = p->conf.staletime; - - /* RFC 8538: enforce configurable upper bound of the stale timer */ - if (staletime > p->capa.neg.grestart.timeout) - staletime = p->capa.neg.grestart.timeout; - timer_set(&p->timers, Timer_RestartTimeout, staletime); - - for (i = AID_MIN; i < AID_MAX; i++) { - if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { - if (imsg_rde(IMSG_SESSION_STALE, p->conf.id, - &i, sizeof(i)) == -1) - return (-1); - log_peer_warnx(&p->conf, - "graceful restart of %s, keeping routes", - aid2str(i)); - p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; - } else if (p->capa.neg.mp[i]) { - if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id, - &i, sizeof(i)) == -1) - return (-1); - log_peer_warnx(&p->conf, - "graceful restart of %s, flushing routes", - aid2str(i)); - } - } - return (0); -} - -int -session_graceful_stop(struct peer *p) -{ - uint8_t i; - - for (i = AID_MIN; i < AID_MAX; i++) { - /* - * Only flush if the peer is restarting and the timeout fired. - * In all other cases the session was already flushed when the - * session went down or when the new open message was parsed. - */ - if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { - log_peer_warnx(&p->conf, "graceful restart of %s, " - "time-out, flushing", aid2str(i)); - if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, - &i, sizeof(i)) == -1) - return (-1); - } - p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; - } - return (0); -} - -int -session_dispatch_msg(struct pollfd *pfd, struct peer *p) -{ - socklen_t len; - int error; - - if (p->state == STATE_CONNECT) { - if (pfd->revents & POLLOUT) { - if (pfd->revents & POLLIN) { - /* error occurred */ - len = sizeof(error); - if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, - &error, &len) == -1 || error) { - if (error) - errno = error; - if (errno != p->lasterr) { - log_peer_warn(&p->conf, - "socket error"); - p->lasterr = errno; - } - bgp_fsm(p, EVNT_CON_OPENFAIL, NULL); - return (1); - } - } - bgp_fsm(p, EVNT_CON_OPEN, NULL); - return (1); - } - if (pfd->revents & POLLHUP) { - bgp_fsm(p, EVNT_CON_OPENFAIL, NULL); - return (1); - } - if (pfd->revents & (POLLERR|POLLNVAL)) { - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return (1); - } - return (0); - } - - if (pfd->revents & POLLHUP) { - bgp_fsm(p, EVNT_CON_CLOSED, NULL); - return (1); - } - if (pfd->revents & (POLLERR|POLLNVAL)) { - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return (1); - } - - if (pfd->revents & POLLOUT && msgbuf_queuelen(p->wbuf) > 0) { - if (ibuf_write(p->fd, p->wbuf) == -1) { - if (errno == EPIPE) - log_peer_warnx(&p->conf, "Connection closed"); - else - log_peer_warn(&p->conf, "write error"); - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return (1); - } - p->stats.last_write = getmonotime(); - start_timer_sendholdtime(p); - if (p->throttled && - msgbuf_queuelen(p->wbuf) < SESS_MSG_LOW_MARK) { - if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1) - log_peer_warn(&p->conf, "imsg_compose XON"); - else - p->throttled = 0; - } - if (!(pfd->revents & POLLIN)) - return (1); - } - - if (p->fd != -1 && pfd->revents & POLLIN) { - switch (ibuf_read(p->fd, p->wbuf)) { - case -1: - if (p->state == STATE_IDLE) - /* error already handled before */ - return (1); - log_peer_warn(&p->conf, "read error"); - bgp_fsm(p, EVNT_CON_FATAL, NULL); - return (1); - case 0: - bgp_fsm(p, EVNT_CON_CLOSED, NULL); - return (1); - } - p->stats.last_read = getmonotime(); - return (1); - } - return (0); -} - -void -session_process_msg(struct peer *p) -{ - struct ibuf *msg; - int processed = 0; - uint8_t msgtype; - - p->rpending = 0; - if (p->wbuf == NULL) - return; - - /* - * session might drop to IDLE -> all buffers are flushed - */ - while ((msg = msgbuf_get(p->wbuf)) != NULL) { - /* skip msg header and extract type */ - if (ibuf_skip(msg, MSGSIZE_HEADER_MARKER) == -1 || - ibuf_skip(msg, sizeof(uint16_t)) == -1 || - ibuf_get_n8(msg, &msgtype) == -1) { - log_peer_warn(&p->conf, "process message failed"); - bgp_fsm(p, EVNT_CON_FATAL, NULL); - ibuf_free(msg); - return; - } - ibuf_rewind(msg); - - session_mrt_dump_bgp_msg(p, msg, msgtype, DIR_IN); - - ibuf_skip(msg, MSGSIZE_HEADER); - - switch (msgtype) { - case BGP_OPEN: - bgp_fsm(p, EVNT_RCVD_OPEN, msg); - p->stats.msg_rcvd_open++; - break; - case BGP_UPDATE: - bgp_fsm(p, EVNT_RCVD_UPDATE, msg); - p->stats.msg_rcvd_update++; - break; - case BGP_NOTIFICATION: - bgp_fsm(p, EVNT_RCVD_NOTIFICATION, msg); - p->stats.msg_rcvd_notification++; - break; - case BGP_KEEPALIVE: - bgp_fsm(p, EVNT_RCVD_KEEPALIVE, msg); - p->stats.msg_rcvd_keepalive++; - break; - case BGP_RREFRESH: - parse_rrefresh(p, msg); - p->stats.msg_rcvd_rrefresh++; - break; - default: /* cannot happen */ - session_notification_data(p, ERR_HEADER, ERR_HDR_TYPE, - &msgtype, 1); - log_peer_warnx(&p->conf, - "received message with unknown type %u", msgtype); - bgp_fsm(p, EVNT_CON_FATAL, NULL); - } - ibuf_free(msg); - if (++processed > MSG_PROCESS_LIMIT) { - p->rpending = 1; - break; - } - } -} - -struct ibuf * -parse_header(struct ibuf *msg, void *arg, int *fd) -{ - struct peer *peer = arg; - struct ibuf *b; - u_char m[MSGSIZE_HEADER_MARKER]; - uint16_t len, maxlen = MAX_PKTSIZE; - uint8_t type; - - if (ibuf_get(msg, m, sizeof(m)) == -1 || - ibuf_get_n16(msg, &len) == -1 || - ibuf_get_n8(msg, &type) == -1) - return (NULL); - /* caller MUST make sure we are getting 19 bytes! */ - if (memcmp(m, marker, sizeof(marker))) { - log_peer_warnx(&peer->conf, "sync error"); - session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL); - bgp_fsm(peer, EVNT_CON_FATAL, NULL); - errno = EINVAL; - return (NULL); - } - - if (peer->capa.ann.ext_msg) - maxlen = MAX_EXT_PKTSIZE; - - if (len < MSGSIZE_HEADER || len > maxlen) { - log_peer_warnx(&peer->conf, - "received message: illegal length: %u byte", len); - goto badlen; - } - - switch (type) { - case BGP_OPEN: - if (len < MSGSIZE_OPEN_MIN || len > MAX_PKTSIZE) { - log_peer_warnx(&peer->conf, - "received OPEN: illegal len: %u byte", len); - goto badlen; - } - break; - case BGP_NOTIFICATION: - if (len < MSGSIZE_NOTIFICATION_MIN) { - log_peer_warnx(&peer->conf, - "received NOTIFICATION: illegal len: %u byte", len); - goto badlen; - } - break; - case BGP_UPDATE: - if (len < MSGSIZE_UPDATE_MIN) { - log_peer_warnx(&peer->conf, - "received UPDATE: illegal len: %u byte", len); - goto badlen; - } - break; - case BGP_KEEPALIVE: - if (len != MSGSIZE_KEEPALIVE) { - log_peer_warnx(&peer->conf, - "received KEEPALIVE: illegal len: %u byte", len); - goto badlen; - } - break; - case BGP_RREFRESH: - if (len < MSGSIZE_RREFRESH_MIN) { - log_peer_warnx(&peer->conf, - "received RREFRESH: illegal len: %u byte", len); - goto badlen; - } - break; - default: - log_peer_warnx(&peer->conf, - "received msg with unknown type %u", type); - session_notification_data(peer, ERR_HEADER, ERR_HDR_TYPE, - &type, sizeof(type)); - bgp_fsm(peer, EVNT_CON_FATAL, NULL); - errno = EINVAL; - return (NULL); - } - - if ((b = ibuf_open(len)) == NULL) - return (NULL); - return (b); - - badlen: - len = htons(len); - session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, - &len, sizeof(len)); - bgp_fsm(peer, EVNT_CON_FATAL, NULL); - errno = ERANGE; - return (NULL); -} - -int -parse_open(struct peer *peer, struct ibuf *msg) -{ - uint8_t version, rversion; - uint16_t short_as; - uint16_t holdtime; - uint32_t as, bgpid; - uint8_t optparamlen; - - if (ibuf_get_n8(msg, &version) == -1 || - ibuf_get_n16(msg, &short_as) == -1 || - ibuf_get_n16(msg, &holdtime) == -1 || - ibuf_get_n32(msg, &bgpid) == -1 || - ibuf_get_n8(msg, &optparamlen) == -1) - goto bad_len; - - if (version != BGP_VERSION) { - log_peer_warnx(&peer->conf, - "peer wants unrecognized version %u", version); - if (version > BGP_VERSION) - rversion = version - BGP_VERSION; - else - rversion = BGP_VERSION; - session_notification_data(peer, ERR_OPEN, ERR_OPEN_VERSION, - &rversion, sizeof(rversion)); - change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); - return (-1); - } - - as = peer->short_as = short_as; - if (as == 0) { - log_peer_warnx(&peer->conf, - "peer requests unacceptable AS %u", as); - session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL); - change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); - return (-1); - } - - if (holdtime != 0 && holdtime < peer->conf.min_holdtime) { - log_peer_warnx(&peer->conf, - "peer requests unacceptable holdtime %u", holdtime); - session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, NULL); - change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); - return (-1); - } - - if (holdtime < peer->conf.holdtime) - peer->holdtime = holdtime; - else - peer->holdtime = peer->conf.holdtime; - - /* check bgpid for validity - just disallow 0 */ - if (bgpid == 0) { - log_peer_warnx(&peer->conf, "peer BGPID 0 unacceptable"); - session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL); - change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); - return (-1); - } - peer->remote_bgpid = bgpid; - - if (optparamlen != 0) { - struct ibuf oparams, op; - uint8_t ext_type, op_type; - uint16_t ext_len, op_len; - - ibuf_from_ibuf(&oparams, msg); - - /* check for RFC9072 encoding */ - if (ibuf_get_n8(&oparams, &ext_type) == -1) - goto bad_len; - if (ext_type == OPT_PARAM_EXT_LEN) { - if (ibuf_get_n16(&oparams, &ext_len) == -1) - goto bad_len; - /* skip RFC9072 header */ - if (ibuf_skip(msg, 3) == -1) - goto bad_len; - } else { - ext_len = optparamlen; - ibuf_rewind(&oparams); - } - - if (ibuf_truncate(&oparams, ext_len) == -1 || - ibuf_skip(msg, ext_len) == -1) - goto bad_len; - - while (ibuf_size(&oparams) > 0) { - if (ibuf_get_n8(&oparams, &op_type) == -1) - goto bad_len; - - if (ext_type == OPT_PARAM_EXT_LEN) { - if (ibuf_get_n16(&oparams, &op_len) == -1) - goto bad_len; - } else { - uint8_t tmp; - if (ibuf_get_n8(&oparams, &tmp) == -1) - goto bad_len; - op_len = tmp; - } - - if (ibuf_get_ibuf(&oparams, op_len, &op) == -1) - goto bad_len; - - switch (op_type) { - case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ - if (parse_capabilities(peer, &op, &as) == -1) { - session_notification(peer, ERR_OPEN, 0, - NULL); - change_state(peer, STATE_IDLE, - EVNT_RCVD_OPEN); - return (-1); - } - break; - case OPT_PARAM_AUTH: /* deprecated */ - default: - /* - * unsupported type - * the RFCs tell us to leave the data section - * empty and notify the peer with ERR_OPEN, - * ERR_OPEN_OPT. How the peer should know - * _which_ optional parameter we don't support - * is beyond me. - */ - log_peer_warnx(&peer->conf, - "received OPEN message with unsupported " - "optional parameter: type %u", op_type); - session_notification(peer, ERR_OPEN, - ERR_OPEN_OPT, NULL); - change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); - return (-1); - } - } - } - - if (ibuf_size(msg) != 0) { - bad_len: - log_peer_warnx(&peer->conf, - "corrupt OPEN message received: length mismatch"); - session_notification(peer, ERR_OPEN, 0, NULL); - change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); - return (-1); - } - - /* - * if remote-as is zero and it's a cloned neighbor, accept any - * but only on the first connect, after that the remote-as needs - * to remain the same. - */ - if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { - peer->conf.remote_as = as; - peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as); - if (!peer->conf.ebgp) - /* force enforce_as off for iBGP sessions */ - peer->conf.enforce_as = ENFORCE_AS_OFF; - } - - if (peer->conf.remote_as != as) { - log_peer_warnx(&peer->conf, "peer sent wrong AS %s", - log_as(as)); - session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL); - change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); - return (-1); - } - - /* on iBGP sessions check for bgpid collision */ - if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) { - struct in_addr ina; - ina.s_addr = htonl(bgpid); - log_peer_warnx(&peer->conf, "peer BGPID %s conflicts with ours", - inet_ntoa(ina)); - session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL); - change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); - return (-1); - } - - if (capa_neg_calc(peer) == -1) { - change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); - return (-1); - } - - return (0); -} - -int -parse_update(struct peer *peer, struct ibuf *msg) -{ - /* - * we pass the message verbatim to the rde. - * in case of errors the whole session is reset with a - * notification anyway, we only need to know the peer - */ - if (imsg_rde(IMSG_UPDATE, peer->conf.id, ibuf_data(msg), - ibuf_size(msg)) == -1) - return (-1); - - return (0); -} - -int -parse_rrefresh(struct peer *peer, struct ibuf *msg) -{ - struct route_refresh rr; - uint16_t afi, datalen; - uint8_t aid, safi, subtype; - - datalen = ibuf_size(msg) + MSGSIZE_HEADER; - - if (ibuf_get_n16(msg, &afi) == -1 || - ibuf_get_n8(msg, &subtype) == -1 || - ibuf_get_n8(msg, &safi) == -1) { - /* minimum size checked in session_process_msg() */ - fatalx("%s: message too small", __func__); - } - - /* check subtype if peer announced enhanced route refresh */ - if (peer->capa.neg.enhanced_rr) { - switch (subtype) { - case ROUTE_REFRESH_REQUEST: - /* no ORF support, so no oversized RREFRESH msgs */ - if (datalen != MSGSIZE_RREFRESH) { - log_peer_warnx(&peer->conf, - "received RREFRESH: illegal len: %u byte", - datalen); - datalen = htons(datalen); - session_notification_data(peer, ERR_HEADER, - ERR_HDR_LEN, &datalen, sizeof(datalen)); - bgp_fsm(peer, EVNT_CON_FATAL, NULL); - return (-1); - } - peer->stats.refresh_rcvd_req++; - break; - case ROUTE_REFRESH_BEGIN_RR: - case ROUTE_REFRESH_END_RR: - /* special handling for RFC7313 */ - if (datalen != MSGSIZE_RREFRESH) { - log_peer_warnx(&peer->conf, - "received RREFRESH: illegal len: %u byte", - datalen); - ibuf_rewind(msg); - session_notification(peer, ERR_RREFRESH, - ERR_RR_INV_LEN, msg); - bgp_fsm(peer, EVNT_CON_FATAL, NULL); - return (-1); - } - if (subtype == ROUTE_REFRESH_BEGIN_RR) - peer->stats.refresh_rcvd_borr++; - else - peer->stats.refresh_rcvd_eorr++; - break; - default: - log_peer_warnx(&peer->conf, "peer sent bad refresh, " - "bad subtype %d", subtype); - return (0); - } - } else { - /* force subtype to default */ - subtype = ROUTE_REFRESH_REQUEST; - peer->stats.refresh_rcvd_req++; - } - - /* afi/safi unchecked - unrecognized values will be ignored anyway */ - if (afi2aid(afi, safi, &aid) == -1) { - log_peer_warnx(&peer->conf, "peer sent bad refresh, " - "invalid afi/safi pair"); - return (0); - } - - if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) { - log_peer_warnx(&peer->conf, "peer sent unexpected refresh"); - return (0); - } - - rr.aid = aid; - rr.subtype = subtype; - - if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1) - return (-1); - - return (0); -} - -void -parse_notification(struct peer *peer, struct ibuf *msg) -{ - const char *reason = "received"; - uint8_t errcode, subcode; - uint8_t reason_len; - enum session_events event = EVNT_RCVD_NOTIFICATION; - - if (ibuf_get_n8(msg, &errcode) == -1 || - ibuf_get_n8(msg, &subcode) == -1) { - log_peer_warnx(&peer->conf, "received bad notification"); - goto done; - } - - /* RFC8538: check for hard-reset or graceful notification */ - if (peer->capa.neg.grestart.grnotification) { - if (errcode == ERR_CEASE && subcode == ERR_CEASE_HARD_RESET) { - if (ibuf_get_n8(msg, &errcode) == -1 || - ibuf_get_n8(msg, &subcode) == -1) { - log_peer_warnx(&peer->conf, - "received bad hard-reset notification"); - goto done; - } - reason = "received hard-reset"; - } else { - reason = "received graceful"; - event = EVNT_RCVD_GRACE_NOTIFICATION; - } - } - - peer->errcnt++; - peer->stats.last_rcvd_errcode = errcode; - peer->stats.last_rcvd_suberr = subcode; - - log_notification(peer, errcode, subcode, msg, reason); - - CTASSERT(sizeof(peer->stats.last_reason) > UINT8_MAX); - memset(peer->stats.last_reason, 0, sizeof(peer->stats.last_reason)); - if (errcode == ERR_CEASE && - (subcode == ERR_CEASE_ADMIN_DOWN || - subcode == ERR_CEASE_ADMIN_RESET)) { - /* check if shutdown reason is included */ - if (ibuf_get_n8(msg, &reason_len) != -1 && reason_len != 0) { - if (ibuf_get(msg, peer->stats.last_reason, - reason_len) == -1) - log_peer_warnx(&peer->conf, - "received truncated shutdown reason"); - } - } - -done: - change_state(peer, STATE_IDLE, event); -} - -int -parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as) -{ - struct ibuf capabuf; - uint16_t afi, nhafi, gr_header; - uint8_t capa_code, capa_len; - uint8_t safi, aid, role, flags; - - while (ibuf_size(buf) > 0) { - if (ibuf_get_n8(buf, &capa_code) == -1 || - ibuf_get_n8(buf, &capa_len) == -1) { - log_peer_warnx(&peer->conf, "Bad capabilities attr " - "length: too short"); - return (-1); - } - if (ibuf_get_ibuf(buf, capa_len, &capabuf) == -1) { - log_peer_warnx(&peer->conf, - "Received bad capabilities attr length: " - "len %zu smaller than capa_len %u", - ibuf_size(buf), capa_len); - return (-1); - } - - switch (capa_code) { - case CAPA_MP: /* RFC 4760 */ - if (capa_len != 4 || - ibuf_get_n16(&capabuf, &afi) == -1 || - ibuf_skip(&capabuf, 1) == -1 || - ibuf_get_n8(&capabuf, &safi) == -1) { - log_peer_warnx(&peer->conf, - "Received bad multi protocol capability"); - break; - } - if (afi2aid(afi, safi, &aid) == -1) { - log_peer_warnx(&peer->conf, - "Received multi protocol capability: " - " unknown AFI %u, safi %u pair", - afi, safi); - peer->capa.peer.mp[AID_UNSPEC] = 1; - break; - } - peer->capa.peer.mp[aid] = 1; - break; - case CAPA_REFRESH: - peer->capa.peer.refresh = 1; - break; - case CAPA_EXT_NEXTHOP: - while (ibuf_size(&capabuf) > 0) { - uint16_t tmp16; - if (ibuf_get_n16(&capabuf, &afi) == -1 || - ibuf_get_n16(&capabuf, &tmp16) == -1 || - ibuf_get_n16(&capabuf, &nhafi) == -1) { - log_peer_warnx(&peer->conf, - "Received bad %s capability", - log_capability(CAPA_EXT_NEXTHOP)); - memset(peer->capa.peer.ext_nh, 0, - sizeof(peer->capa.peer.ext_nh)); - break; - } - safi = tmp16; - if (afi2aid(afi, safi, &aid) == -1 || - !(aid == AID_INET || aid == AID_VPN_IPv4)) { - log_peer_warnx(&peer->conf, - "Received %s capability: " - " unsupported AFI %u, safi %u pair", - log_capability(CAPA_EXT_NEXTHOP), - afi, safi); - continue; - } - if (nhafi != AFI_IPv6) { - log_peer_warnx(&peer->conf, - "Received %s capability: " - " unsupported nexthop AFI %u", - log_capability(CAPA_EXT_NEXTHOP), - nhafi); - continue; - } - peer->capa.peer.ext_nh[aid] = 1; - } - break; - case CAPA_EXT_MSG: - peer->capa.peer.ext_msg = 1; - break; - case CAPA_ROLE: - if (capa_len != 1 || - ibuf_get_n8(&capabuf, &role) == -1) { - log_peer_warnx(&peer->conf, - "Received bad role capability"); - break; - } - if (!peer->conf.ebgp) { - log_peer_warnx(&peer->conf, - "Received role capability on iBGP session"); - break; - } - peer->capa.peer.policy = 1; - peer->remote_role = capa2role(role); - break; - case CAPA_RESTART: - if (capa_len == 2) { - /* peer only supports EoR marker */ - peer->capa.peer.grestart.restart = 1; - peer->capa.peer.grestart.timeout = 0; - break; - } else if (capa_len % 4 != 2) { - log_peer_warnx(&peer->conf, - "Bad graceful restart capability"); - peer->capa.peer.grestart.restart = 0; - peer->capa.peer.grestart.timeout = 0; - break; - } - - if (ibuf_get_n16(&capabuf, &gr_header) == -1) { - bad_gr_restart: - log_peer_warnx(&peer->conf, - "Bad graceful restart capability"); - peer->capa.peer.grestart.restart = 0; - peer->capa.peer.grestart.timeout = 0; - break; - } - - peer->capa.peer.grestart.timeout = - gr_header & CAPA_GR_TIMEMASK; - if (peer->capa.peer.grestart.timeout == 0) { - log_peer_warnx(&peer->conf, "Received " - "graceful restart with zero timeout"); - peer->capa.peer.grestart.restart = 0; - break; - } - - while (ibuf_size(&capabuf) > 0) { - if (ibuf_get_n16(&capabuf, &afi) == -1 || - ibuf_get_n8(&capabuf, &safi) == -1 || - ibuf_get_n8(&capabuf, &flags) == -1) - goto bad_gr_restart; - if (afi2aid(afi, safi, &aid) == -1) { - log_peer_warnx(&peer->conf, - "Received graceful restart capa: " - " unknown AFI %u, safi %u pair", - afi, safi); - continue; - } - peer->capa.peer.grestart.flags[aid] |= - CAPA_GR_PRESENT; - if (flags & CAPA_GR_F_FLAG) - peer->capa.peer.grestart.flags[aid] |= - CAPA_GR_FORWARD; - if (gr_header & CAPA_GR_R_FLAG) - peer->capa.peer.grestart.flags[aid] |= - CAPA_GR_RESTART; - peer->capa.peer.grestart.restart = 2; - } - if (gr_header & CAPA_GR_N_FLAG) - peer->capa.peer.grestart.grnotification = 1; - break; - case CAPA_AS4BYTE: - if (capa_len != 4 || - ibuf_get_n32(&capabuf, as) == -1) { - log_peer_warnx(&peer->conf, - "Received bad AS4BYTE capability"); - peer->capa.peer.as4byte = 0; - break; - } - if (*as == 0) { - log_peer_warnx(&peer->conf, - "peer requests unacceptable AS %u", *as); - session_notification(peer, ERR_OPEN, - ERR_OPEN_AS, NULL); - change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); - return (-1); - } - peer->capa.peer.as4byte = 1; - break; - case CAPA_ADD_PATH: - if (capa_len % 4 != 0) { - bad_add_path: - log_peer_warnx(&peer->conf, - "Received bad ADD-PATH capability"); - memset(peer->capa.peer.add_path, 0, - sizeof(peer->capa.peer.add_path)); - break; - } - while (ibuf_size(&capabuf) > 0) { - if (ibuf_get_n16(&capabuf, &afi) == -1 || - ibuf_get_n8(&capabuf, &safi) == -1 || - ibuf_get_n8(&capabuf, &flags) == -1) - goto bad_add_path; - if (afi2aid(afi, safi, &aid) == -1) { - log_peer_warnx(&peer->conf, - "Received ADD-PATH capa: " - " unknown AFI %u, safi %u pair", - afi, safi); - memset(peer->capa.peer.add_path, 0, - sizeof(peer->capa.peer.add_path)); - break; - } - if (flags & ~CAPA_AP_BIDIR) { - log_peer_warnx(&peer->conf, - "Received ADD-PATH capa: " - " bad flags %x", flags); - memset(peer->capa.peer.add_path, 0, - sizeof(peer->capa.peer.add_path)); - break; - } - peer->capa.peer.add_path[aid] = flags; - } - break; - case CAPA_ENHANCED_RR: - peer->capa.peer.enhanced_rr = 1; - break; - default: - break; - } - } - - return (0); -} - -int -capa_neg_calc(struct peer *p) -{ - struct ibuf *ebuf; - uint8_t i, hasmp = 0, capa_code, capa_len, capa_aid = 0; - - /* a capability is accepted only if both sides announced it */ - - p->capa.neg.refresh = - (p->capa.ann.refresh && p->capa.peer.refresh) != 0; - p->capa.neg.enhanced_rr = - (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0; - p->capa.neg.as4byte = - (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0; - p->capa.neg.ext_msg = - (p->capa.ann.ext_msg && p->capa.peer.ext_msg) != 0; - - /* MP: both side must agree on the AFI,SAFI pair */ - if (p->capa.peer.mp[AID_UNSPEC]) - hasmp = 1; - for (i = AID_MIN; i < AID_MAX; i++) { - if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) - p->capa.neg.mp[i] = 1; - else - p->capa.neg.mp[i] = 0; - if (p->capa.ann.mp[i] || p->capa.peer.mp[i]) - hasmp = 1; - } - /* if no MP capability present default to IPv4 unicast mode */ - if (!hasmp) - p->capa.neg.mp[AID_INET] = 1; - - /* - * graceful restart: the peer capabilities are of interest here. - * It is necessary to compare the new values with the previous ones - * and act accordingly. AFI/SAFI that are not part in the MP capability - * are treated as not being present. - * Also make sure that a flush happens if the session stopped - * supporting graceful restart. - */ - - for (i = AID_MIN; i < AID_MAX; i++) { - int8_t negflags; - - /* disable GR if the AFI/SAFI is not present */ - if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && - p->capa.neg.mp[i] == 0)) - p->capa.peer.grestart.flags[i] = 0; /* disable */ - /* look at current GR state and decide what to do */ - negflags = p->capa.neg.grestart.flags[i]; - p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; - if (negflags & CAPA_GR_RESTARTING) { - if (p->capa.ann.grestart.restart != 0 && - p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) { - p->capa.neg.grestart.flags[i] |= - CAPA_GR_RESTARTING; - } else { - if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, - &i, sizeof(i)) == -1) { - log_peer_warnx(&p->conf, - "imsg send failed"); - return (-1); - } - log_peer_warnx(&p->conf, "graceful restart of " - "%s, not restarted, flushing", aid2str(i)); - } - } - } - p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; - p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; - if (p->capa.ann.grestart.restart == 0) - p->capa.neg.grestart.restart = 0; - - /* RFC 8538 graceful notification: both sides need to agree */ - p->capa.neg.grestart.grnotification = - (p->capa.ann.grestart.grnotification && - p->capa.peer.grestart.grnotification) != 0; - - /* RFC 8950 extended nexthop encoding: both sides need to agree */ - memset(p->capa.neg.ext_nh, 0, sizeof(p->capa.neg.ext_nh)); - for (i = AID_MIN; i < AID_MAX; i++) { - if (p->capa.neg.mp[i] == 0) - continue; - if (p->capa.ann.ext_nh[i] && p->capa.peer.ext_nh[i]) { - p->capa.neg.ext_nh[i] = 1; - } - } - - /* - * ADD-PATH: set only those bits where both sides agree. - * For this compare our send bit with the recv bit from the peer - * and vice versa. - * The flags are stored from this systems view point. - * At index 0 the flags are set if any per-AID flag is set. - */ - memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path)); - for (i = AID_MIN; i < AID_MAX; i++) { - if (p->capa.neg.mp[i] == 0) - continue; - if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) && - (p->capa.peer.add_path[i] & CAPA_AP_SEND)) { - p->capa.neg.add_path[i] |= CAPA_AP_RECV; - p->capa.neg.add_path[0] |= CAPA_AP_RECV; - } - if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) && - (p->capa.peer.add_path[i] & CAPA_AP_RECV)) { - p->capa.neg.add_path[i] |= CAPA_AP_SEND; - p->capa.neg.add_path[0] |= CAPA_AP_SEND; - } - } - - /* - * Open policy: check that the policy is sensible. - * - * Make sure that the roles match and set the negotiated capability - * to the role of the peer. So the RDE can inject the OTC attribute. - * See RFC 9234, section 4.2. - * These checks should only happen on ebgp sessions. - */ - if (p->capa.ann.policy != 0 && p->capa.peer.policy != 0 && - p->conf.ebgp) { - switch (p->conf.role) { - case ROLE_PROVIDER: - if (p->remote_role != ROLE_CUSTOMER) - goto policyfail; - break; - case ROLE_RS: - if (p->remote_role != ROLE_RS_CLIENT) - goto policyfail; - break; - case ROLE_RS_CLIENT: - if (p->remote_role != ROLE_RS) - goto policyfail; - break; - case ROLE_CUSTOMER: - if (p->remote_role != ROLE_PROVIDER) - goto policyfail; - break; - case ROLE_PEER: - if (p->remote_role != ROLE_PEER) - goto policyfail; - break; - default: - policyfail: - log_peer_warnx(&p->conf, "open policy role mismatch: " - "our role %s, their role %s", - log_policy(p->conf.role), - log_policy(p->remote_role)); - session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL); - return (-1); - } - p->capa.neg.policy = 1; - } - - /* enforce presence of open policy role capability */ - if (p->capa.ann.policy == 2 && p->capa.peer.policy == 0 && - p->conf.ebgp) { - log_peer_warnx(&p->conf, "open policy role enforced but " - "not present"); - session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL); - return (-1); - } - - /* enforce presence of other capabilities */ - if (p->capa.ann.refresh == 2 && p->capa.neg.refresh == 0) { - capa_code = CAPA_REFRESH; - capa_len = 0; - goto fail; - } - /* enforce presence of other capabilities */ - if (p->capa.ann.ext_msg == 2 && p->capa.neg.ext_msg == 0) { - capa_code = CAPA_EXT_MSG; - capa_len = 0; - goto fail; - } - if (p->capa.ann.enhanced_rr == 2 && p->capa.neg.enhanced_rr == 0) { - capa_code = CAPA_ENHANCED_RR; - capa_len = 0; - goto fail; - } - if (p->capa.ann.as4byte == 2 && p->capa.neg.as4byte == 0) { - capa_code = CAPA_AS4BYTE; - capa_len = 4; - goto fail; - } - if (p->capa.ann.grestart.restart == 2 && - p->capa.neg.grestart.restart == 0) { - capa_code = CAPA_RESTART; - capa_len = 2; - goto fail; - } - for (i = AID_MIN; i < AID_MAX; i++) { - if (p->capa.ann.mp[i] == 2 && p->capa.neg.mp[i] == 0) { - capa_code = CAPA_MP; - capa_len = 4; - capa_aid = i; - goto fail; - } - } - - for (i = AID_MIN; i < AID_MAX; i++) { - if (p->capa.neg.mp[i] == 0) - continue; - if ((p->capa.ann.add_path[i] & CAPA_AP_RECV_ENFORCE) && - (p->capa.neg.add_path[i] & CAPA_AP_RECV) == 0) { - capa_code = CAPA_ADD_PATH; - capa_len = 4; - capa_aid = i; - goto fail; - } - if ((p->capa.ann.add_path[i] & CAPA_AP_SEND_ENFORCE) && - (p->capa.neg.add_path[i] & CAPA_AP_SEND) == 0) { - capa_code = CAPA_ADD_PATH; - capa_len = 4; - capa_aid = i; - goto fail; - } - } - - for (i = AID_MIN; i < AID_MAX; i++) { - if (p->capa.neg.mp[i] == 0) - continue; - if (p->capa.ann.ext_nh[i] == 2 && - p->capa.neg.ext_nh[i] == 0) { - capa_code = CAPA_EXT_NEXTHOP; - capa_len = 6; - capa_aid = i; - goto fail; - } - } - return (0); - - fail: - if ((ebuf = ibuf_dynamic(2, 256)) == NULL) - return (-1); - /* best effort, no problem if it fails */ - session_capa_add(ebuf, capa_code, capa_len); - if (capa_code == CAPA_MP) - session_capa_add_mp(ebuf, capa_aid); - else if (capa_code == CAPA_ADD_PATH) - session_capa_add_afi(ebuf, capa_aid, 0); - else if (capa_code == CAPA_EXT_NEXTHOP) - session_capa_add_ext_nh(ebuf, capa_aid); - else if (capa_len > 0) - ibuf_add_zero(ebuf, capa_len); - - session_notification(p, ERR_OPEN, ERR_OPEN_CAPA, ebuf); - ibuf_free(ebuf); - return (-1); -} - -void -session_mrt_dump_state(struct peer *p, enum session_state oldstate, - enum session_state newstate) +session_mrt_dump_state(struct peer *p, enum session_state oldstate, + enum session_state newstate) { struct mrt *mrt; @@ -3050,6 +1153,41 @@ session_mrt_dump_bgp_msg(struct peer *p, } } +static int +la_cmp(struct listen_addr *a, struct listen_addr *b) +{ + struct sockaddr_in *in_a, *in_b; + struct sockaddr_in6 *in6_a, *in6_b; + + if (a->sa.ss_family != b->sa.ss_family) + return (1); + + switch (a->sa.ss_family) { + case AF_INET: + in_a = (struct sockaddr_in *)&a->sa; + in_b = (struct sockaddr_in *)&b->sa; + if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) + return (1); + if (in_a->sin_port != in_b->sin_port) + return (1); + break; + case AF_INET6: + in6_a = (struct sockaddr_in6 *)&a->sa; + in6_b = (struct sockaddr_in6 *)&b->sa; + if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, + sizeof(struct in6_addr))) + return (1); + if (in6_a->sin6_port != in6_b->sin6_port) + return (1); + break; + default: + fatal("king bula sez: unknown address family"); + /* NOTREACHED */ + } + + return (0); +} + void session_dispatch_imsg(struct imsgbuf *imsgbuf, int idx, u_int *listener_cnt) { @@ -3467,41 +1605,6 @@ session_dispatch_imsg(struct imsgbuf *im } } -int -la_cmp(struct listen_addr *a, struct listen_addr *b) -{ - struct sockaddr_in *in_a, *in_b; - struct sockaddr_in6 *in6_a, *in6_b; - - if (a->sa.ss_family != b->sa.ss_family) - return (1); - - switch (a->sa.ss_family) { - case AF_INET: - in_a = (struct sockaddr_in *)&a->sa; - in_b = (struct sockaddr_in *)&b->sa; - if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) - return (1); - if (in_a->sin_port != in_b->sin_port) - return (1); - break; - case AF_INET6: - in6_a = (struct sockaddr_in6 *)&a->sa; - in6_b = (struct sockaddr_in6 *)&b->sa; - if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, - sizeof(struct in6_addr))) - return (1); - if (in6_a->sin6_port != in6_b->sin6_port) - return (1); - break; - default: - fatal("king bula sez: unknown address family"); - /* NOTREACHED */ - } - - return (0); -} - struct peer * getpeerbydesc(struct bgpd_config *c, const char *descr) { @@ -3765,6 +1868,15 @@ session_demote(struct peer *p, int level } void +session_md5_reload(struct peer *p) +{ + if (!p->template) + if (imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, + p->conf.id, 0, -1, NULL, 0) == -1) + fatalx("imsg_compose error"); +} + +void session_stop(struct peer *peer, uint8_t subcode, const char *reason) { struct ibuf *ibuf; @@ -3853,6 +1965,8 @@ merge_peers(struct bgpd_config *c, struc p->conf.holdtime = conf->holdtime; if (p->conf.min_holdtime == 0) p->conf.min_holdtime = conf->min_holdtime; + p->connectretry = conf->connectretry; + p->local_bgpid = conf->bgpid; /* had demotion, is demoted, demote removed? */ if (p->demoted && !p->conf.demote_group[0]) Index: session.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/session.h,v diff -u -p -r1.187 session.h --- session.h 20 Feb 2025 19:47:31 -0000 1.187 +++ session.h 25 Feb 2025 14:17:48 -0000 @@ -216,6 +216,7 @@ struct peer { u_int errcnt; u_int IdleHoldTime; unsigned int if_scope; /* interface scope for IPv6 */ + uint32_t local_bgpid; uint32_t remote_bgpid; enum session_state state; enum session_state prev_state; @@ -225,6 +226,7 @@ struct peer { uint16_t holdtime; uint16_t local_port; uint16_t remote_port; + uint16_t connectretry; uint8_t depend_ok; uint8_t demoted; uint8_t passive; @@ -328,11 +330,16 @@ void rtr_recalc(void); RB_PROTOTYPE(peer_head, peer, entry, peer_compare); void session_main(int, int); -void bgp_fsm(struct peer *, enum session_events, struct ibuf *); int session_neighbor_rrefresh(struct peer *p); +void get_alternate_addr(struct bgpd_addr *, struct bgpd_addr *, + struct bgpd_addr *, unsigned int *); struct peer *getpeerbydesc(struct bgpd_config *, const char *); struct peer *getpeerbyip(struct bgpd_config *, struct sockaddr *); struct peer *getpeerbyid(struct bgpd_config *, uint32_t); +int session_hanlde_update(struct peer *, struct ibuf *); +int session_handle_rrefresh(struct peer *, struct route_refresh *); +int session_graceful_restart(struct peer *); +int session_graceful_flush(struct peer *, uint8_t, const char *); void session_mrt_dump_state(struct peer *, enum session_state, enum session_state); void session_mrt_dump_bgp_msg(struct peer *, struct ibuf *, @@ -341,8 +348,31 @@ int peer_matched(struct peer *, struct int imsg_ctl_parent(struct imsg *); int imsg_ctl_rde(struct imsg *); int imsg_ctl_rde_msg(int, uint32_t, pid_t); +int session_connect(struct peer *); +void session_close(struct peer *); +void session_up(struct peer *); +void session_down(struct peer *); +void session_demote(struct peer *, int); +void session_md5_reload(struct peer *); void session_stop(struct peer *, uint8_t, const char *); struct bgpd_addr *session_localaddr(struct peer *); + +/* session_bgp.c */ +void session_open(struct peer *); +void session_keepalive(struct peer *); +void session_update(struct peer *, struct ibuf *); +void session_notification(struct peer *, uint8_t, uint8_t, struct ibuf *); +void session_notification_data(struct peer *, uint8_t, uint8_t, void *, + size_t); +void session_rrefresh(struct peer *, uint8_t, uint8_t); +int session_dispatch_msg(struct pollfd *, struct peer *); +void session_process_msg(struct peer *); + +struct ibuf *parse_header(struct ibuf *, void *, int *); + +void start_timer_sendholdtime(struct peer *); +void bgp_fsm(struct peer *, enum session_events, struct ibuf *); +void change_state(struct peer *, enum session_state, enum session_events); /* timer.c */ struct timer *timer_get(struct timer_head *, enum Timer); Index: session_bgp.c =================================================================== RCS file: session_bgp.c diff -N session_bgp.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ session_bgp.c 25 Feb 2025 14:16:47 -0000 @@ -0,0 +1,1930 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2004 - 2025 Claudio Jeker + * Copyright (c) 2003, 2004, 2005 Henning Brauer + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include +#include +#include + +#include +#include +#include + +#include "bgpd.h" +#include "session.h" +#include "log.h" + +static void start_timer_holdtime(struct peer *); +static void start_timer_keepalive(struct peer *); +struct ibuf *session_newmsg(enum msg_type, uint16_t); +void session_sendmsg(struct ibuf *, struct peer *, enum msg_type); +void session_open(struct peer *); +void session_keepalive(struct peer *); +void session_update(struct peer *, struct ibuf *); +void session_notification(struct peer *, uint8_t, uint8_t, struct ibuf *); +void session_notification_data(struct peer *, uint8_t, uint8_t, void *, + size_t); +void session_rrefresh(struct peer *, uint8_t, uint8_t); +int capa_neg_calc(struct peer *); + +static const uint8_t marker[MSGSIZE_HEADER_MARKER] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +}; + +struct ibuf * +session_newmsg(enum msg_type msgtype, uint16_t len) +{ + struct ibuf *buf; + int errs = 0; + + if ((buf = ibuf_open(len)) == NULL) + return (NULL); + + errs += ibuf_add(buf, marker, sizeof(marker)); + errs += ibuf_add_n16(buf, len); + errs += ibuf_add_n8(buf, msgtype); + + if (errs) { + ibuf_free(buf); + return (NULL); + } + + return (buf); +} + +void +session_sendmsg(struct ibuf *msg, struct peer *p, enum msg_type msgtype) +{ + session_mrt_dump_bgp_msg(p, msg, msgtype, DIR_OUT); + + ibuf_close(p->wbuf, msg); +} + +/* + * Translate between internal roles and the value expected by RFC 9234. + */ +static uint8_t +role2capa(enum role role) +{ + switch (role) { + case ROLE_CUSTOMER: + return CAPA_ROLE_CUSTOMER; + case ROLE_PROVIDER: + return CAPA_ROLE_PROVIDER; + case ROLE_RS: + return CAPA_ROLE_RS; + case ROLE_RS_CLIENT: + return CAPA_ROLE_RS_CLIENT; + case ROLE_PEER: + return CAPA_ROLE_PEER; + default: + fatalx("Unsupported role for role capability"); + } +} + +static enum role +capa2role(uint8_t val) +{ + switch (val) { + case CAPA_ROLE_PROVIDER: + return ROLE_PROVIDER; + case CAPA_ROLE_RS: + return ROLE_RS; + case CAPA_ROLE_RS_CLIENT: + return ROLE_RS_CLIENT; + case CAPA_ROLE_CUSTOMER: + return ROLE_CUSTOMER; + case CAPA_ROLE_PEER: + return ROLE_PEER; + default: + return ROLE_NONE; + } +} + +static int +session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len) +{ + int errs = 0; + + errs += ibuf_add_n8(opb, capa_code); + errs += ibuf_add_n8(opb, capa_len); + return (errs); +} + +static int +session_capa_add_mp(struct ibuf *buf, uint8_t aid) +{ + uint16_t afi; + uint8_t safi; + int errs = 0; + + if (aid2afi(aid, &afi, &safi) == -1) { + log_warn("%s: bad AID", __func__); + return (-1); + } + + errs += ibuf_add_n16(buf, afi); + errs += ibuf_add_zero(buf, 1); + errs += ibuf_add_n8(buf, safi); + + return (errs); +} + +static int +session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags) +{ + int errs = 0; + uint16_t afi; + uint8_t safi; + + if (aid2afi(aid, &afi, &safi)) { + log_warn("%s: bad AID", __func__); + return (-1); + } + + errs += ibuf_add_n16(b, afi); + errs += ibuf_add_n8(b, safi); + errs += ibuf_add_n8(b, flags); + + return (errs); +} + +static int +session_capa_add_ext_nh(struct ibuf *b, uint8_t aid) +{ + int errs = 0; + uint16_t afi; + uint8_t safi; + + if (aid2afi(aid, &afi, &safi)) { + log_warn("%s: bad AID", __func__); + return (-1); + } + + errs += ibuf_add_n16(b, afi); + errs += ibuf_add_n16(b, safi); + errs += ibuf_add_n16(b, AFI_IPv6); + + return (errs); +} + +void +session_open(struct peer *p) +{ + struct ibuf *buf, *opb; + size_t len, optparamlen; + uint8_t i; + int errs = 0, extlen = 0; + int mpcapa = 0; + + + if ((opb = ibuf_dynamic(0, MAX_PKTSIZE - MSGSIZE_OPEN_MIN - 6)) == + NULL) { + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return; + } + + /* multiprotocol extensions, RFC 4760 */ + for (i = AID_MIN; i < AID_MAX; i++) + if (p->capa.ann.mp[i]) { /* 4 bytes data */ + errs += session_capa_add(opb, CAPA_MP, 4); + errs += session_capa_add_mp(opb, i); + mpcapa++; + } + + /* route refresh, RFC 2918 */ + if (p->capa.ann.refresh) /* no data */ + errs += session_capa_add(opb, CAPA_REFRESH, 0); + + /* extended nexthop encoding, RFC 8950 */ + if (p->capa.ann.ext_nh[AID_INET]) { + uint8_t enhlen = 0; + + if (p->capa.ann.mp[AID_INET]) + enhlen += 6; + if (p->capa.ann.mp[AID_VPN_IPv4]) + enhlen += 6; + errs += session_capa_add(opb, CAPA_EXT_NEXTHOP, enhlen); + if (p->capa.ann.mp[AID_INET]) + errs += session_capa_add_ext_nh(opb, AID_INET); + if (p->capa.ann.mp[AID_VPN_IPv4]) + errs += session_capa_add_ext_nh(opb, AID_VPN_IPv4); + } + + /* extended message support, RFC 8654 */ + if (p->capa.ann.ext_msg) /* no data */ + errs += session_capa_add(opb, CAPA_EXT_MSG, 0); + + /* BGP open policy, RFC 9234, only for ebgp sessions */ + if (p->conf.ebgp && p->capa.ann.policy && + p->conf.role != ROLE_NONE && + (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] || + mpcapa == 0)) { + errs += session_capa_add(opb, CAPA_ROLE, 1); + errs += ibuf_add_n8(opb, role2capa(p->conf.role)); + } + + /* graceful restart and End-of-RIB marker, RFC 4724 */ + if (p->capa.ann.grestart.restart) { + int rst = 0; + uint16_t hdr = 0; + + for (i = AID_MIN; i < AID_MAX; i++) { + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) + rst++; + } + + /* Only set the R-flag if no graceful restart is ongoing */ + if (!rst) + hdr |= CAPA_GR_R_FLAG; + if (p->capa.ann.grestart.grnotification) + hdr |= CAPA_GR_N_FLAG; + errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr)); + errs += ibuf_add_n16(opb, hdr); + } + + /* 4-bytes AS numbers, RFC6793 */ + if (p->capa.ann.as4byte) { /* 4 bytes data */ + errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t)); + errs += ibuf_add_n32(opb, p->conf.local_as); + } + + /* advertisement of multiple paths, RFC7911 */ + if (p->capa.ann.add_path[AID_MIN]) { /* variable */ + uint8_t aplen; + + if (mpcapa) + aplen = 4 * mpcapa; + else /* AID_INET */ + aplen = 4; + errs += session_capa_add(opb, CAPA_ADD_PATH, aplen); + if (mpcapa) { + for (i = AID_MIN; i < AID_MAX; i++) { + if (p->capa.ann.mp[i]) { + errs += session_capa_add_afi(opb, + i, p->capa.ann.add_path[i] & + CAPA_AP_MASK); + } + } + } else { /* AID_INET */ + errs += session_capa_add_afi(opb, AID_INET, + p->capa.ann.add_path[AID_INET] & CAPA_AP_MASK); + } + } + + /* enhanced route-refresh, RFC7313 */ + if (p->capa.ann.enhanced_rr) /* no data */ + errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0); + + if (errs) { + ibuf_free(opb); + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return; + } + + optparamlen = ibuf_size(opb); + len = MSGSIZE_OPEN_MIN + optparamlen; + if (optparamlen == 0) { + /* nothing */ + } else if (optparamlen + 2 >= 255) { + /* RFC9072: use 255 as magic size and request extra header */ + optparamlen = 255; + extlen = 1; + /* 3 byte OPT_PARAM_EXT_LEN and OPT_PARAM_CAPABILITIES */ + len += 2 * 3; + } else { + /* regular capabilities header */ + optparamlen += 2; + len += 2; + } + + if ((buf = session_newmsg(BGP_OPEN, len)) == NULL) { + ibuf_free(opb); + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return; + } + + errs += ibuf_add_n8(buf, 4); + errs += ibuf_add_n16(buf, p->conf.local_short_as); + errs += ibuf_add_n16(buf, p->conf.holdtime); + /* is already in network byte order */ + errs += ibuf_add_n32(buf, p->local_bgpid); + errs += ibuf_add_n8(buf, optparamlen); + + if (extlen) { + /* RFC9072 extra header which spans over the capabilities hdr */ + errs += ibuf_add_n8(buf, OPT_PARAM_EXT_LEN); + errs += ibuf_add_n16(buf, ibuf_size(opb) + 1 + 2); + } + + if (optparamlen) { + errs += ibuf_add_n8(buf, OPT_PARAM_CAPABILITIES); + + if (extlen) { + /* RFC9072: 2-byte extended length */ + errs += ibuf_add_n16(buf, ibuf_size(opb)); + } else { + errs += ibuf_add_n8(buf, ibuf_size(opb)); + } + errs += ibuf_add_ibuf(buf, opb); + } + + ibuf_free(opb); + + if (errs) { + ibuf_free(buf); + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return; + } + + session_sendmsg(buf, p, BGP_OPEN); + p->stats.msg_sent_open++; +} + +void +session_keepalive(struct peer *p) +{ + struct ibuf *buf; + + if ((buf = session_newmsg(BGP_KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL) { + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return; + } + + session_sendmsg(buf, p, BGP_KEEPALIVE); + start_timer_keepalive(p); + p->stats.msg_sent_keepalive++; +} + +void +session_update(struct peer *p, struct ibuf *ibuf) +{ + struct ibuf *buf; + size_t len, maxsize = MAX_PKTSIZE; + + if (p->state != STATE_ESTABLISHED) + return; + + if (p->capa.neg.ext_msg) + maxsize = MAX_EXT_PKTSIZE; + len = ibuf_size(ibuf); + if (len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER || + len > maxsize - MSGSIZE_HEADER) { + log_peer_warnx(&p->conf, "bad UPDATE from RDE"); + return; + } + + if ((buf = session_newmsg(BGP_UPDATE, MSGSIZE_HEADER + len)) == NULL) { + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return; + } + + if (ibuf_add_ibuf(buf, ibuf)) { + ibuf_free(buf); + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return; + } + + session_sendmsg(buf, p, BGP_UPDATE); + start_timer_keepalive(p); + p->stats.msg_sent_update++; +} + +/* Return 1 if a hard reset should be issued, 0 for a graceful notification */ +static int +session_req_hard_reset(enum err_codes errcode, uint8_t subcode) +{ + switch (errcode) { + case ERR_HEADER: + case ERR_OPEN: + case ERR_UPDATE: + case ERR_FSM: + case ERR_RREFRESH: + /* + * Protocol errors trigger a hard reset. The peer + * is not trustworthy and so there is no realistic + * hope that forwarding can continue. + */ + break; + case ERR_HOLDTIMEREXPIRED: + case ERR_SENDHOLDTIMEREXPIRED: + /* Keep forwarding and hope the other side is back soon. */ + return 0; + case ERR_CEASE: + switch (subcode) { + case ERR_CEASE_CONN_REJECT: + case ERR_CEASE_OTHER_CHANGE: + case ERR_CEASE_COLLISION: + case ERR_CEASE_RSRC_EXHAUST: + /* Per RFC8538 suggestion make these graceful. */ + return 0; + } + break; + } + return 1; +} + +void +session_notification_data(struct peer *p, uint8_t errcode, uint8_t subcode, + void *data, size_t datalen) +{ + struct ibuf ibuf; + + ibuf_from_buffer(&ibuf, data, datalen); + session_notification(p, errcode, subcode, &ibuf); +} + +void +session_notification(struct peer *p, uint8_t errcode, uint8_t subcode, + struct ibuf *ibuf) +{ + struct ibuf *buf; + const char *reason = "sending"; + int errs = 0, need_hard_reset = 0; + size_t datalen = 0; + + switch (p->state) { + case STATE_OPENSENT: + case STATE_OPENCONFIRM: + case STATE_ESTABLISHED: + break; + default: + /* session not open, no need to send notification */ + log_notification(p, errcode, subcode, ibuf, "dropping"); + return; + } + + if (p->capa.neg.grestart.grnotification) { + if (session_req_hard_reset(errcode, subcode)) { + need_hard_reset = 1; + datalen += 2; + reason = "sending hard-reset"; + } else { + reason = "sending graceful"; + } + } + + log_notification(p, errcode, subcode, ibuf, reason); + + /* cap to maximum size */ + if (ibuf != NULL) { + if (ibuf_size(ibuf) > + MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN - datalen) { + log_peer_warnx(&p->conf, + "oversized notification, data trunkated"); + ibuf_truncate(ibuf, MAX_PKTSIZE - + MSGSIZE_NOTIFICATION_MIN - datalen); + } + datalen += ibuf_size(ibuf); + } + + if ((buf = session_newmsg(BGP_NOTIFICATION, + MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return; + } + + if (need_hard_reset) { + errs += ibuf_add_n8(buf, ERR_CEASE); + errs += ibuf_add_n8(buf, ERR_CEASE_HARD_RESET); + } + + errs += ibuf_add_n8(buf, errcode); + errs += ibuf_add_n8(buf, subcode); + + if (ibuf != NULL) + errs += ibuf_add_ibuf(buf, ibuf); + + if (errs) { + ibuf_free(buf); + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return; + } + + session_sendmsg(buf, p, BGP_NOTIFICATION); + p->stats.msg_sent_notification++; + p->stats.last_sent_errcode = errcode; + p->stats.last_sent_suberr = subcode; +} + +int +session_neighbor_rrefresh(struct peer *p) +{ + uint8_t i; + + if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr)) + return (-1); + + for (i = AID_MIN; i < AID_MAX; i++) { + if (p->capa.neg.mp[i] != 0) + session_rrefresh(p, i, ROUTE_REFRESH_REQUEST); + } + + return (0); +} + +void +session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype) +{ + struct ibuf *buf; + int errs = 0; + uint16_t afi; + uint8_t safi; + + switch (subtype) { + case ROUTE_REFRESH_REQUEST: + p->stats.refresh_sent_req++; + break; + case ROUTE_REFRESH_BEGIN_RR: + case ROUTE_REFRESH_END_RR: + /* requires enhanced route refresh */ + if (!p->capa.neg.enhanced_rr) + return; + if (subtype == ROUTE_REFRESH_BEGIN_RR) + p->stats.refresh_sent_borr++; + else + p->stats.refresh_sent_eorr++; + break; + default: + fatalx("session_rrefresh: bad subtype %d", subtype); + } + + if (aid2afi(aid, &afi, &safi) == -1) + fatalx("session_rrefresh: bad afi/safi pair"); + + if ((buf = session_newmsg(BGP_RREFRESH, MSGSIZE_RREFRESH)) == NULL) { + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return; + } + + errs += ibuf_add_n16(buf, afi); + errs += ibuf_add_n8(buf, subtype); + errs += ibuf_add_n8(buf, safi); + + if (errs) { + ibuf_free(buf); + bgp_fsm(p, EVNT_CON_FATAL, NULL); + return; + } + + session_sendmsg(buf, p, BGP_RREFRESH); + p->stats.msg_sent_rrefresh++; +} + +struct ibuf * +parse_header(struct ibuf *msg, void *arg, int *fd) +{ + struct peer *peer = arg; + struct ibuf *b; + u_char m[MSGSIZE_HEADER_MARKER]; + uint16_t len, maxlen = MAX_PKTSIZE; + uint8_t type; + + if (ibuf_get(msg, m, sizeof(m)) == -1 || + ibuf_get_n16(msg, &len) == -1 || + ibuf_get_n8(msg, &type) == -1) + return (NULL); + /* caller MUST make sure we are getting 19 bytes! */ + if (memcmp(m, marker, sizeof(marker))) { + log_peer_warnx(&peer->conf, "sync error"); + session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL); + bgp_fsm(peer, EVNT_CON_FATAL, NULL); + errno = EINVAL; + return (NULL); + } + + if (peer->capa.ann.ext_msg) + maxlen = MAX_EXT_PKTSIZE; + + if (len < MSGSIZE_HEADER || len > maxlen) { + log_peer_warnx(&peer->conf, + "received message: illegal length: %u byte", len); + goto badlen; + } + + switch (type) { + case BGP_OPEN: + if (len < MSGSIZE_OPEN_MIN || len > MAX_PKTSIZE) { + log_peer_warnx(&peer->conf, + "received OPEN: illegal len: %u byte", len); + goto badlen; + } + break; + case BGP_NOTIFICATION: + if (len < MSGSIZE_NOTIFICATION_MIN) { + log_peer_warnx(&peer->conf, + "received NOTIFICATION: illegal len: %u byte", len); + goto badlen; + } + break; + case BGP_UPDATE: + if (len < MSGSIZE_UPDATE_MIN) { + log_peer_warnx(&peer->conf, + "received UPDATE: illegal len: %u byte", len); + goto badlen; + } + break; + case BGP_KEEPALIVE: + if (len != MSGSIZE_KEEPALIVE) { + log_peer_warnx(&peer->conf, + "received KEEPALIVE: illegal len: %u byte", len); + goto badlen; + } + break; + case BGP_RREFRESH: + if (len < MSGSIZE_RREFRESH_MIN) { + log_peer_warnx(&peer->conf, + "received RREFRESH: illegal len: %u byte", len); + goto badlen; + } + break; + default: + log_peer_warnx(&peer->conf, + "received msg with unknown type %u", type); + session_notification_data(peer, ERR_HEADER, ERR_HDR_TYPE, + &type, sizeof(type)); + bgp_fsm(peer, EVNT_CON_FATAL, NULL); + errno = EINVAL; + return (NULL); + } + + if ((b = ibuf_open(len)) == NULL) + return (NULL); + return (b); + + badlen: + len = htons(len); + session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, + &len, sizeof(len)); + bgp_fsm(peer, EVNT_CON_FATAL, NULL); + errno = ERANGE; + return (NULL); +} + +static int +parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as) +{ + struct ibuf capabuf; + uint16_t afi, nhafi, gr_header; + uint8_t capa_code, capa_len; + uint8_t safi, aid, role, flags; + + while (ibuf_size(buf) > 0) { + if (ibuf_get_n8(buf, &capa_code) == -1 || + ibuf_get_n8(buf, &capa_len) == -1) { + log_peer_warnx(&peer->conf, "Bad capabilities attr " + "length: too short"); + return (-1); + } + if (ibuf_get_ibuf(buf, capa_len, &capabuf) == -1) { + log_peer_warnx(&peer->conf, + "Received bad capabilities attr length: " + "len %zu smaller than capa_len %u", + ibuf_size(buf), capa_len); + return (-1); + } + + switch (capa_code) { + case CAPA_MP: /* RFC 4760 */ + if (capa_len != 4 || + ibuf_get_n16(&capabuf, &afi) == -1 || + ibuf_skip(&capabuf, 1) == -1 || + ibuf_get_n8(&capabuf, &safi) == -1) { + log_peer_warnx(&peer->conf, + "Received bad multi protocol capability"); + break; + } + if (afi2aid(afi, safi, &aid) == -1) { + log_peer_warnx(&peer->conf, + "Received multi protocol capability: " + " unknown AFI %u, safi %u pair", + afi, safi); + peer->capa.peer.mp[AID_UNSPEC] = 1; + break; + } + peer->capa.peer.mp[aid] = 1; + break; + case CAPA_REFRESH: + peer->capa.peer.refresh = 1; + break; + case CAPA_EXT_NEXTHOP: + while (ibuf_size(&capabuf) > 0) { + uint16_t tmp16; + if (ibuf_get_n16(&capabuf, &afi) == -1 || + ibuf_get_n16(&capabuf, &tmp16) == -1 || + ibuf_get_n16(&capabuf, &nhafi) == -1) { + log_peer_warnx(&peer->conf, + "Received bad %s capability", + log_capability(CAPA_EXT_NEXTHOP)); + memset(peer->capa.peer.ext_nh, 0, + sizeof(peer->capa.peer.ext_nh)); + break; + } + safi = tmp16; + if (afi2aid(afi, safi, &aid) == -1 || + !(aid == AID_INET || aid == AID_VPN_IPv4)) { + log_peer_warnx(&peer->conf, + "Received %s capability: " + " unsupported AFI %u, safi %u pair", + log_capability(CAPA_EXT_NEXTHOP), + afi, safi); + continue; + } + if (nhafi != AFI_IPv6) { + log_peer_warnx(&peer->conf, + "Received %s capability: " + " unsupported nexthop AFI %u", + log_capability(CAPA_EXT_NEXTHOP), + nhafi); + continue; + } + peer->capa.peer.ext_nh[aid] = 1; + } + break; + case CAPA_EXT_MSG: + peer->capa.peer.ext_msg = 1; + break; + case CAPA_ROLE: + if (capa_len != 1 || + ibuf_get_n8(&capabuf, &role) == -1) { + log_peer_warnx(&peer->conf, + "Received bad role capability"); + break; + } + if (!peer->conf.ebgp) { + log_peer_warnx(&peer->conf, + "Received role capability on iBGP session"); + break; + } + peer->capa.peer.policy = 1; + peer->remote_role = capa2role(role); + break; + case CAPA_RESTART: + if (capa_len == 2) { + /* peer only supports EoR marker */ + peer->capa.peer.grestart.restart = 1; + peer->capa.peer.grestart.timeout = 0; + break; + } else if (capa_len % 4 != 2) { + log_peer_warnx(&peer->conf, + "Bad graceful restart capability"); + peer->capa.peer.grestart.restart = 0; + peer->capa.peer.grestart.timeout = 0; + break; + } + + if (ibuf_get_n16(&capabuf, &gr_header) == -1) { + bad_gr_restart: + log_peer_warnx(&peer->conf, + "Bad graceful restart capability"); + peer->capa.peer.grestart.restart = 0; + peer->capa.peer.grestart.timeout = 0; + break; + } + + peer->capa.peer.grestart.timeout = + gr_header & CAPA_GR_TIMEMASK; + if (peer->capa.peer.grestart.timeout == 0) { + log_peer_warnx(&peer->conf, "Received " + "graceful restart with zero timeout"); + peer->capa.peer.grestart.restart = 0; + break; + } + + while (ibuf_size(&capabuf) > 0) { + if (ibuf_get_n16(&capabuf, &afi) == -1 || + ibuf_get_n8(&capabuf, &safi) == -1 || + ibuf_get_n8(&capabuf, &flags) == -1) + goto bad_gr_restart; + if (afi2aid(afi, safi, &aid) == -1) { + log_peer_warnx(&peer->conf, + "Received graceful restart capa: " + " unknown AFI %u, safi %u pair", + afi, safi); + continue; + } + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_PRESENT; + if (flags & CAPA_GR_F_FLAG) + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_FORWARD; + if (gr_header & CAPA_GR_R_FLAG) + peer->capa.peer.grestart.flags[aid] |= + CAPA_GR_RESTART; + peer->capa.peer.grestart.restart = 2; + } + if (gr_header & CAPA_GR_N_FLAG) + peer->capa.peer.grestart.grnotification = 1; + break; + case CAPA_AS4BYTE: + if (capa_len != 4 || + ibuf_get_n32(&capabuf, as) == -1) { + log_peer_warnx(&peer->conf, + "Received bad AS4BYTE capability"); + peer->capa.peer.as4byte = 0; + break; + } + if (*as == 0) { + log_peer_warnx(&peer->conf, + "peer requests unacceptable AS %u", *as); + session_notification(peer, ERR_OPEN, + ERR_OPEN_AS, NULL); + change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); + return (-1); + } + peer->capa.peer.as4byte = 1; + break; + case CAPA_ADD_PATH: + if (capa_len % 4 != 0) { + bad_add_path: + log_peer_warnx(&peer->conf, + "Received bad ADD-PATH capability"); + memset(peer->capa.peer.add_path, 0, + sizeof(peer->capa.peer.add_path)); + break; + } + while (ibuf_size(&capabuf) > 0) { + if (ibuf_get_n16(&capabuf, &afi) == -1 || + ibuf_get_n8(&capabuf, &safi) == -1 || + ibuf_get_n8(&capabuf, &flags) == -1) + goto bad_add_path; + if (afi2aid(afi, safi, &aid) == -1) { + log_peer_warnx(&peer->conf, + "Received ADD-PATH capa: " + " unknown AFI %u, safi %u pair", + afi, safi); + memset(peer->capa.peer.add_path, 0, + sizeof(peer->capa.peer.add_path)); + break; + } + if (flags & ~CAPA_AP_BIDIR) { + log_peer_warnx(&peer->conf, + "Received ADD-PATH capa: " + " bad flags %x", flags); + memset(peer->capa.peer.add_path, 0, + sizeof(peer->capa.peer.add_path)); + break; + } + peer->capa.peer.add_path[aid] = flags; + } + break; + case CAPA_ENHANCED_RR: + peer->capa.peer.enhanced_rr = 1; + break; + default: + break; + } + } + + return (0); +} + +static int +parse_open(struct peer *peer, struct ibuf *msg) +{ + uint8_t version, rversion; + uint16_t short_as; + uint16_t holdtime; + uint32_t as, bgpid; + uint8_t optparamlen; + + if (ibuf_get_n8(msg, &version) == -1 || + ibuf_get_n16(msg, &short_as) == -1 || + ibuf_get_n16(msg, &holdtime) == -1 || + ibuf_get_n32(msg, &bgpid) == -1 || + ibuf_get_n8(msg, &optparamlen) == -1) + goto bad_len; + + if (version != BGP_VERSION) { + log_peer_warnx(&peer->conf, + "peer wants unrecognized version %u", version); + if (version > BGP_VERSION) + rversion = version - BGP_VERSION; + else + rversion = BGP_VERSION; + session_notification_data(peer, ERR_OPEN, ERR_OPEN_VERSION, + &rversion, sizeof(rversion)); + change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); + return (-1); + } + + as = peer->short_as = short_as; + if (as == 0) { + log_peer_warnx(&peer->conf, + "peer requests unacceptable AS %u", as); + session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL); + change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); + return (-1); + } + + if (holdtime != 0 && holdtime < peer->conf.min_holdtime) { + log_peer_warnx(&peer->conf, + "peer requests unacceptable holdtime %u", holdtime); + session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, NULL); + change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); + return (-1); + } + + if (holdtime < peer->conf.holdtime) + peer->holdtime = holdtime; + else + peer->holdtime = peer->conf.holdtime; + + /* check bgpid for validity - just disallow 0 */ + if (bgpid == 0) { + log_peer_warnx(&peer->conf, "peer BGPID 0 unacceptable"); + session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL); + change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); + return (-1); + } + peer->remote_bgpid = bgpid; + + if (optparamlen != 0) { + struct ibuf oparams, op; + uint8_t ext_type, op_type; + uint16_t ext_len, op_len; + + ibuf_from_ibuf(&oparams, msg); + + /* check for RFC9072 encoding */ + if (ibuf_get_n8(&oparams, &ext_type) == -1) + goto bad_len; + if (ext_type == OPT_PARAM_EXT_LEN) { + if (ibuf_get_n16(&oparams, &ext_len) == -1) + goto bad_len; + /* skip RFC9072 header */ + if (ibuf_skip(msg, 3) == -1) + goto bad_len; + } else { + ext_len = optparamlen; + ibuf_rewind(&oparams); + } + + if (ibuf_truncate(&oparams, ext_len) == -1 || + ibuf_skip(msg, ext_len) == -1) + goto bad_len; + + while (ibuf_size(&oparams) > 0) { + if (ibuf_get_n8(&oparams, &op_type) == -1) + goto bad_len; + + if (ext_type == OPT_PARAM_EXT_LEN) { + if (ibuf_get_n16(&oparams, &op_len) == -1) + goto bad_len; + } else { + uint8_t tmp; + if (ibuf_get_n8(&oparams, &tmp) == -1) + goto bad_len; + op_len = tmp; + } + + if (ibuf_get_ibuf(&oparams, op_len, &op) == -1) + goto bad_len; + + switch (op_type) { + case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ + if (parse_capabilities(peer, &op, &as) == -1) { + session_notification(peer, ERR_OPEN, 0, + NULL); + change_state(peer, STATE_IDLE, + EVNT_RCVD_OPEN); + return (-1); + } + break; + case OPT_PARAM_AUTH: /* deprecated */ + default: + /* + * unsupported type + * the RFCs tell us to leave the data section + * empty and notify the peer with ERR_OPEN, + * ERR_OPEN_OPT. How the peer should know + * _which_ optional parameter we don't support + * is beyond me. + */ + log_peer_warnx(&peer->conf, + "received OPEN message with unsupported " + "optional parameter: type %u", op_type); + session_notification(peer, ERR_OPEN, + ERR_OPEN_OPT, NULL); + change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); + return (-1); + } + } + } + + if (ibuf_size(msg) != 0) { + bad_len: + log_peer_warnx(&peer->conf, + "corrupt OPEN message received: length mismatch"); + session_notification(peer, ERR_OPEN, 0, NULL); + change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); + return (-1); + } + + /* + * if remote-as is zero and it's a cloned neighbor, accept any + * but only on the first connect, after that the remote-as needs + * to remain the same. + */ + if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { + peer->conf.remote_as = as; + peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as); + if (!peer->conf.ebgp) + /* force enforce_as off for iBGP sessions */ + peer->conf.enforce_as = ENFORCE_AS_OFF; + } + + if (peer->conf.remote_as != as) { + log_peer_warnx(&peer->conf, "peer sent wrong AS %s", + log_as(as)); + session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL); + change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); + return (-1); + } + + /* on iBGP sessions check for bgpid collision */ + if (!peer->conf.ebgp && peer->remote_bgpid == peer->local_bgpid) { + struct in_addr ina; + ina.s_addr = htonl(bgpid); + log_peer_warnx(&peer->conf, "peer BGPID %s conflicts with ours", + inet_ntoa(ina)); + session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL); + change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); + return (-1); + } + + if (capa_neg_calc(peer) == -1) { + change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); + return (-1); + } + + return (0); +} + +static int +parse_update(struct peer *peer, struct ibuf *msg) +{ + return session_hanlde_update(peer, msg); +} + +static int +parse_rrefresh(struct peer *peer, struct ibuf *msg) +{ + struct route_refresh rr; + uint16_t afi, datalen; + uint8_t aid, safi, subtype; + + datalen = ibuf_size(msg) + MSGSIZE_HEADER; + + if (ibuf_get_n16(msg, &afi) == -1 || + ibuf_get_n8(msg, &subtype) == -1 || + ibuf_get_n8(msg, &safi) == -1) { + /* minimum size checked in session_process_msg() */ + fatalx("%s: message too small", __func__); + } + + /* check subtype if peer announced enhanced route refresh */ + if (peer->capa.neg.enhanced_rr) { + switch (subtype) { + case ROUTE_REFRESH_REQUEST: + /* no ORF support, so no oversized RREFRESH msgs */ + if (datalen != MSGSIZE_RREFRESH) { + log_peer_warnx(&peer->conf, + "received RREFRESH: illegal len: %u byte", + datalen); + datalen = htons(datalen); + session_notification_data(peer, ERR_HEADER, + ERR_HDR_LEN, &datalen, sizeof(datalen)); + bgp_fsm(peer, EVNT_CON_FATAL, NULL); + return (-1); + } + peer->stats.refresh_rcvd_req++; + break; + case ROUTE_REFRESH_BEGIN_RR: + case ROUTE_REFRESH_END_RR: + /* special handling for RFC7313 */ + if (datalen != MSGSIZE_RREFRESH) { + log_peer_warnx(&peer->conf, + "received RREFRESH: illegal len: %u byte", + datalen); + ibuf_rewind(msg); + session_notification(peer, ERR_RREFRESH, + ERR_RR_INV_LEN, msg); + bgp_fsm(peer, EVNT_CON_FATAL, NULL); + return (-1); + } + if (subtype == ROUTE_REFRESH_BEGIN_RR) + peer->stats.refresh_rcvd_borr++; + else + peer->stats.refresh_rcvd_eorr++; + break; + default: + log_peer_warnx(&peer->conf, "peer sent bad refresh, " + "bad subtype %d", subtype); + return (0); + } + } else { + /* force subtype to default */ + subtype = ROUTE_REFRESH_REQUEST; + peer->stats.refresh_rcvd_req++; + } + + /* afi/safi unchecked - unrecognized values will be ignored anyway */ + if (afi2aid(afi, safi, &aid) == -1) { + log_peer_warnx(&peer->conf, "peer sent bad refresh, " + "invalid afi/safi pair"); + return (0); + } + + if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) { + log_peer_warnx(&peer->conf, "peer sent unexpected refresh"); + return (0); + } + + rr.aid = aid; + rr.subtype = subtype; + + return session_handle_rrefresh(peer, &rr); +} + +static void +parse_notification(struct peer *peer, struct ibuf *msg) +{ + const char *reason = "received"; + uint8_t errcode, subcode; + uint8_t reason_len; + enum session_events event = EVNT_RCVD_NOTIFICATION; + + if (ibuf_get_n8(msg, &errcode) == -1 || + ibuf_get_n8(msg, &subcode) == -1) { + log_peer_warnx(&peer->conf, "received bad notification"); + goto done; + } + + /* RFC8538: check for hard-reset or graceful notification */ + if (peer->capa.neg.grestart.grnotification) { + if (errcode == ERR_CEASE && subcode == ERR_CEASE_HARD_RESET) { + if (ibuf_get_n8(msg, &errcode) == -1 || + ibuf_get_n8(msg, &subcode) == -1) { + log_peer_warnx(&peer->conf, + "received bad hard-reset notification"); + goto done; + } + reason = "received hard-reset"; + } else { + reason = "received graceful"; + event = EVNT_RCVD_GRACE_NOTIFICATION; + } + } + + peer->errcnt++; + peer->stats.last_rcvd_errcode = errcode; + peer->stats.last_rcvd_suberr = subcode; + + log_notification(peer, errcode, subcode, msg, reason); + + CTASSERT(sizeof(peer->stats.last_reason) > UINT8_MAX); + memset(peer->stats.last_reason, 0, sizeof(peer->stats.last_reason)); + if (errcode == ERR_CEASE && + (subcode == ERR_CEASE_ADMIN_DOWN || + subcode == ERR_CEASE_ADMIN_RESET)) { + /* check if shutdown reason is included */ + if (ibuf_get_n8(msg, &reason_len) != -1 && reason_len != 0) { + if (ibuf_get(msg, peer->stats.last_reason, + reason_len) == -1) + log_peer_warnx(&peer->conf, + "received truncated shutdown reason"); + } + } + +done: + change_state(peer, STATE_IDLE, event); +} + +void +session_process_msg(struct peer *p) +{ + struct ibuf *msg; + int processed = 0; + uint8_t msgtype; + + p->rpending = 0; + if (p->wbuf == NULL) + return; + + /* + * session might drop to IDLE -> all buffers are flushed + */ + while ((msg = msgbuf_get(p->wbuf)) != NULL) { + /* skip msg header and extract type */ + if (ibuf_skip(msg, MSGSIZE_HEADER_MARKER) == -1 || + ibuf_skip(msg, sizeof(uint16_t)) == -1 || + ibuf_get_n8(msg, &msgtype) == -1) { + log_peer_warn(&p->conf, "process message failed"); + bgp_fsm(p, EVNT_CON_FATAL, NULL); + ibuf_free(msg); + return; + } + ibuf_rewind(msg); + + session_mrt_dump_bgp_msg(p, msg, msgtype, DIR_IN); + + ibuf_skip(msg, MSGSIZE_HEADER); + + switch (msgtype) { + case BGP_OPEN: + bgp_fsm(p, EVNT_RCVD_OPEN, msg); + p->stats.msg_rcvd_open++; + break; + case BGP_UPDATE: + bgp_fsm(p, EVNT_RCVD_UPDATE, msg); + p->stats.msg_rcvd_update++; + break; + case BGP_NOTIFICATION: + bgp_fsm(p, EVNT_RCVD_NOTIFICATION, msg); + p->stats.msg_rcvd_notification++; + break; + case BGP_KEEPALIVE: + bgp_fsm(p, EVNT_RCVD_KEEPALIVE, msg); + p->stats.msg_rcvd_keepalive++; + break; + case BGP_RREFRESH: + parse_rrefresh(p, msg); + p->stats.msg_rcvd_rrefresh++; + break; + default: /* cannot happen */ + session_notification_data(p, ERR_HEADER, ERR_HDR_TYPE, + &msgtype, 1); + log_peer_warnx(&p->conf, + "received message with unknown type %u", msgtype); + bgp_fsm(p, EVNT_CON_FATAL, NULL); + } + ibuf_free(msg); + if (++processed > MSG_PROCESS_LIMIT) { + p->rpending = 1; + break; + } + } +} + +int +capa_neg_calc(struct peer *p) +{ + struct ibuf *ebuf; + uint8_t i, hasmp = 0, capa_code, capa_len, capa_aid = 0; + + /* a capability is accepted only if both sides announced it */ + + p->capa.neg.refresh = + (p->capa.ann.refresh && p->capa.peer.refresh) != 0; + p->capa.neg.enhanced_rr = + (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0; + p->capa.neg.as4byte = + (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0; + p->capa.neg.ext_msg = + (p->capa.ann.ext_msg && p->capa.peer.ext_msg) != 0; + + /* MP: both side must agree on the AFI,SAFI pair */ + if (p->capa.peer.mp[AID_UNSPEC]) + hasmp = 1; + for (i = AID_MIN; i < AID_MAX; i++) { + if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) + p->capa.neg.mp[i] = 1; + else + p->capa.neg.mp[i] = 0; + if (p->capa.ann.mp[i] || p->capa.peer.mp[i]) + hasmp = 1; + } + /* if no MP capability present default to IPv4 unicast mode */ + if (!hasmp) + p->capa.neg.mp[AID_INET] = 1; + + /* + * graceful restart: the peer capabilities are of interest here. + * It is necessary to compare the new values with the previous ones + * and act accordingly. AFI/SAFI that are not part in the MP capability + * are treated as not being present. + * Also make sure that a flush happens if the session stopped + * supporting graceful restart. + */ + + for (i = AID_MIN; i < AID_MAX; i++) { + int8_t negflags; + + /* disable GR if the AFI/SAFI is not present */ + if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && + p->capa.neg.mp[i] == 0)) + p->capa.peer.grestart.flags[i] = 0; /* disable */ + /* look at current GR state and decide what to do */ + negflags = p->capa.neg.grestart.flags[i]; + p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; + if (negflags & CAPA_GR_RESTARTING) { + if (p->capa.ann.grestart.restart != 0 && + p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) { + p->capa.neg.grestart.flags[i] |= + CAPA_GR_RESTARTING; + } else { + if (session_graceful_flush(p, i, + "not restarted") == -1) + return (-1); + } + } + } + p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; + p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; + if (p->capa.ann.grestart.restart == 0) + p->capa.neg.grestart.restart = 0; + + /* RFC 8538 graceful notification: both sides need to agree */ + p->capa.neg.grestart.grnotification = + (p->capa.ann.grestart.grnotification && + p->capa.peer.grestart.grnotification) != 0; + + /* RFC 8950 extended nexthop encoding: both sides need to agree */ + memset(p->capa.neg.ext_nh, 0, sizeof(p->capa.neg.ext_nh)); + for (i = AID_MIN; i < AID_MAX; i++) { + if (p->capa.neg.mp[i] == 0) + continue; + if (p->capa.ann.ext_nh[i] && p->capa.peer.ext_nh[i]) { + p->capa.neg.ext_nh[i] = 1; + } + } + + /* + * ADD-PATH: set only those bits where both sides agree. + * For this compare our send bit with the recv bit from the peer + * and vice versa. + * The flags are stored from this systems view point. + * At index 0 the flags are set if any per-AID flag is set. + */ + memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path)); + for (i = AID_MIN; i < AID_MAX; i++) { + if (p->capa.neg.mp[i] == 0) + continue; + if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) && + (p->capa.peer.add_path[i] & CAPA_AP_SEND)) { + p->capa.neg.add_path[i] |= CAPA_AP_RECV; + p->capa.neg.add_path[0] |= CAPA_AP_RECV; + } + if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) && + (p->capa.peer.add_path[i] & CAPA_AP_RECV)) { + p->capa.neg.add_path[i] |= CAPA_AP_SEND; + p->capa.neg.add_path[0] |= CAPA_AP_SEND; + } + } + + /* + * Open policy: check that the policy is sensible. + * + * Make sure that the roles match and set the negotiated capability + * to the role of the peer. So the RDE can inject the OTC attribute. + * See RFC 9234, section 4.2. + * These checks should only happen on ebgp sessions. + */ + if (p->capa.ann.policy != 0 && p->capa.peer.policy != 0 && + p->conf.ebgp) { + switch (p->conf.role) { + case ROLE_PROVIDER: + if (p->remote_role != ROLE_CUSTOMER) + goto policyfail; + break; + case ROLE_RS: + if (p->remote_role != ROLE_RS_CLIENT) + goto policyfail; + break; + case ROLE_RS_CLIENT: + if (p->remote_role != ROLE_RS) + goto policyfail; + break; + case ROLE_CUSTOMER: + if (p->remote_role != ROLE_PROVIDER) + goto policyfail; + break; + case ROLE_PEER: + if (p->remote_role != ROLE_PEER) + goto policyfail; + break; + default: + policyfail: + log_peer_warnx(&p->conf, "open policy role mismatch: " + "our role %s, their role %s", + log_policy(p->conf.role), + log_policy(p->remote_role)); + session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL); + return (-1); + } + p->capa.neg.policy = 1; + } + + /* enforce presence of open policy role capability */ + if (p->capa.ann.policy == 2 && p->capa.peer.policy == 0 && + p->conf.ebgp) { + log_peer_warnx(&p->conf, "open policy role enforced but " + "not present"); + session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL); + return (-1); + } + + /* enforce presence of other capabilities */ + if (p->capa.ann.refresh == 2 && p->capa.neg.refresh == 0) { + capa_code = CAPA_REFRESH; + capa_len = 0; + goto fail; + } + /* enforce presence of other capabilities */ + if (p->capa.ann.ext_msg == 2 && p->capa.neg.ext_msg == 0) { + capa_code = CAPA_EXT_MSG; + capa_len = 0; + goto fail; + } + if (p->capa.ann.enhanced_rr == 2 && p->capa.neg.enhanced_rr == 0) { + capa_code = CAPA_ENHANCED_RR; + capa_len = 0; + goto fail; + } + if (p->capa.ann.as4byte == 2 && p->capa.neg.as4byte == 0) { + capa_code = CAPA_AS4BYTE; + capa_len = 4; + goto fail; + } + if (p->capa.ann.grestart.restart == 2 && + p->capa.neg.grestart.restart == 0) { + capa_code = CAPA_RESTART; + capa_len = 2; + goto fail; + } + for (i = AID_MIN; i < AID_MAX; i++) { + if (p->capa.ann.mp[i] == 2 && p->capa.neg.mp[i] == 0) { + capa_code = CAPA_MP; + capa_len = 4; + capa_aid = i; + goto fail; + } + } + + for (i = AID_MIN; i < AID_MAX; i++) { + if (p->capa.neg.mp[i] == 0) + continue; + if ((p->capa.ann.add_path[i] & CAPA_AP_RECV_ENFORCE) && + (p->capa.neg.add_path[i] & CAPA_AP_RECV) == 0) { + capa_code = CAPA_ADD_PATH; + capa_len = 4; + capa_aid = i; + goto fail; + } + if ((p->capa.ann.add_path[i] & CAPA_AP_SEND_ENFORCE) && + (p->capa.neg.add_path[i] & CAPA_AP_SEND) == 0) { + capa_code = CAPA_ADD_PATH; + capa_len = 4; + capa_aid = i; + goto fail; + } + } + + for (i = AID_MIN; i < AID_MAX; i++) { + if (p->capa.neg.mp[i] == 0) + continue; + if (p->capa.ann.ext_nh[i] == 2 && + p->capa.neg.ext_nh[i] == 0) { + capa_code = CAPA_EXT_NEXTHOP; + capa_len = 6; + capa_aid = i; + goto fail; + } + } + return (0); + + fail: + if ((ebuf = ibuf_dynamic(2, 256)) == NULL) + return (-1); + /* best effort, no problem if it fails */ + session_capa_add(ebuf, capa_code, capa_len); + if (capa_code == CAPA_MP) + session_capa_add_mp(ebuf, capa_aid); + else if (capa_code == CAPA_ADD_PATH) + session_capa_add_afi(ebuf, capa_aid, 0); + else if (capa_code == CAPA_EXT_NEXTHOP) + session_capa_add_ext_nh(ebuf, capa_aid); + else if (capa_len > 0) + ibuf_add_zero(ebuf, capa_len); + + session_notification(p, ERR_OPEN, ERR_OPEN_CAPA, ebuf); + ibuf_free(ebuf); + return (-1); +} + +static void +session_tcp_established(struct peer *peer) +{ + struct sockaddr_storage ss; + socklen_t len; + + len = sizeof(ss); + if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1) + log_warn("getsockname"); + sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port); + len = sizeof(ss); + if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1) + log_warn("getpeername"); + sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port); + + get_alternate_addr(&peer->local, &peer->remote, &peer->local_alt, + &peer->if_scope); +} + +void +bgp_fsm(struct peer *peer, enum session_events event, struct ibuf *msg) +{ + switch (peer->state) { + case STATE_NONE: + /* nothing */ + break; + case STATE_IDLE: + switch (event) { + case EVNT_START: + timer_stop(&peer->timers, Timer_Hold); + timer_stop(&peer->timers, Timer_SendHold); + timer_stop(&peer->timers, Timer_Keepalive); + timer_stop(&peer->timers, Timer_IdleHold); + + if (!peer->depend_ok) + timer_stop(&peer->timers, Timer_ConnectRetry); + else if (peer->passive || peer->conf.passive || + peer->conf.template) { + change_state(peer, STATE_ACTIVE, event); + timer_stop(&peer->timers, Timer_ConnectRetry); + } else { + change_state(peer, STATE_CONNECT, event); + timer_set(&peer->timers, Timer_ConnectRetry, + peer->connectretry); + session_connect(peer); + } + peer->passive = 0; + break; + case EVNT_STOP: + timer_stop(&peer->timers, Timer_IdleHold); + break; + default: + /* ignore */ + break; + } + break; + case STATE_CONNECT: + switch (event) { + case EVNT_START: + /* ignore */ + break; + case EVNT_CON_OPEN: + session_tcp_established(peer); + session_open(peer); + timer_stop(&peer->timers, Timer_ConnectRetry); + peer->holdtime = INTERVAL_HOLD_INITIAL; + start_timer_holdtime(peer); + change_state(peer, STATE_OPENSENT, event); + break; + case EVNT_CON_OPENFAIL: + timer_set(&peer->timers, Timer_ConnectRetry, + peer->connectretry); + session_close(peer); + change_state(peer, STATE_ACTIVE, event); + break; + case EVNT_TIMER_CONNRETRY: + timer_set(&peer->timers, Timer_ConnectRetry, + peer->connectretry); + session_connect(peer); + break; + default: + change_state(peer, STATE_IDLE, event); + break; + } + break; + case STATE_ACTIVE: + switch (event) { + case EVNT_START: + /* ignore */ + break; + case EVNT_CON_OPEN: + session_tcp_established(peer); + session_open(peer); + timer_stop(&peer->timers, Timer_ConnectRetry); + peer->holdtime = INTERVAL_HOLD_INITIAL; + start_timer_holdtime(peer); + change_state(peer, STATE_OPENSENT, event); + break; + case EVNT_CON_OPENFAIL: + timer_set(&peer->timers, Timer_ConnectRetry, + peer->connectretry); + session_close(peer); + change_state(peer, STATE_ACTIVE, event); + break; + case EVNT_TIMER_CONNRETRY: + timer_set(&peer->timers, Timer_ConnectRetry, + peer->holdtime); + change_state(peer, STATE_CONNECT, event); + session_connect(peer); + break; + default: + change_state(peer, STATE_IDLE, event); + break; + } + break; + case STATE_OPENSENT: + switch (event) { + case EVNT_START: + /* ignore */ + break; + case EVNT_STOP: + change_state(peer, STATE_IDLE, event); + break; + case EVNT_CON_CLOSED: + session_close(peer); + timer_set(&peer->timers, Timer_ConnectRetry, + peer->connectretry); + change_state(peer, STATE_ACTIVE, event); + break; + case EVNT_CON_FATAL: + change_state(peer, STATE_IDLE, event); + break; + case EVNT_TIMER_HOLDTIME: + session_notification(peer, ERR_HOLDTIMEREXPIRED, + 0, NULL); + change_state(peer, STATE_IDLE, event); + break; + case EVNT_TIMER_SENDHOLD: + session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, + 0, NULL); + change_state(peer, STATE_IDLE, event); + break; + case EVNT_RCVD_OPEN: + /* parse_open calls change_state itself on failure */ + if (parse_open(peer, msg)) + break; + session_keepalive(peer); + change_state(peer, STATE_OPENCONFIRM, event); + break; + case EVNT_RCVD_NOTIFICATION: + parse_notification(peer, msg); + break; + default: + session_notification(peer, + ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL); + change_state(peer, STATE_IDLE, event); + break; + } + break; + case STATE_OPENCONFIRM: + switch (event) { + case EVNT_START: + /* ignore */ + break; + case EVNT_STOP: + change_state(peer, STATE_IDLE, event); + break; + case EVNT_CON_CLOSED: + case EVNT_CON_FATAL: + change_state(peer, STATE_IDLE, event); + break; + case EVNT_TIMER_HOLDTIME: + session_notification(peer, ERR_HOLDTIMEREXPIRED, + 0, NULL); + change_state(peer, STATE_IDLE, event); + break; + case EVNT_TIMER_SENDHOLD: + session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, + 0, NULL); + change_state(peer, STATE_IDLE, event); + break; + case EVNT_TIMER_KEEPALIVE: + session_keepalive(peer); + break; + case EVNT_RCVD_KEEPALIVE: + start_timer_holdtime(peer); + change_state(peer, STATE_ESTABLISHED, event); + break; + case EVNT_RCVD_NOTIFICATION: + parse_notification(peer, msg); + break; + default: + session_notification(peer, + ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL); + change_state(peer, STATE_IDLE, event); + break; + } + break; + case STATE_ESTABLISHED: + switch (event) { + case EVNT_START: + /* ignore */ + break; + case EVNT_STOP: + change_state(peer, STATE_IDLE, event); + break; + case EVNT_CON_CLOSED: + case EVNT_CON_FATAL: + change_state(peer, STATE_IDLE, event); + break; + case EVNT_TIMER_HOLDTIME: + session_notification(peer, ERR_HOLDTIMEREXPIRED, + 0, NULL); + change_state(peer, STATE_IDLE, event); + break; + case EVNT_TIMER_SENDHOLD: + session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, + 0, NULL); + change_state(peer, STATE_IDLE, event); + break; + case EVNT_TIMER_KEEPALIVE: + session_keepalive(peer); + break; + case EVNT_RCVD_KEEPALIVE: + start_timer_holdtime(peer); + break; + case EVNT_RCVD_UPDATE: + start_timer_holdtime(peer); + if (parse_update(peer, msg)) + change_state(peer, STATE_IDLE, event); + else + start_timer_holdtime(peer); + break; + case EVNT_RCVD_NOTIFICATION: + parse_notification(peer, msg); + break; + default: + session_notification(peer, + ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL); + change_state(peer, STATE_IDLE, event); + break; + } + break; + } +} + +static void +start_timer_holdtime(struct peer *peer) +{ + if (peer->holdtime > 0) + timer_set(&peer->timers, Timer_Hold, peer->holdtime); + else + timer_stop(&peer->timers, Timer_Hold); +} + +void +start_timer_sendholdtime(struct peer *peer) +{ + uint16_t holdtime = INTERVAL_HOLD; + + if (peer->holdtime > INTERVAL_HOLD) + holdtime = peer->holdtime; + + if (peer->holdtime > 0) + timer_set(&peer->timers, Timer_SendHold, holdtime); + else + timer_stop(&peer->timers, Timer_SendHold); +} + +static void +start_timer_keepalive(struct peer *peer) +{ + if (peer->holdtime > 0) + timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3); + else + timer_stop(&peer->timers, Timer_Keepalive); +} + +void +change_state(struct peer *peer, enum session_state state, + enum session_events event) +{ + switch (state) { + case STATE_IDLE: + /* carp demotion first. new peers handled in init_peer */ + if (peer->state == STATE_ESTABLISHED && + peer->conf.demote_group[0] && !peer->demoted) + session_demote(peer, +1); + + /* + * try to write out what's buffered (maybe a notification), + * don't bother if it fails + */ + if (peer->state >= STATE_OPENSENT && + msgbuf_queuelen(peer->wbuf) > 0) + ibuf_write(peer->fd, peer->wbuf); + + /* + * we must start the timer for the next EVNT_START + * if we are coming here due to an error and the + * session was not established successfully before, the + * starttimerinterval needs to be exponentially increased + */ + if (peer->IdleHoldTime == 0) + peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; + peer->holdtime = INTERVAL_HOLD_INITIAL; + timer_stop(&peer->timers, Timer_ConnectRetry); + timer_stop(&peer->timers, Timer_Keepalive); + timer_stop(&peer->timers, Timer_Hold); + timer_stop(&peer->timers, Timer_SendHold); + timer_stop(&peer->timers, Timer_IdleHold); + timer_stop(&peer->timers, Timer_IdleHoldReset); + session_close(peer); + msgbuf_clear(peer->wbuf); + peer->rpending = 0; + memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); + session_md5_reload(peer); + + if (peer->state == STATE_ESTABLISHED) { + if (peer->capa.neg.grestart.restart == 2 && + (event == EVNT_CON_CLOSED || + event == EVNT_CON_FATAL || + (peer->capa.neg.grestart.grnotification && + (event == EVNT_RCVD_GRACE_NOTIFICATION || + event == EVNT_TIMER_HOLDTIME || + event == EVNT_TIMER_SENDHOLD)))) { + /* don't punish graceful restart */ + timer_set(&peer->timers, Timer_IdleHold, 0); + session_graceful_restart(peer); + } else if (event != EVNT_STOP) { + timer_set(&peer->timers, Timer_IdleHold, + peer->IdleHoldTime); + if (event != EVNT_NONE && + peer->IdleHoldTime < MAX_IDLE_HOLD/2) + peer->IdleHoldTime *= 2; + session_down(peer); + } else { + session_down(peer); + } + } else if (event != EVNT_STOP) { + timer_set(&peer->timers, Timer_IdleHold, + peer->IdleHoldTime); + if (event != EVNT_NONE && + peer->IdleHoldTime < MAX_IDLE_HOLD / 2) + peer->IdleHoldTime *= 2; + } + + if (peer->state == STATE_NONE || + peer->state == STATE_ESTABLISHED) { + /* initialize capability negotiation structures */ + memcpy(&peer->capa.ann, &peer->conf.capabilities, + sizeof(peer->capa.ann)); + } + break; + case STATE_CONNECT: + if (peer->state == STATE_ESTABLISHED && + peer->capa.neg.grestart.restart == 2) { + /* do the graceful restart dance */ + session_graceful_restart(peer); + peer->holdtime = INTERVAL_HOLD_INITIAL; + timer_stop(&peer->timers, Timer_ConnectRetry); + timer_stop(&peer->timers, Timer_Keepalive); + timer_stop(&peer->timers, Timer_Hold); + timer_stop(&peer->timers, Timer_SendHold); + timer_stop(&peer->timers, Timer_IdleHold); + timer_stop(&peer->timers, Timer_IdleHoldReset); + session_close(peer); + msgbuf_clear(peer->wbuf); + memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); + } + break; + case STATE_ACTIVE: + session_md5_reload(peer); + break; + case STATE_OPENSENT: + break; + case STATE_OPENCONFIRM: + break; + case STATE_ESTABLISHED: + timer_set(&peer->timers, Timer_IdleHoldReset, + peer->IdleHoldTime); + if (peer->demoted) + timer_set(&peer->timers, Timer_CarpUndemote, + INTERVAL_HOLD_DEMOTED); + session_up(peer); + break; + default: /* something seriously fucked */ + break; + } + + log_statechange(peer, state, event); + + session_mrt_dump_state(peer, peer->state, state); + + peer->prev_state = peer->state; + peer->state = state; +}