Download raw body.
bgpd: more paranoia when hitting the UPDATE size limit
When creating UPDATE messages bgpd can hit some error cases that can be
triggered from remote.
Mainly up_dump_update() can fail if the produced attributes for the UPDATE
are so large that they no longer fit into the default BGP packet size.
bgpd kind of handles this but in a not so good way. Right now the bad prefix
is silently dropped and the peer remains with stale data (the previous
update).
Here is a diff that improves the situation. It withdraws the prefix in
question and also logs the issue. On top of this I moved the ibuf
allocation from rde_update_queue_runner() into up_dump_withdraws() and
up_dump_update() which makes the queue runner simpler.
I have a regress test that currently tickles the goto drop cases for IPv4
(but not yet the MP one).
--
:wq Claudio
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.633 rde.c
--- rde.c 10 Sep 2024 09:38:45 -0000 1.633
+++ rde.c 10 Sep 2024 12:16:23 -0000
@@ -3383,11 +3383,7 @@ rde_update_queue_runner(uint8_t aid)
if (RB_EMPTY(&peer->withdraws[aid]))
continue;
- if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) ==
- NULL)
- fatal("%s", __func__);
- if (up_dump_withdraws(buf, peer, aid) == -1) {
- ibuf_free(buf);
+ if ((buf = up_dump_withdraws(peer, aid)) == NULL) {
continue;
}
if (imsg_compose_ibuf(ibuf_se, IMSG_UPDATE,
@@ -3422,11 +3418,7 @@ rde_update_queue_runner(uint8_t aid)
continue;
}
- if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) ==
- NULL)
- fatal("%s", __func__);
- if (up_dump_update(buf, peer, aid) == -1) {
- ibuf_free(buf);
+ if ((buf = up_dump_update(peer, aid)) == NULL) {
continue;
}
if (imsg_compose_ibuf(ibuf_se, IMSG_UPDATE,
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.305 rde.h
--- rde.h 28 Aug 2024 13:21:39 -0000 1.305
+++ rde.h 4 Sep 2024 12:14:12 -0000
@@ -702,8 +702,8 @@ void up_generate_addpath_all(struct rd
struct prefix *, struct prefix *);
void up_generate_default(struct rde_peer *, uint8_t);
int up_is_eor(struct rde_peer *, uint8_t);
-int up_dump_withdraws(struct ibuf *, struct rde_peer *, uint8_t);
-int up_dump_update(struct ibuf *, struct rde_peer *, uint8_t);
+struct ibuf *up_dump_withdraws(struct rde_peer *, uint8_t);
+struct ibuf *up_dump_update(struct rde_peer *, uint8_t);
/* rde_aspa.c */
void aspa_validation(struct rde_aspa *, struct aspath *,
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
diff -u -p -r1.168 rde_update.c
--- rde_update.c 30 May 2024 08:29:30 -0000 1.168
+++ rde_update.c 25 Sep 2024 12:07:21 -0000
@@ -25,6 +25,7 @@
#include <stdio.h>
#include "bgpd.h"
+#include "session.h"
#include "rde.h"
#include "log.h"
@@ -948,7 +949,7 @@ up_generate_mp_reach(struct ibuf *buf, s
if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
/* no prefixes written, fail update */
- return (-1);
+ return -1;
/* update MP_REACH attribute length field */
len = ibuf_size(buf) - off - sizeof(len);
@@ -983,61 +984,144 @@ up_generate_mp_reach(struct ibuf *buf, s
* how may routes can be added. Return 0 on success -1 on error which
* includes generating an empty withdraw message.
*/
-int
-up_dump_withdraws(struct ibuf *buf, struct rde_peer *peer, uint8_t aid)
+struct ibuf *
+up_dump_withdraws(struct rde_peer *peer, uint8_t aid)
{
+ struct ibuf *buf;
size_t off;
uint16_t afi, len;
uint8_t safi;
+ if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == NULL)
+ goto fail;
+
/* reserve space for the withdrawn routes length field */
off = ibuf_size(buf);
if (ibuf_add_zero(buf, sizeof(len)) == -1)
- return -1;
+ goto fail;
if (aid != AID_INET) {
/* reserve space for 2-byte path attribute length */
off = ibuf_size(buf);
if (ibuf_add_zero(buf, sizeof(len)) == -1)
- return -1;
+ goto fail;
/* attribute header, defaulting to extended length one */
if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
- return -1;
+ goto fail;
if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
- return -1;
+ goto fail;
if (ibuf_add_zero(buf, sizeof(len)) == -1)
- return -1;
+ goto fail;
/* afi & safi */
if (aid2afi(aid, &afi, &safi))
- fatalx("up_dump_mp_unreach: bad AID");
+ fatalx("%s: bad AID", __func__);
if (ibuf_add_n16(buf, afi) == -1)
- return -1;
+ goto fail;
if (ibuf_add_n8(buf, safi) == -1)
- return -1;
+ goto fail;
}
if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1)
- return -1;
+ goto fail;
/* update length field (either withdrawn routes or attribute length) */
len = ibuf_size(buf) - off - sizeof(len);
if (ibuf_set_n16(buf, off, len) == -1)
- return -1;
+ goto fail;
if (aid != AID_INET) {
/* write MP_UNREACH_NLRI attribute length (always extended) */
len -= 4; /* skip attribute header */
if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
- return -1;
+ goto fail;
} else {
/* no extra attributes so set attribute len to 0 */
+ if (ibuf_add_zero(buf, sizeof(len)) == -1) {
+ goto fail;
+ }
+ }
+
+ return buf;
+
+ fail:
+ /* something went horribly worng */
+ log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
+ ibuf_free(buf);
+ return NULL;
+}
+
+/*
+ * Withdraw a single prefix after an error.
+ */
+static struct ibuf *
+up_dump_withdraw_one(struct rde_peer *peer, struct prefix *p, struct ibuf *buf)
+{
+ size_t off;
+ int has_ap;
+ uint16_t afi, len;
+ uint8_t safi;
+
+ /* reset the buffer and start fresh */
+ ibuf_truncate(buf, 0);
+
+ /* reserve space for the withdrawn routes length field */
+ off = ibuf_size(buf);
+ if (ibuf_add_zero(buf, sizeof(len)) == -1)
+ goto fail;
+
+ if (p->pt->aid != AID_INET) {
+ /* reserve space for 2-byte path attribute length */
+ off = ibuf_size(buf);
if (ibuf_add_zero(buf, sizeof(len)) == -1)
- return -1;
+ goto fail;
+
+ /* attribute header, defaulting to extended length one */
+ if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
+ goto fail;
+ if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
+ goto fail;
+ if (ibuf_add_zero(buf, sizeof(len)) == -1)
+ goto fail;
+
+ /* afi & safi */
+ if (aid2afi(p->pt->aid, &afi, &safi))
+ fatalx("%s: bad AID", __func__);
+ if (ibuf_add_n16(buf, afi) == -1)
+ goto fail;
+ if (ibuf_add_n8(buf, safi) == -1)
+ goto fail;
}
- return 0;
+ has_ap = peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND);
+ if (pt_writebuf(buf, p->pt, 1, has_ap, p->path_id_tx) == -1)
+ goto fail;
+
+ /* update length field (either withdrawn routes or attribute length) */
+ len = ibuf_size(buf) - off - sizeof(len);
+ if (ibuf_set_n16(buf, off, len) == -1)
+ goto fail;
+
+ if (p->pt->aid != AID_INET) {
+ /* write MP_UNREACH_NLRI attribute length (always extended) */
+ len -= 4; /* skip attribute header */
+ if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
+ goto fail;
+ } else {
+ /* no extra attributes so set attribute len to 0 */
+ if (ibuf_add_zero(buf, sizeof(len)) == -1) {
+ goto fail;
+ }
+ }
+
+ return buf;
+
+ fail:
+ /* something went horribly worng */
+ log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
+ ibuf_free(buf);
+ return NULL;
}
/*
@@ -1046,9 +1130,10 @@ up_dump_withdraws(struct ibuf *buf, stru
* and then tries to add as many prefixes using these attributes.
* Return 0 on success -1 on error which includes producing an empty message.
*/
-int
-up_dump_update(struct ibuf *buf, struct rde_peer *peer, uint8_t aid)
+struct ibuf *
+up_dump_update(struct rde_peer *peer, uint8_t aid)
{
+ struct ibuf *buf;
struct bgpd_addr addr;
struct prefix *p;
size_t off;
@@ -1056,20 +1141,23 @@ up_dump_update(struct ibuf *buf, struct
p = RB_MIN(prefix_tree, &peer->updates[aid]);
if (p == NULL)
- return -1;
+ return NULL;
+
+ if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == NULL)
+ goto fail;
/* withdrawn routes length field is 0 */
if (ibuf_add_zero(buf, sizeof(len)) == -1)
- return -1;
+ goto fail;
/* reserve space for 2-byte path attribute length */
off = ibuf_size(buf);
if (ibuf_add_zero(buf, sizeof(len)) == -1)
- return -1;
+ goto fail;
if (up_generate_attr(buf, peer, prefix_aspath(p),
prefix_communities(p), prefix_nexthop(p), aid) == -1)
- goto fail;
+ goto drop;
if (aid != AID_INET) {
/* write mp attribute including nlri */
@@ -1082,29 +1170,35 @@ up_dump_update(struct ibuf *buf, struct
*/
if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) ==
-1)
- goto fail;
+ goto drop;
}
/* update attribute length field */
len = ibuf_size(buf) - off - sizeof(len);
if (ibuf_set_n16(buf, off, len) == -1)
- return -1;
+ goto fail;
if (aid == AID_INET) {
/* last but not least dump the IPv4 nlri */
if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
- goto fail;
+ goto drop;
}
- return 0;
+ return buf;
-fail:
- /* Not enough space. Drop prefix, it will never fit. */
+ drop:
+ /* Not enough space. Drop current prefix, it will never fit. */
+ p = RB_MIN(prefix_tree, &peer->updates[aid]);
pt_getaddr(p->pt, &addr);
- log_peer_warnx(&peer->conf, "dump of path attributes failed, "
+ log_peer_warnx(&peer->conf, "generating update failed, "
"prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen);
up_prefix_free(&peer->updates[aid], p, peer, 0);
- /* XXX should probably send a withdraw for this prefix */
- return -1;
+ return up_dump_withdraw_one(peer, p, buf);
+
+ fail:
+ /* something went horribly worng */
+ log_peer_warn(&peer->conf, "generating update failed, peer desynced");
+ ibuf_free(buf);
+ return NULL;
}
bgpd: more paranoia when hitting the UPDATE size limit