From: Claudio Jeker Subject: bgpd: rfc8654 extended message support To: tech@openbsd.org Date: Tue, 3 Dec 2024 15:51:22 +0100 Since we can handle imsg > 16k now we can also handle 64k BGP messages. So implement the bits for extended message support. This should mostly follow rfc8654 with the following differences: - NOTIFICATIONS are always truncated to fit in 4096 bytes. This is so that early errors do not ship too large packets. Also I see little point in having huge NOTIFICATIONS. - I did not implement the SHOULD in: When propagating that UPDATE onward to a neighbor that has not advertised the BGP Extended Message Capability, the speaker SHOULD try to reduce the outgoing message size by removing attributes eligible under the "attribute discard" approach of [RFC7606]. bgpd does the withdraw of previous NLRI since a while. There are only a few attributes that use "attribute discard" and in most cases their size is not the problem (or we already discarded them on input). So implementing that seems like a waste of time. - I decided that bgpd only sends large messages if both sides announce extended messages. In other words if the local bgpd has not set it then no large message will be sent (even though the remote annouces it) so we take the MAY in: A BGP speaker MAY send BGP Extended Messages to a peer only if the BGP Extended Message Capability was received from that peer. I tested this against a modifed version of the maxattr.sh regress test. That part will be committed once this is in. -- :wq Claudio Index: usr.sbin/bgpctl/bgpctl.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v diff -u -p -r1.312 bgpctl.c --- usr.sbin/bgpctl/bgpctl.c 2 Dec 2024 15:03:46 -0000 1.312 +++ usr.sbin/bgpctl/bgpctl.c 3 Dec 2024 14:39:29 -0000 @@ -1465,6 +1465,9 @@ print_capability(uint8_t capa_code, stru case CAPA_ENHANCED_RR: printf("enhanced route refresh capability"); break; + case CAPA_EXT_MSG: + printf("extended message capability"); + break; default: printf("unknown capability %u length %zu", capa_code, ibuf_size(b)); Index: usr.sbin/bgpctl/output.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v diff -u -p -r1.56 output.c --- usr.sbin/bgpctl/output.c 1 Oct 2024 18:33:16 -0000 1.56 +++ usr.sbin/bgpctl/output.c 3 Dec 2024 14:39:29 -0000 @@ -342,6 +342,8 @@ show_neighbor_full(struct peer *p, struc printf(" Route Refresh\n"); if (p->capa.peer.enhanced_rr) printf(" Enhanced Route Refresh\n"); + if (p->capa.peer.ext_msg) + printf(" Extended message\n"); if (p->capa.peer.grestart.restart) show_neighbor_capa_restart(&p->capa.peer); if (hascapaap) @@ -372,6 +374,8 @@ show_neighbor_full(struct peer *p, struc printf(" Route Refresh\n"); if (p->capa.neg.enhanced_rr) printf(" Enhanced Route Refresh\n"); + if (p->capa.neg.ext_msg) + printf(" Extended message\n"); if (p->capa.neg.grestart.restart) show_neighbor_capa_restart(&p->capa.neg); if (hascapaap) Index: usr.sbin/bgpctl/output_json.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v diff -u -p -r1.48 output_json.c --- usr.sbin/bgpctl/output_json.c 1 Oct 2024 18:33:16 -0000 1.48 +++ usr.sbin/bgpctl/output_json.c 3 Dec 2024 14:39:29 -0000 @@ -57,6 +57,7 @@ json_neighbor_capabilities(struct capabi json_do_bool("as4byte", capa->as4byte); json_do_bool("refresh", capa->refresh); json_do_bool("enhanced_refresh", capa->enhanced_rr); + json_do_bool("extended_message", capa->ext_msg); if (hascapamp) { json_do_array("multiprotocol"); Index: usr.sbin/bgpd/bgpd.8 =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.8,v diff -u -p -r1.79 bgpd.8 --- usr.sbin/bgpd/bgpd.8 6 Nov 2024 12:01:39 -0000 1.79 +++ usr.sbin/bgpd/bgpd.8 3 Dec 2024 14:39:29 -0000 @@ -478,6 +478,15 @@ has been started. .Re .Pp .Rs +.%A R. Bush +.%A K. Patel +.%A D. Ward +.%D October 2019 +.%R RFC 8654 +.%T Extended Message Support for BGP +.Re +.Pp +.Rs .%A C. Loibl .%A S. Hares .%A R. Raszuk Index: usr.sbin/bgpd/bgpd.conf.5 =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.conf.5,v diff -u -p -r1.242 bgpd.conf.5 --- usr.sbin/bgpd/bgpd.conf.5 14 Aug 2024 19:09:51 -0000 1.242 +++ usr.sbin/bgpd/bgpd.conf.5 3 Dec 2024 14:39:29 -0000 @@ -1091,6 +1091,22 @@ The default is .Ic no . .Pp .It Xo +.Ic announce extended +.Pq Ic yes Ns | Ns Ic no Ns | Ns Ic enforce +.Xc +If set to +.Ic yes , +the extended message capability is announced. +If negotiated the default maximum message size is increaded from 4096 to 65535 +bytes. +If +.Ic enforce +is set, the session will only be established if the neighbor also announces +the capability. +The default is +.Ic no . +.Pp +.It Xo .Ic announce policy .Pq Ic yes Ns | Ns Ic no Ns | Ns Ic enforce .Xc Index: usr.sbin/bgpd/bgpd.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v diff -u -p -r1.501 bgpd.h --- usr.sbin/bgpd/bgpd.h 2 Dec 2024 15:03:17 -0000 1.501 +++ usr.sbin/bgpd/bgpd.h 3 Dec 2024 14:39:29 -0000 @@ -49,13 +49,14 @@ #define SET_NAME_LEN 128 #define MAX_PKTSIZE 4096 -#define MIN_HOLDTIME 3 +#define MAX_EXT_PKTSIZE 65535 #define MAX_BGPD_IMSGSIZE (128 * 1024) #define MAX_SOCK_BUF (4 * IBUF_READ_SIZE) #define RT_BUF_SIZE 16384 #define MAX_RTSOCK_BUF (2 * 1024 * 1024) #define MAX_COMM_MATCH 3 #define MAX_ASPA_SPAS_COUNT 10000 +#define MIN_HOLDTIME 3 #define BGPD_OPT_VERBOSE 0x0001 #define BGPD_OPT_VERBOSE2 0x0002 @@ -411,12 +412,14 @@ struct capabilities { int8_t as4byte; /* 4-byte ASnum, RFC 4893 */ int8_t enhanced_rr; /* enhanced route refresh, RFC 7313 */ int8_t policy; /* Open Policy, RFC 9234, 2 = enforce */ + int8_t ext_msg; /* Extended Msg, RFC8654 */ }; enum capa_codes { CAPA_NONE = 0, CAPA_MP = 1, CAPA_REFRESH = 2, + CAPA_EXT_MSG = 6, CAPA_ROLE = 9, CAPA_RESTART = 64, CAPA_AS4BYTE = 65, Index: usr.sbin/bgpd/parse.y =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v diff -u -p -r1.470 parse.y --- usr.sbin/bgpd/parse.y 9 Oct 2024 10:01:29 -0000 1.470 +++ usr.sbin/bgpd/parse.y 3 Dec 2024 14:39:29 -0000 @@ -248,7 +248,7 @@ typedef struct { %token EBGP IBGP %token FLOWSPEC PROTO FLAGS FRAGMENT TOS LENGTH ICMPTYPE CODE %token LOCALAS REMOTEAS DESCR LOCALADDR MULTIHOP PASSIVE MAXPREFIX RESTART -%token ANNOUNCE REFRESH AS4BYTE CONNECTRETRY ENHANCED ADDPATH +%token ANNOUNCE REFRESH AS4BYTE CONNECTRETRY ENHANCED ADDPATH EXTENDED %token SEND RECV PLUS POLICY ROLE %token DEMOTE ENFORCE NEIGHBORAS ASOVERRIDE REFLECTOR DEPEND DOWN %token DUMP IN OUT SOCKET RESTRICTED @@ -2011,6 +2011,9 @@ peeropts : REMOTEAS as4number { | ANNOUNCE POLICY yesnoenforce { curpeer->conf.capabilities.policy = $3; } + | ANNOUNCE EXTENDED yesnoenforce { + curpeer->conf.capabilities.ext_msg = $3; + } | ROLE STRING { if (strcmp($2, "provider") == 0) { curpeer->conf.role = ROLE_PROVIDER; @@ -3536,6 +3539,7 @@ lookup(char *s) { "export", EXPORT}, { "export-target", EXPORTTRGT}, { "ext-community", EXTCOMMUNITY}, + { "extended", EXTENDED }, { "fib-priority", FIBPRIORITY}, { "fib-update", FIBUPDATE}, { "filtered", FILTERED}, Index: usr.sbin/bgpd/printconf.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/printconf.c,v diff -u -p -r1.176 printconf.c --- usr.sbin/bgpd/printconf.c 9 Oct 2024 10:01:29 -0000 1.176 +++ usr.sbin/bgpd/printconf.c 3 Dec 2024 14:39:29 -0000 @@ -960,6 +960,11 @@ print_announce(struct peer_config *p, co else if (p->capabilities.as4byte == 0) printf("%s\tannounce as4byte no\n", c); + if (p->capabilities.ext_msg == 2) + printf("%s\tannounce extended enforce\n", c); + else if (p->capabilities.ext_msg == 1) + printf("%s\tannounce extended yes\n", c); + if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV_ENFORCE) printf("%s\tannounce add-path recv enforce\n", c); else if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV) Index: usr.sbin/bgpd/rde_update.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v diff -u -p -r1.169 rde_update.c --- usr.sbin/bgpd/rde_update.c 25 Sep 2024 14:46:51 -0000 1.169 +++ usr.sbin/bgpd/rde_update.c 3 Dec 2024 14:39:29 -0000 @@ -988,11 +988,14 @@ struct ibuf * up_dump_withdraws(struct rde_peer *peer, uint8_t aid) { struct ibuf *buf; - size_t off; + size_t off, pkgsize = MAX_PKTSIZE; uint16_t afi, len; uint8_t safi; - if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == NULL) + if (peer->capa.ext_msg) + pkgsize = MAX_EXT_PKTSIZE; + + if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL) goto fail; /* reserve space for the withdrawn routes length field */ @@ -1136,14 +1139,17 @@ up_dump_update(struct rde_peer *peer, ui struct ibuf *buf; struct bgpd_addr addr; struct prefix *p; - size_t off; + size_t off, pkgsize = MAX_PKTSIZE; uint16_t len; p = RB_MIN(prefix_tree, &peer->updates[aid]); if (p == NULL) return NULL; - if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == NULL) + if (peer->capa.ext_msg) + pkgsize = MAX_EXT_PKTSIZE; + + if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL) goto fail; /* withdrawn routes length field is 0 */ Index: usr.sbin/bgpd/session.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/session.c,v diff -u -p -r1.500 session.c --- usr.sbin/bgpd/session.c 2 Dec 2024 15:03:18 -0000 1.500 +++ usr.sbin/bgpd/session.c 3 Dec 2024 14:39:29 -0000 @@ -1478,7 +1478,8 @@ session_open(struct peer *p) int mpcapa = 0; - if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) { + if ((opb = ibuf_dynamic(0, MAX_PKTSIZE - MSGSIZE_OPEN_MIN - 6)) == + NULL) { bgp_fsm(p, EVNT_CON_FATAL, NULL); return; } @@ -1495,6 +1496,10 @@ session_open(struct peer *p) if (p->capa.ann.refresh) /* no data */ errs += session_capa_add(opb, CAPA_REFRESH, 0); + /* extended message support, RFC8654 */ + if (p->capa.ann.ext_msg) /* no data */ + errs += session_capa_add(opb, CAPA_EXT_MSG, 0); + /* BGP open policy, RFC 9234, only for ebgp sessions */ if (p->conf.ebgp && p->capa.ann.policy && p->conf.role != ROLE_NONE && @@ -1642,19 +1647,28 @@ session_keepalive(struct peer *p) void session_update(uint32_t peerid, struct ibuf *ibuf) { - struct peer *p; - struct ibuf *buf; + struct peer *p; + struct ibuf *buf; + size_t len, maxsize = MAX_PKTSIZE; if ((p = getpeerbyid(conf, peerid)) == NULL) { - log_warnx("no such peer: id=%u", peerid); + log_warnx("%s: no such peer: id=%u", __func__, peerid); return; } if (p->state != STATE_ESTABLISHED) return; - if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + ibuf_size(ibuf))) == - NULL) { + if (p->capa.neg.ext_msg) + maxsize = MAX_EXT_PKTSIZE; + len = ibuf_size(ibuf); + if (len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER || + len > maxsize - MSGSIZE_HEADER) { + log_peer_warnx(&p->conf, "bad UDPATE from RDE"); + return; + } + + if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + len)) == NULL) { bgp_fsm(p, EVNT_CON_FATAL, NULL); return; } @@ -2023,7 +2037,7 @@ parse_header(struct ibuf *msg, void *arg struct peer *peer = arg; struct ibuf *b; u_char m[MSGSIZE_HEADER_MARKER]; - uint16_t len; + uint16_t len, maxlen = MAX_PKTSIZE; uint8_t type; if (ibuf_get(msg, m, sizeof(m)) == -1 || @@ -2039,7 +2053,10 @@ parse_header(struct ibuf *msg, void *arg return (NULL); } - if (len < MSGSIZE_HEADER || len > MAX_PKTSIZE) { + if (peer->capa.ann.ext_msg) + maxlen = MAX_EXT_PKTSIZE; + + if (len < MSGSIZE_HEADER || len > maxlen) { log_peer_warnx(&peer->conf, "received message: illegal length: %u byte", len); goto badlen; @@ -2047,7 +2064,7 @@ parse_header(struct ibuf *msg, void *arg switch (type) { case OPEN: - if (len < MSGSIZE_OPEN_MIN) { + if (len < MSGSIZE_OPEN_MIN || len > MAX_PKTSIZE) { log_peer_warnx(&peer->conf, "received OPEN: illegal len: %u byte", len); goto badlen; @@ -2465,6 +2482,9 @@ parse_capabilities(struct peer *peer, st case CAPA_REFRESH: peer->capa.peer.refresh = 1; break; + case CAPA_EXT_MSG: + peer->capa.peer.ext_msg = 1; + break; case CAPA_ROLE: if (capa_len != 1 || ibuf_get_n8(&capabuf, &role) == -1) { @@ -2612,6 +2632,8 @@ capa_neg_calc(struct peer *p) (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0; p->capa.neg.as4byte = (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0; + p->capa.neg.ext_msg = + (p->capa.ann.ext_msg && p->capa.peer.ext_msg) != 0; /* MP: both side must agree on the AFI,SAFI pair */ if (p->capa.peer.mp[AID_UNSPEC]) @@ -2750,6 +2772,12 @@ capa_neg_calc(struct peer *p) capa_len = 0; goto fail; } + /* enforce presence of other capabilities */ + if (p->capa.ann.ext_msg == 2 && p->capa.neg.ext_msg == 0) { + capa_code = CAPA_EXT_MSG; + capa_len = 0; + goto fail; + } if (p->capa.ann.enhanced_rr == 2 && p->capa.neg.enhanced_rr == 0) { capa_code = CAPA_ENHANCED_RR; capa_len = 0; @@ -2826,7 +2854,6 @@ session_dispatch_imsg(struct imsgbuf *im struct listen_addr *la, *next, nla; struct session_dependon sdon; struct bgpd_config tconf; - size_t len; uint32_t peerid; int n, fd, depend_ok, restricted; uint16_t t; @@ -3119,11 +3146,8 @@ session_dispatch_imsg(struct imsgbuf *im case IMSG_UPDATE: if (idx != PFD_PIPE_ROUTE) fatalx("update request not from RDE"); - len = imsg_get_len(&imsg); - if (imsg_get_ibuf(&imsg, &ibuf) == -1 || - len > MAX_PKTSIZE - MSGSIZE_HEADER || - len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) - log_warnx("RDE sent invalid update"); + if (imsg_get_ibuf(&imsg, &ibuf) == -1) + log_warn("RDE sent invalid update"); else session_update(peerid, &ibuf); break;