Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
bgpd: rfc8654 extended message support
To:
tech@openbsd.org
Date:
Tue, 3 Dec 2024 15:51:22 +0100

Download raw body.

Thread
Since we can handle imsg > 16k now we can also handle 64k BGP messages.
So implement the bits for extended message support.

This should mostly follow rfc8654 with the following differences:

- NOTIFICATIONS are always truncated to fit in 4096 bytes. This is so that
  early errors do not ship too large packets. Also I see little point in
  having huge NOTIFICATIONS.

- I did not implement the SHOULD in:
    When propagating that UPDATE onward to a neighbor that has not advertised
    the BGP Extended Message Capability, the speaker SHOULD try to reduce the
    outgoing message size by removing attributes eligible under the "attribute
    discard" approach of [RFC7606].
  bgpd does the withdraw of previous NLRI since a while. There are only a
  few attributes that use "attribute discard" and in most cases their size
  is not the problem (or we already discarded them on input). So
  implementing that seems like a waste of time.

- I decided that bgpd only sends large messages if both sides announce
  extended messages. In other words if the local bgpd has not set it then
  no large message will be sent (even though the remote annouces it) so we
  take the MAY in:
    A BGP speaker MAY send BGP Extended Messages to a peer only if the BGP
    Extended Message Capability was received from that peer.

I tested this against a modifed version of the maxattr.sh regress test.
That part will be committed once this is in.
-- 
:wq Claudio

Index: usr.sbin/bgpctl/bgpctl.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
diff -u -p -r1.312 bgpctl.c
--- usr.sbin/bgpctl/bgpctl.c	2 Dec 2024 15:03:46 -0000	1.312
+++ usr.sbin/bgpctl/bgpctl.c	3 Dec 2024 14:39:29 -0000
@@ -1465,6 +1465,9 @@ print_capability(uint8_t capa_code, stru
 	case CAPA_ENHANCED_RR:
 		printf("enhanced route refresh capability");
 		break;
+	case CAPA_EXT_MSG:
+		printf("extended message capability");
+		break;
 	default:
 		printf("unknown capability %u length %zu",
 		    capa_code, ibuf_size(b));
Index: usr.sbin/bgpctl/output.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
diff -u -p -r1.56 output.c
--- usr.sbin/bgpctl/output.c	1 Oct 2024 18:33:16 -0000	1.56
+++ usr.sbin/bgpctl/output.c	3 Dec 2024 14:39:29 -0000
@@ -342,6 +342,8 @@ show_neighbor_full(struct peer *p, struc
 			printf("    Route Refresh\n");
 		if (p->capa.peer.enhanced_rr)
 			printf("    Enhanced Route Refresh\n");
+		if (p->capa.peer.ext_msg)
+			printf("    Extended message\n");
 		if (p->capa.peer.grestart.restart)
 			show_neighbor_capa_restart(&p->capa.peer);
 		if (hascapaap)
@@ -372,6 +374,8 @@ show_neighbor_full(struct peer *p, struc
 			printf("    Route Refresh\n");
 		if (p->capa.neg.enhanced_rr)
 			printf("    Enhanced Route Refresh\n");
+		if (p->capa.neg.ext_msg)
+			printf("    Extended message\n");
 		if (p->capa.neg.grestart.restart)
 			show_neighbor_capa_restart(&p->capa.neg);
 		if (hascapaap)
Index: usr.sbin/bgpctl/output_json.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
diff -u -p -r1.48 output_json.c
--- usr.sbin/bgpctl/output_json.c	1 Oct 2024 18:33:16 -0000	1.48
+++ usr.sbin/bgpctl/output_json.c	3 Dec 2024 14:39:29 -0000
@@ -57,6 +57,7 @@ json_neighbor_capabilities(struct capabi
 	json_do_bool("as4byte", capa->as4byte);
 	json_do_bool("refresh", capa->refresh);
 	json_do_bool("enhanced_refresh", capa->enhanced_rr);
+	json_do_bool("extended_message", capa->ext_msg);
 
 	if (hascapamp) {
 		json_do_array("multiprotocol");
Index: usr.sbin/bgpd/bgpd.8
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.8,v
diff -u -p -r1.79 bgpd.8
--- usr.sbin/bgpd/bgpd.8	6 Nov 2024 12:01:39 -0000	1.79
+++ usr.sbin/bgpd/bgpd.8	3 Dec 2024 14:39:29 -0000
@@ -478,6 +478,15 @@ has been started.
 .Re
 .Pp
 .Rs
+.%A R. Bush
+.%A K. Patel
+.%A D. Ward
+.%D October 2019
+.%R RFC 8654
+.%T Extended Message Support for BGP
+.Re
+.Pp
+.Rs
 .%A C. Loibl
 .%A S. Hares
 .%A R. Raszuk
Index: usr.sbin/bgpd/bgpd.conf.5
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.conf.5,v
diff -u -p -r1.242 bgpd.conf.5
--- usr.sbin/bgpd/bgpd.conf.5	14 Aug 2024 19:09:51 -0000	1.242
+++ usr.sbin/bgpd/bgpd.conf.5	3 Dec 2024 14:39:29 -0000
@@ -1091,6 +1091,22 @@ The default is
 .Ic no .
 .Pp
 .It Xo
+.Ic announce extended
+.Pq Ic yes Ns | Ns Ic no Ns | Ns Ic enforce
+.Xc
+If set to
+.Ic yes ,
+the extended message capability is announced.
+If negotiated the default maximum message size is increaded from 4096 to 65535
+bytes.
+If
+.Ic enforce
+is set, the session will only be established if the neighbor also announces
+the capability.
+The default is
+.Ic no .
+.Pp
+.It Xo
 .Ic announce policy
 .Pq Ic yes Ns | Ns Ic no Ns | Ns Ic enforce
 .Xc
Index: usr.sbin/bgpd/bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.501 bgpd.h
--- usr.sbin/bgpd/bgpd.h	2 Dec 2024 15:03:17 -0000	1.501
+++ usr.sbin/bgpd/bgpd.h	3 Dec 2024 14:39:29 -0000
@@ -49,13 +49,14 @@
 #define	SET_NAME_LEN			128
 
 #define	MAX_PKTSIZE			4096
-#define	MIN_HOLDTIME			3
+#define	MAX_EXT_PKTSIZE			65535
 #define	MAX_BGPD_IMSGSIZE		(128 * 1024)
 #define	MAX_SOCK_BUF			(4 * IBUF_READ_SIZE)
 #define	RT_BUF_SIZE			16384
 #define	MAX_RTSOCK_BUF			(2 * 1024 * 1024)
 #define	MAX_COMM_MATCH			3
 #define	MAX_ASPA_SPAS_COUNT		10000
+#define	MIN_HOLDTIME			3
 
 #define	BGPD_OPT_VERBOSE		0x0001
 #define	BGPD_OPT_VERBOSE2		0x0002
@@ -411,12 +412,14 @@ struct capabilities {
 	int8_t	as4byte;		/* 4-byte ASnum, RFC 4893 */
 	int8_t	enhanced_rr;		/* enhanced route refresh, RFC 7313 */
 	int8_t	policy;			/* Open Policy, RFC 9234, 2 = enforce */
+	int8_t	ext_msg;		/* Extended Msg, RFC8654 */
 };
 
 enum capa_codes {
 	CAPA_NONE = 0,
 	CAPA_MP = 1,
 	CAPA_REFRESH = 2,
+	CAPA_EXT_MSG = 6,
 	CAPA_ROLE = 9,
 	CAPA_RESTART = 64,
 	CAPA_AS4BYTE = 65,
Index: usr.sbin/bgpd/parse.y
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
diff -u -p -r1.470 parse.y
--- usr.sbin/bgpd/parse.y	9 Oct 2024 10:01:29 -0000	1.470
+++ usr.sbin/bgpd/parse.y	3 Dec 2024 14:39:29 -0000
@@ -248,7 +248,7 @@ typedef struct {
 %token	EBGP IBGP
 %token	FLOWSPEC PROTO FLAGS FRAGMENT TOS LENGTH ICMPTYPE CODE
 %token	LOCALAS REMOTEAS DESCR LOCALADDR MULTIHOP PASSIVE MAXPREFIX RESTART
-%token	ANNOUNCE REFRESH AS4BYTE CONNECTRETRY ENHANCED ADDPATH
+%token	ANNOUNCE REFRESH AS4BYTE CONNECTRETRY ENHANCED ADDPATH EXTENDED
 %token	SEND RECV PLUS POLICY ROLE
 %token	DEMOTE ENFORCE NEIGHBORAS ASOVERRIDE REFLECTOR DEPEND DOWN
 %token	DUMP IN OUT SOCKET RESTRICTED
@@ -2011,6 +2011,9 @@ peeropts	: REMOTEAS as4number	{
 		| ANNOUNCE POLICY yesnoenforce {
 			curpeer->conf.capabilities.policy = $3;
 		}
+		| ANNOUNCE EXTENDED yesnoenforce {
+			curpeer->conf.capabilities.ext_msg = $3;
+		}
 		| ROLE STRING {
 			if (strcmp($2, "provider") == 0) {
 				curpeer->conf.role = ROLE_PROVIDER;
@@ -3536,6 +3539,7 @@ lookup(char *s)
 		{ "export",		EXPORT},
 		{ "export-target",	EXPORTTRGT},
 		{ "ext-community",	EXTCOMMUNITY},
+		{ "extended",		EXTENDED },
 		{ "fib-priority",	FIBPRIORITY},
 		{ "fib-update",		FIBUPDATE},
 		{ "filtered",		FILTERED},
Index: usr.sbin/bgpd/printconf.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/printconf.c,v
diff -u -p -r1.176 printconf.c
--- usr.sbin/bgpd/printconf.c	9 Oct 2024 10:01:29 -0000	1.176
+++ usr.sbin/bgpd/printconf.c	3 Dec 2024 14:39:29 -0000
@@ -960,6 +960,11 @@ print_announce(struct peer_config *p, co
 	else if (p->capabilities.as4byte == 0)
 		printf("%s\tannounce as4byte no\n", c);
 
+	if (p->capabilities.ext_msg == 2)
+		printf("%s\tannounce extended enforce\n", c);
+	else if (p->capabilities.ext_msg == 1)
+		printf("%s\tannounce extended yes\n", c);
+
 	if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV_ENFORCE)
 		printf("%s\tannounce add-path recv enforce\n", c);
 	else if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV)
Index: usr.sbin/bgpd/rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
diff -u -p -r1.169 rde_update.c
--- usr.sbin/bgpd/rde_update.c	25 Sep 2024 14:46:51 -0000	1.169
+++ usr.sbin/bgpd/rde_update.c	3 Dec 2024 14:39:29 -0000
@@ -988,11 +988,14 @@ struct ibuf *
 up_dump_withdraws(struct rde_peer *peer, uint8_t aid)
 {
 	struct ibuf *buf;
-	size_t off;
+	size_t off, pkgsize = MAX_PKTSIZE;
 	uint16_t afi, len;
 	uint8_t safi;
 
-	if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == NULL)
+	if (peer->capa.ext_msg)
+		pkgsize = MAX_EXT_PKTSIZE;
+
+	if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL)
 		goto fail;
 
 	/* reserve space for the withdrawn routes length field */
@@ -1136,14 +1139,17 @@ up_dump_update(struct rde_peer *peer, ui
 	struct ibuf *buf;
 	struct bgpd_addr addr;
 	struct prefix *p;
-	size_t off;
+	size_t off, pkgsize = MAX_PKTSIZE;
 	uint16_t len;
 
 	p = RB_MIN(prefix_tree, &peer->updates[aid]);
 	if (p == NULL)
 		return NULL;
 
-	if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == NULL)
+	if (peer->capa.ext_msg)
+		pkgsize = MAX_EXT_PKTSIZE;
+
+	if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL)
 		goto fail;
 
 	/* withdrawn routes length field is 0 */
Index: usr.sbin/bgpd/session.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
diff -u -p -r1.500 session.c
--- usr.sbin/bgpd/session.c	2 Dec 2024 15:03:18 -0000	1.500
+++ usr.sbin/bgpd/session.c	3 Dec 2024 14:39:29 -0000
@@ -1478,7 +1478,8 @@ session_open(struct peer *p)
 	int			 mpcapa = 0;
 
 
-	if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) {
+	if ((opb = ibuf_dynamic(0, MAX_PKTSIZE - MSGSIZE_OPEN_MIN - 6)) ==
+	    NULL) {
 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
 		return;
 	}
@@ -1495,6 +1496,10 @@ session_open(struct peer *p)
 	if (p->capa.ann.refresh)	/* no data */
 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
 
+	/* extended message support, RFC8654 */
+	if (p->capa.ann.ext_msg)	/* no data */
+		errs += session_capa_add(opb, CAPA_EXT_MSG, 0);
+
 	/* BGP open policy, RFC 9234, only for ebgp sessions */
 	if (p->conf.ebgp && p->capa.ann.policy &&
 	    p->conf.role != ROLE_NONE &&
@@ -1642,19 +1647,28 @@ session_keepalive(struct peer *p)
 void
 session_update(uint32_t peerid, struct ibuf *ibuf)
 {
-	struct peer		*p;
-	struct ibuf		*buf;
+	struct peer	*p;
+	struct ibuf	*buf;
+	size_t		 len, maxsize = MAX_PKTSIZE;
 
 	if ((p = getpeerbyid(conf, peerid)) == NULL) {
-		log_warnx("no such peer: id=%u", peerid);
+		log_warnx("%s: no such peer: id=%u", __func__, peerid);
 		return;
 	}
 
 	if (p->state != STATE_ESTABLISHED)
 		return;
 
-	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + ibuf_size(ibuf))) ==
-	    NULL) {
+	if (p->capa.neg.ext_msg)
+		maxsize = MAX_EXT_PKTSIZE;
+	len = ibuf_size(ibuf);
+	if (len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER ||
+	    len > maxsize - MSGSIZE_HEADER) {
+		log_peer_warnx(&p->conf, "bad UDPATE from RDE");
+		return;
+	}
+
+	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + len)) == NULL) {
 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
 		return;
 	}
@@ -2023,7 +2037,7 @@ parse_header(struct ibuf *msg, void *arg
 	struct peer		*peer = arg;
 	struct ibuf		*b;
 	u_char			 m[MSGSIZE_HEADER_MARKER];
-	uint16_t		 len;
+	uint16_t		 len, maxlen = MAX_PKTSIZE;
 	uint8_t			 type;
 
 	if (ibuf_get(msg, m, sizeof(m)) == -1 ||
@@ -2039,7 +2053,10 @@ parse_header(struct ibuf *msg, void *arg
 		return (NULL);
 	}
 
-	if (len < MSGSIZE_HEADER || len > MAX_PKTSIZE) {
+	if (peer->capa.ann.ext_msg)
+		maxlen = MAX_EXT_PKTSIZE;
+
+	if (len < MSGSIZE_HEADER || len > maxlen) {
 		log_peer_warnx(&peer->conf,
 		    "received message: illegal length: %u byte", len);
 		goto badlen;
@@ -2047,7 +2064,7 @@ parse_header(struct ibuf *msg, void *arg
 
 	switch (type) {
 	case OPEN:
-		if (len < MSGSIZE_OPEN_MIN) {
+		if (len < MSGSIZE_OPEN_MIN || len > MAX_PKTSIZE) {
 			log_peer_warnx(&peer->conf,
 			    "received OPEN: illegal len: %u byte", len);
 			goto badlen;
@@ -2465,6 +2482,9 @@ parse_capabilities(struct peer *peer, st
 		case CAPA_REFRESH:
 			peer->capa.peer.refresh = 1;
 			break;
+		case CAPA_EXT_MSG:
+			peer->capa.peer.ext_msg = 1;
+			break;
 		case CAPA_ROLE:
 			if (capa_len != 1 ||
 			    ibuf_get_n8(&capabuf, &role) == -1) {
@@ -2612,6 +2632,8 @@ capa_neg_calc(struct peer *p)
 	    (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0;
 	p->capa.neg.as4byte =
 	    (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0;
+	p->capa.neg.ext_msg =
+	    (p->capa.ann.ext_msg && p->capa.peer.ext_msg) != 0;
 
 	/* MP: both side must agree on the AFI,SAFI pair */
 	if (p->capa.peer.mp[AID_UNSPEC])
@@ -2750,6 +2772,12 @@ capa_neg_calc(struct peer *p)
 		capa_len = 0;
 		goto fail;
 	}
+	/* enforce presence of other capabilities */
+	if (p->capa.ann.ext_msg == 2 && p->capa.neg.ext_msg == 0) {
+		capa_code = CAPA_EXT_MSG;
+		capa_len = 0;
+		goto fail;
+	}
 	if (p->capa.ann.enhanced_rr == 2 && p->capa.neg.enhanced_rr == 0) {
 		capa_code = CAPA_ENHANCED_RR;
 		capa_len = 0;
@@ -2826,7 +2854,6 @@ session_dispatch_imsg(struct imsgbuf *im
 	struct listen_addr	*la, *next, nla;
 	struct session_dependon	 sdon;
 	struct bgpd_config	 tconf;
-	size_t			 len;
 	uint32_t		 peerid;
 	int			 n, fd, depend_ok, restricted;
 	uint16_t		 t;
@@ -3119,11 +3146,8 @@ session_dispatch_imsg(struct imsgbuf *im
 		case IMSG_UPDATE:
 			if (idx != PFD_PIPE_ROUTE)
 				fatalx("update request not from RDE");
-			len = imsg_get_len(&imsg);
-			if (imsg_get_ibuf(&imsg, &ibuf) == -1 ||
-			    len > MAX_PKTSIZE - MSGSIZE_HEADER ||
-			    len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
-				log_warnx("RDE sent invalid update");
+			if (imsg_get_ibuf(&imsg, &ibuf) == -1)
+				log_warn("RDE sent invalid update");
 			else
 				session_update(peerid, &ibuf);
 			break;