Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
bgpd: RFC8950 extended nexthop encoding support in RIB
To:
tech@openbsd.org
Date:
Fri, 10 Jan 2025 14:04:20 +0100

Download raw body.

Thread
This diff is enough to use RFC 8950 on route reflectors or route servers.
It adds the support for IPv6 nexthops for IPv4 routes to the RDE / RIB.

The problem is that the FIB is not ready for that yet (the nexthop of a
kroute has to be of the same address family as the prefix) and fixing that
is not trivial. Additionally the OpenBSD network stack is also not ready
but lets go step by step :)

I only tested this against OpenBGPD but did no interop tests yet.
-- 
:wq Claudio

Index: bgpd.8
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.8,v
diff -u -p -r1.82 bgpd.8
--- bgpd.8	7 Jan 2025 12:00:36 -0000	1.82
+++ bgpd.8	10 Jan 2025 13:03:25 -0000
@@ -536,6 +536,16 @@ has been started.
 .Re
 .Pp
 .Rs
+.%A S. Litkowski
+.%A S. Agrawal
+.%A K. Ananthamurthy
+.%A K. Patel
+.%D November 2020
+.%R RFC 8950
+.%T Advertising IPv4 Network Layer Reachability Information (NLRI) with an IPv6 Next Hop
+.Re
+.Pp
+.Rs
 .%A C. Loibl
 .%A S. Hares
 .%A R. Raszuk
Index: bgpd.conf.5
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.conf.5,v
diff -u -p -r1.247 bgpd.conf.5
--- bgpd.conf.5	9 Jan 2025 15:57:31 -0000	1.247
+++ bgpd.conf.5	10 Jan 2025 12:12:58 -0000
@@ -1119,6 +1119,25 @@ The default is
 .Ic no .
 .Pp
 .It Xo
+.Ic announce extended nexthop
+.Pq Ic yes Ns | Ns Ic no Ns | Ns Ic enforce
+.Xc
+If set to
+.Ic yes ,
+the extended nexthop encoding capability is announced.
+If negotiated,
+.Ic IPv4 unicast
+and
+.Ic vpn
+sessions can send paths with a IPv6 nexthop.
+If
+.Ic enforce
+is set, the session will only be established if the neighbor also announces
+the capability.
+The default is
+.Ic no .
+.Pp
+.It Xo
 .Ic announce graceful notification
 .Pq Ic yes Ns | Ns Ic no
 .Xc
Index: bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.508 bgpd.h
--- bgpd.h	9 Jan 2025 12:16:21 -0000	1.508
+++ bgpd.h	9 Jan 2025 13:39:37 -0000
@@ -415,7 +415,7 @@ struct capabilities {
 	}	grestart;
 	int8_t	mp[AID_MAX];		/* multiprotocol extensions, RFC 4760 */
 	int8_t	add_path[AID_MAX];	/* ADD_PATH, RFC 7911 */
-	int8_t	ext_nexthop[AID_MAX];	/* Ext Nexthop Encoding, RFC 8950 */
+	int8_t	ext_nh[AID_MAX];	/* Ext Nexthop Encoding, RFC 8950 */
 	int8_t	refresh;		/* route refresh, RFC 2918 */
 	int8_t	as4byte;		/* 4-byte ASnum, RFC 4893 */
 	int8_t	enhanced_rr;		/* enhanced route refresh, RFC 7313 */
Index: parse.y
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
diff -u -p -r1.475 parse.y
--- parse.y	9 Jan 2025 15:57:31 -0000	1.475
+++ parse.y	10 Jan 2025 12:13:31 -0000
@@ -2033,6 +2033,10 @@ peeropts	: REMOTEAS as4number	{
 		| ANNOUNCE EXTENDED MESSAGE yesnoenforce {
 			curpeer->conf.capabilities.ext_msg = $4;
 		}
+		| ANNOUNCE EXTENDED NEXTHOP yesnoenforce {
+			curpeer->conf.capabilities.ext_nh[AID_VPN_IPv4] =
+			    curpeer->conf.capabilities.ext_nh[AID_INET] = $4;
+		}
 		| ROLE STRING {
 			if (strcmp($2, "provider") == 0) {
 				curpeer->conf.role = ROLE_PROVIDER;
Index: printconf.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/printconf.c,v
diff -u -p -r1.179 printconf.c
--- printconf.c	9 Jan 2025 15:57:31 -0000	1.179
+++ printconf.c	10 Jan 2025 12:13:21 -0000
@@ -973,6 +973,11 @@ print_announce(struct peer_config *p, co
 	else if (p->capabilities.ext_msg == 1)
 		printf("%s\tannounce extended message yes\n", c);
 
+	if (p->capabilities.ext_nh[AID_INET] == 2)
+		printf("%s\tannounce extended nexthop enforce\n", c);
+	else if (p->capabilities.ext_nh[AID_INET] == 1)
+		printf("%s\tannounce extended nexthop yes\n", c);
+
 	if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV_ENFORCE)
 		printf("%s\tannounce add-path recv enforce\n", c);
 	else if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV)
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.648 rde.c
--- rde.c	9 Jan 2025 12:16:21 -0000	1.648
+++ rde.c	10 Jan 2025 12:53:22 -0000
@@ -3345,6 +3345,10 @@ rde_send_kroute(struct rib *rib, struct 
 
 	switch (kf.prefix.aid) {
 	case AID_VPN_IPv4:
+		/* XXX FIB can not handle non-IPv4 nexthop */
+		if (kf.nexthop.aid != AID_INET)
+			type = IMSG_KROUTE_DELETE;
+		/* FALLTHROUGH */
 	case AID_VPN_IPv6:
 		if (!(rib->flags & F_RIB_LOCAL))
 			/* not Loc-RIB, no update for VPNs */
@@ -3361,6 +3365,11 @@ rde_send_kroute(struct rib *rib, struct 
 				    __LINE__);
 		}
 		break;
+	case AID_INET:
+		/* XXX FIB can not handle non-IPv4 nexthop */
+		if (kf.nexthop.aid != AID_INET)
+			type = IMSG_KROUTE_DELETE;
+		/* FALLTHROUGH */
 	default:
 		if (imsg_compose(ibuf_main, type, rib->rtableid, 0, -1,
 		    &kf, sizeof(kf)) == -1)
Index: rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.44 rde_peer.c
--- rde_peer.c	9 Jan 2025 12:16:21 -0000	1.44
+++ rde_peer.c	9 Jan 2025 13:39:53 -0000
@@ -68,7 +68,7 @@ peer_has_ext_nexthop(struct rde_peer *pe
 {
 	if (aid >= AID_MAX)
 		return 0;
-	return peer->capa.ext_nexthop[aid];
+	return peer->capa.ext_nh[aid];
 }
 
 int
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
diff -u -p -r1.173 rde_update.c
--- rde_update.c	9 Jan 2025 12:16:21 -0000	1.173
+++ rde_update.c	9 Jan 2025 21:28:31 -0000
@@ -467,7 +467,10 @@ up_get_nexthop(struct rde_peer *peer, st
 	switch (aid) {
 	case AID_INET:
 	case AID_VPN_IPv4:
-		if (peer->local_v4_addr.aid == AID_INET)
+		if (peer_has_ext_nexthop(peer, aid) &&
+		    peer->remote_addr.aid == AID_INET6)
+			peer_local = &peer->local_v6_addr;
+		else if (peer->local_v4_addr.aid == AID_INET)
 			peer_local = &peer->local_v4_addr;
 		break;
 	case AID_INET6:
@@ -625,6 +628,11 @@ up_generate_attr(struct ibuf *buf, struc
 			case AID_INET:
 				if (nh == NULL)
 					return -1;
+				if (nh->exit_nexthop.aid != AID_INET) {
+					if (peer_has_ext_nexthop(peer, aid))
+						break;
+					return -1;
+				}
 				if (attr_writebuf(buf, ATTR_WELL_KNOWN,
 				    ATTR_NEXTHOP, &nh->exit_nexthop.v4,
 				    sizeof(nh->exit_nexthop.v4)) == -1)
Index: session.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
diff -u -p -r1.506 session.c
--- session.c	3 Jan 2025 12:57:49 -0000	1.506
+++ session.c	9 Jan 2025 13:40:55 -0000
@@ -67,8 +67,6 @@ void	session_accept(int);
 int	session_connect(struct peer *);
 void	session_tcp_established(struct peer *);
 int	session_capa_add(struct ibuf *, uint8_t, uint8_t);
-int	session_capa_add_mp(struct ibuf *, uint8_t);
-int	session_capa_add_afi(struct ibuf *, uint8_t, uint8_t);
 struct ibuf	*session_newmsg(enum msg_type, uint16_t);
 void	session_sendmsg(struct ibuf *, struct peer *, enum msg_type);
 void	session_open(struct peer *);
@@ -1364,7 +1362,7 @@ session_capa_add(struct ibuf *opb, uint8
 	return (errs);
 }
 
-int
+static int
 session_capa_add_mp(struct ibuf *buf, uint8_t aid)
 {
 	uint16_t		 afi;
@@ -1383,10 +1381,10 @@ session_capa_add_mp(struct ibuf *buf, ui
 	return (errs);
 }
 
-int
+static int
 session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags)
 {
-	u_int		errs = 0;
+	int		errs = 0;
 	uint16_t	afi;
 	uint8_t		safi;
 
@@ -1402,6 +1400,25 @@ session_capa_add_afi(struct ibuf *b, uin
 	return (errs);
 }
 
+static int
+session_capa_add_ext_nh(struct ibuf *b, uint8_t aid)
+{
+	int		errs = 0;
+	uint16_t	afi;
+	uint8_t		safi;
+
+	if (aid2afi(aid, &afi, &safi)) {
+		log_warn("%s: bad AID", __func__);
+		return (-1);
+	}
+
+	errs += ibuf_add_n16(b, afi);
+	errs += ibuf_add_n16(b, safi);
+	errs += ibuf_add_n16(b, AFI_IPv6);
+
+	return (errs);
+}
+
 struct ibuf *
 session_newmsg(enum msg_type msgtype, uint16_t len)
 {
@@ -1517,7 +1534,22 @@ session_open(struct peer *p)
 	if (p->capa.ann.refresh)	/* no data */
 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
 
-	/* extended message support, RFC8654 */
+	/* extended nexthop encoding, RFC 8950 */
+	if (p->capa.ann.ext_nh[AID_INET]) {
+		uint8_t enhlen = 0;
+
+		if (p->capa.ann.mp[AID_INET])
+			enhlen += 6;
+		if (p->capa.ann.mp[AID_VPN_IPv4])
+			enhlen += 6;
+		errs += session_capa_add(opb, CAPA_EXT_NEXTHOP, enhlen);
+		if (p->capa.ann.mp[AID_INET])
+			errs += session_capa_add_ext_nh(opb, AID_INET);
+		if (p->capa.ann.mp[AID_VPN_IPv4])
+			errs += session_capa_add_ext_nh(opb, AID_VPN_IPv4);
+	}
+
+	/* extended message support, RFC 8654 */
 	if (p->capa.ann.ext_msg)	/* no data */
 		errs += session_capa_add(opb, CAPA_EXT_MSG, 0);
 
@@ -2540,7 +2572,7 @@ int
 parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as)
 {
 	struct ibuf	 capabuf;
-	uint16_t	 afi, gr_header;
+	uint16_t	 afi, nhafi, tmp16, gr_header;
 	uint8_t		 capa_code, capa_len;
 	uint8_t		 safi, aid, role, flags;
 
@@ -2582,6 +2614,38 @@ parse_capabilities(struct peer *peer, st
 		case CAPA_REFRESH:
 			peer->capa.peer.refresh = 1;
 			break;
+		case CAPA_EXT_NEXTHOP:
+			while (ibuf_size(&capabuf) > 0) {
+				if (ibuf_get_n16(&capabuf, &afi) == -1 ||
+				    ibuf_get_n16(&capabuf, &tmp16) == -1 ||
+				    ibuf_get_n16(&capabuf, &nhafi) == -1) {
+					log_peer_warnx(&peer->conf,
+					    "Received bad %s capability",
+					    log_capability(CAPA_EXT_NEXTHOP));
+					memset(peer->capa.peer.ext_nh, 0,
+					    sizeof(peer->capa.peer.ext_nh));
+					break;
+				}
+				if (afi2aid(afi, tmp16, &aid) == -1 ||
+				    !(aid == AID_INET || aid == AID_VPN_IPv4)) {
+					log_peer_warnx(&peer->conf,
+					    "Received %s capability: "
+					    " unsupported AFI %u, safi %u pair",
+					    log_capability(CAPA_EXT_NEXTHOP),
+					    afi, safi);
+					continue;
+				}
+				if (nhafi != AFI_IPv6) {
+					log_peer_warnx(&peer->conf,
+					    "Received %s capability: "
+					    " unsupported nexthop AFI %u",
+					    log_capability(CAPA_EXT_NEXTHOP),
+					    nhafi);
+					continue;
+				}
+				peer->capa.peer.ext_nh[aid] = 1;
+			}
+			break;
 		case CAPA_EXT_MSG:
 			peer->capa.peer.ext_msg = 1;
 			break;
@@ -2798,6 +2862,16 @@ capa_neg_calc(struct peer *p)
 	    (p->capa.ann.grestart.grnotification &&
 	    p->capa.peer.grestart.grnotification) != 0;
 
+	/* RFC 8950 extended nexthop encoding: both sides need to agree */
+	memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
+	for (i = AID_MIN; i < AID_MAX; i++) {
+		if (p->capa.neg.mp[i] == 0)
+			continue;
+		if (p->capa.ann.ext_nh[i] && p->capa.peer.ext_nh[i]) {
+			p->capa.neg.ext_nh[i] = 1;
+		}
+	}
+
 	/*
 	 * ADD-PATH: set only those bits where both sides agree.
 	 * For this compare our send bit with the recv bit from the peer
@@ -2929,6 +3003,17 @@ capa_neg_calc(struct peer *p)
 		}
 	}
 
+	for (i = AID_MIN; i < AID_MAX; i++) {
+		if (p->capa.neg.mp[i] == 0)
+			continue;
+		if (p->capa.ann.ext_nh[i] == 2 &&
+		    p->capa.neg.ext_nh[i] == 0) {
+			capa_code = CAPA_EXT_NEXTHOP;
+			capa_len = 6;
+			capa_aid = i;
+			goto fail;
+		}
+	}
 	return (0);
 
  fail:
@@ -2940,6 +3025,8 @@ capa_neg_calc(struct peer *p)
 		session_capa_add_mp(ebuf, capa_aid);
 	else if (capa_code == CAPA_ADD_PATH)
 		session_capa_add_afi(ebuf, capa_aid, 0);
+	else if (capa_code == CAPA_EXT_NEXTHOP)
+		session_capa_add_ext_nh(ebuf, capa_aid);
 	else if (capa_len > 0)
 		ibuf_add_zero(ebuf, capa_len);