Download raw body.
bgpd: RFC8950 extended nexthop encoding support in RIB
This diff is enough to use RFC 8950 on route reflectors or route servers.
It adds the support for IPv6 nexthops for IPv4 routes to the RDE / RIB.
The problem is that the FIB is not ready for that yet (the nexthop of a
kroute has to be of the same address family as the prefix) and fixing that
is not trivial. Additionally the OpenBSD network stack is also not ready
but lets go step by step :)
I only tested this against OpenBGPD but did no interop tests yet.
--
:wq Claudio
Index: bgpd.8
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.8,v
diff -u -p -r1.82 bgpd.8
--- bgpd.8 7 Jan 2025 12:00:36 -0000 1.82
+++ bgpd.8 10 Jan 2025 13:03:25 -0000
@@ -536,6 +536,16 @@ has been started.
.Re
.Pp
.Rs
+.%A S. Litkowski
+.%A S. Agrawal
+.%A K. Ananthamurthy
+.%A K. Patel
+.%D November 2020
+.%R RFC 8950
+.%T Advertising IPv4 Network Layer Reachability Information (NLRI) with an IPv6 Next Hop
+.Re
+.Pp
+.Rs
.%A C. Loibl
.%A S. Hares
.%A R. Raszuk
Index: bgpd.conf.5
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.conf.5,v
diff -u -p -r1.247 bgpd.conf.5
--- bgpd.conf.5 9 Jan 2025 15:57:31 -0000 1.247
+++ bgpd.conf.5 10 Jan 2025 12:12:58 -0000
@@ -1119,6 +1119,25 @@ The default is
.Ic no .
.Pp
.It Xo
+.Ic announce extended nexthop
+.Pq Ic yes Ns | Ns Ic no Ns | Ns Ic enforce
+.Xc
+If set to
+.Ic yes ,
+the extended nexthop encoding capability is announced.
+If negotiated,
+.Ic IPv4 unicast
+and
+.Ic vpn
+sessions can send paths with a IPv6 nexthop.
+If
+.Ic enforce
+is set, the session will only be established if the neighbor also announces
+the capability.
+The default is
+.Ic no .
+.Pp
+.It Xo
.Ic announce graceful notification
.Pq Ic yes Ns | Ns Ic no
.Xc
Index: bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.508 bgpd.h
--- bgpd.h 9 Jan 2025 12:16:21 -0000 1.508
+++ bgpd.h 9 Jan 2025 13:39:37 -0000
@@ -415,7 +415,7 @@ struct capabilities {
} grestart;
int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */
int8_t add_path[AID_MAX]; /* ADD_PATH, RFC 7911 */
- int8_t ext_nexthop[AID_MAX]; /* Ext Nexthop Encoding, RFC 8950 */
+ int8_t ext_nh[AID_MAX]; /* Ext Nexthop Encoding, RFC 8950 */
int8_t refresh; /* route refresh, RFC 2918 */
int8_t as4byte; /* 4-byte ASnum, RFC 4893 */
int8_t enhanced_rr; /* enhanced route refresh, RFC 7313 */
Index: parse.y
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
diff -u -p -r1.475 parse.y
--- parse.y 9 Jan 2025 15:57:31 -0000 1.475
+++ parse.y 10 Jan 2025 12:13:31 -0000
@@ -2033,6 +2033,10 @@ peeropts : REMOTEAS as4number {
| ANNOUNCE EXTENDED MESSAGE yesnoenforce {
curpeer->conf.capabilities.ext_msg = $4;
}
+ | ANNOUNCE EXTENDED NEXTHOP yesnoenforce {
+ curpeer->conf.capabilities.ext_nh[AID_VPN_IPv4] =
+ curpeer->conf.capabilities.ext_nh[AID_INET] = $4;
+ }
| ROLE STRING {
if (strcmp($2, "provider") == 0) {
curpeer->conf.role = ROLE_PROVIDER;
Index: printconf.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/printconf.c,v
diff -u -p -r1.179 printconf.c
--- printconf.c 9 Jan 2025 15:57:31 -0000 1.179
+++ printconf.c 10 Jan 2025 12:13:21 -0000
@@ -973,6 +973,11 @@ print_announce(struct peer_config *p, co
else if (p->capabilities.ext_msg == 1)
printf("%s\tannounce extended message yes\n", c);
+ if (p->capabilities.ext_nh[AID_INET] == 2)
+ printf("%s\tannounce extended nexthop enforce\n", c);
+ else if (p->capabilities.ext_nh[AID_INET] == 1)
+ printf("%s\tannounce extended nexthop yes\n", c);
+
if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV_ENFORCE)
printf("%s\tannounce add-path recv enforce\n", c);
else if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV)
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.648 rde.c
--- rde.c 9 Jan 2025 12:16:21 -0000 1.648
+++ rde.c 10 Jan 2025 12:53:22 -0000
@@ -3345,6 +3345,10 @@ rde_send_kroute(struct rib *rib, struct
switch (kf.prefix.aid) {
case AID_VPN_IPv4:
+ /* XXX FIB can not handle non-IPv4 nexthop */
+ if (kf.nexthop.aid != AID_INET)
+ type = IMSG_KROUTE_DELETE;
+ /* FALLTHROUGH */
case AID_VPN_IPv6:
if (!(rib->flags & F_RIB_LOCAL))
/* not Loc-RIB, no update for VPNs */
@@ -3361,6 +3365,11 @@ rde_send_kroute(struct rib *rib, struct
__LINE__);
}
break;
+ case AID_INET:
+ /* XXX FIB can not handle non-IPv4 nexthop */
+ if (kf.nexthop.aid != AID_INET)
+ type = IMSG_KROUTE_DELETE;
+ /* FALLTHROUGH */
default:
if (imsg_compose(ibuf_main, type, rib->rtableid, 0, -1,
&kf, sizeof(kf)) == -1)
Index: rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.44 rde_peer.c
--- rde_peer.c 9 Jan 2025 12:16:21 -0000 1.44
+++ rde_peer.c 9 Jan 2025 13:39:53 -0000
@@ -68,7 +68,7 @@ peer_has_ext_nexthop(struct rde_peer *pe
{
if (aid >= AID_MAX)
return 0;
- return peer->capa.ext_nexthop[aid];
+ return peer->capa.ext_nh[aid];
}
int
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
diff -u -p -r1.173 rde_update.c
--- rde_update.c 9 Jan 2025 12:16:21 -0000 1.173
+++ rde_update.c 9 Jan 2025 21:28:31 -0000
@@ -467,7 +467,10 @@ up_get_nexthop(struct rde_peer *peer, st
switch (aid) {
case AID_INET:
case AID_VPN_IPv4:
- if (peer->local_v4_addr.aid == AID_INET)
+ if (peer_has_ext_nexthop(peer, aid) &&
+ peer->remote_addr.aid == AID_INET6)
+ peer_local = &peer->local_v6_addr;
+ else if (peer->local_v4_addr.aid == AID_INET)
peer_local = &peer->local_v4_addr;
break;
case AID_INET6:
@@ -625,6 +628,11 @@ up_generate_attr(struct ibuf *buf, struc
case AID_INET:
if (nh == NULL)
return -1;
+ if (nh->exit_nexthop.aid != AID_INET) {
+ if (peer_has_ext_nexthop(peer, aid))
+ break;
+ return -1;
+ }
if (attr_writebuf(buf, ATTR_WELL_KNOWN,
ATTR_NEXTHOP, &nh->exit_nexthop.v4,
sizeof(nh->exit_nexthop.v4)) == -1)
Index: session.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
diff -u -p -r1.506 session.c
--- session.c 3 Jan 2025 12:57:49 -0000 1.506
+++ session.c 9 Jan 2025 13:40:55 -0000
@@ -67,8 +67,6 @@ void session_accept(int);
int session_connect(struct peer *);
void session_tcp_established(struct peer *);
int session_capa_add(struct ibuf *, uint8_t, uint8_t);
-int session_capa_add_mp(struct ibuf *, uint8_t);
-int session_capa_add_afi(struct ibuf *, uint8_t, uint8_t);
struct ibuf *session_newmsg(enum msg_type, uint16_t);
void session_sendmsg(struct ibuf *, struct peer *, enum msg_type);
void session_open(struct peer *);
@@ -1364,7 +1362,7 @@ session_capa_add(struct ibuf *opb, uint8
return (errs);
}
-int
+static int
session_capa_add_mp(struct ibuf *buf, uint8_t aid)
{
uint16_t afi;
@@ -1383,10 +1381,10 @@ session_capa_add_mp(struct ibuf *buf, ui
return (errs);
}
-int
+static int
session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags)
{
- u_int errs = 0;
+ int errs = 0;
uint16_t afi;
uint8_t safi;
@@ -1402,6 +1400,25 @@ session_capa_add_afi(struct ibuf *b, uin
return (errs);
}
+static int
+session_capa_add_ext_nh(struct ibuf *b, uint8_t aid)
+{
+ int errs = 0;
+ uint16_t afi;
+ uint8_t safi;
+
+ if (aid2afi(aid, &afi, &safi)) {
+ log_warn("%s: bad AID", __func__);
+ return (-1);
+ }
+
+ errs += ibuf_add_n16(b, afi);
+ errs += ibuf_add_n16(b, safi);
+ errs += ibuf_add_n16(b, AFI_IPv6);
+
+ return (errs);
+}
+
struct ibuf *
session_newmsg(enum msg_type msgtype, uint16_t len)
{
@@ -1517,7 +1534,22 @@ session_open(struct peer *p)
if (p->capa.ann.refresh) /* no data */
errs += session_capa_add(opb, CAPA_REFRESH, 0);
- /* extended message support, RFC8654 */
+ /* extended nexthop encoding, RFC 8950 */
+ if (p->capa.ann.ext_nh[AID_INET]) {
+ uint8_t enhlen = 0;
+
+ if (p->capa.ann.mp[AID_INET])
+ enhlen += 6;
+ if (p->capa.ann.mp[AID_VPN_IPv4])
+ enhlen += 6;
+ errs += session_capa_add(opb, CAPA_EXT_NEXTHOP, enhlen);
+ if (p->capa.ann.mp[AID_INET])
+ errs += session_capa_add_ext_nh(opb, AID_INET);
+ if (p->capa.ann.mp[AID_VPN_IPv4])
+ errs += session_capa_add_ext_nh(opb, AID_VPN_IPv4);
+ }
+
+ /* extended message support, RFC 8654 */
if (p->capa.ann.ext_msg) /* no data */
errs += session_capa_add(opb, CAPA_EXT_MSG, 0);
@@ -2540,7 +2572,7 @@ int
parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as)
{
struct ibuf capabuf;
- uint16_t afi, gr_header;
+ uint16_t afi, nhafi, tmp16, gr_header;
uint8_t capa_code, capa_len;
uint8_t safi, aid, role, flags;
@@ -2582,6 +2614,38 @@ parse_capabilities(struct peer *peer, st
case CAPA_REFRESH:
peer->capa.peer.refresh = 1;
break;
+ case CAPA_EXT_NEXTHOP:
+ while (ibuf_size(&capabuf) > 0) {
+ if (ibuf_get_n16(&capabuf, &afi) == -1 ||
+ ibuf_get_n16(&capabuf, &tmp16) == -1 ||
+ ibuf_get_n16(&capabuf, &nhafi) == -1) {
+ log_peer_warnx(&peer->conf,
+ "Received bad %s capability",
+ log_capability(CAPA_EXT_NEXTHOP));
+ memset(peer->capa.peer.ext_nh, 0,
+ sizeof(peer->capa.peer.ext_nh));
+ break;
+ }
+ if (afi2aid(afi, tmp16, &aid) == -1 ||
+ !(aid == AID_INET || aid == AID_VPN_IPv4)) {
+ log_peer_warnx(&peer->conf,
+ "Received %s capability: "
+ " unsupported AFI %u, safi %u pair",
+ log_capability(CAPA_EXT_NEXTHOP),
+ afi, safi);
+ continue;
+ }
+ if (nhafi != AFI_IPv6) {
+ log_peer_warnx(&peer->conf,
+ "Received %s capability: "
+ " unsupported nexthop AFI %u",
+ log_capability(CAPA_EXT_NEXTHOP),
+ nhafi);
+ continue;
+ }
+ peer->capa.peer.ext_nh[aid] = 1;
+ }
+ break;
case CAPA_EXT_MSG:
peer->capa.peer.ext_msg = 1;
break;
@@ -2798,6 +2862,16 @@ capa_neg_calc(struct peer *p)
(p->capa.ann.grestart.grnotification &&
p->capa.peer.grestart.grnotification) != 0;
+ /* RFC 8950 extended nexthop encoding: both sides need to agree */
+ memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
+ for (i = AID_MIN; i < AID_MAX; i++) {
+ if (p->capa.neg.mp[i] == 0)
+ continue;
+ if (p->capa.ann.ext_nh[i] && p->capa.peer.ext_nh[i]) {
+ p->capa.neg.ext_nh[i] = 1;
+ }
+ }
+
/*
* ADD-PATH: set only those bits where both sides agree.
* For this compare our send bit with the recv bit from the peer
@@ -2929,6 +3003,17 @@ capa_neg_calc(struct peer *p)
}
}
+ for (i = AID_MIN; i < AID_MAX; i++) {
+ if (p->capa.neg.mp[i] == 0)
+ continue;
+ if (p->capa.ann.ext_nh[i] == 2 &&
+ p->capa.neg.ext_nh[i] == 0) {
+ capa_code = CAPA_EXT_NEXTHOP;
+ capa_len = 6;
+ capa_aid = i;
+ goto fail;
+ }
+ }
return (0);
fail:
@@ -2940,6 +3025,8 @@ capa_neg_calc(struct peer *p)
session_capa_add_mp(ebuf, capa_aid);
else if (capa_code == CAPA_ADD_PATH)
session_capa_add_afi(ebuf, capa_aid, 0);
+ else if (capa_code == CAPA_EXT_NEXTHOP)
+ session_capa_add_ext_nh(ebuf, capa_aid);
else if (capa_len > 0)
ibuf_add_zero(ebuf, capa_len);
bgpd: RFC8950 extended nexthop encoding support in RIB