Download raw body.
bgpd: RFC8950 extended nexthop encoding support in RIB
On Fri, Jan 10, 2025 at 02:04:20PM +0100, Claudio Jeker wrote:
> This diff is enough to use RFC 8950 on route reflectors or route servers.
> It adds the support for IPv6 nexthops for IPv4 routes to the RDE / RIB.
>
> The problem is that the FIB is not ready for that yet (the nexthop of a
> kroute has to be of the same address family as the prefix) and fixing that
> is not trivial. Additionally the OpenBSD network stack is also not ready
> but lets go step by step :)
>
> I only tested this against OpenBGPD but did no interop tests yet.
I don't think that's a blocker. Diff reads fine
ok tb
> --
> :wq Claudio
>
> Index: bgpd.8
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.8,v
> diff -u -p -r1.82 bgpd.8
> --- bgpd.8 7 Jan 2025 12:00:36 -0000 1.82
> +++ bgpd.8 10 Jan 2025 13:03:25 -0000
> @@ -536,6 +536,16 @@ has been started.
> .Re
> .Pp
> .Rs
> +.%A S. Litkowski
> +.%A S. Agrawal
> +.%A K. Ananthamurthy
> +.%A K. Patel
> +.%D November 2020
> +.%R RFC 8950
> +.%T Advertising IPv4 Network Layer Reachability Information (NLRI) with an IPv6 Next Hop
> +.Re
> +.Pp
> +.Rs
> .%A C. Loibl
> .%A S. Hares
> .%A R. Raszuk
> Index: bgpd.conf.5
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.conf.5,v
> diff -u -p -r1.247 bgpd.conf.5
> --- bgpd.conf.5 9 Jan 2025 15:57:31 -0000 1.247
> +++ bgpd.conf.5 10 Jan 2025 12:12:58 -0000
> @@ -1119,6 +1119,25 @@ The default is
> .Ic no .
> .Pp
> .It Xo
> +.Ic announce extended nexthop
> +.Pq Ic yes Ns | Ns Ic no Ns | Ns Ic enforce
> +.Xc
> +If set to
> +.Ic yes ,
> +the extended nexthop encoding capability is announced.
> +If negotiated,
> +.Ic IPv4 unicast
> +and
> +.Ic vpn
> +sessions can send paths with a IPv6 nexthop.
> +If
> +.Ic enforce
> +is set, the session will only be established if the neighbor also announces
> +the capability.
> +The default is
> +.Ic no .
> +.Pp
> +.It Xo
> .Ic announce graceful notification
> .Pq Ic yes Ns | Ns Ic no
> .Xc
> Index: bgpd.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
> diff -u -p -r1.508 bgpd.h
> --- bgpd.h 9 Jan 2025 12:16:21 -0000 1.508
> +++ bgpd.h 9 Jan 2025 13:39:37 -0000
> @@ -415,7 +415,7 @@ struct capabilities {
> } grestart;
> int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */
> int8_t add_path[AID_MAX]; /* ADD_PATH, RFC 7911 */
> - int8_t ext_nexthop[AID_MAX]; /* Ext Nexthop Encoding, RFC 8950 */
> + int8_t ext_nh[AID_MAX]; /* Ext Nexthop Encoding, RFC 8950 */
> int8_t refresh; /* route refresh, RFC 2918 */
> int8_t as4byte; /* 4-byte ASnum, RFC 4893 */
> int8_t enhanced_rr; /* enhanced route refresh, RFC 7313 */
> Index: parse.y
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
> diff -u -p -r1.475 parse.y
> --- parse.y 9 Jan 2025 15:57:31 -0000 1.475
> +++ parse.y 10 Jan 2025 12:13:31 -0000
> @@ -2033,6 +2033,10 @@ peeropts : REMOTEAS as4number {
> | ANNOUNCE EXTENDED MESSAGE yesnoenforce {
> curpeer->conf.capabilities.ext_msg = $4;
> }
> + | ANNOUNCE EXTENDED NEXTHOP yesnoenforce {
> + curpeer->conf.capabilities.ext_nh[AID_VPN_IPv4] =
> + curpeer->conf.capabilities.ext_nh[AID_INET] = $4;
> + }
> | ROLE STRING {
> if (strcmp($2, "provider") == 0) {
> curpeer->conf.role = ROLE_PROVIDER;
> Index: printconf.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/printconf.c,v
> diff -u -p -r1.179 printconf.c
> --- printconf.c 9 Jan 2025 15:57:31 -0000 1.179
> +++ printconf.c 10 Jan 2025 12:13:21 -0000
> @@ -973,6 +973,11 @@ print_announce(struct peer_config *p, co
> else if (p->capabilities.ext_msg == 1)
> printf("%s\tannounce extended message yes\n", c);
>
> + if (p->capabilities.ext_nh[AID_INET] == 2)
> + printf("%s\tannounce extended nexthop enforce\n", c);
> + else if (p->capabilities.ext_nh[AID_INET] == 1)
> + printf("%s\tannounce extended nexthop yes\n", c);
> +
> if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV_ENFORCE)
> printf("%s\tannounce add-path recv enforce\n", c);
> else if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV)
> Index: rde.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> diff -u -p -r1.648 rde.c
> --- rde.c 9 Jan 2025 12:16:21 -0000 1.648
> +++ rde.c 10 Jan 2025 12:53:22 -0000
> @@ -3345,6 +3345,10 @@ rde_send_kroute(struct rib *rib, struct
>
> switch (kf.prefix.aid) {
> case AID_VPN_IPv4:
> + /* XXX FIB can not handle non-IPv4 nexthop */
> + if (kf.nexthop.aid != AID_INET)
> + type = IMSG_KROUTE_DELETE;
> + /* FALLTHROUGH */
> case AID_VPN_IPv6:
> if (!(rib->flags & F_RIB_LOCAL))
> /* not Loc-RIB, no update for VPNs */
> @@ -3361,6 +3365,11 @@ rde_send_kroute(struct rib *rib, struct
> __LINE__);
> }
> break;
> + case AID_INET:
> + /* XXX FIB can not handle non-IPv4 nexthop */
> + if (kf.nexthop.aid != AID_INET)
> + type = IMSG_KROUTE_DELETE;
> + /* FALLTHROUGH */
> default:
> if (imsg_compose(ibuf_main, type, rib->rtableid, 0, -1,
> &kf, sizeof(kf)) == -1)
> Index: rde_peer.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
> diff -u -p -r1.44 rde_peer.c
> --- rde_peer.c 9 Jan 2025 12:16:21 -0000 1.44
> +++ rde_peer.c 9 Jan 2025 13:39:53 -0000
> @@ -68,7 +68,7 @@ peer_has_ext_nexthop(struct rde_peer *pe
> {
> if (aid >= AID_MAX)
> return 0;
> - return peer->capa.ext_nexthop[aid];
> + return peer->capa.ext_nh[aid];
> }
>
> int
> Index: rde_update.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
> diff -u -p -r1.173 rde_update.c
> --- rde_update.c 9 Jan 2025 12:16:21 -0000 1.173
> +++ rde_update.c 9 Jan 2025 21:28:31 -0000
> @@ -467,7 +467,10 @@ up_get_nexthop(struct rde_peer *peer, st
> switch (aid) {
> case AID_INET:
> case AID_VPN_IPv4:
> - if (peer->local_v4_addr.aid == AID_INET)
> + if (peer_has_ext_nexthop(peer, aid) &&
> + peer->remote_addr.aid == AID_INET6)
> + peer_local = &peer->local_v6_addr;
> + else if (peer->local_v4_addr.aid == AID_INET)
> peer_local = &peer->local_v4_addr;
> break;
> case AID_INET6:
> @@ -625,6 +628,11 @@ up_generate_attr(struct ibuf *buf, struc
> case AID_INET:
> if (nh == NULL)
> return -1;
> + if (nh->exit_nexthop.aid != AID_INET) {
> + if (peer_has_ext_nexthop(peer, aid))
> + break;
> + return -1;
> + }
> if (attr_writebuf(buf, ATTR_WELL_KNOWN,
> ATTR_NEXTHOP, &nh->exit_nexthop.v4,
> sizeof(nh->exit_nexthop.v4)) == -1)
> Index: session.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
> diff -u -p -r1.506 session.c
> --- session.c 3 Jan 2025 12:57:49 -0000 1.506
> +++ session.c 9 Jan 2025 13:40:55 -0000
> @@ -67,8 +67,6 @@ void session_accept(int);
> int session_connect(struct peer *);
> void session_tcp_established(struct peer *);
> int session_capa_add(struct ibuf *, uint8_t, uint8_t);
> -int session_capa_add_mp(struct ibuf *, uint8_t);
> -int session_capa_add_afi(struct ibuf *, uint8_t, uint8_t);
> struct ibuf *session_newmsg(enum msg_type, uint16_t);
> void session_sendmsg(struct ibuf *, struct peer *, enum msg_type);
> void session_open(struct peer *);
> @@ -1364,7 +1362,7 @@ session_capa_add(struct ibuf *opb, uint8
> return (errs);
> }
>
> -int
> +static int
> session_capa_add_mp(struct ibuf *buf, uint8_t aid)
> {
> uint16_t afi;
> @@ -1383,10 +1381,10 @@ session_capa_add_mp(struct ibuf *buf, ui
> return (errs);
> }
>
> -int
> +static int
> session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags)
> {
> - u_int errs = 0;
> + int errs = 0;
> uint16_t afi;
> uint8_t safi;
>
> @@ -1402,6 +1400,25 @@ session_capa_add_afi(struct ibuf *b, uin
> return (errs);
> }
>
> +static int
> +session_capa_add_ext_nh(struct ibuf *b, uint8_t aid)
> +{
> + int errs = 0;
> + uint16_t afi;
> + uint8_t safi;
> +
> + if (aid2afi(aid, &afi, &safi)) {
> + log_warn("%s: bad AID", __func__);
> + return (-1);
> + }
> +
> + errs += ibuf_add_n16(b, afi);
> + errs += ibuf_add_n16(b, safi);
> + errs += ibuf_add_n16(b, AFI_IPv6);
> +
> + return (errs);
> +}
> +
> struct ibuf *
> session_newmsg(enum msg_type msgtype, uint16_t len)
> {
> @@ -1517,7 +1534,22 @@ session_open(struct peer *p)
> if (p->capa.ann.refresh) /* no data */
> errs += session_capa_add(opb, CAPA_REFRESH, 0);
>
> - /* extended message support, RFC8654 */
> + /* extended nexthop encoding, RFC 8950 */
> + if (p->capa.ann.ext_nh[AID_INET]) {
> + uint8_t enhlen = 0;
> +
> + if (p->capa.ann.mp[AID_INET])
> + enhlen += 6;
> + if (p->capa.ann.mp[AID_VPN_IPv4])
> + enhlen += 6;
> + errs += session_capa_add(opb, CAPA_EXT_NEXTHOP, enhlen);
> + if (p->capa.ann.mp[AID_INET])
> + errs += session_capa_add_ext_nh(opb, AID_INET);
> + if (p->capa.ann.mp[AID_VPN_IPv4])
> + errs += session_capa_add_ext_nh(opb, AID_VPN_IPv4);
> + }
> +
> + /* extended message support, RFC 8654 */
> if (p->capa.ann.ext_msg) /* no data */
> errs += session_capa_add(opb, CAPA_EXT_MSG, 0);
>
> @@ -2540,7 +2572,7 @@ int
> parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as)
> {
> struct ibuf capabuf;
> - uint16_t afi, gr_header;
> + uint16_t afi, nhafi, tmp16, gr_header;
> uint8_t capa_code, capa_len;
> uint8_t safi, aid, role, flags;
>
> @@ -2582,6 +2614,38 @@ parse_capabilities(struct peer *peer, st
> case CAPA_REFRESH:
> peer->capa.peer.refresh = 1;
> break;
> + case CAPA_EXT_NEXTHOP:
> + while (ibuf_size(&capabuf) > 0) {
> + if (ibuf_get_n16(&capabuf, &afi) == -1 ||
> + ibuf_get_n16(&capabuf, &tmp16) == -1 ||
> + ibuf_get_n16(&capabuf, &nhafi) == -1) {
> + log_peer_warnx(&peer->conf,
> + "Received bad %s capability",
> + log_capability(CAPA_EXT_NEXTHOP));
> + memset(peer->capa.peer.ext_nh, 0,
> + sizeof(peer->capa.peer.ext_nh));
> + break;
> + }
> + if (afi2aid(afi, tmp16, &aid) == -1 ||
> + !(aid == AID_INET || aid == AID_VPN_IPv4)) {
> + log_peer_warnx(&peer->conf,
> + "Received %s capability: "
> + " unsupported AFI %u, safi %u pair",
> + log_capability(CAPA_EXT_NEXTHOP),
> + afi, safi);
> + continue;
> + }
> + if (nhafi != AFI_IPv6) {
> + log_peer_warnx(&peer->conf,
> + "Received %s capability: "
> + " unsupported nexthop AFI %u",
> + log_capability(CAPA_EXT_NEXTHOP),
> + nhafi);
> + continue;
> + }
> + peer->capa.peer.ext_nh[aid] = 1;
> + }
> + break;
> case CAPA_EXT_MSG:
> peer->capa.peer.ext_msg = 1;
> break;
> @@ -2798,6 +2862,16 @@ capa_neg_calc(struct peer *p)
> (p->capa.ann.grestart.grnotification &&
> p->capa.peer.grestart.grnotification) != 0;
>
> + /* RFC 8950 extended nexthop encoding: both sides need to agree */
> + memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
> + for (i = AID_MIN; i < AID_MAX; i++) {
> + if (p->capa.neg.mp[i] == 0)
> + continue;
> + if (p->capa.ann.ext_nh[i] && p->capa.peer.ext_nh[i]) {
> + p->capa.neg.ext_nh[i] = 1;
> + }
> + }
> +
> /*
> * ADD-PATH: set only those bits where both sides agree.
> * For this compare our send bit with the recv bit from the peer
> @@ -2929,6 +3003,17 @@ capa_neg_calc(struct peer *p)
> }
> }
>
> + for (i = AID_MIN; i < AID_MAX; i++) {
> + if (p->capa.neg.mp[i] == 0)
> + continue;
> + if (p->capa.ann.ext_nh[i] == 2 &&
> + p->capa.neg.ext_nh[i] == 0) {
> + capa_code = CAPA_EXT_NEXTHOP;
> + capa_len = 6;
> + capa_aid = i;
> + goto fail;
> + }
> + }
> return (0);
>
> fail:
> @@ -2940,6 +3025,8 @@ capa_neg_calc(struct peer *p)
> session_capa_add_mp(ebuf, capa_aid);
> else if (capa_code == CAPA_ADD_PATH)
> session_capa_add_afi(ebuf, capa_aid, 0);
> + else if (capa_code == CAPA_EXT_NEXTHOP)
> + session_capa_add_ext_nh(ebuf, capa_aid);
> else if (capa_len > 0)
> ibuf_add_zero(ebuf, capa_len);
>
>
bgpd: RFC8950 extended nexthop encoding support in RIB