From: Theo Buehler Subject: Re: bgpd: RFC8950 extended nexthop encoding support in RIB To: tech@openbsd.org Date: Mon, 13 Jan 2025 09:09:20 +0100 On Fri, Jan 10, 2025 at 02:04:20PM +0100, Claudio Jeker wrote: > This diff is enough to use RFC 8950 on route reflectors or route servers. > It adds the support for IPv6 nexthops for IPv4 routes to the RDE / RIB. > > The problem is that the FIB is not ready for that yet (the nexthop of a > kroute has to be of the same address family as the prefix) and fixing that > is not trivial. Additionally the OpenBSD network stack is also not ready > but lets go step by step :) > > I only tested this against OpenBGPD but did no interop tests yet. I don't think that's a blocker. Diff reads fine ok tb > -- > :wq Claudio > > Index: bgpd.8 > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.8,v > diff -u -p -r1.82 bgpd.8 > --- bgpd.8 7 Jan 2025 12:00:36 -0000 1.82 > +++ bgpd.8 10 Jan 2025 13:03:25 -0000 > @@ -536,6 +536,16 @@ has been started. > .Re > .Pp > .Rs > +.%A S. Litkowski > +.%A S. Agrawal > +.%A K. Ananthamurthy > +.%A K. Patel > +.%D November 2020 > +.%R RFC 8950 > +.%T Advertising IPv4 Network Layer Reachability Information (NLRI) with an IPv6 Next Hop > +.Re > +.Pp > +.Rs > .%A C. Loibl > .%A S. Hares > .%A R. Raszuk > Index: bgpd.conf.5 > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.conf.5,v > diff -u -p -r1.247 bgpd.conf.5 > --- bgpd.conf.5 9 Jan 2025 15:57:31 -0000 1.247 > +++ bgpd.conf.5 10 Jan 2025 12:12:58 -0000 > @@ -1119,6 +1119,25 @@ The default is > .Ic no . > .Pp > .It Xo > +.Ic announce extended nexthop > +.Pq Ic yes Ns | Ns Ic no Ns | Ns Ic enforce > +.Xc > +If set to > +.Ic yes , > +the extended nexthop encoding capability is announced. > +If negotiated, > +.Ic IPv4 unicast > +and > +.Ic vpn > +sessions can send paths with a IPv6 nexthop. > +If > +.Ic enforce > +is set, the session will only be established if the neighbor also announces > +the capability. > +The default is > +.Ic no . > +.Pp > +.It Xo > .Ic announce graceful notification > .Pq Ic yes Ns | Ns Ic no > .Xc > Index: bgpd.h > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v > diff -u -p -r1.508 bgpd.h > --- bgpd.h 9 Jan 2025 12:16:21 -0000 1.508 > +++ bgpd.h 9 Jan 2025 13:39:37 -0000 > @@ -415,7 +415,7 @@ struct capabilities { > } grestart; > int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */ > int8_t add_path[AID_MAX]; /* ADD_PATH, RFC 7911 */ > - int8_t ext_nexthop[AID_MAX]; /* Ext Nexthop Encoding, RFC 8950 */ > + int8_t ext_nh[AID_MAX]; /* Ext Nexthop Encoding, RFC 8950 */ > int8_t refresh; /* route refresh, RFC 2918 */ > int8_t as4byte; /* 4-byte ASnum, RFC 4893 */ > int8_t enhanced_rr; /* enhanced route refresh, RFC 7313 */ > Index: parse.y > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v > diff -u -p -r1.475 parse.y > --- parse.y 9 Jan 2025 15:57:31 -0000 1.475 > +++ parse.y 10 Jan 2025 12:13:31 -0000 > @@ -2033,6 +2033,10 @@ peeropts : REMOTEAS as4number { > | ANNOUNCE EXTENDED MESSAGE yesnoenforce { > curpeer->conf.capabilities.ext_msg = $4; > } > + | ANNOUNCE EXTENDED NEXTHOP yesnoenforce { > + curpeer->conf.capabilities.ext_nh[AID_VPN_IPv4] = > + curpeer->conf.capabilities.ext_nh[AID_INET] = $4; > + } > | ROLE STRING { > if (strcmp($2, "provider") == 0) { > curpeer->conf.role = ROLE_PROVIDER; > Index: printconf.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/printconf.c,v > diff -u -p -r1.179 printconf.c > --- printconf.c 9 Jan 2025 15:57:31 -0000 1.179 > +++ printconf.c 10 Jan 2025 12:13:21 -0000 > @@ -973,6 +973,11 @@ print_announce(struct peer_config *p, co > else if (p->capabilities.ext_msg == 1) > printf("%s\tannounce extended message yes\n", c); > > + if (p->capabilities.ext_nh[AID_INET] == 2) > + printf("%s\tannounce extended nexthop enforce\n", c); > + else if (p->capabilities.ext_nh[AID_INET] == 1) > + printf("%s\tannounce extended nexthop yes\n", c); > + > if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV_ENFORCE) > printf("%s\tannounce add-path recv enforce\n", c); > else if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV) > Index: rde.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v > diff -u -p -r1.648 rde.c > --- rde.c 9 Jan 2025 12:16:21 -0000 1.648 > +++ rde.c 10 Jan 2025 12:53:22 -0000 > @@ -3345,6 +3345,10 @@ rde_send_kroute(struct rib *rib, struct > > switch (kf.prefix.aid) { > case AID_VPN_IPv4: > + /* XXX FIB can not handle non-IPv4 nexthop */ > + if (kf.nexthop.aid != AID_INET) > + type = IMSG_KROUTE_DELETE; > + /* FALLTHROUGH */ > case AID_VPN_IPv6: > if (!(rib->flags & F_RIB_LOCAL)) > /* not Loc-RIB, no update for VPNs */ > @@ -3361,6 +3365,11 @@ rde_send_kroute(struct rib *rib, struct > __LINE__); > } > break; > + case AID_INET: > + /* XXX FIB can not handle non-IPv4 nexthop */ > + if (kf.nexthop.aid != AID_INET) > + type = IMSG_KROUTE_DELETE; > + /* FALLTHROUGH */ > default: > if (imsg_compose(ibuf_main, type, rib->rtableid, 0, -1, > &kf, sizeof(kf)) == -1) > Index: rde_peer.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v > diff -u -p -r1.44 rde_peer.c > --- rde_peer.c 9 Jan 2025 12:16:21 -0000 1.44 > +++ rde_peer.c 9 Jan 2025 13:39:53 -0000 > @@ -68,7 +68,7 @@ peer_has_ext_nexthop(struct rde_peer *pe > { > if (aid >= AID_MAX) > return 0; > - return peer->capa.ext_nexthop[aid]; > + return peer->capa.ext_nh[aid]; > } > > int > Index: rde_update.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v > diff -u -p -r1.173 rde_update.c > --- rde_update.c 9 Jan 2025 12:16:21 -0000 1.173 > +++ rde_update.c 9 Jan 2025 21:28:31 -0000 > @@ -467,7 +467,10 @@ up_get_nexthop(struct rde_peer *peer, st > switch (aid) { > case AID_INET: > case AID_VPN_IPv4: > - if (peer->local_v4_addr.aid == AID_INET) > + if (peer_has_ext_nexthop(peer, aid) && > + peer->remote_addr.aid == AID_INET6) > + peer_local = &peer->local_v6_addr; > + else if (peer->local_v4_addr.aid == AID_INET) > peer_local = &peer->local_v4_addr; > break; > case AID_INET6: > @@ -625,6 +628,11 @@ up_generate_attr(struct ibuf *buf, struc > case AID_INET: > if (nh == NULL) > return -1; > + if (nh->exit_nexthop.aid != AID_INET) { > + if (peer_has_ext_nexthop(peer, aid)) > + break; > + return -1; > + } > if (attr_writebuf(buf, ATTR_WELL_KNOWN, > ATTR_NEXTHOP, &nh->exit_nexthop.v4, > sizeof(nh->exit_nexthop.v4)) == -1) > Index: session.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/session.c,v > diff -u -p -r1.506 session.c > --- session.c 3 Jan 2025 12:57:49 -0000 1.506 > +++ session.c 9 Jan 2025 13:40:55 -0000 > @@ -67,8 +67,6 @@ void session_accept(int); > int session_connect(struct peer *); > void session_tcp_established(struct peer *); > int session_capa_add(struct ibuf *, uint8_t, uint8_t); > -int session_capa_add_mp(struct ibuf *, uint8_t); > -int session_capa_add_afi(struct ibuf *, uint8_t, uint8_t); > struct ibuf *session_newmsg(enum msg_type, uint16_t); > void session_sendmsg(struct ibuf *, struct peer *, enum msg_type); > void session_open(struct peer *); > @@ -1364,7 +1362,7 @@ session_capa_add(struct ibuf *opb, uint8 > return (errs); > } > > -int > +static int > session_capa_add_mp(struct ibuf *buf, uint8_t aid) > { > uint16_t afi; > @@ -1383,10 +1381,10 @@ session_capa_add_mp(struct ibuf *buf, ui > return (errs); > } > > -int > +static int > session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags) > { > - u_int errs = 0; > + int errs = 0; > uint16_t afi; > uint8_t safi; > > @@ -1402,6 +1400,25 @@ session_capa_add_afi(struct ibuf *b, uin > return (errs); > } > > +static int > +session_capa_add_ext_nh(struct ibuf *b, uint8_t aid) > +{ > + int errs = 0; > + uint16_t afi; > + uint8_t safi; > + > + if (aid2afi(aid, &afi, &safi)) { > + log_warn("%s: bad AID", __func__); > + return (-1); > + } > + > + errs += ibuf_add_n16(b, afi); > + errs += ibuf_add_n16(b, safi); > + errs += ibuf_add_n16(b, AFI_IPv6); > + > + return (errs); > +} > + > struct ibuf * > session_newmsg(enum msg_type msgtype, uint16_t len) > { > @@ -1517,7 +1534,22 @@ session_open(struct peer *p) > if (p->capa.ann.refresh) /* no data */ > errs += session_capa_add(opb, CAPA_REFRESH, 0); > > - /* extended message support, RFC8654 */ > + /* extended nexthop encoding, RFC 8950 */ > + if (p->capa.ann.ext_nh[AID_INET]) { > + uint8_t enhlen = 0; > + > + if (p->capa.ann.mp[AID_INET]) > + enhlen += 6; > + if (p->capa.ann.mp[AID_VPN_IPv4]) > + enhlen += 6; > + errs += session_capa_add(opb, CAPA_EXT_NEXTHOP, enhlen); > + if (p->capa.ann.mp[AID_INET]) > + errs += session_capa_add_ext_nh(opb, AID_INET); > + if (p->capa.ann.mp[AID_VPN_IPv4]) > + errs += session_capa_add_ext_nh(opb, AID_VPN_IPv4); > + } > + > + /* extended message support, RFC 8654 */ > if (p->capa.ann.ext_msg) /* no data */ > errs += session_capa_add(opb, CAPA_EXT_MSG, 0); > > @@ -2540,7 +2572,7 @@ int > parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as) > { > struct ibuf capabuf; > - uint16_t afi, gr_header; > + uint16_t afi, nhafi, tmp16, gr_header; > uint8_t capa_code, capa_len; > uint8_t safi, aid, role, flags; > > @@ -2582,6 +2614,38 @@ parse_capabilities(struct peer *peer, st > case CAPA_REFRESH: > peer->capa.peer.refresh = 1; > break; > + case CAPA_EXT_NEXTHOP: > + while (ibuf_size(&capabuf) > 0) { > + if (ibuf_get_n16(&capabuf, &afi) == -1 || > + ibuf_get_n16(&capabuf, &tmp16) == -1 || > + ibuf_get_n16(&capabuf, &nhafi) == -1) { > + log_peer_warnx(&peer->conf, > + "Received bad %s capability", > + log_capability(CAPA_EXT_NEXTHOP)); > + memset(peer->capa.peer.ext_nh, 0, > + sizeof(peer->capa.peer.ext_nh)); > + break; > + } > + if (afi2aid(afi, tmp16, &aid) == -1 || > + !(aid == AID_INET || aid == AID_VPN_IPv4)) { > + log_peer_warnx(&peer->conf, > + "Received %s capability: " > + " unsupported AFI %u, safi %u pair", > + log_capability(CAPA_EXT_NEXTHOP), > + afi, safi); > + continue; > + } > + if (nhafi != AFI_IPv6) { > + log_peer_warnx(&peer->conf, > + "Received %s capability: " > + " unsupported nexthop AFI %u", > + log_capability(CAPA_EXT_NEXTHOP), > + nhafi); > + continue; > + } > + peer->capa.peer.ext_nh[aid] = 1; > + } > + break; > case CAPA_EXT_MSG: > peer->capa.peer.ext_msg = 1; > break; > @@ -2798,6 +2862,16 @@ capa_neg_calc(struct peer *p) > (p->capa.ann.grestart.grnotification && > p->capa.peer.grestart.grnotification) != 0; > > + /* RFC 8950 extended nexthop encoding: both sides need to agree */ > + memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path)); > + for (i = AID_MIN; i < AID_MAX; i++) { > + if (p->capa.neg.mp[i] == 0) > + continue; > + if (p->capa.ann.ext_nh[i] && p->capa.peer.ext_nh[i]) { > + p->capa.neg.ext_nh[i] = 1; > + } > + } > + > /* > * ADD-PATH: set only those bits where both sides agree. > * For this compare our send bit with the recv bit from the peer > @@ -2929,6 +3003,17 @@ capa_neg_calc(struct peer *p) > } > } > > + for (i = AID_MIN; i < AID_MAX; i++) { > + if (p->capa.neg.mp[i] == 0) > + continue; > + if (p->capa.ann.ext_nh[i] == 2 && > + p->capa.neg.ext_nh[i] == 0) { > + capa_code = CAPA_EXT_NEXTHOP; > + capa_len = 6; > + capa_aid = i; > + goto fail; > + } > + } > return (0); > > fail: > @@ -2940,6 +3025,8 @@ capa_neg_calc(struct peer *p) > session_capa_add_mp(ebuf, capa_aid); > else if (capa_code == CAPA_ADD_PATH) > session_capa_add_afi(ebuf, capa_aid, 0); > + else if (capa_code == CAPA_EXT_NEXTHOP) > + session_capa_add_ext_nh(ebuf, capa_aid); > else if (capa_len > 0) > ibuf_add_zero(ebuf, capa_len); > >