Index | Thread | Search

From:
Theo Buehler <tb@theobuehler.org>
Subject:
Re: bgpd: RFC8950 extended nexthop encoding support in RIB
To:
tech@openbsd.org
Date:
Mon, 13 Jan 2025 09:09:20 +0100

Download raw body.

Thread
On Fri, Jan 10, 2025 at 02:04:20PM +0100, Claudio Jeker wrote:
> This diff is enough to use RFC 8950 on route reflectors or route servers.
> It adds the support for IPv6 nexthops for IPv4 routes to the RDE / RIB.
> 
> The problem is that the FIB is not ready for that yet (the nexthop of a
> kroute has to be of the same address family as the prefix) and fixing that
> is not trivial. Additionally the OpenBSD network stack is also not ready
> but lets go step by step :)
> 
> I only tested this against OpenBGPD but did no interop tests yet.

I don't think that's a blocker. Diff reads fine

ok tb

> -- 
> :wq Claudio
> 
> Index: bgpd.8
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.8,v
> diff -u -p -r1.82 bgpd.8
> --- bgpd.8	7 Jan 2025 12:00:36 -0000	1.82
> +++ bgpd.8	10 Jan 2025 13:03:25 -0000
> @@ -536,6 +536,16 @@ has been started.
>  .Re
>  .Pp
>  .Rs
> +.%A S. Litkowski
> +.%A S. Agrawal
> +.%A K. Ananthamurthy
> +.%A K. Patel
> +.%D November 2020
> +.%R RFC 8950
> +.%T Advertising IPv4 Network Layer Reachability Information (NLRI) with an IPv6 Next Hop
> +.Re
> +.Pp
> +.Rs
>  .%A C. Loibl
>  .%A S. Hares
>  .%A R. Raszuk
> Index: bgpd.conf.5
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.conf.5,v
> diff -u -p -r1.247 bgpd.conf.5
> --- bgpd.conf.5	9 Jan 2025 15:57:31 -0000	1.247
> +++ bgpd.conf.5	10 Jan 2025 12:12:58 -0000
> @@ -1119,6 +1119,25 @@ The default is
>  .Ic no .
>  .Pp
>  .It Xo
> +.Ic announce extended nexthop
> +.Pq Ic yes Ns | Ns Ic no Ns | Ns Ic enforce
> +.Xc
> +If set to
> +.Ic yes ,
> +the extended nexthop encoding capability is announced.
> +If negotiated,
> +.Ic IPv4 unicast
> +and
> +.Ic vpn
> +sessions can send paths with a IPv6 nexthop.
> +If
> +.Ic enforce
> +is set, the session will only be established if the neighbor also announces
> +the capability.
> +The default is
> +.Ic no .
> +.Pp
> +.It Xo
>  .Ic announce graceful notification
>  .Pq Ic yes Ns | Ns Ic no
>  .Xc
> Index: bgpd.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
> diff -u -p -r1.508 bgpd.h
> --- bgpd.h	9 Jan 2025 12:16:21 -0000	1.508
> +++ bgpd.h	9 Jan 2025 13:39:37 -0000
> @@ -415,7 +415,7 @@ struct capabilities {
>  	}	grestart;
>  	int8_t	mp[AID_MAX];		/* multiprotocol extensions, RFC 4760 */
>  	int8_t	add_path[AID_MAX];	/* ADD_PATH, RFC 7911 */
> -	int8_t	ext_nexthop[AID_MAX];	/* Ext Nexthop Encoding, RFC 8950 */
> +	int8_t	ext_nh[AID_MAX];	/* Ext Nexthop Encoding, RFC 8950 */
>  	int8_t	refresh;		/* route refresh, RFC 2918 */
>  	int8_t	as4byte;		/* 4-byte ASnum, RFC 4893 */
>  	int8_t	enhanced_rr;		/* enhanced route refresh, RFC 7313 */
> Index: parse.y
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
> diff -u -p -r1.475 parse.y
> --- parse.y	9 Jan 2025 15:57:31 -0000	1.475
> +++ parse.y	10 Jan 2025 12:13:31 -0000
> @@ -2033,6 +2033,10 @@ peeropts	: REMOTEAS as4number	{
>  		| ANNOUNCE EXTENDED MESSAGE yesnoenforce {
>  			curpeer->conf.capabilities.ext_msg = $4;
>  		}
> +		| ANNOUNCE EXTENDED NEXTHOP yesnoenforce {
> +			curpeer->conf.capabilities.ext_nh[AID_VPN_IPv4] =
> +			    curpeer->conf.capabilities.ext_nh[AID_INET] = $4;
> +		}
>  		| ROLE STRING {
>  			if (strcmp($2, "provider") == 0) {
>  				curpeer->conf.role = ROLE_PROVIDER;
> Index: printconf.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/printconf.c,v
> diff -u -p -r1.179 printconf.c
> --- printconf.c	9 Jan 2025 15:57:31 -0000	1.179
> +++ printconf.c	10 Jan 2025 12:13:21 -0000
> @@ -973,6 +973,11 @@ print_announce(struct peer_config *p, co
>  	else if (p->capabilities.ext_msg == 1)
>  		printf("%s\tannounce extended message yes\n", c);
>  
> +	if (p->capabilities.ext_nh[AID_INET] == 2)
> +		printf("%s\tannounce extended nexthop enforce\n", c);
> +	else if (p->capabilities.ext_nh[AID_INET] == 1)
> +		printf("%s\tannounce extended nexthop yes\n", c);
> +
>  	if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV_ENFORCE)
>  		printf("%s\tannounce add-path recv enforce\n", c);
>  	else if (p->capabilities.add_path[AID_MIN] & CAPA_AP_RECV)
> Index: rde.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> diff -u -p -r1.648 rde.c
> --- rde.c	9 Jan 2025 12:16:21 -0000	1.648
> +++ rde.c	10 Jan 2025 12:53:22 -0000
> @@ -3345,6 +3345,10 @@ rde_send_kroute(struct rib *rib, struct 
>  
>  	switch (kf.prefix.aid) {
>  	case AID_VPN_IPv4:
> +		/* XXX FIB can not handle non-IPv4 nexthop */
> +		if (kf.nexthop.aid != AID_INET)
> +			type = IMSG_KROUTE_DELETE;
> +		/* FALLTHROUGH */
>  	case AID_VPN_IPv6:
>  		if (!(rib->flags & F_RIB_LOCAL))
>  			/* not Loc-RIB, no update for VPNs */
> @@ -3361,6 +3365,11 @@ rde_send_kroute(struct rib *rib, struct 
>  				    __LINE__);
>  		}
>  		break;
> +	case AID_INET:
> +		/* XXX FIB can not handle non-IPv4 nexthop */
> +		if (kf.nexthop.aid != AID_INET)
> +			type = IMSG_KROUTE_DELETE;
> +		/* FALLTHROUGH */
>  	default:
>  		if (imsg_compose(ibuf_main, type, rib->rtableid, 0, -1,
>  		    &kf, sizeof(kf)) == -1)
> Index: rde_peer.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
> diff -u -p -r1.44 rde_peer.c
> --- rde_peer.c	9 Jan 2025 12:16:21 -0000	1.44
> +++ rde_peer.c	9 Jan 2025 13:39:53 -0000
> @@ -68,7 +68,7 @@ peer_has_ext_nexthop(struct rde_peer *pe
>  {
>  	if (aid >= AID_MAX)
>  		return 0;
> -	return peer->capa.ext_nexthop[aid];
> +	return peer->capa.ext_nh[aid];
>  }
>  
>  int
> Index: rde_update.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
> diff -u -p -r1.173 rde_update.c
> --- rde_update.c	9 Jan 2025 12:16:21 -0000	1.173
> +++ rde_update.c	9 Jan 2025 21:28:31 -0000
> @@ -467,7 +467,10 @@ up_get_nexthop(struct rde_peer *peer, st
>  	switch (aid) {
>  	case AID_INET:
>  	case AID_VPN_IPv4:
> -		if (peer->local_v4_addr.aid == AID_INET)
> +		if (peer_has_ext_nexthop(peer, aid) &&
> +		    peer->remote_addr.aid == AID_INET6)
> +			peer_local = &peer->local_v6_addr;
> +		else if (peer->local_v4_addr.aid == AID_INET)
>  			peer_local = &peer->local_v4_addr;
>  		break;
>  	case AID_INET6:
> @@ -625,6 +628,11 @@ up_generate_attr(struct ibuf *buf, struc
>  			case AID_INET:
>  				if (nh == NULL)
>  					return -1;
> +				if (nh->exit_nexthop.aid != AID_INET) {
> +					if (peer_has_ext_nexthop(peer, aid))
> +						break;
> +					return -1;
> +				}
>  				if (attr_writebuf(buf, ATTR_WELL_KNOWN,
>  				    ATTR_NEXTHOP, &nh->exit_nexthop.v4,
>  				    sizeof(nh->exit_nexthop.v4)) == -1)
> Index: session.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
> diff -u -p -r1.506 session.c
> --- session.c	3 Jan 2025 12:57:49 -0000	1.506
> +++ session.c	9 Jan 2025 13:40:55 -0000
> @@ -67,8 +67,6 @@ void	session_accept(int);
>  int	session_connect(struct peer *);
>  void	session_tcp_established(struct peer *);
>  int	session_capa_add(struct ibuf *, uint8_t, uint8_t);
> -int	session_capa_add_mp(struct ibuf *, uint8_t);
> -int	session_capa_add_afi(struct ibuf *, uint8_t, uint8_t);
>  struct ibuf	*session_newmsg(enum msg_type, uint16_t);
>  void	session_sendmsg(struct ibuf *, struct peer *, enum msg_type);
>  void	session_open(struct peer *);
> @@ -1364,7 +1362,7 @@ session_capa_add(struct ibuf *opb, uint8
>  	return (errs);
>  }
>  
> -int
> +static int
>  session_capa_add_mp(struct ibuf *buf, uint8_t aid)
>  {
>  	uint16_t		 afi;
> @@ -1383,10 +1381,10 @@ session_capa_add_mp(struct ibuf *buf, ui
>  	return (errs);
>  }
>  
> -int
> +static int
>  session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags)
>  {
> -	u_int		errs = 0;
> +	int		errs = 0;
>  	uint16_t	afi;
>  	uint8_t		safi;
>  
> @@ -1402,6 +1400,25 @@ session_capa_add_afi(struct ibuf *b, uin
>  	return (errs);
>  }
>  
> +static int
> +session_capa_add_ext_nh(struct ibuf *b, uint8_t aid)
> +{
> +	int		errs = 0;
> +	uint16_t	afi;
> +	uint8_t		safi;
> +
> +	if (aid2afi(aid, &afi, &safi)) {
> +		log_warn("%s: bad AID", __func__);
> +		return (-1);
> +	}
> +
> +	errs += ibuf_add_n16(b, afi);
> +	errs += ibuf_add_n16(b, safi);
> +	errs += ibuf_add_n16(b, AFI_IPv6);
> +
> +	return (errs);
> +}
> +
>  struct ibuf *
>  session_newmsg(enum msg_type msgtype, uint16_t len)
>  {
> @@ -1517,7 +1534,22 @@ session_open(struct peer *p)
>  	if (p->capa.ann.refresh)	/* no data */
>  		errs += session_capa_add(opb, CAPA_REFRESH, 0);
>  
> -	/* extended message support, RFC8654 */
> +	/* extended nexthop encoding, RFC 8950 */
> +	if (p->capa.ann.ext_nh[AID_INET]) {
> +		uint8_t enhlen = 0;
> +
> +		if (p->capa.ann.mp[AID_INET])
> +			enhlen += 6;
> +		if (p->capa.ann.mp[AID_VPN_IPv4])
> +			enhlen += 6;
> +		errs += session_capa_add(opb, CAPA_EXT_NEXTHOP, enhlen);
> +		if (p->capa.ann.mp[AID_INET])
> +			errs += session_capa_add_ext_nh(opb, AID_INET);
> +		if (p->capa.ann.mp[AID_VPN_IPv4])
> +			errs += session_capa_add_ext_nh(opb, AID_VPN_IPv4);
> +	}
> +
> +	/* extended message support, RFC 8654 */
>  	if (p->capa.ann.ext_msg)	/* no data */
>  		errs += session_capa_add(opb, CAPA_EXT_MSG, 0);
>  
> @@ -2540,7 +2572,7 @@ int
>  parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as)
>  {
>  	struct ibuf	 capabuf;
> -	uint16_t	 afi, gr_header;
> +	uint16_t	 afi, nhafi, tmp16, gr_header;
>  	uint8_t		 capa_code, capa_len;
>  	uint8_t		 safi, aid, role, flags;
>  
> @@ -2582,6 +2614,38 @@ parse_capabilities(struct peer *peer, st
>  		case CAPA_REFRESH:
>  			peer->capa.peer.refresh = 1;
>  			break;
> +		case CAPA_EXT_NEXTHOP:
> +			while (ibuf_size(&capabuf) > 0) {
> +				if (ibuf_get_n16(&capabuf, &afi) == -1 ||
> +				    ibuf_get_n16(&capabuf, &tmp16) == -1 ||
> +				    ibuf_get_n16(&capabuf, &nhafi) == -1) {
> +					log_peer_warnx(&peer->conf,
> +					    "Received bad %s capability",
> +					    log_capability(CAPA_EXT_NEXTHOP));
> +					memset(peer->capa.peer.ext_nh, 0,
> +					    sizeof(peer->capa.peer.ext_nh));
> +					break;
> +				}
> +				if (afi2aid(afi, tmp16, &aid) == -1 ||
> +				    !(aid == AID_INET || aid == AID_VPN_IPv4)) {
> +					log_peer_warnx(&peer->conf,
> +					    "Received %s capability: "
> +					    " unsupported AFI %u, safi %u pair",
> +					    log_capability(CAPA_EXT_NEXTHOP),
> +					    afi, safi);
> +					continue;
> +				}
> +				if (nhafi != AFI_IPv6) {
> +					log_peer_warnx(&peer->conf,
> +					    "Received %s capability: "
> +					    " unsupported nexthop AFI %u",
> +					    log_capability(CAPA_EXT_NEXTHOP),
> +					    nhafi);
> +					continue;
> +				}
> +				peer->capa.peer.ext_nh[aid] = 1;
> +			}
> +			break;
>  		case CAPA_EXT_MSG:
>  			peer->capa.peer.ext_msg = 1;
>  			break;
> @@ -2798,6 +2862,16 @@ capa_neg_calc(struct peer *p)
>  	    (p->capa.ann.grestart.grnotification &&
>  	    p->capa.peer.grestart.grnotification) != 0;
>  
> +	/* RFC 8950 extended nexthop encoding: both sides need to agree */
> +	memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
> +	for (i = AID_MIN; i < AID_MAX; i++) {
> +		if (p->capa.neg.mp[i] == 0)
> +			continue;
> +		if (p->capa.ann.ext_nh[i] && p->capa.peer.ext_nh[i]) {
> +			p->capa.neg.ext_nh[i] = 1;
> +		}
> +	}
> +
>  	/*
>  	 * ADD-PATH: set only those bits where both sides agree.
>  	 * For this compare our send bit with the recv bit from the peer
> @@ -2929,6 +3003,17 @@ capa_neg_calc(struct peer *p)
>  		}
>  	}
>  
> +	for (i = AID_MIN; i < AID_MAX; i++) {
> +		if (p->capa.neg.mp[i] == 0)
> +			continue;
> +		if (p->capa.ann.ext_nh[i] == 2 &&
> +		    p->capa.neg.ext_nh[i] == 0) {
> +			capa_code = CAPA_EXT_NEXTHOP;
> +			capa_len = 6;
> +			capa_aid = i;
> +			goto fail;
> +		}
> +	}
>  	return (0);
>  
>   fail:
> @@ -2940,6 +3025,8 @@ capa_neg_calc(struct peer *p)
>  		session_capa_add_mp(ebuf, capa_aid);
>  	else if (capa_code == CAPA_ADD_PATH)
>  		session_capa_add_afi(ebuf, capa_aid, 0);
> +	else if (capa_code == CAPA_EXT_NEXTHOP)
> +		session_capa_add_ext_nh(ebuf, capa_aid);
>  	else if (capa_len > 0)
>  		ibuf_add_zero(ebuf, capa_len);
>  
>