Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
bgpd: initial steps for RFC 8950 support
To:
tech@openbsd.org
Date:
Wed, 8 Jan 2025 16:20:45 +0100

Download raw body.

Thread
This are the first bits to support IPv6 nexthop for IPv4 routes which is
defined by RFC 8950.

The big change of RFC 8950 is that when enabled some updates will use
MP_REACH_ATTR even for AID_INET. So this diff kind of implements those
bits but all of this is currently unreachable since peer_has_ext_nexthop()
never returns true.

This diff tries to error hard when a peer uses MP encoding for AID_INET
unless peer_has_ext_nexthop() is true. Also we never accept a
MP_UNREACH_ATTR for AID_INET.

It seems that the standards allow for 48byte nexthops in AID_VPN_IPv6
so adjust that there as well. We always ignore the link-local address so
just accept the extra lenght.

I rewrote the nexthop handling of up_generate_mp_reach() since fitting
different nexthops into the old code resulted in a lot of ugly code.

-- 
:wq Claudio

Index: bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.507 bgpd.h
--- bgpd.h	7 Jan 2025 17:43:31 -0000	1.507
+++ bgpd.h	8 Jan 2025 15:08:24 -0000
@@ -415,17 +415,19 @@ struct capabilities {
 	}	grestart;
 	int8_t	mp[AID_MAX];		/* multiprotocol extensions, RFC 4760 */
 	int8_t	add_path[AID_MAX];	/* ADD_PATH, RFC 7911 */
+	int8_t	ext_nexthop[AID_MAX];	/* Ext Nexthop Encoding, RFC 8950 */
 	int8_t	refresh;		/* route refresh, RFC 2918 */
 	int8_t	as4byte;		/* 4-byte ASnum, RFC 4893 */
 	int8_t	enhanced_rr;		/* enhanced route refresh, RFC 7313 */
 	int8_t	policy;			/* Open Policy, RFC 9234, 2 = enforce */
-	int8_t	ext_msg;		/* Extended Msg, RFC8654 */
+	int8_t	ext_msg;		/* Extended Msg, RFC 8654 */
 };
 
 enum capa_codes {
 	CAPA_NONE = 0,
 	CAPA_MP = 1,
 	CAPA_REFRESH = 2,
+	CAPA_EXT_NEXTHOP = 5,
 	CAPA_EXT_MSG = 6,
 	CAPA_ROLE = 9,
 	CAPA_RESTART = 64,
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.647 rde.c
--- rde.c	4 Jan 2025 16:58:46 -0000	1.647
+++ rde.c	8 Jan 2025 13:48:38 -0000
@@ -1560,6 +1560,12 @@ rde_update_dispatch(struct rde_peer *pee
 				pathid = 0;
 
 			switch (aid) {
+			case AID_INET:
+				log_peer_warnx(&peer->conf,
+				    "bad MP withdraw for %s", aid2str(aid));
+				rde_update_err(peer, ERR_UPDATE,
+				    ERR_UPD_OPTATTR, &unreachbuf);
+				goto done;
 			case AID_INET6:
 				if (nlri_get_prefix6(&unreachbuf,
 				    &prefix, &prefixlen) == -1) {
@@ -1689,7 +1695,7 @@ rde_update_dispatch(struct rde_peer *pee
 			goto done;
 		}
 
-		if (aid == AID_INET6) {
+		if (aid == AID_INET6 || aid == AID_INET) {
 			/* inject open policy OTC attribute if needed */
 			if ((state.aspath.flags & F_ATTR_OTC) == 0) {
 				uint32_t tmp;
@@ -1740,6 +1746,20 @@ rde_update_dispatch(struct rde_peer *pee
 				pathid = 0;
 
 			switch (aid) {
+			case AID_INET:
+				/*
+				 * rde_get_mp_nexthop already enforces that
+				 * this is only used for RFC 8950.
+				 */
+				if (nlri_get_prefix(&reachbuf,
+				    &prefix, &prefixlen) == -1) {
+					log_peer_warnx(&peer->conf,
+					    "bad IPv4 MP nlri prefix");
+					rde_update_err(peer, ERR_UPDATE,
+					    ERR_UPD_OPTATTR, &reachbuf);
+					goto done;
+				}
+				break;
 			case AID_INET6:
 				if (nlri_get_prefix6(&reachbuf,
 				    &prefix, &prefixlen) == -1) {
@@ -2411,8 +2431,19 @@ rde_get_mp_nexthop(struct ibuf *buf, uin
 	if (ibuf_skip(buf, 1) == -1)
 		return (-1);
 
+	if (aid == AID_INET && peer_has_ext_nexthop(peer, AID_INET) &&
+	    (nhlen == 16 || nhlen == 32))
+		aid = AID_INET6;
+	if (aid == AID_VPN_IPv4 && peer_has_ext_nexthop(peer, AID_VPN_IPv4) &&
+	    (nhlen == 24 || nhlen == 48))
+		aid = AID_VPN_IPv6;
+
 	memset(&nexthop, 0, sizeof(nexthop));
 	switch (aid) {
+	case AID_INET:
+		log_peer_warnx(&peer->conf, "bad multiprotocol nexthop, "
+		    "IPv4 unexpected");
+		return (-1);
 	case AID_INET6:
 		/*
 		 * RFC2545 describes that there may be a link-local
@@ -2466,7 +2497,7 @@ rde_get_mp_nexthop(struct ibuf *buf, uin
 		nexthop.aid = AID_INET;
 		break;
 	case AID_VPN_IPv6:
-		if (nhlen != 24) {
+		if (nhlen != 24 && nhlen != 48) {
 			log_peer_warnx(&peer->conf, "bad %s nexthop, "
 			    "bad size %d", aid2str(aid), nhlen);
 			return (-1);
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.311 rde.h
--- rde.h	7 Jan 2025 17:43:31 -0000	1.311
+++ rde.h	8 Jan 2025 13:41:14 -0000
@@ -355,6 +355,8 @@ int		rde_match_peer(struct rde_peer *, s
 /* rde_peer.c */
 int		 peer_has_as4byte(struct rde_peer *);
 int		 peer_has_add_path(struct rde_peer *, uint8_t, int);
+int		 peer_has_ext_msg(struct rde_peer *);
+int		 peer_has_ext_nexthop(struct rde_peer *, uint8_t);
 int		 peer_accept_no_as_set(struct rde_peer *);
 void		 peer_init(struct filter_head *);
 void		 peer_shutdown(void);
Index: rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.43 rde_peer.c
--- rde_peer.c	7 Jan 2025 17:43:31 -0000	1.43
+++ rde_peer.c	8 Jan 2025 13:48:52 -0000
@@ -58,6 +58,20 @@ peer_has_add_path(struct rde_peer *peer,
 }
 
 int
+peer_has_ext_msg(struct rde_peer *peer)
+{
+	return (peer->capa.ext_msg);
+}
+
+int
+peer_has_ext_nexthop(struct rde_peer *peer, uint8_t aid)
+{
+	if (aid >= AID_MAX)
+		return 0;
+	return (peer->capa.ext_nexthop[aid]);
+}
+
+int
 peer_accept_no_as_set(struct rde_peer *peer)
 {
 	return (peer->flags & PEERFLAG_NO_AS_SET);
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
diff -u -p -r1.172 rde_update.c
--- rde_update.c	7 Jan 2025 12:11:45 -0000	1.172
+++ rde_update.c	8 Jan 2025 14:55:10 -0000
@@ -877,7 +877,7 @@ up_generate_mp_reach(struct ibuf *buf, s
     struct nexthop *nh, uint8_t aid)
 {
 	struct bgpd_addr *nexthop;
-	size_t off;
+	size_t off, nhoff;
 	uint16_t len, afi;
 	uint8_t safi;
 
@@ -898,59 +898,61 @@ up_generate_mp_reach(struct ibuf *buf, s
 		return -1;
 	if (ibuf_add_n8(buf, safi) == -1)
 		return -1;
+	nhoff = ibuf_size(buf);
+	if (ibuf_add_zero(buf, 1) == -1)
+		return -1;
 
 	switch (aid) {
-	case AID_INET6:
-		if (nh == NULL)
-			return -1;
-		/* NH LEN */
-		if (ibuf_add_n8(buf, sizeof(struct in6_addr)) == -1)
-			return -1;
-		/* write nexthop */
-		nexthop = &nh->exit_nexthop;
-		if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
-			return -1;
-		break;
 	case AID_VPN_IPv4:
-		if (nh == NULL)
-			return -1;
-		/* NH LEN */
-		if (ibuf_add_n8(buf,
-		    sizeof(uint64_t) + sizeof(struct in_addr)) == -1)
-			return -1;
+	case AID_VPN_IPv6:
 		/* write zero rd */
 		if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
 			return -1;
-		/* write nexthop */
+	}
+
+	switch (aid) {
+	case AID_INET:
+	case AID_VPN_IPv4:
+		if (nh == NULL)
+			return -1;
 		nexthop = &nh->exit_nexthop;
-		if (ibuf_add(buf, &nexthop->v4, sizeof(struct in_addr)) == -1)
+		/* AID_INET must only use this path with an IPv6 nexthop */
+		if (nexthop->aid == AID_INET && aid != AID_INET) {
+			if (ibuf_add(buf, &nexthop->v4,
+			    sizeof(nexthop->v4)) == -1)
+				return -1;
+			break;
+		} else if (nexthop->aid == AID_INET6 &&
+		    peer_has_ext_nexthop(peer, aid)) {
+			if (ibuf_add(buf, &nexthop->v6,
+			    sizeof(nexthop->v6)) == -1)
+				return -1;
+		} else {
+			/* can't encode nexthop, give up and withdraw prefix */
 			return -1;
+		}
 		break;
+	case AID_INET6:
 	case AID_VPN_IPv6:
 		if (nh == NULL)
 			return -1;
-		/* NH LEN */
-		if (ibuf_add_n8(buf,
-		    sizeof(uint64_t) + sizeof(struct in6_addr)) == -1)
-			return -1;
-		/* write zero rd */
-		if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
-			return -1;
-		/* write nexthop */
 		nexthop = &nh->exit_nexthop;
-		if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
+		if (ibuf_add(buf, &nexthop->v6, sizeof(nexthop->v6)) == -1)
 			return -1;
 		break;
 	case AID_FLOWSPECv4:
 	case AID_FLOWSPECv6:
-		if (ibuf_add_zero(buf, 1) == -1) /* NH LEN MUST be 0 */
-			return -1;
 		/* no NH */
 		break;
 	default:
 		fatalx("up_generate_mp_reach: unknown AID");
 	}
 
+	/* update nexthop len */
+	len = ibuf_size(buf) - nhoff - 1;
+	if (ibuf_set_n8(buf, nhoff, len) == -1)
+		return -1;
+
 	if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */
 		return -1;
 
@@ -999,7 +1001,7 @@ up_dump_withdraws(struct rde_peer *peer,
 	uint16_t afi, len;
 	uint8_t safi;
 
-	if (peer->capa.ext_msg)
+	if (peer_has_ext_msg(peer))
 		pkgsize = MAX_EXT_PKTSIZE;
 
 	if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL)
@@ -1148,14 +1150,21 @@ up_dump_update(struct rde_peer *peer, ui
 	struct prefix *p;
 	size_t off, pkgsize = MAX_PKTSIZE;
 	uint16_t len;
+	int force_ip4mp = 0;
 
 	p = RB_MIN(prefix_tree, &peer->updates[aid]);
 	if (p == NULL)
 		return NULL;
 
-	if (peer->capa.ext_msg)
+	if (peer_has_ext_msg(peer))
 		pkgsize = MAX_EXT_PKTSIZE;
 
+	if (aid == AID_INET && peer_has_ext_nexthop(peer, AID_INET)) {
+		struct nexthop *nh = prefix_nexthop(p);
+		if (nh != NULL && nh->exit_nexthop.aid == AID_INET6)
+			force_ip4mp = 1;
+	}
+
 	if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL)
 		goto fail;
 
@@ -1172,7 +1181,7 @@ up_dump_update(struct rde_peer *peer, ui
 	    prefix_communities(p), prefix_nexthop(p), aid) == -1)
 		goto drop;
 
-	if (aid != AID_INET) {
+	if (aid != AID_INET || force_ip4mp) {
 		/* write mp attribute including nlri */
 
 		/*
@@ -1191,7 +1200,7 @@ up_dump_update(struct rde_peer *peer, ui
 	if (ibuf_set_n16(buf, off, len) == -1)
 		goto fail;
 
-	if (aid == AID_INET) {
+	if (aid == AID_INET && !force_ip4mp) {
 		/* last but not least dump the IPv4 nlri */
 		if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
 			goto drop;
Index: util.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/util.c,v
diff -u -p -r1.90 util.c
--- util.c	7 Jan 2025 19:24:53 -0000	1.90
+++ util.c	8 Jan 2025 10:24:50 -0000
@@ -323,6 +323,8 @@ log_capability(uint8_t capa)
 		return "Multiprotocol Extensions";
 	case CAPA_REFRESH:
 		return "Route Refresh";
+	case CAPA_EXT_NEXTHOP:
+		return "Extended Nexhop Encoding";
 	case CAPA_EXT_MSG:
 		return "Extended Message";
 	case CAPA_ROLE: