Index | Thread | Search

From:
David Gwynne <david@gwynne.id.au>
Subject:
erspan(4): ERSPAN Type II collection
To:
tech@openbsd.org
Date:
Mon, 12 May 2025 11:27:59 +1000

Download raw body.

Thread
we were exploring how to better let us see what's happening on access
networks or specific ports on a switch at work. our switches are
pretty much all cisco, which has ERSPAN.

ERSPAN in it's various forms ships Ethernet packets over GRE for
collection and analysis on another system. There's 3 types of ERPSAN
encapsulation, but Type II seems broadly implemented.

this implements support for ERSPAN Type II in OpenBSD as a tunnel
interface. this allows OpenBSD to collect ERSPAN encapsualted packets
with existing tooling, particularly those built around BPF (like
tcpdump and bpflogd). it also supports transmitting packets, so it can
be added as a span port on bridge(4) or veb(4). unfortunately this means
it can actually work as a tunnel interface, but i can't stop people
doing every silly thing they want to do.

ERSPAN type II is supposed to be configured between two endpoints, a
sender and receiver, and includes a Session ID in it's shim header so
you can tell different capture sessions apart. this configuration is
mapped to ifconfig erspanX tunnel LOCALIP REMOTEIP for the endpoint
addresses, and ifconfig erspanX vnetid SESSIONID for the session id.

you can wildcard the remote ip by not specifying one, ie, ifconfig
erspan0 tunneladdr LOCALIP. this lets the interface capture all erspan
packets sent to it, regardless of where it comes from. however, this
disables it's ability to transmit packets. in this mode you can also
unset the vnetid to allow reception of packets for all ERSPAN sessions.

the driver shows the encapsulated Ethernet packets via BPF, ie, tcpdump
-i erspanX will show the Ethernet packet that are captured on the remote
device. it also supports BPF with the DLT_LOOP type to show the
encapsulating IP, GRE, and ERSPAN headers. eg:

if i have a collector interface set up like this:

$ ifconfig erspan0
erspan0: flags=1008843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST,MONITOR> mtu 1500
	lladdr fe:e1:ba:d0:9e:d8
	index 7 priority 0 llprio 3
	encap: vnetid none txprio packet rxprio payload
	groups: erspan
	tunnel: inet 192.168.2.3 ttl 64 nodf

so if the remote device captured this Ethernet frame:

11:16:26.975309 78:72:5d:29:b9:bf 01:00:5e:00:00:66 8100 118: 802.1Q vid 33 pri 6 10.153.135.3.1985 > 224.0.0.102.1985: [udp sum ok] HSRPv1 [tos 0xc0] [ttl 1] (id 0, len 100)

you can also capture the transport header so you can tell where it
came from with tcpdump -y LOOP (-vveeni erspan0 -s1600), which looks
like this:

11:16:26.975309 192.168.128.153 > 192.168.2.3: gre [S] 88be seq 0 erspan II session 11 vlan payload cos 0 index 2752: 78:72:5d:29:b9:bf 01:00:5e:00:00:66 8100 118: 802.1Q vid 333 pri 6 192.168.135.3.1985 > 224.0.0.102.1985: [udp sum ok] HSRPv1 [tos 0xc0] [ttl 1] (id 0, len 100) (ttl 248, id 55158, len 154)

192.168.128.153 is a switch that has a capture session configured.

wiring this into bpf also means i can use bpflogd to write all the
collected packets to disk without stopping people running tcpdump
against the interface to watch packets live, or even use something like
the remote ssh capture stuff in wireshark.

cos erspan is GRE, it can reuse a lot of what's already there in
if_gre.c

as well as cisco devices, ive tested this as a collector for monitoring
sessions from a vmware dvswitch.

i dont know if anyone else is interested in this stuff or if it's just
me.

Index: if_gre.c
===================================================================
RCS file: /cvs/src/sys/net/if_gre.c,v
diff -u -p -r1.184 if_gre.c
--- if_gre.c	2 Mar 2025 21:28:31 -0000	1.184
+++ if_gre.c	12 May 2025 00:51:19 -0000
@@ -152,7 +152,9 @@ struct gre_h_wccp {
 	uint8_t			pri_bucket;
 } __packed __aligned(4);
 
-#define GRE_WCCP 0x883e
+#define GRE_WCCP		0x883e
+#define GRE_ERSPAN		0x88be /* also ERSPAN Type II */
+#define GRE_ERSPAN_III		0x22eb
 
 #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))
 
@@ -535,6 +537,75 @@ struct if_clone eoip_cloner =
 struct eoip_tree eoip_tree = RBT_INITIALIZER();
 
 /*
+ * ERSPAN support
+ */
+
+struct gre_h_erspan {
+	uint32_t		hdr;
+#define ERSPAN_II_VER_SHIFT		28
+#define ERSPAN_II_VER_MASK		0xf
+#define ERSPAN_II_VER			0x1
+#define ERSPAN_II_VLAN_SHIFT		16
+#define ERSPAN_II_VLAN_MASK		0xfff
+#define ERSPAN_II_COS_SHIFT		13
+#define ERSPAN_II_COS_MASK		0x7
+#define ERSPAN_II_EN_SHIFT		11 /* Encapsulation type */
+#define ERSPAN_II_EN_MASK		0x3
+#define ERSPAN_II_EN_NONE		0x0
+#define ERSPAN_II_EN_ISL		0x1
+#define ERSPAN_II_EN_VLAN		0x2
+#define ERSPAN_II_EN_PRESERVED		0x3
+#define ERSPAN_II_EN_PRESERVED		0x3
+#define ERSPAN_II_T			(0x1 << 10)
+#define ERSPAN_II_SESSION_ID_SHIFT	0
+#define ERSPAN_II_SESSION_ID_MASK	0x3ff /* 10 bits */
+	uint32_t		index;
+#define ERSPAN_II_INDEX_SHIFT		0
+#define ERSPAN_II_INDEX_MASK		0xfffff /* 20 bits */
+};
+
+struct erspan_softc {
+	struct gre_tunnel	sc_tunnel; /* must be first */
+	int			sc_session_id;
+	RBT_ENTRY(erspan_softc)	sc_entry;
+
+	struct arpcom		sc_ac;
+	uint32_t		sc_seq;
+	caddr_t			sc_bpf;
+};
+
+RBT_HEAD(erspan_tree, erspan_softc);
+
+static inline int
+		erspan_cmp(const struct erspan_softc *,
+		    const struct erspan_softc *);
+
+RBT_PROTOTYPE(erspan_tree, erspan_softc, sc_entry, erspan_cmp);
+
+static int	erspan_clone_create(struct if_clone *, int);
+static int	erspan_clone_destroy(struct ifnet *);
+
+static void	erspan_start(struct ifnet *);
+static int	erspan_ioctl(struct ifnet *, u_long, caddr_t);
+
+static int	erspan_up(struct erspan_softc *);
+static int	erspan_down(struct erspan_softc *);
+
+static struct mbuf *
+		erspan_encap(struct erspan_softc *, struct mbuf *, uint8_t,
+		    uint32_t);
+
+static struct mbuf *
+		erspan_input(struct gre_tunnel *, struct mbuf *, int,
+		    const struct gre_header *, uint8_t, struct netstack *);
+
+struct if_clone erspan_cloner =
+    IF_CLONE_INITIALIZER("erspan", erspan_clone_create, erspan_clone_destroy);
+
+/* protected by NET_LOCK */
+struct erspan_tree erspan_tree = RBT_INITIALIZER();
+
+/*
  * It is not easy to calculate the right value for a GRE MTU.
  * We leave this task to the admin and use the same default that
  * other vendors use.
@@ -561,6 +632,7 @@ greattach(int n)
 	if_clone_attach(&egre_cloner);
 	if_clone_attach(&nvgre_cloner);
 	if_clone_attach(&eoip_cloner);
+	if_clone_attach(&erspan_cloner);
 }
 
 static int
@@ -889,6 +961,7 @@ eoip_clone_destroy(struct ifnet *ifp)
 	return (0);
 }
 
+
 int
 gre_input(struct mbuf **mp, int *offp, int type, int af, struct netstack *ns)
 {
@@ -1044,6 +1117,18 @@ gre_input_key(struct mbuf **mp, int *off
 		goto decline;
 	}
 
+	/*
+	 * ERSPAN I uses no bits in the header, and II uses sequence numbers.
+	 * handle them before limiting what flags we support. 
+	 */
+	if (gh->gre_proto == htons(GRE_ERSPAN)) {
+		m = erspan_input(key, m, iphlen, gh, otos, ns);
+		if (m == NULL)
+			return (IPPROTO_DONE);
+
+		goto decline;
+	}
+
 	/* the only optional bit in the header is K flag */
 	if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
 		goto decline;
@@ -4237,10 +4322,8 @@ RBT_GENERATE(nvgre_ucast_tree, nvgre_sof
 RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
 
 static inline int
-eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
+gre_tunnel_key_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
 {
-	const struct gre_tunnel *a = &ea->sc_tunnel;
-	const struct gre_tunnel *b = &eb->sc_tunnel;
 	int rv;
 
 	if (a->t_key > b->t_key)
@@ -4269,6 +4352,13 @@ eoip_cmp(const struct eoip_softc *ea, co
 		return (rv);
 
 	return (0);
+
+}
+
+static inline int
+eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
+{
+	return (gre_tunnel_key_cmp(&ea->sc_tunnel, &eb->sc_tunnel));
 }
 
 RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
@@ -4342,3 +4432,652 @@ nvgre_eb_port_sa(void *arg, struct socka
 		unhandled_af(sc->sc_tunnel.t_af);
 	}
 }
+
+/*
+ * ERSPAN
+ */
+
+static int
+erspan_clone_create(struct if_clone *ifc, int unit)
+{
+	struct erspan_softc *sc;
+	struct ifnet *ifp;
+
+	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+	ifp = &sc->sc_ac.ac_if;
+
+	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
+	    ifc->ifc_name, unit);
+
+	ifp->if_softc = sc;
+	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
+	ifp->if_ioctl = erspan_ioctl;
+	ifp->if_start = erspan_start;
+	ifp->if_xflags = IFXF_CLONED | IFXF_MONITOR;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+#if 0 && NVLAN > 0
+	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
+#endif
+	ether_fakeaddr(ifp);
+
+	sc->sc_tunnel.t_key = ~0;
+	sc->sc_tunnel.t_ttl = ip_defttl;
+	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PACKET; /* XXX */
+	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PAYLOAD;
+	sc->sc_tunnel.t_df = htons(0);
+
+	if_counters_alloc(ifp);
+	if_attach(ifp);
+	ether_ifattach(ifp);
+
+#if NBPFILTER > 0
+	/* attach after Ethernet */
+	bpfattach(&sc->sc_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
+#endif
+
+	return (0);
+}
+
+static int
+erspan_clone_destroy(struct ifnet *ifp)
+{
+	struct erspan_softc *sc = ifp->if_softc;
+
+	NET_LOCK();
+	if (ISSET(ifp->if_flags, IFF_RUNNING))
+		erspan_down(sc);
+	NET_UNLOCK();
+
+	ether_ifdetach(ifp);
+	if_detach(ifp);
+
+	free(sc, M_DEVBUF, sizeof(*sc));
+
+	return (0);
+}
+
+static int
+erspan_set_tunnel(struct erspan_softc *sc, struct if_laddrreq *req)
+{
+	struct gre_tunnel *tunnel = &sc->sc_tunnel;
+	struct sockaddr *addr = (struct sockaddr *)&req->addr;
+	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
+	struct sockaddr_in *src4;
+#ifdef INET6
+	struct sockaddr_in6 *src6;
+	int error;
+#endif
+	uint32_t mask = 0;
+
+	/* validate */
+	switch (addr->sa_family) {
+	case AF_INET:
+		if (addr->sa_len != sizeof(*src4))
+			return (EINVAL);
+
+		src4 = (struct sockaddr_in *)addr;
+		if (in_nullhost(src4->sin_addr) ||
+		    IN_MULTICAST(src4->sin_addr.s_addr))
+			return (EINVAL);
+
+		if (dstaddr->sa_family == AF_UNSPEC)
+			tunnel->t_dst4.s_addr = INADDR_ANY;
+		else if (dstaddr->sa_family != AF_INET)
+			return (EINVAL);
+		else {
+			struct sockaddr_in *daddr4 = satosin(dstaddr);
+			if (in_nullhost(daddr4->sin_addr) ||
+			    IN_MULTICAST(daddr4->sin_addr.s_addr))
+				return (EINVAL);
+			
+			tunnel->t_dst4 = daddr4->sin_addr;
+			mask = 1;
+		}
+		tunnel->t_src4 = src4->sin_addr;
+
+		break;
+#ifdef INET6
+	case AF_INET6:
+		if (addr->sa_len != sizeof(*src6))
+			return (EINVAL);
+
+		src6 = (struct sockaddr_in6 *)addr;
+		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
+		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
+			return (EINVAL);
+
+		error = in6_embedscope(&tunnel->t_src6, src6, NULL, NULL);
+		if (error != 0)
+			return (error);
+
+		if (dstaddr->sa_family == AF_UNSPEC)
+			memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));
+		else if (dstaddr->sa_family != AF_INET6)
+			return (EINVAL);
+		else {
+			struct sockaddr_in6 *dst6 = satosin6(dstaddr);
+			if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
+			    IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr))
+				return (EINVAL);
+
+			if (src6->sin6_scope_id != dst6->sin6_scope_id)
+				return (EINVAL);
+
+			error = in6_embedscope(&tunnel->t_dst6, dst6,
+			    NULL, NULL);
+			if (error != 0)
+				return (error);
+			mask = 1;
+		}
+
+		error = in6_embedscope(&tunnel->t_src6, src6, NULL, NULL);
+		if (error != 0)
+			return (error);
+
+		break;
+#endif
+	default:
+		return (EAFNOSUPPORT);
+	}
+
+	/* commit */
+	tunnel->t_af = addr->sa_family;
+	tunnel->t_key_mask = mask; /* set if dstaddr set */
+
+	return (0);
+}
+
+static int
+erspan_get_tunnel(struct erspan_softc *sc, struct if_laddrreq *req)
+{
+	struct gre_tunnel *tunnel = &sc->sc_tunnel;
+	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
+	struct sockaddr_in *sin;
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+#endif
+
+	switch (tunnel->t_af) {
+	case AF_UNSPEC:
+		return (EADDRNOTAVAIL);
+	case AF_INET:
+		sin = (struct sockaddr_in *)&req->addr;
+		memset(sin, 0, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr = tunnel->t_src4;
+
+		if (!tunnel->t_key_mask)
+			goto unspec;
+
+		sin = (struct sockaddr_in *)dstaddr;
+		memset(sin, 0, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr = tunnel->t_dst4;
+		break;
+
+#ifdef INET6
+	case AF_INET6:
+		sin6 = (struct sockaddr_in6 *)&req->addr;
+		memset(sin6, 0, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(*sin6);
+		in6_recoverscope(sin6, &tunnel->t_src6);
+
+		if (!tunnel->t_key_mask)
+			goto unspec;
+
+		sin6 = (struct sockaddr_in6 *)dstaddr;
+		memset(sin6, 0, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(*sin6);
+		in6_recoverscope(sin6, &tunnel->t_dst6);
+		break;
+#endif
+	default:
+		unhandled_af(tunnel->t_af);
+	}
+
+	return (0);
+
+unspec:
+	dstaddr->sa_len = 2;
+	dstaddr->sa_family = AF_UNSPEC;
+
+	return (0);
+}
+
+static int
+erspan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct erspan_softc *sc = ifp->if_softc;
+	struct ifreq *ifr = (struct ifreq *)data;
+	int error = 0;
+
+	switch(cmd) {
+	case SIOCSIFADDR:
+		break;
+	case SIOCSIFFLAGS:
+		if (ISSET(ifp->if_flags, IFF_UP)) {
+			if (!ISSET(ifp->if_flags, IFF_RUNNING))
+				error = erspan_up(sc);
+			else
+				error = 0;
+		} else {
+			if (ISSET(ifp->if_flags, IFF_RUNNING))
+				error = erspan_down(sc);
+		}
+		break;
+
+	case SIOCSVNETID:
+		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+			error = EBUSY;
+			break;
+		}
+		if (ifr->ifr_vnetid < 0 ||
+		    ifr->ifr_vnetid > ERSPAN_II_SESSION_ID_MASK)
+			return (EINVAL);
+
+		sc->sc_tunnel.t_key = ifr->ifr_vnetid; /* for cmp */
+		break;
+	case SIOCGVNETID:
+		if (sc->sc_tunnel.t_key == ~0)
+			return (EADDRNOTAVAIL);
+		ifr->ifr_vnetid = sc->sc_tunnel.t_key;
+		break;
+	case SIOCDVNETID:
+		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+			error = EBUSY;
+			break;
+		}
+		sc->sc_tunnel.t_key = ~0;
+		break;
+
+	case SIOCSLIFPHYADDR:
+		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+			error = EBUSY;
+			break;
+		}
+
+		error = erspan_set_tunnel(sc, (struct if_laddrreq *)data);
+		break;
+	case SIOCGLIFPHYADDR:
+		error = erspan_get_tunnel(sc, (struct if_laddrreq *)data);
+		break;
+	case SIOCDIFPHYADDR:
+		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+			error = EBUSY;
+			break;
+		}
+
+		/* commit */
+		sc->sc_tunnel.t_af = AF_UNSPEC;
+		sc->sc_tunnel.t_key_mask = 0; /* dstaddr is not set */
+		break;
+
+	case SIOCSLIFPHYRTABLE:
+		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+			error = EBUSY;
+			break;
+		}
+
+		if (ifr->ifr_rdomainid < 0 ||
+		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
+		    !rtable_exists(ifr->ifr_rdomainid)) {
+			error = EINVAL;
+			break;
+		}
+		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
+		break;
+	case SIOCGLIFPHYRTABLE:
+		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
+		break;
+
+	case SIOCSLIFPHYTTL:
+		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
+			error = EINVAL;
+			break;
+		}
+
+		/* commit */
+		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
+		break;
+	case SIOCGLIFPHYTTL:
+		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
+		break;
+
+	case SIOCSLIFPHYDF:
+		/* commit */
+		sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
+		break;
+	case SIOCGLIFPHYDF:
+		ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
+		break;
+
+	case SIOCSTXHPRIO:
+		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
+		if (error != 0)
+			break;
+
+		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
+		break;
+	case SIOCGTXHPRIO:
+		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
+		break;
+
+	case SIOCSRXHPRIO:
+		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
+		if (error != 0)
+			break;
+
+		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
+		break;
+	case SIOCGRXHPRIO:
+		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
+		break;
+
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		break;
+
+	default:
+		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
+		break;
+	}
+
+	if (error == ENETRESET) {
+		/* no hardware to program */
+		error = 0;
+	}
+
+	return (error);
+}
+
+static int
+erspan_up(struct erspan_softc *sc)
+{
+	struct gre_tunnel *tunnel = &sc->sc_tunnel;
+
+	if (tunnel->t_af == AF_UNSPEC)
+		return (EDESTADDRREQ);
+	if (tunnel->t_key == ~0 && tunnel->t_key_mask) {
+		/* wildcard session id and t_dst is not set */
+		return (EDESTADDRREQ);
+	}
+
+	NET_ASSERT_LOCKED();
+
+	if (RBT_INSERT(erspan_tree, &erspan_tree, sc) != NULL)
+		return (EADDRINUSE);
+
+	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
+
+	return (0);
+}
+
+static int
+erspan_down(struct erspan_softc *sc)
+{
+	NET_ASSERT_LOCKED();
+	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
+
+	RBT_REMOVE(erspan_tree, &erspan_tree, sc);
+
+	return (0);
+}
+
+static void
+erspan_start(struct ifnet *ifp)
+{
+	struct erspan_softc *sc = ifp->if_softc;
+	struct mbuf *m0, *m;
+	uint32_t session_id = sc->sc_tunnel.t_key;
+#if NBPFILTER > 0
+	caddr_t if_bpf;
+#endif
+
+	if (!atomic_load_int(&gre_allow) ||
+	    !sc->sc_tunnel.t_key_mask || /* dstaddr is not set */
+	    session_id == ~0) {
+		ifq_purge(&ifp->if_snd);
+		return;
+	}
+
+	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
+#if NBPFILTER > 0
+		if_bpf = ifp->if_bpf;
+		if (if_bpf)
+			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
+#endif
+
+		/* force prepend mbuf because of alignment problems */
+		m = m_get(M_DONTWAIT, m0->m_type);
+		if (m == NULL) {
+			m_freem(m0);
+			continue;
+		}
+
+		M_MOVE_PKTHDR(m, m0);
+		m->m_next = m0;
+
+		m_align(m, 0);
+		m->m_len = 0;
+
+		m = erspan_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m),
+		    session_id);
+		if (m == NULL) {
+			ifp->if_oerrors++;
+			continue;
+		}
+#if NBPFILTER > 0
+		if_bpf = sc->sc_bpf;
+		if (if_bpf) {
+			bpf_mtap_af(if_bpf, sc->sc_tunnel.t_af, m,
+			    BPF_DIRECTION_OUT);
+		}
+#endif
+		if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
+			ifp->if_oerrors++;
+			continue;
+		}
+	}
+}
+
+static struct mbuf *
+erspan_encap(struct erspan_softc *sc, struct mbuf *m, uint8_t tos,
+    uint32_t session_id)
+{
+	struct ifnet *ifp = &sc->sc_ac.ac_if;
+	struct gre_header *gh;
+	struct gre_h_seq *seqh;
+	struct gre_h_erspan *erspanh;
+	uint32_t hdr;
+
+	m = m_prepend(m, sizeof(*gh) + sizeof(*seqh) + sizeof(*erspanh),
+	    M_DONTWAIT);
+	if (m == NULL)
+		return (NULL);
+
+	gh = mtod(m, struct gre_header *);
+	gh->gre_flags = htons(GRE_VERS_0 | GRE_SP);
+	gh->gre_proto = htons(GRE_ERSPAN);
+
+	seqh = (struct gre_h_seq *)(gh + 1);
+	htobem32(&seqh->gre_seq, sc->sc_seq++);
+
+	hdr = session_id << ERSPAN_II_SESSION_ID_SHIFT;
+	hdr |= m->m_pkthdr.pf.prio << ERSPAN_II_COS_SHIFT;
+#if 0 && NVLAN > 0
+	if (ISSET(m->m_flags, M_VLANTAG)) {
+		hdr |= ERSPAN_II_EN_VLAN << ERSPAN_II_EN_SHIFT;
+		hdr |= (m->m_pkthdr.ether_vlan & ERSPAN_II_VLAN_MASK) <<
+		    ERSPAN_II_VLAN_SHIFT;
+		CLR(m->m_flags, M_VLANTAG);
+	} /* else?? */
+#endif
+	hdr |= ERSPAN_II_VER << ERSPAN_II_VER_SHIFT;
+
+	erspanh = (struct gre_h_erspan *)(seqh + 1);
+	htobem32(&erspanh->hdr, hdr);
+	htobem32(&erspanh->index, ISSET(ifp->if_flags, IFF_LINK0) ?
+	    m->m_pkthdr.ph_ifidx : 0);
+
+	return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos));
+}
+
+static struct mbuf *
+erspan_input(struct gre_tunnel *key, struct mbuf *m, int iphlen,
+    const struct gre_header *gh, uint8_t otos, struct netstack *ns)
+{
+	struct erspan_softc *sc;
+	struct ifnet *ifp;
+	struct gre_h_seq *seqh;
+	struct gre_h_erspan *erspanh;
+	uint32_t hdr;
+	int hlen;
+	caddr_t buf;
+	int input = 1;
+	int rxprio;
+#if NBPFILTER > 0
+	caddr_t if_bpf;
+#endif
+
+	/* ERSPAN Type II */
+	if (gh->gre_flags != htons(GRE_SP | GRE_VERS_0))
+		goto decline;
+
+	hlen = iphlen + sizeof(*gh) + sizeof(*seqh) + sizeof(*erspanh);
+	if (m->m_pkthdr.len < hlen)
+		goto decline;
+
+	m = m_pullup(m, hlen);
+	if (m == NULL)
+		return (NULL);
+
+	buf = mtod(m, caddr_t);
+	gh = (struct gre_header *)(buf + iphlen);
+	seqh = (struct gre_h_seq *)(gh + 1);
+	erspanh = (struct gre_h_erspan *)(seqh + 1);
+
+	hdr = bemtoh32(&erspanh->hdr);
+
+	key->t_key = (hdr >> ERSPAN_II_SESSION_ID_SHIFT) &
+	    ERSPAN_II_SESSION_ID_MASK;
+
+	NET_ASSERT_LOCKED();
+	sc = RBT_FIND(erspan_tree, &erspan_tree,
+	    (const struct erspan_softc *)key);
+	if (sc == NULL) {
+		/* try for a wildcard listener */
+		struct gre_tunnel wkey = {
+			.t_af = key->t_af,
+			.t_rtableid = key->t_rtableid,
+			.t_src = key->t_src,
+			.t_key = key->t_key,
+		};
+
+		input = 0;
+		sc = RBT_FIND(erspan_tree, &erspan_tree,
+		    (const struct erspan_softc *)&wkey);
+		if (sc == NULL) {
+			/* last resort is a wildcard listener without a key */
+			wkey.t_key = ~0;
+			sc = RBT_FIND(erspan_tree, &erspan_tree,
+			    (const struct erspan_softc *)&wkey);
+			if (sc == NULL) {
+				goto decline;
+			}
+		}
+	}
+
+	/* it's ours now */
+	ifp = &sc->sc_ac.ac_if;
+
+#if NBPFILTER > 0
+	if_bpf = sc->sc_bpf;
+	if (if_bpf) {
+		if (bpf_mtap_af(if_bpf, key->t_af, m, BPF_DIRECTION_IN))
+			input = 0;
+	}
+#endif
+
+#if 0
+	/*
+	 * this appears to be metadata from the switch rather than
+	 * an offload for the payload.
+	 */
+	switch ((hdr >> ERSPAN_II_EN_SHIFT) & ERSPAN_II_EN_MASK) {
+	case ERSPAN_II_EN_ISL: /* this is cheeky */
+	case ERSPAN_II_EN_VLAN:
+#if NVLAN > 0
+		m->m_pkthdr.ether_vtag = (hdr >> ERSPAN_II_VLAN_SHIFT) &
+		    ERSPAN_II_VLAN_MASK;
+		m->m_pkthdr.ether_vtag |= ((hdr >> ERSPAN_II_COS_SHIFT) &
+		    ERSPAN_II_COS_MASK) << 13;
+                m->m_flags |= M_VLANTAG;
+#else
+		input = 0;
+#endif
+		break;
+	default:
+		break;
+	}
+#endif
+
+	rxprio = sc->sc_tunnel.t_rxhprio;
+	switch (rxprio) {
+	case IF_HDRPRIO_PACKET:
+		/* nop */
+		break;
+	case IF_HDRPRIO_OUTER:
+		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
+		break;
+	case IF_HDRPRIO_PAYLOAD:
+		m->m_pkthdr.pf.prio = (hdr >> ERSPAN_II_COS_SHIFT) &
+		    ERSPAN_II_COS_MASK;
+		break;
+	default:
+		m->m_pkthdr.pf.prio = rxprio;
+		break;
+	}
+
+	if (hdr & ERSPAN_II_T)
+		input = 0;
+
+	if (input) {
+		m = gre_ether_align(m, hlen);
+		if (m == NULL)
+			return (NULL);
+
+		CLR(m->m_flags, M_MCAST|M_BCAST);
+
+		if_vinput(&sc->sc_ac.ac_if, m, ns);
+	} else {
+#if NBPFILTER > 0
+		if_bpf = ifp->if_bpf;
+		if (if_bpf) {
+			m_adj(m, hlen);
+			bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN);
+		}
+#endif
+
+		goto drop;
+	}
+
+	return (NULL);
+
+decline:
+	return (m);
+drop:
+	m_freem(m);
+	return (NULL);
+}
+
+static inline int
+erspan_cmp(const struct erspan_softc *ea, const struct erspan_softc *eb)
+{
+	return (gre_tunnel_key_cmp(&ea->sc_tunnel, &eb->sc_tunnel));
+}
+
+RBT_GENERATE(erspan_tree, erspan_softc, sc_entry, erspan_cmp);