Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
Re: erspan(4): ERSPAN Type II collection
To:
David Gwynne <david@gwynne.id.au>
Cc:
tech@openbsd.org
Date:
Mon, 12 May 2025 14:06:18 +0200

Download raw body.

Thread
On Mon, May 12, 2025 at 11:27:59AM +1000, David Gwynne wrote:
> we were exploring how to better let us see what's happening on access
> networks or specific ports on a switch at work. our switches are
> pretty much all cisco, which has ERSPAN.
> 
> ERSPAN in it's various forms ships Ethernet packets over GRE for
> collection and analysis on another system. There's 3 types of ERPSAN
> encapsulation, but Type II seems broadly implemented.
> 
> this implements support for ERSPAN Type II in OpenBSD as a tunnel
> interface. this allows OpenBSD to collect ERSPAN encapsualted packets
> with existing tooling, particularly those built around BPF (like
> tcpdump and bpflogd). it also supports transmitting packets, so it can
> be added as a span port on bridge(4) or veb(4). unfortunately this means
> it can actually work as a tunnel interface, but i can't stop people
> doing every silly thing they want to do.
> 
> ERSPAN type II is supposed to be configured between two endpoints, a
> sender and receiver, and includes a Session ID in it's shim header so
> you can tell different capture sessions apart. this configuration is
> mapped to ifconfig erspanX tunnel LOCALIP REMOTEIP for the endpoint
> addresses, and ifconfig erspanX vnetid SESSIONID for the session id.
> 
> you can wildcard the remote ip by not specifying one, ie, ifconfig
> erspan0 tunneladdr LOCALIP. this lets the interface capture all erspan
> packets sent to it, regardless of where it comes from. however, this
> disables it's ability to transmit packets. in this mode you can also
> unset the vnetid to allow reception of packets for all ERSPAN sessions.
> 
> the driver shows the encapsulated Ethernet packets via BPF, ie, tcpdump
> -i erspanX will show the Ethernet packet that are captured on the remote
> device. it also supports BPF with the DLT_LOOP type to show the
> encapsulating IP, GRE, and ERSPAN headers. eg:
> 
> if i have a collector interface set up like this:
> 
> $ ifconfig erspan0
> erspan0: flags=1008843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST,MONITOR> mtu 1500
> 	lladdr fe:e1:ba:d0:9e:d8
> 	index 7 priority 0 llprio 3
> 	encap: vnetid none txprio packet rxprio payload
> 	groups: erspan
> 	tunnel: inet 192.168.2.3 ttl 64 nodf
> 
> so if the remote device captured this Ethernet frame:
> 
> 11:16:26.975309 78:72:5d:29:b9:bf 01:00:5e:00:00:66 8100 118: 802.1Q vid 33 pri 6 10.153.135.3.1985 > 224.0.0.102.1985: [udp sum ok] HSRPv1 [tos 0xc0] [ttl 1] (id 0, len 100)
> 
> you can also capture the transport header so you can tell where it
> came from with tcpdump -y LOOP (-vveeni erspan0 -s1600), which looks
> like this:
> 
> 11:16:26.975309 192.168.128.153 > 192.168.2.3: gre [S] 88be seq 0 erspan II session 11 vlan payload cos 0 index 2752: 78:72:5d:29:b9:bf 01:00:5e:00:00:66 8100 118: 802.1Q vid 333 pri 6 192.168.135.3.1985 > 224.0.0.102.1985: [udp sum ok] HSRPv1 [tos 0xc0] [ttl 1] (id 0, len 100) (ttl 248, id 55158, len 154)
> 
> 192.168.128.153 is a switch that has a capture session configured.
> 
> wiring this into bpf also means i can use bpflogd to write all the
> collected packets to disk without stopping people running tcpdump
> against the interface to watch packets live, or even use something like
> the remote ssh capture stuff in wireshark.
> 
> cos erspan is GRE, it can reuse a lot of what's already there in
> if_gre.c
> 
> as well as cisco devices, ive tested this as a collector for monitoring
> sessions from a vmware dvswitch.
> 
> i dont know if anyone else is interested in this stuff or if it's just
> me.

I think this is a handy tool and the code looks reasonable.
IMO this should be added to the list of gre devices we have.

 
> Index: if_gre.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_gre.c,v
> diff -u -p -r1.184 if_gre.c
> --- if_gre.c	2 Mar 2025 21:28:31 -0000	1.184
> +++ if_gre.c	12 May 2025 00:51:19 -0000
> @@ -152,7 +152,9 @@ struct gre_h_wccp {
>  	uint8_t			pri_bucket;
>  } __packed __aligned(4);
>  
> -#define GRE_WCCP 0x883e
> +#define GRE_WCCP		0x883e
> +#define GRE_ERSPAN		0x88be /* also ERSPAN Type II */
> +#define GRE_ERSPAN_III		0x22eb
>  
>  #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))
>  
> @@ -535,6 +537,75 @@ struct if_clone eoip_cloner =
>  struct eoip_tree eoip_tree = RBT_INITIALIZER();
>  
>  /*
> + * ERSPAN support
> + */
> +
> +struct gre_h_erspan {
> +	uint32_t		hdr;
> +#define ERSPAN_II_VER_SHIFT		28
> +#define ERSPAN_II_VER_MASK		0xf
> +#define ERSPAN_II_VER			0x1
> +#define ERSPAN_II_VLAN_SHIFT		16
> +#define ERSPAN_II_VLAN_MASK		0xfff
> +#define ERSPAN_II_COS_SHIFT		13
> +#define ERSPAN_II_COS_MASK		0x7
> +#define ERSPAN_II_EN_SHIFT		11 /* Encapsulation type */
> +#define ERSPAN_II_EN_MASK		0x3
> +#define ERSPAN_II_EN_NONE		0x0
> +#define ERSPAN_II_EN_ISL		0x1
> +#define ERSPAN_II_EN_VLAN		0x2
> +#define ERSPAN_II_EN_PRESERVED		0x3
> +#define ERSPAN_II_EN_PRESERVED		0x3
> +#define ERSPAN_II_T			(0x1 << 10)
> +#define ERSPAN_II_SESSION_ID_SHIFT	0
> +#define ERSPAN_II_SESSION_ID_MASK	0x3ff /* 10 bits */
> +	uint32_t		index;
> +#define ERSPAN_II_INDEX_SHIFT		0
> +#define ERSPAN_II_INDEX_MASK		0xfffff /* 20 bits */
> +};
> +
> +struct erspan_softc {
> +	struct gre_tunnel	sc_tunnel; /* must be first */
> +	int			sc_session_id;
> +	RBT_ENTRY(erspan_softc)	sc_entry;
> +
> +	struct arpcom		sc_ac;
> +	uint32_t		sc_seq;
> +	caddr_t			sc_bpf;
> +};
> +
> +RBT_HEAD(erspan_tree, erspan_softc);
> +
> +static inline int
> +		erspan_cmp(const struct erspan_softc *,
> +		    const struct erspan_softc *);
> +
> +RBT_PROTOTYPE(erspan_tree, erspan_softc, sc_entry, erspan_cmp);
> +
> +static int	erspan_clone_create(struct if_clone *, int);
> +static int	erspan_clone_destroy(struct ifnet *);
> +
> +static void	erspan_start(struct ifnet *);
> +static int	erspan_ioctl(struct ifnet *, u_long, caddr_t);
> +
> +static int	erspan_up(struct erspan_softc *);
> +static int	erspan_down(struct erspan_softc *);
> +
> +static struct mbuf *
> +		erspan_encap(struct erspan_softc *, struct mbuf *, uint8_t,
> +		    uint32_t);
> +
> +static struct mbuf *
> +		erspan_input(struct gre_tunnel *, struct mbuf *, int,
> +		    const struct gre_header *, uint8_t, struct netstack *);
> +
> +struct if_clone erspan_cloner =
> +    IF_CLONE_INITIALIZER("erspan", erspan_clone_create, erspan_clone_destroy);
> +
> +/* protected by NET_LOCK */
> +struct erspan_tree erspan_tree = RBT_INITIALIZER();
> +
> +/*
>   * It is not easy to calculate the right value for a GRE MTU.
>   * We leave this task to the admin and use the same default that
>   * other vendors use.
> @@ -561,6 +632,7 @@ greattach(int n)
>  	if_clone_attach(&egre_cloner);
>  	if_clone_attach(&nvgre_cloner);
>  	if_clone_attach(&eoip_cloner);
> +	if_clone_attach(&erspan_cloner);
>  }
>  
>  static int
> @@ -889,6 +961,7 @@ eoip_clone_destroy(struct ifnet *ifp)
>  	return (0);
>  }
>  
> +
>  int
>  gre_input(struct mbuf **mp, int *offp, int type, int af, struct netstack *ns)
>  {
> @@ -1044,6 +1117,18 @@ gre_input_key(struct mbuf **mp, int *off
>  		goto decline;
>  	}
>  
> +	/*
> +	 * ERSPAN I uses no bits in the header, and II uses sequence numbers.
> +	 * handle them before limiting what flags we support. 
> +	 */
> +	if (gh->gre_proto == htons(GRE_ERSPAN)) {
> +		m = erspan_input(key, m, iphlen, gh, otos, ns);
> +		if (m == NULL)
> +			return (IPPROTO_DONE);
> +
> +		goto decline;
> +	}
> +
>  	/* the only optional bit in the header is K flag */
>  	if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
>  		goto decline;
> @@ -4237,10 +4322,8 @@ RBT_GENERATE(nvgre_ucast_tree, nvgre_sof
>  RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
>  
>  static inline int
> -eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
> +gre_tunnel_key_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
>  {
> -	const struct gre_tunnel *a = &ea->sc_tunnel;
> -	const struct gre_tunnel *b = &eb->sc_tunnel;
>  	int rv;
>  
>  	if (a->t_key > b->t_key)
> @@ -4269,6 +4352,13 @@ eoip_cmp(const struct eoip_softc *ea, co
>  		return (rv);
>  
>  	return (0);
> +
> +}
> +
> +static inline int
> +eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
> +{
> +	return (gre_tunnel_key_cmp(&ea->sc_tunnel, &eb->sc_tunnel));
>  }
>  
>  RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
> @@ -4342,3 +4432,652 @@ nvgre_eb_port_sa(void *arg, struct socka
>  		unhandled_af(sc->sc_tunnel.t_af);
>  	}
>  }
> +
> +/*
> + * ERSPAN
> + */
> +
> +static int
> +erspan_clone_create(struct if_clone *ifc, int unit)
> +{
> +	struct erspan_softc *sc;
> +	struct ifnet *ifp;
> +
> +	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
> +	ifp = &sc->sc_ac.ac_if;
> +
> +	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
> +	    ifc->ifc_name, unit);
> +
> +	ifp->if_softc = sc;
> +	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
> +	ifp->if_ioctl = erspan_ioctl;
> +	ifp->if_start = erspan_start;
> +	ifp->if_xflags = IFXF_CLONED | IFXF_MONITOR;
> +	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> +#if 0 && NVLAN > 0
> +	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
> +#endif
> +	ether_fakeaddr(ifp);
> +
> +	sc->sc_tunnel.t_key = ~0;
> +	sc->sc_tunnel.t_ttl = ip_defttl;
> +	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PACKET; /* XXX */
> +	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PAYLOAD;
> +	sc->sc_tunnel.t_df = htons(0);
> +
> +	if_counters_alloc(ifp);
> +	if_attach(ifp);
> +	ether_ifattach(ifp);
> +
> +#if NBPFILTER > 0
> +	/* attach after Ethernet */
> +	bpfattach(&sc->sc_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
> +#endif
> +
> +	return (0);
> +}
> +
> +static int
> +erspan_clone_destroy(struct ifnet *ifp)
> +{
> +	struct erspan_softc *sc = ifp->if_softc;
> +
> +	NET_LOCK();
> +	if (ISSET(ifp->if_flags, IFF_RUNNING))
> +		erspan_down(sc);
> +	NET_UNLOCK();
> +
> +	ether_ifdetach(ifp);
> +	if_detach(ifp);
> +
> +	free(sc, M_DEVBUF, sizeof(*sc));
> +
> +	return (0);
> +}
> +
> +static int
> +erspan_set_tunnel(struct erspan_softc *sc, struct if_laddrreq *req)
> +{
> +	struct gre_tunnel *tunnel = &sc->sc_tunnel;
> +	struct sockaddr *addr = (struct sockaddr *)&req->addr;
> +	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
> +	struct sockaddr_in *src4;
> +#ifdef INET6
> +	struct sockaddr_in6 *src6;
> +	int error;
> +#endif
> +	uint32_t mask = 0;
> +
> +	/* validate */
> +	switch (addr->sa_family) {
> +	case AF_INET:
> +		if (addr->sa_len != sizeof(*src4))
> +			return (EINVAL);
> +
> +		src4 = (struct sockaddr_in *)addr;
> +		if (in_nullhost(src4->sin_addr) ||
> +		    IN_MULTICAST(src4->sin_addr.s_addr))
> +			return (EINVAL);
> +
> +		if (dstaddr->sa_family == AF_UNSPEC)
> +			tunnel->t_dst4.s_addr = INADDR_ANY;
> +		else if (dstaddr->sa_family != AF_INET)
> +			return (EINVAL);
> +		else {
> +			struct sockaddr_in *daddr4 = satosin(dstaddr);
> +			if (in_nullhost(daddr4->sin_addr) ||
> +			    IN_MULTICAST(daddr4->sin_addr.s_addr))
> +				return (EINVAL);
> +			
> +			tunnel->t_dst4 = daddr4->sin_addr;
> +			mask = 1;
> +		}
> +		tunnel->t_src4 = src4->sin_addr;
> +
> +		break;
> +#ifdef INET6
> +	case AF_INET6:
> +		if (addr->sa_len != sizeof(*src6))
> +			return (EINVAL);
> +
> +		src6 = (struct sockaddr_in6 *)addr;
> +		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
> +		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
> +			return (EINVAL);
> +
> +		error = in6_embedscope(&tunnel->t_src6, src6, NULL, NULL);
> +		if (error != 0)
> +			return (error);
> +
> +		if (dstaddr->sa_family == AF_UNSPEC)
> +			memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));
> +		else if (dstaddr->sa_family != AF_INET6)
> +			return (EINVAL);
> +		else {
> +			struct sockaddr_in6 *dst6 = satosin6(dstaddr);
> +			if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
> +			    IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr))
> +				return (EINVAL);
> +
> +			if (src6->sin6_scope_id != dst6->sin6_scope_id)
> +				return (EINVAL);
> +
> +			error = in6_embedscope(&tunnel->t_dst6, dst6,
> +			    NULL, NULL);
> +			if (error != 0)
> +				return (error);
> +			mask = 1;
> +		}
> +
> +		error = in6_embedscope(&tunnel->t_src6, src6, NULL, NULL);
> +		if (error != 0)
> +			return (error);
> +
> +		break;
> +#endif
> +	default:
> +		return (EAFNOSUPPORT);
> +	}
> +
> +	/* commit */
> +	tunnel->t_af = addr->sa_family;
> +	tunnel->t_key_mask = mask; /* set if dstaddr set */
> +
> +	return (0);
> +}
> +
> +static int
> +erspan_get_tunnel(struct erspan_softc *sc, struct if_laddrreq *req)
> +{
> +	struct gre_tunnel *tunnel = &sc->sc_tunnel;
> +	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
> +	struct sockaddr_in *sin;
> +#ifdef INET6
> +	struct sockaddr_in6 *sin6;
> +#endif
> +
> +	switch (tunnel->t_af) {
> +	case AF_UNSPEC:
> +		return (EADDRNOTAVAIL);
> +	case AF_INET:
> +		sin = (struct sockaddr_in *)&req->addr;
> +		memset(sin, 0, sizeof(*sin));
> +		sin->sin_family = AF_INET;
> +		sin->sin_len = sizeof(*sin);
> +		sin->sin_addr = tunnel->t_src4;
> +
> +		if (!tunnel->t_key_mask)
> +			goto unspec;
> +
> +		sin = (struct sockaddr_in *)dstaddr;
> +		memset(sin, 0, sizeof(*sin));
> +		sin->sin_family = AF_INET;
> +		sin->sin_len = sizeof(*sin);
> +		sin->sin_addr = tunnel->t_dst4;
> +		break;
> +
> +#ifdef INET6
> +	case AF_INET6:
> +		sin6 = (struct sockaddr_in6 *)&req->addr;
> +		memset(sin6, 0, sizeof(*sin6));
> +		sin6->sin6_family = AF_INET6;
> +		sin6->sin6_len = sizeof(*sin6);
> +		in6_recoverscope(sin6, &tunnel->t_src6);
> +
> +		if (!tunnel->t_key_mask)
> +			goto unspec;
> +
> +		sin6 = (struct sockaddr_in6 *)dstaddr;
> +		memset(sin6, 0, sizeof(*sin6));
> +		sin6->sin6_family = AF_INET6;
> +		sin6->sin6_len = sizeof(*sin6);
> +		in6_recoverscope(sin6, &tunnel->t_dst6);
> +		break;
> +#endif
> +	default:
> +		unhandled_af(tunnel->t_af);
> +	}
> +
> +	return (0);
> +
> +unspec:
> +	dstaddr->sa_len = 2;
> +	dstaddr->sa_family = AF_UNSPEC;
> +
> +	return (0);
> +}
> +
> +static int
> +erspan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
> +{
> +	struct erspan_softc *sc = ifp->if_softc;
> +	struct ifreq *ifr = (struct ifreq *)data;
> +	int error = 0;
> +
> +	switch(cmd) {
> +	case SIOCSIFADDR:
> +		break;
> +	case SIOCSIFFLAGS:
> +		if (ISSET(ifp->if_flags, IFF_UP)) {
> +			if (!ISSET(ifp->if_flags, IFF_RUNNING))
> +				error = erspan_up(sc);
> +			else
> +				error = 0;
> +		} else {
> +			if (ISSET(ifp->if_flags, IFF_RUNNING))
> +				error = erspan_down(sc);
> +		}
> +		break;
> +
> +	case SIOCSVNETID:
> +		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> +			error = EBUSY;
> +			break;
> +		}
> +		if (ifr->ifr_vnetid < 0 ||
> +		    ifr->ifr_vnetid > ERSPAN_II_SESSION_ID_MASK)
> +			return (EINVAL);
> +
> +		sc->sc_tunnel.t_key = ifr->ifr_vnetid; /* for cmp */
> +		break;
> +	case SIOCGVNETID:
> +		if (sc->sc_tunnel.t_key == ~0)
> +			return (EADDRNOTAVAIL);
> +		ifr->ifr_vnetid = sc->sc_tunnel.t_key;
> +		break;
> +	case SIOCDVNETID:
> +		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> +			error = EBUSY;
> +			break;
> +		}
> +		sc->sc_tunnel.t_key = ~0;
> +		break;
> +
> +	case SIOCSLIFPHYADDR:
> +		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> +			error = EBUSY;
> +			break;
> +		}
> +
> +		error = erspan_set_tunnel(sc, (struct if_laddrreq *)data);
> +		break;
> +	case SIOCGLIFPHYADDR:
> +		error = erspan_get_tunnel(sc, (struct if_laddrreq *)data);
> +		break;
> +	case SIOCDIFPHYADDR:
> +		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> +			error = EBUSY;
> +			break;
> +		}
> +
> +		/* commit */
> +		sc->sc_tunnel.t_af = AF_UNSPEC;
> +		sc->sc_tunnel.t_key_mask = 0; /* dstaddr is not set */
> +		break;
> +
> +	case SIOCSLIFPHYRTABLE:
> +		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> +			error = EBUSY;
> +			break;
> +		}
> +
> +		if (ifr->ifr_rdomainid < 0 ||
> +		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
> +		    !rtable_exists(ifr->ifr_rdomainid)) {
> +			error = EINVAL;
> +			break;
> +		}
> +		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
> +		break;
> +	case SIOCGLIFPHYRTABLE:
> +		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
> +		break;
> +
> +	case SIOCSLIFPHYTTL:
> +		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
> +			error = EINVAL;
> +			break;
> +		}
> +
> +		/* commit */
> +		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
> +		break;
> +	case SIOCGLIFPHYTTL:
> +		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
> +		break;
> +
> +	case SIOCSLIFPHYDF:
> +		/* commit */
> +		sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
> +		break;
> +	case SIOCGLIFPHYDF:
> +		ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
> +		break;
> +
> +	case SIOCSTXHPRIO:
> +		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
> +		if (error != 0)
> +			break;
> +
> +		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
> +		break;
> +	case SIOCGTXHPRIO:
> +		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
> +		break;
> +
> +	case SIOCSRXHPRIO:
> +		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
> +		if (error != 0)
> +			break;
> +
> +		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
> +		break;
> +	case SIOCGRXHPRIO:
> +		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
> +		break;
> +
> +	case SIOCADDMULTI:
> +	case SIOCDELMULTI:
> +		break;
> +
> +	default:
> +		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
> +		break;
> +	}
> +
> +	if (error == ENETRESET) {
> +		/* no hardware to program */
> +		error = 0;
> +	}
> +
> +	return (error);
> +}
> +
> +static int
> +erspan_up(struct erspan_softc *sc)
> +{
> +	struct gre_tunnel *tunnel = &sc->sc_tunnel;
> +
> +	if (tunnel->t_af == AF_UNSPEC)
> +		return (EDESTADDRREQ);
> +	if (tunnel->t_key == ~0 && tunnel->t_key_mask) {
> +		/* wildcard session id and t_dst is not set */
> +		return (EDESTADDRREQ);
> +	}
> +
> +	NET_ASSERT_LOCKED();
> +
> +	if (RBT_INSERT(erspan_tree, &erspan_tree, sc) != NULL)
> +		return (EADDRINUSE);
> +
> +	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
> +
> +	return (0);
> +}
> +
> +static int
> +erspan_down(struct erspan_softc *sc)
> +{
> +	NET_ASSERT_LOCKED();
> +	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
> +
> +	RBT_REMOVE(erspan_tree, &erspan_tree, sc);
> +
> +	return (0);
> +}
> +
> +static void
> +erspan_start(struct ifnet *ifp)
> +{
> +	struct erspan_softc *sc = ifp->if_softc;
> +	struct mbuf *m0, *m;
> +	uint32_t session_id = sc->sc_tunnel.t_key;
> +#if NBPFILTER > 0
> +	caddr_t if_bpf;
> +#endif
> +
> +	if (!atomic_load_int(&gre_allow) ||
> +	    !sc->sc_tunnel.t_key_mask || /* dstaddr is not set */
> +	    session_id == ~0) {
> +		ifq_purge(&ifp->if_snd);
> +		return;
> +	}
> +
> +	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
> +#if NBPFILTER > 0
> +		if_bpf = ifp->if_bpf;
> +		if (if_bpf)
> +			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
> +#endif
> +
> +		/* force prepend mbuf because of alignment problems */
> +		m = m_get(M_DONTWAIT, m0->m_type);
> +		if (m == NULL) {
> +			m_freem(m0);
> +			continue;
> +		}
> +
> +		M_MOVE_PKTHDR(m, m0);
> +		m->m_next = m0;
> +
> +		m_align(m, 0);
> +		m->m_len = 0;
> +
> +		m = erspan_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m),
> +		    session_id);
> +		if (m == NULL) {
> +			ifp->if_oerrors++;
> +			continue;
> +		}
> +#if NBPFILTER > 0
> +		if_bpf = sc->sc_bpf;
> +		if (if_bpf) {
> +			bpf_mtap_af(if_bpf, sc->sc_tunnel.t_af, m,
> +			    BPF_DIRECTION_OUT);
> +		}
> +#endif
> +		if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
> +			ifp->if_oerrors++;
> +			continue;
> +		}
> +	}
> +}
> +
> +static struct mbuf *
> +erspan_encap(struct erspan_softc *sc, struct mbuf *m, uint8_t tos,
> +    uint32_t session_id)
> +{
> +	struct ifnet *ifp = &sc->sc_ac.ac_if;
> +	struct gre_header *gh;
> +	struct gre_h_seq *seqh;
> +	struct gre_h_erspan *erspanh;
> +	uint32_t hdr;
> +
> +	m = m_prepend(m, sizeof(*gh) + sizeof(*seqh) + sizeof(*erspanh),
> +	    M_DONTWAIT);
> +	if (m == NULL)
> +		return (NULL);
> +
> +	gh = mtod(m, struct gre_header *);
> +	gh->gre_flags = htons(GRE_VERS_0 | GRE_SP);
> +	gh->gre_proto = htons(GRE_ERSPAN);
> +
> +	seqh = (struct gre_h_seq *)(gh + 1);
> +	htobem32(&seqh->gre_seq, sc->sc_seq++);
> +
> +	hdr = session_id << ERSPAN_II_SESSION_ID_SHIFT;
> +	hdr |= m->m_pkthdr.pf.prio << ERSPAN_II_COS_SHIFT;
> +#if 0 && NVLAN > 0
> +	if (ISSET(m->m_flags, M_VLANTAG)) {
> +		hdr |= ERSPAN_II_EN_VLAN << ERSPAN_II_EN_SHIFT;
> +		hdr |= (m->m_pkthdr.ether_vlan & ERSPAN_II_VLAN_MASK) <<
> +		    ERSPAN_II_VLAN_SHIFT;
> +		CLR(m->m_flags, M_VLANTAG);
> +	} /* else?? */
> +#endif
> +	hdr |= ERSPAN_II_VER << ERSPAN_II_VER_SHIFT;
> +
> +	erspanh = (struct gre_h_erspan *)(seqh + 1);
> +	htobem32(&erspanh->hdr, hdr);
> +	htobem32(&erspanh->index, ISSET(ifp->if_flags, IFF_LINK0) ?
> +	    m->m_pkthdr.ph_ifidx : 0);
> +
> +	return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos));
> +}
> +
> +static struct mbuf *
> +erspan_input(struct gre_tunnel *key, struct mbuf *m, int iphlen,
> +    const struct gre_header *gh, uint8_t otos, struct netstack *ns)
> +{
> +	struct erspan_softc *sc;
> +	struct ifnet *ifp;
> +	struct gre_h_seq *seqh;
> +	struct gre_h_erspan *erspanh;
> +	uint32_t hdr;
> +	int hlen;
> +	caddr_t buf;
> +	int input = 1;
> +	int rxprio;
> +#if NBPFILTER > 0
> +	caddr_t if_bpf;
> +#endif
> +
> +	/* ERSPAN Type II */
> +	if (gh->gre_flags != htons(GRE_SP | GRE_VERS_0))
> +		goto decline;
> +
> +	hlen = iphlen + sizeof(*gh) + sizeof(*seqh) + sizeof(*erspanh);
> +	if (m->m_pkthdr.len < hlen)
> +		goto decline;
> +
> +	m = m_pullup(m, hlen);
> +	if (m == NULL)
> +		return (NULL);
> +
> +	buf = mtod(m, caddr_t);
> +	gh = (struct gre_header *)(buf + iphlen);
> +	seqh = (struct gre_h_seq *)(gh + 1);
> +	erspanh = (struct gre_h_erspan *)(seqh + 1);
> +
> +	hdr = bemtoh32(&erspanh->hdr);
> +
> +	key->t_key = (hdr >> ERSPAN_II_SESSION_ID_SHIFT) &
> +	    ERSPAN_II_SESSION_ID_MASK;
> +
> +	NET_ASSERT_LOCKED();
> +	sc = RBT_FIND(erspan_tree, &erspan_tree,
> +	    (const struct erspan_softc *)key);
> +	if (sc == NULL) {
> +		/* try for a wildcard listener */
> +		struct gre_tunnel wkey = {
> +			.t_af = key->t_af,
> +			.t_rtableid = key->t_rtableid,
> +			.t_src = key->t_src,
> +			.t_key = key->t_key,
> +		};
> +
> +		input = 0;
> +		sc = RBT_FIND(erspan_tree, &erspan_tree,
> +		    (const struct erspan_softc *)&wkey);
> +		if (sc == NULL) {
> +			/* last resort is a wildcard listener without a key */
> +			wkey.t_key = ~0;
> +			sc = RBT_FIND(erspan_tree, &erspan_tree,
> +			    (const struct erspan_softc *)&wkey);
> +			if (sc == NULL) {
> +				goto decline;
> +			}
> +		}
> +	}
> +
> +	/* it's ours now */
> +	ifp = &sc->sc_ac.ac_if;
> +
> +#if NBPFILTER > 0
> +	if_bpf = sc->sc_bpf;
> +	if (if_bpf) {
> +		if (bpf_mtap_af(if_bpf, key->t_af, m, BPF_DIRECTION_IN))
> +			input = 0;
> +	}
> +#endif
> +
> +#if 0
> +	/*
> +	 * this appears to be metadata from the switch rather than
> +	 * an offload for the payload.
> +	 */
> +	switch ((hdr >> ERSPAN_II_EN_SHIFT) & ERSPAN_II_EN_MASK) {
> +	case ERSPAN_II_EN_ISL: /* this is cheeky */
> +	case ERSPAN_II_EN_VLAN:
> +#if NVLAN > 0
> +		m->m_pkthdr.ether_vtag = (hdr >> ERSPAN_II_VLAN_SHIFT) &
> +		    ERSPAN_II_VLAN_MASK;
> +		m->m_pkthdr.ether_vtag |= ((hdr >> ERSPAN_II_COS_SHIFT) &
> +		    ERSPAN_II_COS_MASK) << 13;
> +                m->m_flags |= M_VLANTAG;
> +#else
> +		input = 0;
> +#endif
> +		break;
> +	default:
> +		break;
> +	}
> +#endif
> +
> +	rxprio = sc->sc_tunnel.t_rxhprio;
> +	switch (rxprio) {
> +	case IF_HDRPRIO_PACKET:
> +		/* nop */
> +		break;
> +	case IF_HDRPRIO_OUTER:
> +		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
> +		break;
> +	case IF_HDRPRIO_PAYLOAD:
> +		m->m_pkthdr.pf.prio = (hdr >> ERSPAN_II_COS_SHIFT) &
> +		    ERSPAN_II_COS_MASK;
> +		break;
> +	default:
> +		m->m_pkthdr.pf.prio = rxprio;
> +		break;
> +	}
> +
> +	if (hdr & ERSPAN_II_T)
> +		input = 0;
> +
> +	if (input) {
> +		m = gre_ether_align(m, hlen);
> +		if (m == NULL)
> +			return (NULL);
> +
> +		CLR(m->m_flags, M_MCAST|M_BCAST);
> +
> +		if_vinput(&sc->sc_ac.ac_if, m, ns);
> +	} else {
> +#if NBPFILTER > 0
> +		if_bpf = ifp->if_bpf;
> +		if (if_bpf) {
> +			m_adj(m, hlen);
> +			bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN);
> +		}
> +#endif
> +
> +		goto drop;
> +	}
> +
> +	return (NULL);
> +
> +decline:
> +	return (m);
> +drop:
> +	m_freem(m);
> +	return (NULL);
> +}
> +
> +static inline int
> +erspan_cmp(const struct erspan_softc *ea, const struct erspan_softc *eb)
> +{
> +	return (gre_tunnel_key_cmp(&ea->sc_tunnel, &eb->sc_tunnel));
> +}
> +
> +RBT_GENERATE(erspan_tree, erspan_softc, sc_entry, erspan_cmp);
> 

-- 
:wq Claudio