Index | Thread | Search

From:
Jonathan Matthew <jonathan@d14n.org>
Subject:
Re: dwqe(4) VLAN offload
To:
tech@openbsd.org
Cc:
stsp@openbsd.org
Date:
Sun, 2 Jun 2024 21:55:43 +1000

Download raw body.

Thread
On Wed, May 22, 2024 at 12:40:12PM +0200, Stefan Sperling wrote:
> The patch below adds support for VLAN tag offloading to dwqe(4).
> 
> For now this patch only adds support for 802.1Q, i.e. vlan(4).
> The device supports QinQ offloading but our network stack does
> not use hardware offload for svlan(4) (yet?).
> 
> Tested on Elkhart Lake, against another vlan(4) interface running
> on top of em(4).
> 
> ok?

I've only tested the receive side of this so far, on an RK3568 system
that doesn't implement the transmit side.  A couple of comments below.

> 
> diff 84eef7829674053b45bf3760666e19ee4e754135 19a830bf9b66312d351d0fab852c538730f4c2bd
> commit - 84eef7829674053b45bf3760666e19ee4e754135
> commit + 19a830bf9b66312d351d0fab852c538730f4c2bd
> blob - 04cdc69851a5b230b81c8c46244384770207a025
> blob + bc9a16a5e003b09b6a0339a46a4bc2d7b39c7276
> --- sys/dev/ic/dwqe.c
> +++ sys/dev/ic/dwqe.c
> @@ -21,6 +21,7 @@
>   */
>  
>  #include "bpfilter.h"
> +#include "vlan.h"
>  
>  #include <sys/param.h>
>  #include <sys/systm.h>
> @@ -100,6 +101,40 @@ dwqe_have_tx_csum_offload(struct dwqe_softc *sc)
>  }
>  
>  int
> +dwqe_have_tx_vlan_offload(struct dwqe_softc *sc)
> +{
> +#if NVLAN > 0
> +	return (sc->sc_hw_feature[0] & GMAC_MAC_HW_FEATURE0_SAVLANINS);
> +#else
> +	return 0;
> +#endif
> +}
> +
> +void
> +dwqe_set_vlan_tx_mode(struct dwqe_softc *sc)
> +{
> +#if NVLAN > 0
> +	uint32_t reg;
> +
> +	reg = dwqe_read(sc, GMAC_VLAN_TAG_INCL);
> +
> +	/* Enable insertion of outer VLAN tag. */
> +	reg |= GMAC_VLAN_TAG_INCL_INSERT;
> +
> +	/*
> +	 * Generate C-VLAN tags (type 0x8100, 802.1Q). Setting this
> +	 * bit would result in S-VLAN tags (type 0x88A8, 802.1ad).
> +	 */
> +	reg &= ~GMAC_VLAN_TAG_INCL_CSVL;
> +
> +	/* Use VLAN tags provided in Tx context descriptors. */
> +	reg |= GMAC_VLAN_TAG_INCL_VLTI;
> +
> +	dwqe_write(sc, GMAC_VLAN_TAG_INCL, reg);
> +#endif
> +}
> +
> +int
>  dwqe_attach(struct dwqe_softc *sc)
>  {
>  	struct ifnet *ifp;
> @@ -127,6 +162,8 @@ dwqe_attach(struct dwqe_softc *sc)
>  	bcopy(sc->sc_dev.dv_xname, ifp->if_xname, IFNAMSIZ);
>  
>  	ifp->if_capabilities = IFCAP_VLAN_MTU;
> +	if (dwqe_have_tx_vlan_offload(sc))
> +		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
>  	if (dwqe_have_tx_csum_offload(sc)) {
>  		ifp->if_capabilities |= (IFCAP_CSUM_IPv4 |
>  		    IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
> @@ -218,6 +255,9 @@ dwqe_attach(struct dwqe_softc *sc)
>  	if (!sc->sc_fixed_link)
>  		dwqe_mii_attach(sc);
>  
> +	if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING)
> +		dwqe_set_vlan_tx_mode(sc);
> +
>  	if_attach(ifp);
>  	ether_ifattach(ifp);
>  
> @@ -337,7 +377,15 @@ dwqe_start(struct ifqueue *ifq)
>  		m = ifq_dequeue(ifq);
>  		if (m == NULL)
>  			break;
> -
> +#if NVLAN > 0
> +		/* VLAN tags require an extra Tx context descriptor. */
> +		if (dwqe_have_tx_vlan_offload(sc) &&
> +		    (m->m_flags & M_VLANTAG) &&
> +		    used + DWQE_NTXSEGS + 2 > left) {
> +			ifq_set_oactive(ifq);
> +			break;
> +		}
> +#endif

Usually we just check that there's space to fit any possible packet at
the top of the loop, rather than dequeueing and checking if that
specific packet fits.  I think it'd be better to change the check above
this one to be 'used + DWQE_NTXSEGS + 2 > left', moving the comment about
when an extra descriptor is needed there too.

>  		error = dwqe_encap(sc, m, &idx, &used);
>  		if (error == EFBIG) {
>  			m_freem(m); /* give up: drop it */
> @@ -715,6 +763,21 @@ dwqe_rx_csum(struct dwqe_softc *sc, struct mbuf *m, st
>  }
>  
>  void
> +dwqe_vlan_strip(struct dwqe_softc *sc, struct mbuf *m, struct dwqe_desc *rxd)
> +{
> +#if NVLAN > 0
> +	uint16_t tag;
> +
> +	if ((rxd->sd_tdes3 & RDES3_RDES0_VALID) &&
> +	    (rxd->sd_tdes3 & RDES3_LD)) {
> +		tag = rxd->sd_tdes0 & RDES0_OVT;
> +		m->m_pkthdr.ether_vtag = le16toh(tag);
> +		m->m_flags |= M_VLANTAG;
> +	}
> +#endif
> +}
> +
> +void
>  dwqe_rx_proc(struct dwqe_softc *sc)
>  {
>  	struct ifnet *ifp = &sc->sc_ac.ac_if;
> @@ -763,6 +826,7 @@ dwqe_rx_proc(struct dwqe_softc *sc)
>  			m->m_pkthdr.len = m->m_len = len;
>  
>  			dwqe_rx_csum(sc, m, rxd);
> +			dwqe_vlan_strip(sc, m, rxd);
>  			ml_enqueue(&ml, m);
>  		}
>  
> @@ -944,6 +1008,13 @@ dwqe_up(struct dwqe_softc *sc)
>  		reg |= GMAC_MAC_CONF_IPC;
>  		dwqe_write(sc, GMAC_MAC_CONF, reg);
>  	}
> +
> +#if NVLAN > 0
> +	/* Enable outer VLAN tag stripping on Rx. */
> +	reg = dwqe_read(sc, GMAC_VLAN_TAG_CTRL);
> +	reg |= GMAC_VLAN_TAG_CTRL_EVLRXS | GMAC_VLAN_TAG_CTRL_STRIP_ALWAYS;
> +	dwqe_write(sc, GMAC_VLAN_TAG_CTRL, reg);
> +#endif

Would it make sense to do this in dwqe_attach() where the transmit side
is configured?

>  }
>  
>  void
> @@ -1107,12 +1178,34 @@ dwqe_tx_csum(struct dwqe_softc *sc, struct mbuf *m, st
>  		txd->sd_tdes3 |= TDES3_CSUM_IPHDR_PAYLOAD_PSEUDOHDR;
>  }
>  
> +uint16_t
> +dwqe_set_tx_context_desc(struct dwqe_softc *sc, struct mbuf *m, int idx)
> +{
> +	uint16_t tag = 0;
> +#if NVLAN > 0
> +	struct dwqe_desc *ctxt_txd;
> +
> +	if ((m->m_flags & M_VLANTAG) == 0)
> +		return 0;
> +
> +	tag = m->m_pkthdr.ether_vtag;
> +	if (tag) {
> +		ctxt_txd = &sc->sc_txdesc[idx];
> +		ctxt_txd->sd_tdes3 |= (htole16(tag) & TDES3_VLAN_TAG);
> +		ctxt_txd->sd_tdes3 |= TDES3_VLAN_TAG_VALID;
> +		ctxt_txd->sd_tdes3 |= (TDES3_CTXT | TDES3_OWN);
> +	}
> +#endif
> +	return tag;
> +}
> +
>  int
>  dwqe_encap(struct dwqe_softc *sc, struct mbuf *m, int *idx, int *used)
>  {
>  	struct dwqe_desc *txd, *txd_start;
>  	bus_dmamap_t map;
>  	int cur, frag, i;
> +	uint16_t vlan_tag = 0;
>  
>  	cur = frag = *idx;
>  	map = sc->sc_txbuf[cur].tb_map;
> @@ -1128,6 +1221,17 @@ dwqe_encap(struct dwqe_softc *sc, struct mbuf *m, int 
>  	bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
>  	    BUS_DMASYNC_PREWRITE);
>  
> +	if (dwqe_have_tx_vlan_offload(sc)) {
> +		vlan_tag = dwqe_set_tx_context_desc(sc, m, frag);
> +		if (vlan_tag) {
> +			(*used)++;
> +			if (frag == (DWQE_NTXDESC - 1))
> +				frag = 0;
> +			else
> +				frag++;
> +		}
> +	}
> +
>  	txd = txd_start = &sc->sc_txdesc[frag];
>  	for (i = 0; i < map->dm_nsegs; i++) {
>  		/* TODO: check for 32-bit vs 64-bit support */
> @@ -1140,6 +1244,8 @@ dwqe_encap(struct dwqe_softc *sc, struct mbuf *m, int 
>  		if (i == 0) {
>  			txd->sd_tdes3 |= TDES3_FS;
>  			dwqe_tx_csum(sc, m, txd);
> +			if (vlan_tag)
> +				txd->sd_tdes2 |= TDES2_VLAN_TAG_INSERT;
>  		}
>  		if (i == (map->dm_nsegs - 1)) {
>  			txd->sd_tdes2 |= TDES2_IC;
> blob - ab479d54c139b877b2a111e0f99d267db3a17dc0
> blob + 03f66fca83a72220fb718003aaec576c590448e2
> --- sys/dev/ic/dwqereg.h
> +++ sys/dev/ic/dwqereg.h
> @@ -44,6 +44,19 @@
>  #define  GMAC_INT_MASK_LPIIM		(1 << 10)
>  #define  GMAC_INT_MASK_PIM		(1 << 3)
>  #define  GMAC_INT_MASK_RIM		(1 << 0)
> +#define GMAC_VLAN_TAG_CTRL	0x0050
> +#define  GMAC_VLAN_TAG_CTRL_EVLRXS		(1 << 24)
> +#define  GMAC_VLAN_TAG_CTRL_STRIP_ALWAYS	((1 << 21) | (1 << 22))	
> +#define GMAC_VLAN_TAG_DATA	0x0054
> +#define GMAC_VLAN_TAG_INCL	0x0060
> +#define  GMAC_VLAN_TAG_INCL_VLTI	(1 << 20)
> +#define  GMAC_VLAN_TAG_INCL_CSVL	(1 << 19)
> +#define  GMAC_VLAN_TAG_INCL_DELETE	0x10000
> +#define  GMAC_VLAN_TAG_INCL_INSERT	0x20000
> +#define  GMAC_VLAN_TAG_INCL_REPLACE	0x30000
> +#define  GMAC_VLAN_TAG_INCL_VLT		0x0ffff
> +#define  GMAC_VLAN_TAG_INCL_RDWR	(1U << 30)
> +#define  GMAC_VLAN_TAG_INCL_BUSY	(1U << 31)
>  #define GMAC_QX_TX_FLOW_CTRL(x)	(0x0070 + (x) * 4)
>  #define  GMAC_QX_TX_FLOW_CTRL_PT_SHIFT	16
>  #define  GMAC_QX_TX_FLOW_CTRL_TFE	(1 << 0)
> @@ -64,6 +77,7 @@
>  #define GMAC_MAC_HW_FEATURE(x)	(0x011c + (x) * 0x4)
>  #define  GMAC_MAC_HW_FEATURE0_TXCOESEL	(1 << 14)
>  #define  GMAC_MAC_HW_FEATURE0_RXCOESEL	(1 << 16)
> +#define  GMAC_MAC_HW_FEATURE0_SAVLANINS	(1 << 27)
>  #define  GMAC_MAC_HW_FEATURE1_TXFIFOSIZE(x) (((x) >> 6) & 0x1f)
>  #define  GMAC_MAC_HW_FEATURE1_RXFIFOSIZE(x) (((x) >> 0) & 0x1f)
>  #define GMAC_MAC_MDIO_ADDR	0x0200
> @@ -230,6 +244,12 @@ struct dwqe_desc {
>  	uint32_t sd_tdes3;
>  };
>  
> +/* Tx context descriptor bits (host to device); precedes regular descriptor */
> +#define TDES3_CTXT		(1 << 30)
> +#define TDES3_VLAN_TAG_VALID	(1 << 16)
> +#define TDES3_VLAN_TAG		0xffff	
> +/* Bit 31 is the OWN bit, as in regular Tx descriptor. */
> +
>  /* Tx bits (read format; host to device) */
>  #define TDES2_HDR_LEN		0x000003ff	/* if TSO is enabled */
>  #define TDES2_BUF1_LEN		0x00003fff	/* if TSO is disabled */
> @@ -250,6 +270,11 @@ struct dwqe_desc {
>  #define   TDES3_CSUM_IPHDR_PAYLOAD		(0x2 << 16)
>  #define   TDES3_CSUM_IPHDR_PAYLOAD_PSEUDOHDR	(0x3 << 16)
>  #define TDES3_TSO_EN		(1 << 18)
> +#define TDES3_CPC		((1 << 26) | (1 << 27)) /* if TSO is disabled */
> +#define   TDES3_CPC_CRC_AND_PAD		(0x0 << 26)
> +#define   TDES3_CPC_CRC_NO_PAD		(0x1 << 26)
> +#define   TDES3_CPC_DISABLE		(0x2 << 26)
> +#define   TDES3_CPC_CRC_REPLACE		(0x3 << 26)
>  #define TDES3_LS		(1 << 28)
>  #define TDES3_FS		(1 << 29)
>  #define TDES3_OWN		(1U << 31)
> @@ -268,6 +293,8 @@ struct dwqe_desc {
>  #define RDES3_OWN		(1U << 31)
>  
>  /* Rx bits (writeback format; device to host) */
> +#define RDES0_IVT		0xffff0000
> +#define RDES0_OVT		0x0000ffff
>  #define RDES1_IP_PAYLOAD_TYPE	0x7
>  #define   RDES1_IP_PAYLOAD_UNKNOWN	0x0
>  #define   RDES1_IP_PAYLOAD_UDP		0x1
>