From: Mark Kettenis Subject: Re: dwqe(4) VLAN offload To: Stefan Sperling Cc: tech@openbsd.org Date: Sat, 25 May 2024 15:13:46 +0200 > Date: Wed, 22 May 2024 12:40:12 +0200 > From: Stefan Sperling > > The patch below adds support for VLAN tag offloading to dwqe(4). > > For now this patch only adds support for 802.1Q, i.e. vlan(4). > The device supports QinQ offloading but our network stack does > not use hardware offload for svlan(4) (yet?). > > Tested on Elkhart Lake, against another vlan(4) interface running > on top of em(4). > > ok? This feature isn't supported on the RK3568: mendelssohn$ ifconfig dwqe0 hwfeatures dwqe0: flags=808843 mtu 1500 hwfeatures=197 hardmtu 1500 lladdr 9e:cf:11:1f:dd:49 index 1 priority 0 llprio 3 groups: egress media: Ethernet autoselect (1000baseT full-duplex) status: active inet 192.168.0.141 netmask 0xffffff00 broadcast 192.168.0.255 I'm not using any VLANs, but it doesn't seem to break non-VLAN usage. Does this actually make a difference in performance in any way? > diff 84eef7829674053b45bf3760666e19ee4e754135 19a830bf9b66312d351d0fab852c538730f4c2bd > commit - 84eef7829674053b45bf3760666e19ee4e754135 > commit + 19a830bf9b66312d351d0fab852c538730f4c2bd > blob - 04cdc69851a5b230b81c8c46244384770207a025 > blob + bc9a16a5e003b09b6a0339a46a4bc2d7b39c7276 > --- sys/dev/ic/dwqe.c > +++ sys/dev/ic/dwqe.c > @@ -21,6 +21,7 @@ > */ > > #include "bpfilter.h" > +#include "vlan.h" > > #include > #include > @@ -100,6 +101,40 @@ dwqe_have_tx_csum_offload(struct dwqe_softc *sc) > } > > int > +dwqe_have_tx_vlan_offload(struct dwqe_softc *sc) > +{ > +#if NVLAN > 0 > + return (sc->sc_hw_feature[0] & GMAC_MAC_HW_FEATURE0_SAVLANINS); > +#else > + return 0; > +#endif > +} > + > +void > +dwqe_set_vlan_tx_mode(struct dwqe_softc *sc) > +{ > +#if NVLAN > 0 > + uint32_t reg; > + > + reg = dwqe_read(sc, GMAC_VLAN_TAG_INCL); > + > + /* Enable insertion of outer VLAN tag. */ > + reg |= GMAC_VLAN_TAG_INCL_INSERT; > + > + /* > + * Generate C-VLAN tags (type 0x8100, 802.1Q). Setting this > + * bit would result in S-VLAN tags (type 0x88A8, 802.1ad). > + */ > + reg &= ~GMAC_VLAN_TAG_INCL_CSVL; > + > + /* Use VLAN tags provided in Tx context descriptors. */ > + reg |= GMAC_VLAN_TAG_INCL_VLTI; > + > + dwqe_write(sc, GMAC_VLAN_TAG_INCL, reg); > +#endif > +} > + > +int > dwqe_attach(struct dwqe_softc *sc) > { > struct ifnet *ifp; > @@ -127,6 +162,8 @@ dwqe_attach(struct dwqe_softc *sc) > bcopy(sc->sc_dev.dv_xname, ifp->if_xname, IFNAMSIZ); > > ifp->if_capabilities = IFCAP_VLAN_MTU; > + if (dwqe_have_tx_vlan_offload(sc)) > + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; > if (dwqe_have_tx_csum_offload(sc)) { > ifp->if_capabilities |= (IFCAP_CSUM_IPv4 | > IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | > @@ -218,6 +255,9 @@ dwqe_attach(struct dwqe_softc *sc) > if (!sc->sc_fixed_link) > dwqe_mii_attach(sc); > > + if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) > + dwqe_set_vlan_tx_mode(sc); > + > if_attach(ifp); > ether_ifattach(ifp); > > @@ -337,7 +377,15 @@ dwqe_start(struct ifqueue *ifq) > m = ifq_dequeue(ifq); > if (m == NULL) > break; > - > +#if NVLAN > 0 > + /* VLAN tags require an extra Tx context descriptor. */ > + if (dwqe_have_tx_vlan_offload(sc) && > + (m->m_flags & M_VLANTAG) && > + used + DWQE_NTXSEGS + 2 > left) { > + ifq_set_oactive(ifq); > + break; > + } > +#endif > error = dwqe_encap(sc, m, &idx, &used); > if (error == EFBIG) { > m_freem(m); /* give up: drop it */ > @@ -715,6 +763,21 @@ dwqe_rx_csum(struct dwqe_softc *sc, struct mbuf *m, st > } > > void > +dwqe_vlan_strip(struct dwqe_softc *sc, struct mbuf *m, struct dwqe_desc *rxd) > +{ > +#if NVLAN > 0 > + uint16_t tag; > + > + if ((rxd->sd_tdes3 & RDES3_RDES0_VALID) && > + (rxd->sd_tdes3 & RDES3_LD)) { > + tag = rxd->sd_tdes0 & RDES0_OVT; > + m->m_pkthdr.ether_vtag = le16toh(tag); > + m->m_flags |= M_VLANTAG; > + } > +#endif > +} > + > +void > dwqe_rx_proc(struct dwqe_softc *sc) > { > struct ifnet *ifp = &sc->sc_ac.ac_if; > @@ -763,6 +826,7 @@ dwqe_rx_proc(struct dwqe_softc *sc) > m->m_pkthdr.len = m->m_len = len; > > dwqe_rx_csum(sc, m, rxd); > + dwqe_vlan_strip(sc, m, rxd); > ml_enqueue(&ml, m); > } > > @@ -944,6 +1008,13 @@ dwqe_up(struct dwqe_softc *sc) > reg |= GMAC_MAC_CONF_IPC; > dwqe_write(sc, GMAC_MAC_CONF, reg); > } > + > +#if NVLAN > 0 > + /* Enable outer VLAN tag stripping on Rx. */ > + reg = dwqe_read(sc, GMAC_VLAN_TAG_CTRL); > + reg |= GMAC_VLAN_TAG_CTRL_EVLRXS | GMAC_VLAN_TAG_CTRL_STRIP_ALWAYS; > + dwqe_write(sc, GMAC_VLAN_TAG_CTRL, reg); > +#endif > } > > void > @@ -1107,12 +1178,34 @@ dwqe_tx_csum(struct dwqe_softc *sc, struct mbuf *m, st > txd->sd_tdes3 |= TDES3_CSUM_IPHDR_PAYLOAD_PSEUDOHDR; > } > > +uint16_t > +dwqe_set_tx_context_desc(struct dwqe_softc *sc, struct mbuf *m, int idx) > +{ > + uint16_t tag = 0; > +#if NVLAN > 0 > + struct dwqe_desc *ctxt_txd; > + > + if ((m->m_flags & M_VLANTAG) == 0) > + return 0; > + > + tag = m->m_pkthdr.ether_vtag; > + if (tag) { > + ctxt_txd = &sc->sc_txdesc[idx]; > + ctxt_txd->sd_tdes3 |= (htole16(tag) & TDES3_VLAN_TAG); > + ctxt_txd->sd_tdes3 |= TDES3_VLAN_TAG_VALID; > + ctxt_txd->sd_tdes3 |= (TDES3_CTXT | TDES3_OWN); > + } > +#endif > + return tag; > +} > + > int > dwqe_encap(struct dwqe_softc *sc, struct mbuf *m, int *idx, int *used) > { > struct dwqe_desc *txd, *txd_start; > bus_dmamap_t map; > int cur, frag, i; > + uint16_t vlan_tag = 0; > > cur = frag = *idx; > map = sc->sc_txbuf[cur].tb_map; > @@ -1128,6 +1221,17 @@ dwqe_encap(struct dwqe_softc *sc, struct mbuf *m, int > bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize, > BUS_DMASYNC_PREWRITE); > > + if (dwqe_have_tx_vlan_offload(sc)) { > + vlan_tag = dwqe_set_tx_context_desc(sc, m, frag); > + if (vlan_tag) { > + (*used)++; > + if (frag == (DWQE_NTXDESC - 1)) > + frag = 0; > + else > + frag++; > + } > + } > + > txd = txd_start = &sc->sc_txdesc[frag]; > for (i = 0; i < map->dm_nsegs; i++) { > /* TODO: check for 32-bit vs 64-bit support */ > @@ -1140,6 +1244,8 @@ dwqe_encap(struct dwqe_softc *sc, struct mbuf *m, int > if (i == 0) { > txd->sd_tdes3 |= TDES3_FS; > dwqe_tx_csum(sc, m, txd); > + if (vlan_tag) > + txd->sd_tdes2 |= TDES2_VLAN_TAG_INSERT; > } > if (i == (map->dm_nsegs - 1)) { > txd->sd_tdes2 |= TDES2_IC; > blob - ab479d54c139b877b2a111e0f99d267db3a17dc0 > blob + 03f66fca83a72220fb718003aaec576c590448e2 > --- sys/dev/ic/dwqereg.h > +++ sys/dev/ic/dwqereg.h > @@ -44,6 +44,19 @@ > #define GMAC_INT_MASK_LPIIM (1 << 10) > #define GMAC_INT_MASK_PIM (1 << 3) > #define GMAC_INT_MASK_RIM (1 << 0) > +#define GMAC_VLAN_TAG_CTRL 0x0050 > +#define GMAC_VLAN_TAG_CTRL_EVLRXS (1 << 24) > +#define GMAC_VLAN_TAG_CTRL_STRIP_ALWAYS ((1 << 21) | (1 << 22)) > +#define GMAC_VLAN_TAG_DATA 0x0054 > +#define GMAC_VLAN_TAG_INCL 0x0060 > +#define GMAC_VLAN_TAG_INCL_VLTI (1 << 20) > +#define GMAC_VLAN_TAG_INCL_CSVL (1 << 19) > +#define GMAC_VLAN_TAG_INCL_DELETE 0x10000 > +#define GMAC_VLAN_TAG_INCL_INSERT 0x20000 > +#define GMAC_VLAN_TAG_INCL_REPLACE 0x30000 > +#define GMAC_VLAN_TAG_INCL_VLT 0x0ffff > +#define GMAC_VLAN_TAG_INCL_RDWR (1U << 30) > +#define GMAC_VLAN_TAG_INCL_BUSY (1U << 31) > #define GMAC_QX_TX_FLOW_CTRL(x) (0x0070 + (x) * 4) > #define GMAC_QX_TX_FLOW_CTRL_PT_SHIFT 16 > #define GMAC_QX_TX_FLOW_CTRL_TFE (1 << 0) > @@ -64,6 +77,7 @@ > #define GMAC_MAC_HW_FEATURE(x) (0x011c + (x) * 0x4) > #define GMAC_MAC_HW_FEATURE0_TXCOESEL (1 << 14) > #define GMAC_MAC_HW_FEATURE0_RXCOESEL (1 << 16) > +#define GMAC_MAC_HW_FEATURE0_SAVLANINS (1 << 27) > #define GMAC_MAC_HW_FEATURE1_TXFIFOSIZE(x) (((x) >> 6) & 0x1f) > #define GMAC_MAC_HW_FEATURE1_RXFIFOSIZE(x) (((x) >> 0) & 0x1f) > #define GMAC_MAC_MDIO_ADDR 0x0200 > @@ -230,6 +244,12 @@ struct dwqe_desc { > uint32_t sd_tdes3; > }; > > +/* Tx context descriptor bits (host to device); precedes regular descriptor */ > +#define TDES3_CTXT (1 << 30) > +#define TDES3_VLAN_TAG_VALID (1 << 16) > +#define TDES3_VLAN_TAG 0xffff > +/* Bit 31 is the OWN bit, as in regular Tx descriptor. */ > + > /* Tx bits (read format; host to device) */ > #define TDES2_HDR_LEN 0x000003ff /* if TSO is enabled */ > #define TDES2_BUF1_LEN 0x00003fff /* if TSO is disabled */ > @@ -250,6 +270,11 @@ struct dwqe_desc { > #define TDES3_CSUM_IPHDR_PAYLOAD (0x2 << 16) > #define TDES3_CSUM_IPHDR_PAYLOAD_PSEUDOHDR (0x3 << 16) > #define TDES3_TSO_EN (1 << 18) > +#define TDES3_CPC ((1 << 26) | (1 << 27)) /* if TSO is disabled */ > +#define TDES3_CPC_CRC_AND_PAD (0x0 << 26) > +#define TDES3_CPC_CRC_NO_PAD (0x1 << 26) > +#define TDES3_CPC_DISABLE (0x2 << 26) > +#define TDES3_CPC_CRC_REPLACE (0x3 << 26) > #define TDES3_LS (1 << 28) > #define TDES3_FS (1 << 29) > #define TDES3_OWN (1U << 31) > @@ -268,6 +293,8 @@ struct dwqe_desc { > #define RDES3_OWN (1U << 31) > > /* Rx bits (writeback format; device to host) */ > +#define RDES0_IVT 0xffff0000 > +#define RDES0_OVT 0x0000ffff > #define RDES1_IP_PAYLOAD_TYPE 0x7 > #define RDES1_IP_PAYLOAD_UNKNOWN 0x0 > #define RDES1_IP_PAYLOAD_UDP 0x1 > >