Download raw body.
dwqe(4) VLAN offload
On Wed, May 22, 2024 at 12:40:12PM +0200, Stefan Sperling wrote:
> The patch below adds support for VLAN tag offloading to dwqe(4).
>
> For now this patch only adds support for 802.1Q, i.e. vlan(4).
> The device supports QinQ offloading but our network stack does
> not use hardware offload for svlan(4) (yet?).
>
> Tested on Elkhart Lake, against another vlan(4) interface running
> on top of em(4).
>
> ok?
I've only tested the receive side of this so far, on an RK3568 system
that doesn't implement the transmit side. A couple of comments below.
>
> diff 84eef7829674053b45bf3760666e19ee4e754135 19a830bf9b66312d351d0fab852c538730f4c2bd
> commit - 84eef7829674053b45bf3760666e19ee4e754135
> commit + 19a830bf9b66312d351d0fab852c538730f4c2bd
> blob - 04cdc69851a5b230b81c8c46244384770207a025
> blob + bc9a16a5e003b09b6a0339a46a4bc2d7b39c7276
> --- sys/dev/ic/dwqe.c
> +++ sys/dev/ic/dwqe.c
> @@ -21,6 +21,7 @@
> */
>
> #include "bpfilter.h"
> +#include "vlan.h"
>
> #include <sys/param.h>
> #include <sys/systm.h>
> @@ -100,6 +101,40 @@ dwqe_have_tx_csum_offload(struct dwqe_softc *sc)
> }
>
> int
> +dwqe_have_tx_vlan_offload(struct dwqe_softc *sc)
> +{
> +#if NVLAN > 0
> + return (sc->sc_hw_feature[0] & GMAC_MAC_HW_FEATURE0_SAVLANINS);
> +#else
> + return 0;
> +#endif
> +}
> +
> +void
> +dwqe_set_vlan_tx_mode(struct dwqe_softc *sc)
> +{
> +#if NVLAN > 0
> + uint32_t reg;
> +
> + reg = dwqe_read(sc, GMAC_VLAN_TAG_INCL);
> +
> + /* Enable insertion of outer VLAN tag. */
> + reg |= GMAC_VLAN_TAG_INCL_INSERT;
> +
> + /*
> + * Generate C-VLAN tags (type 0x8100, 802.1Q). Setting this
> + * bit would result in S-VLAN tags (type 0x88A8, 802.1ad).
> + */
> + reg &= ~GMAC_VLAN_TAG_INCL_CSVL;
> +
> + /* Use VLAN tags provided in Tx context descriptors. */
> + reg |= GMAC_VLAN_TAG_INCL_VLTI;
> +
> + dwqe_write(sc, GMAC_VLAN_TAG_INCL, reg);
> +#endif
> +}
> +
> +int
> dwqe_attach(struct dwqe_softc *sc)
> {
> struct ifnet *ifp;
> @@ -127,6 +162,8 @@ dwqe_attach(struct dwqe_softc *sc)
> bcopy(sc->sc_dev.dv_xname, ifp->if_xname, IFNAMSIZ);
>
> ifp->if_capabilities = IFCAP_VLAN_MTU;
> + if (dwqe_have_tx_vlan_offload(sc))
> + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
> if (dwqe_have_tx_csum_offload(sc)) {
> ifp->if_capabilities |= (IFCAP_CSUM_IPv4 |
> IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
> @@ -218,6 +255,9 @@ dwqe_attach(struct dwqe_softc *sc)
> if (!sc->sc_fixed_link)
> dwqe_mii_attach(sc);
>
> + if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING)
> + dwqe_set_vlan_tx_mode(sc);
> +
> if_attach(ifp);
> ether_ifattach(ifp);
>
> @@ -337,7 +377,15 @@ dwqe_start(struct ifqueue *ifq)
> m = ifq_dequeue(ifq);
> if (m == NULL)
> break;
> -
> +#if NVLAN > 0
> + /* VLAN tags require an extra Tx context descriptor. */
> + if (dwqe_have_tx_vlan_offload(sc) &&
> + (m->m_flags & M_VLANTAG) &&
> + used + DWQE_NTXSEGS + 2 > left) {
> + ifq_set_oactive(ifq);
> + break;
> + }
> +#endif
Usually we just check that there's space to fit any possible packet at
the top of the loop, rather than dequeueing and checking if that
specific packet fits. I think it'd be better to change the check above
this one to be 'used + DWQE_NTXSEGS + 2 > left', moving the comment about
when an extra descriptor is needed there too.
> error = dwqe_encap(sc, m, &idx, &used);
> if (error == EFBIG) {
> m_freem(m); /* give up: drop it */
> @@ -715,6 +763,21 @@ dwqe_rx_csum(struct dwqe_softc *sc, struct mbuf *m, st
> }
>
> void
> +dwqe_vlan_strip(struct dwqe_softc *sc, struct mbuf *m, struct dwqe_desc *rxd)
> +{
> +#if NVLAN > 0
> + uint16_t tag;
> +
> + if ((rxd->sd_tdes3 & RDES3_RDES0_VALID) &&
> + (rxd->sd_tdes3 & RDES3_LD)) {
> + tag = rxd->sd_tdes0 & RDES0_OVT;
> + m->m_pkthdr.ether_vtag = le16toh(tag);
> + m->m_flags |= M_VLANTAG;
> + }
> +#endif
> +}
> +
> +void
> dwqe_rx_proc(struct dwqe_softc *sc)
> {
> struct ifnet *ifp = &sc->sc_ac.ac_if;
> @@ -763,6 +826,7 @@ dwqe_rx_proc(struct dwqe_softc *sc)
> m->m_pkthdr.len = m->m_len = len;
>
> dwqe_rx_csum(sc, m, rxd);
> + dwqe_vlan_strip(sc, m, rxd);
> ml_enqueue(&ml, m);
> }
>
> @@ -944,6 +1008,13 @@ dwqe_up(struct dwqe_softc *sc)
> reg |= GMAC_MAC_CONF_IPC;
> dwqe_write(sc, GMAC_MAC_CONF, reg);
> }
> +
> +#if NVLAN > 0
> + /* Enable outer VLAN tag stripping on Rx. */
> + reg = dwqe_read(sc, GMAC_VLAN_TAG_CTRL);
> + reg |= GMAC_VLAN_TAG_CTRL_EVLRXS | GMAC_VLAN_TAG_CTRL_STRIP_ALWAYS;
> + dwqe_write(sc, GMAC_VLAN_TAG_CTRL, reg);
> +#endif
Would it make sense to do this in dwqe_attach() where the transmit side
is configured?
> }
>
> void
> @@ -1107,12 +1178,34 @@ dwqe_tx_csum(struct dwqe_softc *sc, struct mbuf *m, st
> txd->sd_tdes3 |= TDES3_CSUM_IPHDR_PAYLOAD_PSEUDOHDR;
> }
>
> +uint16_t
> +dwqe_set_tx_context_desc(struct dwqe_softc *sc, struct mbuf *m, int idx)
> +{
> + uint16_t tag = 0;
> +#if NVLAN > 0
> + struct dwqe_desc *ctxt_txd;
> +
> + if ((m->m_flags & M_VLANTAG) == 0)
> + return 0;
> +
> + tag = m->m_pkthdr.ether_vtag;
> + if (tag) {
> + ctxt_txd = &sc->sc_txdesc[idx];
> + ctxt_txd->sd_tdes3 |= (htole16(tag) & TDES3_VLAN_TAG);
> + ctxt_txd->sd_tdes3 |= TDES3_VLAN_TAG_VALID;
> + ctxt_txd->sd_tdes3 |= (TDES3_CTXT | TDES3_OWN);
> + }
> +#endif
> + return tag;
> +}
> +
> int
> dwqe_encap(struct dwqe_softc *sc, struct mbuf *m, int *idx, int *used)
> {
> struct dwqe_desc *txd, *txd_start;
> bus_dmamap_t map;
> int cur, frag, i;
> + uint16_t vlan_tag = 0;
>
> cur = frag = *idx;
> map = sc->sc_txbuf[cur].tb_map;
> @@ -1128,6 +1221,17 @@ dwqe_encap(struct dwqe_softc *sc, struct mbuf *m, int
> bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
> BUS_DMASYNC_PREWRITE);
>
> + if (dwqe_have_tx_vlan_offload(sc)) {
> + vlan_tag = dwqe_set_tx_context_desc(sc, m, frag);
> + if (vlan_tag) {
> + (*used)++;
> + if (frag == (DWQE_NTXDESC - 1))
> + frag = 0;
> + else
> + frag++;
> + }
> + }
> +
> txd = txd_start = &sc->sc_txdesc[frag];
> for (i = 0; i < map->dm_nsegs; i++) {
> /* TODO: check for 32-bit vs 64-bit support */
> @@ -1140,6 +1244,8 @@ dwqe_encap(struct dwqe_softc *sc, struct mbuf *m, int
> if (i == 0) {
> txd->sd_tdes3 |= TDES3_FS;
> dwqe_tx_csum(sc, m, txd);
> + if (vlan_tag)
> + txd->sd_tdes2 |= TDES2_VLAN_TAG_INSERT;
> }
> if (i == (map->dm_nsegs - 1)) {
> txd->sd_tdes2 |= TDES2_IC;
> blob - ab479d54c139b877b2a111e0f99d267db3a17dc0
> blob + 03f66fca83a72220fb718003aaec576c590448e2
> --- sys/dev/ic/dwqereg.h
> +++ sys/dev/ic/dwqereg.h
> @@ -44,6 +44,19 @@
> #define GMAC_INT_MASK_LPIIM (1 << 10)
> #define GMAC_INT_MASK_PIM (1 << 3)
> #define GMAC_INT_MASK_RIM (1 << 0)
> +#define GMAC_VLAN_TAG_CTRL 0x0050
> +#define GMAC_VLAN_TAG_CTRL_EVLRXS (1 << 24)
> +#define GMAC_VLAN_TAG_CTRL_STRIP_ALWAYS ((1 << 21) | (1 << 22))
> +#define GMAC_VLAN_TAG_DATA 0x0054
> +#define GMAC_VLAN_TAG_INCL 0x0060
> +#define GMAC_VLAN_TAG_INCL_VLTI (1 << 20)
> +#define GMAC_VLAN_TAG_INCL_CSVL (1 << 19)
> +#define GMAC_VLAN_TAG_INCL_DELETE 0x10000
> +#define GMAC_VLAN_TAG_INCL_INSERT 0x20000
> +#define GMAC_VLAN_TAG_INCL_REPLACE 0x30000
> +#define GMAC_VLAN_TAG_INCL_VLT 0x0ffff
> +#define GMAC_VLAN_TAG_INCL_RDWR (1U << 30)
> +#define GMAC_VLAN_TAG_INCL_BUSY (1U << 31)
> #define GMAC_QX_TX_FLOW_CTRL(x) (0x0070 + (x) * 4)
> #define GMAC_QX_TX_FLOW_CTRL_PT_SHIFT 16
> #define GMAC_QX_TX_FLOW_CTRL_TFE (1 << 0)
> @@ -64,6 +77,7 @@
> #define GMAC_MAC_HW_FEATURE(x) (0x011c + (x) * 0x4)
> #define GMAC_MAC_HW_FEATURE0_TXCOESEL (1 << 14)
> #define GMAC_MAC_HW_FEATURE0_RXCOESEL (1 << 16)
> +#define GMAC_MAC_HW_FEATURE0_SAVLANINS (1 << 27)
> #define GMAC_MAC_HW_FEATURE1_TXFIFOSIZE(x) (((x) >> 6) & 0x1f)
> #define GMAC_MAC_HW_FEATURE1_RXFIFOSIZE(x) (((x) >> 0) & 0x1f)
> #define GMAC_MAC_MDIO_ADDR 0x0200
> @@ -230,6 +244,12 @@ struct dwqe_desc {
> uint32_t sd_tdes3;
> };
>
> +/* Tx context descriptor bits (host to device); precedes regular descriptor */
> +#define TDES3_CTXT (1 << 30)
> +#define TDES3_VLAN_TAG_VALID (1 << 16)
> +#define TDES3_VLAN_TAG 0xffff
> +/* Bit 31 is the OWN bit, as in regular Tx descriptor. */
> +
> /* Tx bits (read format; host to device) */
> #define TDES2_HDR_LEN 0x000003ff /* if TSO is enabled */
> #define TDES2_BUF1_LEN 0x00003fff /* if TSO is disabled */
> @@ -250,6 +270,11 @@ struct dwqe_desc {
> #define TDES3_CSUM_IPHDR_PAYLOAD (0x2 << 16)
> #define TDES3_CSUM_IPHDR_PAYLOAD_PSEUDOHDR (0x3 << 16)
> #define TDES3_TSO_EN (1 << 18)
> +#define TDES3_CPC ((1 << 26) | (1 << 27)) /* if TSO is disabled */
> +#define TDES3_CPC_CRC_AND_PAD (0x0 << 26)
> +#define TDES3_CPC_CRC_NO_PAD (0x1 << 26)
> +#define TDES3_CPC_DISABLE (0x2 << 26)
> +#define TDES3_CPC_CRC_REPLACE (0x3 << 26)
> #define TDES3_LS (1 << 28)
> #define TDES3_FS (1 << 29)
> #define TDES3_OWN (1U << 31)
> @@ -268,6 +293,8 @@ struct dwqe_desc {
> #define RDES3_OWN (1U << 31)
>
> /* Rx bits (writeback format; device to host) */
> +#define RDES0_IVT 0xffff0000
> +#define RDES0_OVT 0x0000ffff
> #define RDES1_IP_PAYLOAD_TYPE 0x7
> #define RDES1_IP_PAYLOAD_UNKNOWN 0x0
> #define RDES1_IP_PAYLOAD_UDP 0x1
>
dwqe(4) VLAN offload