From: Mark Kettenis Subject: Re: enable rss/multiqueue for newer aq(4) models To: Jonathan Matthew Cc: tech@openbsd.org, kettenis@openbsd.org Date: Tue, 28 Jan 2025 20:22:38 +0100 > Date: Sun, 26 Jan 2025 17:50:27 +1000 > From: Jonathan Matthew > > This fills in the missing bits for RSS/multiqueue on 'aq2' hardware > (AQC113 up to AQC116). Like earlier models, aq2 is limited to 8 queues. > > ok? Doesn't seem to break aq(4) on my M2 Pro Mac mini. However, this made me realize that aplintc(4) doesn't actually support running interrupts on other CPUs. So all the queues end up on the primary CPU. That in itself shouldn't be a problem a problem, but I think it means that intr_barrier(9) is broken on these machines. Need to dig into this a bit deeper and see if I can fix this. This hardware is interesting since it implements a mode where the hardware picks the most appropriate CPU to run the interrupt on. Not exactly sure how it does that, but I believe this helps saving power since it can direct interrupts to an active CPU to avoid waking up a CPU that is in a deep sleep state. Did you test this diff on non-Apple hardware? > Index: if_aq_pci.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_aq_pci.c,v > diff -u -p -u -p -r1.28 if_aq_pci.c > --- if_aq_pci.c 24 May 2024 06:02:53 -0000 1.28 > +++ if_aq_pci.c 26 Jan 2025 07:43:45 -0000 > @@ -365,6 +365,7 @@ > #define TPB_TX_BUF_SCP_INS_EN (1 << 2) > #define TPB_TX_BUF_CLK_GATE_EN (1 << 5) > #define TPB_TX_BUF_TC_MODE_EN (1 << 8) > +#define TPB_TX_BUF_TC_Q_RAND_MAP_EN (1 << 9) > > > /* TPB_TXB_BUFSIZE_REG[AQ_TRAFFICCLASS_NUM] 0x7910-7990 */ > @@ -467,7 +468,7 @@ > > #define AQ2_RPF_REDIR2_REG 0x54c8 > #define AQ2_RPF_REDIR2_INDEX (1 << 12) > -#define AQ2_RPF_REDIR2_HASHTYPE 0x00000100 > +#define AQ2_RPF_REDIR2_HASHTYPE 0x000001FF > #define AQ2_RPF_REDIR2_HASHTYPE_NONE 0 > #define AQ2_RPF_REDIR2_HASHTYPE_IP (1 << 0) > #define AQ2_RPF_REDIR2_HASHTYPE_TCP4 (1 << 1) > @@ -478,7 +479,16 @@ > #define AQ2_RPF_REDIR2_HASHTYPE_IP6EX (1 << 6) > #define AQ2_RPF_REDIR2_HASHTYPE_TCP6EX (1 << 7) > #define AQ2_RPF_REDIR2_HASHTYPE_UDP6EX (1 << 8) > -#define AQ2_RPF_REDIR2_HASHTYPE_ALL 0x00000100 > +#define AQ2_RPF_REDIR2_HASHTYPE_ALL 0x000001FF > + > +#define AQ2_RX_Q_TC_MAP_REG(i) (0x5900 + (i) * 4) > +#define AQ2_TX_Q_TC_MAP_REG(i) (0x799c + (i) * 4) > + > +#define AQ2_RPF_RSS_REDIR_MAX 64 > +#define AQ2_RPF_RSS_REDIR_REG(tc, i) \ > + (0x6200 + (0x100 * ((tc) >> 2)) + (i) * 4) > +#define AQ2_RPF_RSS_REDIR_TC_MASK(tc) \ > + (0x1f << (5 * ((tc) & 3))) > > #define AQ2_RPF_REC_TAB_ENABLE_REG 0x6ff0 > #define AQ2_RPF_REC_TAB_ENABLE_MASK 0x0000ffff > @@ -1282,8 +1292,7 @@ aq_attach(struct device *parent, struct > > if (pci_intr_map_msix(pa, 0, &ih) == 0) { > int nmsix = pci_intr_msix_count(pa); > - /* don't do rss on aq2 yet */ > - if (aqp->aq_hwtype == HWTYPE_AQ1 && nmsix > 1) { > + if (nmsix > 1) { > nmsix--; > sc->sc_intrmap = intrmap_create(&sc->sc_dev, > nmsix, AQ_MAXQ, INTRMAP_POWEROF2); > @@ -2803,6 +2812,26 @@ aq_hw_qos_set(struct aq_softc *sc) > AQ_WRITE_REG_BIT(sc, RPF_RPB_RX_TC_UPT_REG, > RPF_RPB_RX_TC_UPT_MASK(i_priority), 0); > } > + > + /* ring to TC mapping */ > + if (HWTYPE_AQ2_P(sc)) { > + AQ_WRITE_REG_BIT(sc, TPB_TX_BUF_REG, > + TPB_TX_BUF_TC_Q_RAND_MAP_EN, 1); > + > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(0), 0x00000000); > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(1), 0x00000000); > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(2), 0x01010101); > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(3), 0x01010101); > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(4), 0x02020202); > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(5), 0x02020202); > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(6), 0x03030303); > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(7), 0x03030303); > + > + AQ_WRITE_REG(sc, AQ2_RX_Q_TC_MAP_REG(0), 0x00000000); > + AQ_WRITE_REG(sc, AQ2_RX_Q_TC_MAP_REG(1), 0x11111111); > + AQ_WRITE_REG(sc, AQ2_RX_Q_TC_MAP_REG(2), 0x22222222); > + AQ_WRITE_REG(sc, AQ2_RX_Q_TC_MAP_REG(3), 0x33333333); > + } > } > > int > @@ -2816,6 +2845,19 @@ aq_init_rss(struct aq_softc *sc) > > if (sc->sc_nqueues == 1) > return 0; > + > + if (HWTYPE_AQ2_P(sc)) { > + AQ_WRITE_REG_BIT(sc, AQ2_RPF_REDIR2_REG, AQ2_RPF_REDIR2_INDEX, 0); > + for (i = 0; i < AQ2_RPF_RSS_REDIR_MAX; i++) { > + int tc; > + int q; > + for (tc = 0; tc < 4; tc++) { > + q = (tc * 8) + (i % sc->sc_nqueues); > + AQ_WRITE_REG_BIT(sc, AQ2_RPF_RSS_REDIR_REG(tc, i), > + AQ2_RPF_RSS_REDIR_TC_MASK(tc), q); > + } > + } > + } > > /* rss key is composed of 32 bit registers */ > stoeplitz_to_key(rss_key, sizeof(rss_key)); > >