Download raw body.
enable rss/multiqueue for newer aq(4) models
On Tue, Jan 28, 2025 at 08:22:38PM +0100, Mark Kettenis wrote:
> > Date: Sun, 26 Jan 2025 17:50:27 +1000
> > From: Jonathan Matthew <jonathan@d14n.org>
> >
> > This fills in the missing bits for RSS/multiqueue on 'aq2' hardware
> > (AQC113 up to AQC116). Like earlier models, aq2 is limited to 8 queues.
> >
> > ok?
>
> Doesn't seem to break aq(4) on my M2 Pro Mac mini. However, this made
> me realize that aplintc(4) doesn't actually support running interrupts
> on other CPUs. So all the queues end up on the primary CPU. That in
> itself shouldn't be a problem a problem, but I think it means that
> intr_barrier(9) is broken on these machines.
>
> Need to dig into this a bit deeper and see if I can fix this. This
> hardware is interesting since it implements a mode where the hardware
> picks the most appropriate CPU to run the interrupt on. Not exactly
> sure how it does that, but I believe this helps saving power since it
> can direct interrupts to an active CPU to avoid waking up a CPU that
> is in a deep sleep state.
>
> Did you test this diff on non-Apple hardware?
I worked on this on a rockpro64 with an AQC113 pcie card, and dlg
tested it on an amd64 system with an onboard AQC11x.
Should I hold off committing this until you figure out what to do
with aplintc(4)? It does look like intr_barrier() won't work
properly, but I don't think that's a big problem for aq(4)
specifically. Not having interrupts distributed across cpus also
shouldn't be a problem.
>
>
> > Index: if_aq_pci.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pci/if_aq_pci.c,v
> > diff -u -p -u -p -r1.28 if_aq_pci.c
> > --- if_aq_pci.c 24 May 2024 06:02:53 -0000 1.28
> > +++ if_aq_pci.c 26 Jan 2025 07:43:45 -0000
> > @@ -365,6 +365,7 @@
> > #define TPB_TX_BUF_SCP_INS_EN (1 << 2)
> > #define TPB_TX_BUF_CLK_GATE_EN (1 << 5)
> > #define TPB_TX_BUF_TC_MODE_EN (1 << 8)
> > +#define TPB_TX_BUF_TC_Q_RAND_MAP_EN (1 << 9)
> >
> >
> > /* TPB_TXB_BUFSIZE_REG[AQ_TRAFFICCLASS_NUM] 0x7910-7990 */
> > @@ -467,7 +468,7 @@
> >
> > #define AQ2_RPF_REDIR2_REG 0x54c8
> > #define AQ2_RPF_REDIR2_INDEX (1 << 12)
> > -#define AQ2_RPF_REDIR2_HASHTYPE 0x00000100
> > +#define AQ2_RPF_REDIR2_HASHTYPE 0x000001FF
> > #define AQ2_RPF_REDIR2_HASHTYPE_NONE 0
> > #define AQ2_RPF_REDIR2_HASHTYPE_IP (1 << 0)
> > #define AQ2_RPF_REDIR2_HASHTYPE_TCP4 (1 << 1)
> > @@ -478,7 +479,16 @@
> > #define AQ2_RPF_REDIR2_HASHTYPE_IP6EX (1 << 6)
> > #define AQ2_RPF_REDIR2_HASHTYPE_TCP6EX (1 << 7)
> > #define AQ2_RPF_REDIR2_HASHTYPE_UDP6EX (1 << 8)
> > -#define AQ2_RPF_REDIR2_HASHTYPE_ALL 0x00000100
> > +#define AQ2_RPF_REDIR2_HASHTYPE_ALL 0x000001FF
> > +
> > +#define AQ2_RX_Q_TC_MAP_REG(i) (0x5900 + (i) * 4)
> > +#define AQ2_TX_Q_TC_MAP_REG(i) (0x799c + (i) * 4)
> > +
> > +#define AQ2_RPF_RSS_REDIR_MAX 64
> > +#define AQ2_RPF_RSS_REDIR_REG(tc, i) \
> > + (0x6200 + (0x100 * ((tc) >> 2)) + (i) * 4)
> > +#define AQ2_RPF_RSS_REDIR_TC_MASK(tc) \
> > + (0x1f << (5 * ((tc) & 3)))
> >
> > #define AQ2_RPF_REC_TAB_ENABLE_REG 0x6ff0
> > #define AQ2_RPF_REC_TAB_ENABLE_MASK 0x0000ffff
> > @@ -1282,8 +1292,7 @@ aq_attach(struct device *parent, struct
> >
> > if (pci_intr_map_msix(pa, 0, &ih) == 0) {
> > int nmsix = pci_intr_msix_count(pa);
> > - /* don't do rss on aq2 yet */
> > - if (aqp->aq_hwtype == HWTYPE_AQ1 && nmsix > 1) {
> > + if (nmsix > 1) {
> > nmsix--;
> > sc->sc_intrmap = intrmap_create(&sc->sc_dev,
> > nmsix, AQ_MAXQ, INTRMAP_POWEROF2);
> > @@ -2803,6 +2812,26 @@ aq_hw_qos_set(struct aq_softc *sc)
> > AQ_WRITE_REG_BIT(sc, RPF_RPB_RX_TC_UPT_REG,
> > RPF_RPB_RX_TC_UPT_MASK(i_priority), 0);
> > }
> > +
> > + /* ring to TC mapping */
> > + if (HWTYPE_AQ2_P(sc)) {
> > + AQ_WRITE_REG_BIT(sc, TPB_TX_BUF_REG,
> > + TPB_TX_BUF_TC_Q_RAND_MAP_EN, 1);
> > +
> > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(0), 0x00000000);
> > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(1), 0x00000000);
> > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(2), 0x01010101);
> > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(3), 0x01010101);
> > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(4), 0x02020202);
> > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(5), 0x02020202);
> > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(6), 0x03030303);
> > + AQ_WRITE_REG(sc, AQ2_TX_Q_TC_MAP_REG(7), 0x03030303);
> > +
> > + AQ_WRITE_REG(sc, AQ2_RX_Q_TC_MAP_REG(0), 0x00000000);
> > + AQ_WRITE_REG(sc, AQ2_RX_Q_TC_MAP_REG(1), 0x11111111);
> > + AQ_WRITE_REG(sc, AQ2_RX_Q_TC_MAP_REG(2), 0x22222222);
> > + AQ_WRITE_REG(sc, AQ2_RX_Q_TC_MAP_REG(3), 0x33333333);
> > + }
> > }
> >
> > int
> > @@ -2816,6 +2845,19 @@ aq_init_rss(struct aq_softc *sc)
> >
> > if (sc->sc_nqueues == 1)
> > return 0;
> > +
> > + if (HWTYPE_AQ2_P(sc)) {
> > + AQ_WRITE_REG_BIT(sc, AQ2_RPF_REDIR2_REG, AQ2_RPF_REDIR2_INDEX, 0);
> > + for (i = 0; i < AQ2_RPF_RSS_REDIR_MAX; i++) {
> > + int tc;
> > + int q;
> > + for (tc = 0; tc < 4; tc++) {
> > + q = (tc * 8) + (i % sc->sc_nqueues);
> > + AQ_WRITE_REG_BIT(sc, AQ2_RPF_RSS_REDIR_REG(tc, i),
> > + AQ2_RPF_RSS_REDIR_TC_MASK(tc), q);
> > + }
> > + }
> > + }
> >
> > /* rss key is composed of 32 bit registers */
> > stoeplitz_to_key(rss_key, sizeof(rss_key));
> >
> >
>
enable rss/multiqueue for newer aq(4) models