From: David Gwynne Subject: ix(4): tweak mbuf cluster choices for rx To: tech@openbsd.org Date: Thu, 23 Apr 2026 09:39:29 +1000 this diff switches to using 9k clusters for rx when LRO is enabled. the extra wiggle room i just added to the 9k clusters means that they're big enough to cope with the silly way intel has you specify rx buffer sizes while also providing enough extra space to still align the ip payload as required by the network stack. this has the dual benefit of minimising the amount of unused space in clusters and encourages shorter mbuf chains. while here, this straightens out a bunch of kinks in how the chosen cluster size is used in the rx code. ive tried this on 82599s on arm64 and sparc64, with and without lro. djm has also tried this but suffers a performance regression with lro enabled that i dont understand. on paper it should Just Work(tm). i'm throwing it out here so other people can poke at it. Index: if_ix.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_ix.c,v diff -u -p -r1.225 if_ix.c --- if_ix.c 22 Apr 2026 22:12:49 -0000 1.225 +++ if_ix.c 22 Apr 2026 22:16:15 -0000 @@ -662,7 +662,7 @@ ixgbe_rxrinfo(struct ix_softc *sc, struc for (i = 0; i < sc->num_queues; i++) { rxr = &sc->rx_rings[i]; - ifr[n].ifr_size = MCLBYTES; + ifr[n].ifr_size = sc->rx_mbuf_sz; snprintf(ifr[n].ifr_name, sizeof(ifr[n].ifr_name), "%d", i); ifr[n].ifr_info = rxr->rx_ring; n++; @@ -778,14 +778,13 @@ ixgbe_init(void *arg) ixgbe_initialize_transmit_units(sc); /* - * Use 4k clusters in LRO mode to avoid m_defrag calls in case of - * socket splicing. Or, use 2k clusters in non-LRO mode, even for - * jumbo frames. + * LRO encourages large packets, so pick a cluster to match + * expectations. */ if (ISSET(ifp->if_xflags, IFXF_LRO)) - sc->rx_mbuf_sz = MCLBYTES * 2 - ETHER_ALIGN; + sc->rx_mbuf_sz = 9 * 1024; else - sc->rx_mbuf_sz = MCLBYTES + ETHER_ALIGN; + sc->rx_mbuf_sz = MCLBYTES; /* Prepare receive descriptors and buffers */ if (ixgbe_setup_receive_structures(sc)) { @@ -2726,11 +2725,11 @@ ixgbe_get_buf(struct ix_rxring *rxr, int } /* needed in any case so preallocate since this one will fail for sure */ - mp = MCLGETL(NULL, M_DONTWAIT, sc->rx_mbuf_sz); + mp = MCLGETL(NULL, M_DONTWAIT, sc->rx_mbuf_sz + ETHER_ALIGN); if (!mp) return (ENOBUFS); - mp->m_data += (mp->m_ext.ext_size - sc->rx_mbuf_sz); + mp->m_data += ETHER_ALIGN; mp->m_len = mp->m_pkthdr.len = sc->rx_mbuf_sz; error = bus_dmamap_load_mbuf(rxr->rxdma.dma_tag, rxbuf->map, @@ -2804,6 +2803,7 @@ ixgbe_setup_receive_ring(struct ix_rxrin struct ix_softc *sc = rxr->sc; struct ifnet *ifp = &sc->arpcom.ac_if; int rsize, error; + unsigned int maxpktlen; rsize = roundup2(sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc), 4096); @@ -2817,7 +2817,10 @@ ixgbe_setup_receive_ring(struct ix_rxrin rxr->next_to_check = 0; rxr->last_desc_filled = sc->num_rx_desc - 1; - if_rxr_init(&rxr->rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1), + maxpktlen = ISSET(ifp->if_xflags, IFXF_LRO) ? + MAXMCLBYTES : IXGBE_MAX_FRAME_SIZE; + + if_rxr_init(&rxr->rx_ring, 2 * howmany(maxpktlen, sc->rx_mbuf_sz) + 1, sc->num_rx_desc - 1); ixgbe_rxfill(rxr); @@ -2951,7 +2954,7 @@ ixgbe_initialize_receive_units(struct ix IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); } - bufsz = (sc->rx_mbuf_sz - ETHER_ALIGN) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + bufsz = sc->rx_mbuf_sz >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; for (i = 0; i < sc->num_queues; i++, rxr++) { uint64_t rdba = rxr->rxdma.dma_map->dm_segs[0].ds_addr; Index: if_ixv.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_ixv.c,v diff -u -p -r1.3 if_ixv.c --- if_ixv.c 13 Mar 2026 14:22:33 -0000 1.3 +++ if_ixv.c 22 Apr 2026 22:16:16 -0000 @@ -475,7 +475,7 @@ ixv_init(struct ix_softc *sc) ixv_iff(sc); /* Use 2k clusters, even for jumbo frames */ - sc->rx_mbuf_sz = MCLBYTES + ETHER_ALIGN; + sc->rx_mbuf_sz = MCLBYTES; /* Prepare receive descriptors and buffers */ if (ixgbe_setup_receive_structures(sc)) { @@ -947,7 +947,7 @@ ixv_initialize_receive_units(struct ix_s uint32_t reg, rxdctl, bufsz, psrtype; int i, j, k; - bufsz = (sc->rx_mbuf_sz - ETHER_ALIGN) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + bufsz = sc->rx_mbuf_sz >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR