Download raw body.
ix(4): tweak mbuf cluster choices for rx
this diff switches to using 9k clusters for rx when LRO is enabled.
the extra wiggle room i just added to the 9k clusters means that they're
big enough to cope with the silly way intel has you specify rx buffer
sizes while also providing enough extra space to still align the ip
payload as required by the network stack. this has the dual benefit
of minimising the amount of unused space in clusters and encourages
shorter mbuf chains.
while here, this straightens out a bunch of kinks in how the chosen
cluster size is used in the rx code.
ive tried this on 82599s on arm64 and sparc64, with and without lro.
djm has also tried this but suffers a performance regression with
lro enabled that i dont understand. on paper it should Just Work(tm).
i'm throwing it out here so other people can poke at it.
Index: if_ix.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
diff -u -p -r1.225 if_ix.c
--- if_ix.c 22 Apr 2026 22:12:49 -0000 1.225
+++ if_ix.c 22 Apr 2026 22:16:15 -0000
@@ -662,7 +662,7 @@ ixgbe_rxrinfo(struct ix_softc *sc, struc
for (i = 0; i < sc->num_queues; i++) {
rxr = &sc->rx_rings[i];
- ifr[n].ifr_size = MCLBYTES;
+ ifr[n].ifr_size = sc->rx_mbuf_sz;
snprintf(ifr[n].ifr_name, sizeof(ifr[n].ifr_name), "%d", i);
ifr[n].ifr_info = rxr->rx_ring;
n++;
@@ -778,14 +778,13 @@ ixgbe_init(void *arg)
ixgbe_initialize_transmit_units(sc);
/*
- * Use 4k clusters in LRO mode to avoid m_defrag calls in case of
- * socket splicing. Or, use 2k clusters in non-LRO mode, even for
- * jumbo frames.
+ * LRO encourages large packets, so pick a cluster to match
+ * expectations.
*/
if (ISSET(ifp->if_xflags, IFXF_LRO))
- sc->rx_mbuf_sz = MCLBYTES * 2 - ETHER_ALIGN;
+ sc->rx_mbuf_sz = 9 * 1024;
else
- sc->rx_mbuf_sz = MCLBYTES + ETHER_ALIGN;
+ sc->rx_mbuf_sz = MCLBYTES;
/* Prepare receive descriptors and buffers */
if (ixgbe_setup_receive_structures(sc)) {
@@ -2726,11 +2725,11 @@ ixgbe_get_buf(struct ix_rxring *rxr, int
}
/* needed in any case so preallocate since this one will fail for sure */
- mp = MCLGETL(NULL, M_DONTWAIT, sc->rx_mbuf_sz);
+ mp = MCLGETL(NULL, M_DONTWAIT, sc->rx_mbuf_sz + ETHER_ALIGN);
if (!mp)
return (ENOBUFS);
- mp->m_data += (mp->m_ext.ext_size - sc->rx_mbuf_sz);
+ mp->m_data += ETHER_ALIGN;
mp->m_len = mp->m_pkthdr.len = sc->rx_mbuf_sz;
error = bus_dmamap_load_mbuf(rxr->rxdma.dma_tag, rxbuf->map,
@@ -2804,6 +2803,7 @@ ixgbe_setup_receive_ring(struct ix_rxrin
struct ix_softc *sc = rxr->sc;
struct ifnet *ifp = &sc->arpcom.ac_if;
int rsize, error;
+ unsigned int maxpktlen;
rsize = roundup2(sc->num_rx_desc *
sizeof(union ixgbe_adv_rx_desc), 4096);
@@ -2817,7 +2817,10 @@ ixgbe_setup_receive_ring(struct ix_rxrin
rxr->next_to_check = 0;
rxr->last_desc_filled = sc->num_rx_desc - 1;
- if_rxr_init(&rxr->rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
+ maxpktlen = ISSET(ifp->if_xflags, IFXF_LRO) ?
+ MAXMCLBYTES : IXGBE_MAX_FRAME_SIZE;
+
+ if_rxr_init(&rxr->rx_ring, 2 * howmany(maxpktlen, sc->rx_mbuf_sz) + 1,
sc->num_rx_desc - 1);
ixgbe_rxfill(rxr);
@@ -2951,7 +2954,7 @@ ixgbe_initialize_receive_units(struct ix
IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
}
- bufsz = (sc->rx_mbuf_sz - ETHER_ALIGN) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+ bufsz = sc->rx_mbuf_sz >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
for (i = 0; i < sc->num_queues; i++, rxr++) {
uint64_t rdba = rxr->rxdma.dma_map->dm_segs[0].ds_addr;
Index: if_ixv.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ixv.c,v
diff -u -p -r1.3 if_ixv.c
--- if_ixv.c 13 Mar 2026 14:22:33 -0000 1.3
+++ if_ixv.c 22 Apr 2026 22:16:16 -0000
@@ -475,7 +475,7 @@ ixv_init(struct ix_softc *sc)
ixv_iff(sc);
/* Use 2k clusters, even for jumbo frames */
- sc->rx_mbuf_sz = MCLBYTES + ETHER_ALIGN;
+ sc->rx_mbuf_sz = MCLBYTES;
/* Prepare receive descriptors and buffers */
if (ixgbe_setup_receive_structures(sc)) {
@@ -947,7 +947,7 @@ ixv_initialize_receive_units(struct ix_s
uint32_t reg, rxdctl, bufsz, psrtype;
int i, j, k;
- bufsz = (sc->rx_mbuf_sz - ETHER_ALIGN) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+ bufsz = sc->rx_mbuf_sz >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
psrtype = IXGBE_PSRTYPE_TCPHDR
| IXGBE_PSRTYPE_UDPHDR
ix(4): tweak mbuf cluster choices for rx