Index | Thread | Search

From:
David Gwynne <david@gwynne.id.au>
Subject:
ix(4): tweak mbuf cluster choices for rx
To:
tech@openbsd.org
Date:
Thu, 23 Apr 2026 09:39:29 +1000

Download raw body.

Thread
  • David Gwynne:

    ix(4): tweak mbuf cluster choices for rx

this diff switches to using 9k clusters for rx when LRO is enabled.

the extra wiggle room i just added to the 9k clusters means that they're
big enough to cope with the silly way intel has you specify rx buffer
sizes while also providing enough extra space to still align the ip
payload as required by the network stack. this has the dual benefit
of minimising the amount of unused space in clusters and encourages
shorter mbuf chains.

while here, this straightens out a bunch of kinks in how the chosen
cluster size is used in the rx code.

ive tried this on 82599s on arm64 and sparc64, with and without lro.

djm has also tried this but suffers a performance regression with
lro enabled that i dont understand. on paper it should Just Work(tm).
i'm throwing it out here so other people can poke at it.

Index: if_ix.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
diff -u -p -r1.225 if_ix.c
--- if_ix.c	22 Apr 2026 22:12:49 -0000	1.225
+++ if_ix.c	22 Apr 2026 22:16:15 -0000
@@ -662,7 +662,7 @@ ixgbe_rxrinfo(struct ix_softc *sc, struc
 
 	for (i = 0; i < sc->num_queues; i++) {
 		rxr = &sc->rx_rings[i];
-		ifr[n].ifr_size = MCLBYTES;
+		ifr[n].ifr_size = sc->rx_mbuf_sz;
 		snprintf(ifr[n].ifr_name, sizeof(ifr[n].ifr_name), "%d", i);
 		ifr[n].ifr_info = rxr->rx_ring;
 		n++;
@@ -778,14 +778,13 @@ ixgbe_init(void *arg)
 	ixgbe_initialize_transmit_units(sc);
 
 	/*
-	 * Use 4k clusters in LRO mode to avoid m_defrag calls in case of
-	 * socket splicing.  Or, use 2k clusters in non-LRO mode, even for
-	 * jumbo frames.
+	 * LRO encourages large packets, so pick a cluster to match
+	 * expectations.
 	 */
 	if (ISSET(ifp->if_xflags, IFXF_LRO))
-		sc->rx_mbuf_sz = MCLBYTES * 2 - ETHER_ALIGN;
+		sc->rx_mbuf_sz = 9 * 1024;
 	else
-		sc->rx_mbuf_sz = MCLBYTES + ETHER_ALIGN;
+		sc->rx_mbuf_sz = MCLBYTES;
 
 	/* Prepare receive descriptors and buffers */
 	if (ixgbe_setup_receive_structures(sc)) {
@@ -2726,11 +2725,11 @@ ixgbe_get_buf(struct ix_rxring *rxr, int
 	}
 
 	/* needed in any case so preallocate since this one will fail for sure */
-	mp = MCLGETL(NULL, M_DONTWAIT, sc->rx_mbuf_sz);
+	mp = MCLGETL(NULL, M_DONTWAIT, sc->rx_mbuf_sz + ETHER_ALIGN);
 	if (!mp)
 		return (ENOBUFS);
 
-	mp->m_data += (mp->m_ext.ext_size - sc->rx_mbuf_sz);
+	mp->m_data += ETHER_ALIGN;
 	mp->m_len = mp->m_pkthdr.len = sc->rx_mbuf_sz;
 
 	error = bus_dmamap_load_mbuf(rxr->rxdma.dma_tag, rxbuf->map,
@@ -2804,6 +2803,7 @@ ixgbe_setup_receive_ring(struct ix_rxrin
 	struct ix_softc		*sc = rxr->sc;
 	struct ifnet		*ifp = &sc->arpcom.ac_if;
 	int			 rsize, error;
+	unsigned int		 maxpktlen;
 
 	rsize = roundup2(sc->num_rx_desc *
 	    sizeof(union ixgbe_adv_rx_desc), 4096);
@@ -2817,7 +2817,10 @@ ixgbe_setup_receive_ring(struct ix_rxrin
 	rxr->next_to_check = 0;
 	rxr->last_desc_filled = sc->num_rx_desc - 1;
 
-	if_rxr_init(&rxr->rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
+	maxpktlen = ISSET(ifp->if_xflags, IFXF_LRO) ?
+	    MAXMCLBYTES : IXGBE_MAX_FRAME_SIZE;
+
+	if_rxr_init(&rxr->rx_ring, 2 * howmany(maxpktlen, sc->rx_mbuf_sz) + 1,
 	    sc->num_rx_desc - 1);
 
 	ixgbe_rxfill(rxr);
@@ -2951,7 +2954,7 @@ ixgbe_initialize_receive_units(struct ix
 		IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
 	}
 
-	bufsz = (sc->rx_mbuf_sz - ETHER_ALIGN) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	bufsz = sc->rx_mbuf_sz >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
 
 	for (i = 0; i < sc->num_queues; i++, rxr++) {
 		uint64_t rdba = rxr->rxdma.dma_map->dm_segs[0].ds_addr;
Index: if_ixv.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ixv.c,v
diff -u -p -r1.3 if_ixv.c
--- if_ixv.c	13 Mar 2026 14:22:33 -0000	1.3
+++ if_ixv.c	22 Apr 2026 22:16:16 -0000
@@ -475,7 +475,7 @@ ixv_init(struct ix_softc *sc)
 	ixv_iff(sc);
 
 	/* Use 2k clusters, even for jumbo frames */
-	sc->rx_mbuf_sz = MCLBYTES + ETHER_ALIGN;
+	sc->rx_mbuf_sz = MCLBYTES;
 
 	/* Prepare receive descriptors and buffers */
 	if (ixgbe_setup_receive_structures(sc)) {
@@ -947,7 +947,7 @@ ixv_initialize_receive_units(struct ix_s
 	uint32_t          reg, rxdctl, bufsz, psrtype;
 	int               i, j, k;
 
-	bufsz = (sc->rx_mbuf_sz - ETHER_ALIGN) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	bufsz = sc->rx_mbuf_sz >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
 
 	psrtype = IXGBE_PSRTYPE_TCPHDR
 	        | IXGBE_PSRTYPE_UDPHDR