Index | Thread | Search

From:
Jan Klemkow <jan@openbsd.org>
Subject:
ixl/ice(4): Avoid unnecessary defrag of TSO packets
To:
tech@openbsd.org
Date:
Fri, 12 Sep 2025 16:00:39 +0200

Download raw body.

Thread
Hi,

ixl(4) and ice(4) network cards can handle just up to 8 DMA segments for
a regular packet, but 128 segments for a TSO packet.  TSO packets reach
the 8 segments limit very fast.  Thus, we run into unnecessary
m_defrag() calls, which cost throughput.

This diff sets the limit to 128 segments for ice(4) and ixl(4) and adds an
additional check for non-TSO packets.

ok?

bye,
jan

Index: dev/pci/if_ice.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ice.c,v
diff -u -p -r1.58 if_ice.c
--- dev/pci/if_ice.c	19 Aug 2025 11:46:52 -0000	1.58
+++ dev/pci/if_ice.c	12 Sep 2025 13:17:23 -0000
@@ -13868,7 +13868,7 @@ ice_tso_detect_sparse(struct mbuf *m, st
 		hlen -= seglen;
 	}
 
-	maxsegs = ICE_MAX_TX_SEGS - hdrs;
+	maxsegs = ICE_MAX_TSO_SEGS - hdrs;
 
 	/* We must count the headers, in order to verify that they take up
 	 * 3 or fewer descriptors. However, we don't need to check the data
@@ -13957,13 +13957,23 @@ ice_tx_setup_offload(struct mbuf *m0, st
 static inline int
 ice_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
 {
+	struct mbuf *n;
+	int segs = 0;
 	int error;
 
+	/* Non-TSO packets are limited to max. segments of ICE_MAX_TX_SEGS. */
+	if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+		for (n = m; n != NULL; n = n->m_next)
+			if (++segs > ICE_MAX_TX_SEGS)
+				goto defrag;
+	}
+
 	error = bus_dmamap_load_mbuf(dmat, map, m,
 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
 	if (error != EFBIG)
 		return (error);
 
+ defrag:
 	error = m_defrag(m, M_DONTWAIT);
 	if (error != 0)
 		return (error);
@@ -29671,7 +29681,7 @@ ice_tx_queues_alloc(struct ice_softc *sc
 		for (j = 0; j < sc->isc_ntxd[i]; j++) {
 			map = &txq->tx_map[j];
 			if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES,
-			    ICE_MAX_TX_SEGS, ICE_MAX_DMA_SEG_SIZE, 0,
+			    ICE_MAX_TSO_SEGS, ICE_MAX_DMA_SEG_SIZE, 0,
 			    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
 			    &map->txm_map) != 0) {
 				printf("%s: could not allocate Tx DMA map\n",
Index: dev/pci/if_ixl.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
diff -u -p -r1.108 if_ixl.c
--- dev/pci/if_ixl.c	24 Jun 2025 11:03:10 -0000	1.108
+++ dev/pci/if_ixl.c	12 Sep 2025 13:17:23 -0000
@@ -900,6 +900,7 @@ struct ixl_rx_wb_desc_32 {
 } __packed __aligned(16);
 
 #define IXL_TX_PKT_DESCS		8
+#define IXL_TX_TSO_PKT_DESCS		128
 #define IXL_TX_QUEUE_ALIGN		128
 #define IXL_RX_QUEUE_ALIGN		128
 
@@ -2579,7 +2580,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi
 		txm = &maps[i];
 
 		if (bus_dmamap_create(sc->sc_dmat,
-		    MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
+		    MAXMCLBYTES, IXL_TX_TSO_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
 		    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
 		    &txm->txm_map) != 0)
 			goto uncreate;
@@ -2749,13 +2750,23 @@ ixl_txr_free(struct ixl_softc *sc, struc
 static inline int
 ixl_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
 {
+	struct mbuf *n;
+	int segs = 0;
 	int error;
 
+	/* Non-TSO packets are limited to max. segments of IXL_TX_PKT_DESCS. */
+	if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+		for (n = m; n != NULL; n = n->m_next)
+			if (++segs > IXL_TX_PKT_DESCS)
+				goto defrag;
+	}
+
 	error = bus_dmamap_load_mbuf(dmat, map, m,
 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
 	if (error != EFBIG)
 		return (error);
 
+ defrag:
 	error = m_defrag(m, M_DONTWAIT);
 	if (error != 0)
 		return (error);
@@ -2885,7 +2896,7 @@ ixl_start(struct ifqueue *ifq)
 
 	for (;;) {
 		/* We need one extra descriptor for TSO packets. */
-		if (free <= (IXL_TX_PKT_DESCS + 1)) {
+		if (free <= (IXL_TX_TSO_PKT_DESCS + 1)) {
 			ifq_set_oactive(ifq);
 			break;
 		}