Download raw body.
ixl/ice(4): Avoid unnecessary defrag of TSO packets
Hi,
ixl(4) and ice(4) network cards can handle just up to 8 DMA segments for
a regular packet, but 128 segments for a TSO packet. TSO packets reach
the 8 segments limit very fast. Thus, we run into unnecessary
m_defrag() calls, which cost throughput.
This diff sets the limit to 128 segments for ice(4) and ixl(4) and adds an
additional check for non-TSO packets.
ok?
bye,
jan
Index: dev/pci/if_ice.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ice.c,v
diff -u -p -r1.58 if_ice.c
--- dev/pci/if_ice.c 19 Aug 2025 11:46:52 -0000 1.58
+++ dev/pci/if_ice.c 12 Sep 2025 13:17:23 -0000
@@ -13868,7 +13868,7 @@ ice_tso_detect_sparse(struct mbuf *m, st
hlen -= seglen;
}
- maxsegs = ICE_MAX_TX_SEGS - hdrs;
+ maxsegs = ICE_MAX_TSO_SEGS - hdrs;
/* We must count the headers, in order to verify that they take up
* 3 or fewer descriptors. However, we don't need to check the data
@@ -13957,13 +13957,23 @@ ice_tx_setup_offload(struct mbuf *m0, st
static inline int
ice_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
{
+ struct mbuf *n;
+ int segs = 0;
int error;
+ /* Non-TSO packets are limited to max. segments of ICE_MAX_TX_SEGS. */
+ if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+ for (n = m; n != NULL; n = n->m_next)
+ if (++segs > ICE_MAX_TX_SEGS)
+ goto defrag;
+ }
+
error = bus_dmamap_load_mbuf(dmat, map, m,
BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
if (error != EFBIG)
return (error);
+ defrag:
error = m_defrag(m, M_DONTWAIT);
if (error != 0)
return (error);
@@ -29671,7 +29681,7 @@ ice_tx_queues_alloc(struct ice_softc *sc
for (j = 0; j < sc->isc_ntxd[i]; j++) {
map = &txq->tx_map[j];
if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES,
- ICE_MAX_TX_SEGS, ICE_MAX_DMA_SEG_SIZE, 0,
+ ICE_MAX_TSO_SEGS, ICE_MAX_DMA_SEG_SIZE, 0,
BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
&map->txm_map) != 0) {
printf("%s: could not allocate Tx DMA map\n",
Index: dev/pci/if_ixl.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
diff -u -p -r1.108 if_ixl.c
--- dev/pci/if_ixl.c 24 Jun 2025 11:03:10 -0000 1.108
+++ dev/pci/if_ixl.c 12 Sep 2025 13:17:23 -0000
@@ -900,6 +900,7 @@ struct ixl_rx_wb_desc_32 {
} __packed __aligned(16);
#define IXL_TX_PKT_DESCS 8
+#define IXL_TX_TSO_PKT_DESCS 128
#define IXL_TX_QUEUE_ALIGN 128
#define IXL_RX_QUEUE_ALIGN 128
@@ -2579,7 +2580,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi
txm = &maps[i];
if (bus_dmamap_create(sc->sc_dmat,
- MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
+ MAXMCLBYTES, IXL_TX_TSO_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
&txm->txm_map) != 0)
goto uncreate;
@@ -2749,13 +2750,23 @@ ixl_txr_free(struct ixl_softc *sc, struc
static inline int
ixl_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
{
+ struct mbuf *n;
+ int segs = 0;
int error;
+ /* Non-TSO packets are limited to max. segments of IXL_TX_PKT_DESCS. */
+ if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+ for (n = m; n != NULL; n = n->m_next)
+ if (++segs > IXL_TX_PKT_DESCS)
+ goto defrag;
+ }
+
error = bus_dmamap_load_mbuf(dmat, map, m,
BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
if (error != EFBIG)
return (error);
+ defrag:
error = m_defrag(m, M_DONTWAIT);
if (error != 0)
return (error);
@@ -2885,7 +2896,7 @@ ixl_start(struct ifqueue *ifq)
for (;;) {
/* We need one extra descriptor for TSO packets. */
- if (free <= (IXL_TX_PKT_DESCS + 1)) {
+ if (free <= (IXL_TX_TSO_PKT_DESCS + 1)) {
ifq_set_oactive(ifq);
break;
}
ixl/ice(4): Avoid unnecessary defrag of TSO packets