From: Jan Klemkow Subject: ixl/ice(4): Avoid unnecessary defrag of TSO packets To: tech@openbsd.org Date: Fri, 12 Sep 2025 16:00:39 +0200 Hi, ixl(4) and ice(4) network cards can handle just up to 8 DMA segments for a regular packet, but 128 segments for a TSO packet. TSO packets reach the 8 segments limit very fast. Thus, we run into unnecessary m_defrag() calls, which cost throughput. This diff sets the limit to 128 segments for ice(4) and ixl(4) and adds an additional check for non-TSO packets. ok? bye, jan Index: dev/pci/if_ice.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_ice.c,v diff -u -p -r1.58 if_ice.c --- dev/pci/if_ice.c 19 Aug 2025 11:46:52 -0000 1.58 +++ dev/pci/if_ice.c 12 Sep 2025 13:17:23 -0000 @@ -13868,7 +13868,7 @@ ice_tso_detect_sparse(struct mbuf *m, st hlen -= seglen; } - maxsegs = ICE_MAX_TX_SEGS - hdrs; + maxsegs = ICE_MAX_TSO_SEGS - hdrs; /* We must count the headers, in order to verify that they take up * 3 or fewer descriptors. However, we don't need to check the data @@ -13957,13 +13957,23 @@ ice_tx_setup_offload(struct mbuf *m0, st static inline int ice_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m) { + struct mbuf *n; + int segs = 0; int error; + /* Non-TSO packets are limited to max. segments of ICE_MAX_TX_SEGS. */ + if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + for (n = m; n != NULL; n = n->m_next) + if (++segs > ICE_MAX_TX_SEGS) + goto defrag; + } + error = bus_dmamap_load_mbuf(dmat, map, m, BUS_DMA_STREAMING | BUS_DMA_NOWAIT); if (error != EFBIG) return (error); + defrag: error = m_defrag(m, M_DONTWAIT); if (error != 0) return (error); @@ -29671,7 +29681,7 @@ ice_tx_queues_alloc(struct ice_softc *sc for (j = 0; j < sc->isc_ntxd[i]; j++) { map = &txq->tx_map[j]; if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES, - ICE_MAX_TX_SEGS, ICE_MAX_DMA_SEG_SIZE, 0, + ICE_MAX_TSO_SEGS, ICE_MAX_DMA_SEG_SIZE, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, &map->txm_map) != 0) { printf("%s: could not allocate Tx DMA map\n", Index: dev/pci/if_ixl.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v diff -u -p -r1.108 if_ixl.c --- dev/pci/if_ixl.c 24 Jun 2025 11:03:10 -0000 1.108 +++ dev/pci/if_ixl.c 12 Sep 2025 13:17:23 -0000 @@ -900,6 +900,7 @@ struct ixl_rx_wb_desc_32 { } __packed __aligned(16); #define IXL_TX_PKT_DESCS 8 +#define IXL_TX_TSO_PKT_DESCS 128 #define IXL_TX_QUEUE_ALIGN 128 #define IXL_RX_QUEUE_ALIGN 128 @@ -2579,7 +2580,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi txm = &maps[i]; if (bus_dmamap_create(sc->sc_dmat, - MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0, + MAXMCLBYTES, IXL_TX_TSO_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, &txm->txm_map) != 0) goto uncreate; @@ -2749,13 +2750,23 @@ ixl_txr_free(struct ixl_softc *sc, struc static inline int ixl_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m) { + struct mbuf *n; + int segs = 0; int error; + /* Non-TSO packets are limited to max. segments of IXL_TX_PKT_DESCS. */ + if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + for (n = m; n != NULL; n = n->m_next) + if (++segs > IXL_TX_PKT_DESCS) + goto defrag; + } + error = bus_dmamap_load_mbuf(dmat, map, m, BUS_DMA_STREAMING | BUS_DMA_NOWAIT); if (error != EFBIG) return (error); + defrag: error = m_defrag(m, M_DONTWAIT); if (error != 0) return (error); @@ -2885,7 +2896,7 @@ ixl_start(struct ifqueue *ifq) for (;;) { /* We need one extra descriptor for TSO packets. */ - if (free <= (IXL_TX_PKT_DESCS + 1)) { + if (free <= (IXL_TX_TSO_PKT_DESCS + 1)) { ifq_set_oactive(ifq); break; }